diff --git a/BUILD.md b/BUILD.md
index 0c708bde..51f8141e 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -15,7 +15,7 @@ First, you need to install the required packages:
 sudo apt install ffmpeg libsdl2-2.0-0 adb wget \
                  gcc git pkg-config meson ninja-build libsdl2-dev \
                  libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \
-                 libusb-1.0-0 libusb-1.0-0-dev
+                 libswresample-dev libusb-1.0-0 libusb-1.0-0-dev
 ```
 
 Then clone the repo and execute the installation script
@@ -94,7 +94,7 @@ sudo apt install ffmpeg libsdl2-2.0-0 adb libusb-1.0-0
 # client build dependencies
 sudo apt install gcc git pkg-config meson ninja-build libsdl2-dev \
                  libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \
-                 libusb-1.0-0-dev
+                 libswresample-dev libusb-1.0-0-dev
 
 # server build dependencies
 sudo apt install openjdk-11-jdk
diff --git a/app/meson.build b/app/meson.build
index 392fa6d0..723274c9 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@ src = [
     'src/adb/adb_device.c',
     'src/adb/adb_parser.c',
     'src/adb/adb_tunnel.c',
+    'src/audio_player.c',
     'src/cli.c',
     'src/clock.c',
     'src/compat.c',
@@ -32,6 +33,7 @@ src = [
     'src/trait/frame_source.c',
     'src/trait/packet_source.c',
     'src/util/acksync.c',
+    'src/util/average.c',
     'src/util/bytebuf.c',
     'src/util/file.c',
     'src/util/intmap.c',
@@ -103,6 +105,7 @@ if not crossbuild_windows
         dependency('libavformat', version: '>= 57.33'),
         dependency('libavcodec', version: '>= 57.37'),
         dependency('libavutil'),
+        dependency('libswresample'),
         dependency('sdl2', version: '>= 2.0.5'),
     ]
 
@@ -138,6 +141,7 @@ else
             cc.find_library('avcodec-60', dirs: ffmpeg_bin_dir),
             cc.find_library('avformat-60', dirs: ffmpeg_bin_dir),
             cc.find_library('avutil-58', dirs: ffmpeg_bin_dir),
+            cc.find_library('swresample-4', dirs: ffmpeg_bin_dir),
         ],
         include_directories: include_directories(ffmpeg_include_dir)
     )
diff --git a/app/src/audio_player.c b/app/src/audio_player.c
new file mode 100644
index 00000000..78a8ffe1
--- /dev/null
+++ b/app/src/audio_player.c
@@ -0,0 +1,409 @@
+#include "audio_player.h"
+
+#include <libavutil/opt.h>
+
+#include "util/log.h"
+
+#define SC_AUDIO_PLAYER_NDEBUG // comment to debug
+
+/** Downcast frame_sink to sc_audio_player */
+#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)
+
+#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT
+#define SC_SDL_SAMPLE_FMT AUDIO_F32
+
+#define SC_AUDIO_OUTPUT_BUFFER_SAMPLES 240 // 5ms at 48000Hz
+
+static inline uint32_t
+bytes_to_samples(struct sc_audio_player *ap, size_t bytes) {
+    assert(bytes % (ap->nb_channels * ap->out_bytes_per_sample) == 0);
+    return bytes / (ap->nb_channels * ap->out_bytes_per_sample);
+}
+
+static inline size_t
+samples_to_bytes(struct sc_audio_player *ap, uint32_t samples) {
+    return samples * ap->nb_channels * ap->out_bytes_per_sample;
+}
+
+static void SDLCALL
+sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
+    struct sc_audio_player *ap = userdata;
+
+    // This callback is called with the lock used by SDL_AudioDeviceLock(), so
+    // the bytebuf is protected
+
+    assert(len_int > 0);
+    size_t len = len_int;
+
+#ifndef SC_AUDIO_PLAYER_NDEBUG
+    LOGD("[Audio] SDL callback requests %" PRIu32 " samples",
+         bytes_to_samples(ap, len));
+#endif
+
+    size_t read_avail = sc_bytebuf_read_available(&ap->buf);
+    if (!ap->played) {
+        uint32_t buffered_samples = bytes_to_samples(ap, read_avail);
+
+        // Part of the buffering is handled by inserting initial silence. The
+        // remaining (margin) last samples will be handled by compensation.
+        uint32_t margin = 30 * ap->sample_rate / 1000; // 30ms
+        if (buffered_samples + margin < ap->target_buffering) {
+            LOGV("[Audio] Inserting initial buffering silence: %" PRIu32
+                 " samples", bytes_to_samples(ap, len));
+            // Delay playback starting to reach the target buffering. Fill the
+            // whole buffer with silence (len is small compared to the
+            // arbitrary margin value).
+            memset(stream, 0, len);
+            return;
+        }
+    }
+
+    size_t read = MIN(read_avail, len);
+    if (read) {
+        sc_bytebuf_read(&ap->buf, stream, read);
+    }
+
+    if (read < len) {
+        size_t silence_bytes = len - read;
+        uint32_t silence_samples = bytes_to_samples(ap, silence_bytes);
+        // Insert silence. In theory, the inserted silent samples replace the
+        // missing real samples, which will arrive later, so they should be
+        // dropped to keep the latency minimal. However, this would cause very
+        // audible glitches, so let the clock compensation restore the target
+        // latency.
+        LOGD("[Audio] Buffer underflow, inserting silence: %" PRIu32 " samples",
+             silence_samples);
+        memset(stream + read, 0, silence_bytes);
+
+        if (ap->received) {
+            // Inserting additional samples immediately increases buffering
+            ap->avg_buffering.avg += silence_samples;
+        }
+    }
+
+    ap->played = true;
+}
+
+static uint8_t *
+sc_audio_player_get_swr_buf(struct sc_audio_player *ap, uint32_t min_samples) {
+    size_t min_buf_size = samples_to_bytes(ap, min_samples);
+    if (min_buf_size > ap->swr_buf_alloc_size) {
+        size_t new_size = min_buf_size + 4096;
+        uint8_t *buf = realloc(ap->swr_buf, new_size);
+        if (!buf) {
+            LOG_OOM();
+            // Could not realloc to the requested size
+            return NULL;
+        }
+        ap->swr_buf = buf;
+        ap->swr_buf_alloc_size = new_size;
+    }
+
+    return ap->swr_buf;
+}
+
+static bool
+sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
+                                const AVFrame *frame) {
+    struct sc_audio_player *ap = DOWNCAST(sink);
+
+    SwrContext *swr_ctx = ap->swr_ctx;
+
+    int64_t swr_delay = swr_get_delay(swr_ctx, ap->sample_rate);
+    // No need to av_rescale_rnd(), input and output sample rates are the same.
+    // Add more space (256) for clock compensation.
+    int dst_nb_samples = swr_delay + frame->nb_samples + 256;
+
+    uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, dst_nb_samples);
+    if (!swr_buf) {
+        return false;
+    }
+
+    int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
+                          (const uint8_t **) frame->data, frame->nb_samples);
+    if (ret < 0) {
+        LOGE("Resampling failed: %d", ret);
+        return false;
+    }
+
+    // swr_convert() returns the number of samples which would have been
+    // written if the buffer was big enough.
+    uint32_t samples_written = MIN(ret, dst_nb_samples);
+    size_t swr_buf_size = samples_to_bytes(ap, samples_written);
+#ifndef SC_AUDIO_PLAYER_NDEBUG
+    LOGD("[Audio] %" PRIu32 " samples written to buffer", samples_written);
+#endif
+
+    // Since this function is the only writer, the current available space is
+    // at least the previous available space. In practice, it should almost
+    // always be possible to write without lock.
+    bool lockless_write = swr_buf_size <= ap->previous_write_avail;
+    if (lockless_write) {
+        sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size);
+    }
+
+    SDL_LockAudioDevice(ap->device);
+
+    size_t read_avail = sc_bytebuf_read_available(&ap->buf);
+    uint32_t buffered_samples = bytes_to_samples(ap, read_avail);
+
+    if (lockless_write) {
+        sc_bytebuf_commit_write(&ap->buf, swr_buf_size);
+    } else {
+        // Take care to keep full samples
+        size_t align = ap->nb_channels * ap->out_bytes_per_sample;
+        size_t write_avail =
+            sc_bytebuf_write_available(&ap->buf) / align * align;
+        if (swr_buf_size > write_avail) {
+            // Entering this branch is very unlikely, the ring-buffer (bytebuf)
+            // is allocated with a size sufficient to store 1 second more than
+            // the target buffering. If this happens, though, we have to skip
+            // old samples.
+            size_t cap = sc_bytebuf_capacity(&ap->buf) / align * align;
+            if (swr_buf_size > cap) {
+                // Very very unlikely: a single resampled frame should never
+                // exceed the ring-buffer size (or something is very wrong).
+                // Ignore the first bytes in swr_buf
+                swr_buf += swr_buf_size - cap;
+                swr_buf_size = cap;
+                // This change in samples_written will impact the
+                // instant_compensation below
+                samples_written -= bytes_to_samples(ap, swr_buf_size - cap);
+            }
+
+            assert(swr_buf_size >= write_avail);
+            if (swr_buf_size > write_avail) {
+                sc_bytebuf_skip(&ap->buf, swr_buf_size - write_avail);
+                uint32_t skip_samples =
+                    bytes_to_samples(ap, swr_buf_size - write_avail);
+                assert(buffered_samples >= skip_samples);
+                buffered_samples -= skip_samples;
+                if (ap->played) {
+                    // Dropping input samples instantly decreases buffering
+                    ap->avg_buffering.avg -= skip_samples;
+                }
+            }
+
+            // It should remain exactly the expected size to write the new
+            // samples.
+            assert((sc_bytebuf_write_available(&ap->buf) / align * align)
+                    == swr_buf_size);
+        }
+
+        sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size);
+    }
+
+    buffered_samples += samples_written;
+    assert(samples_to_bytes(ap, buffered_samples)
+            == sc_bytebuf_read_available(&ap->buf));
+
+    // Read with lock held, to be used after unlocking
+    bool played = ap->played;
+    if (played) {
+        uint32_t max_buffered_samples = ap->target_buffering
+                                      + 12 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES
+                                      + ap->target_buffering / 10;
+        if (buffered_samples > max_buffered_samples) {
+            uint32_t skip_samples = buffered_samples - max_buffered_samples;
+            size_t skip_bytes = samples_to_bytes(ap, skip_samples);
+            sc_bytebuf_skip(&ap->buf, skip_bytes);
+#ifndef SC_AUDIO_PLAYER_NDEBUG
+            LOGD("[Audio] Buffering threshold exceeded, skipping %" PRIu32
+                 " samples", skip_samples);
+#endif
+        }
+
+        // Number of samples added (or removed, if negative) for compensation
+        int32_t instant_compensation =
+            (int32_t) samples_written - frame->nb_samples;
+
+        // The compensation must apply instantly, it must not be smoothed
+        ap->avg_buffering.avg += instant_compensation;
+
+        // However, the buffering level must be smoothed
+        sc_average_push(&ap->avg_buffering, buffered_samples);
+
+#ifndef SC_AUDIO_PLAYER_NDEBUG
+        LOGD("[Audio] buffered_samples=%" PRIu32 " avg_buffering=%f",
+             buffered_samples, sc_average_get(&ap->avg_buffering));
+#endif
+    } else {
+        // SDL playback not started yet, do not accumulate more than
+        // max_initial_buffering samples, this would cause unnecessary delay
+        // (and glitches to compensate) on start.
+        uint32_t max_initial_buffering = ap->target_buffering
+                                       + 2 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES;
+        if (buffered_samples > max_initial_buffering) {
+            uint32_t skip_samples = buffered_samples - max_initial_buffering;
+            size_t skip_bytes = samples_to_bytes(ap, skip_samples);
+            sc_bytebuf_skip(&ap->buf, skip_bytes);
+#ifndef SC_AUDIO_PLAYER_NDEBUG
+            LOGD("[Audio] Playback not started, skipping %" PRIu32 " samples",
+                 skip_samples);
+#endif
+        }
+    }
+
+    ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf);
+    ap->received = true;
+
+    SDL_UnlockAudioDevice(ap->device);
+
+    if (played) {
+        ap->samples_since_resync += samples_written;
+        if (ap->samples_since_resync >= ap->sample_rate) {
+            // Recompute compensation every second
+            ap->samples_since_resync = 0;
+
+            float avg = sc_average_get(&ap->avg_buffering);
+            int diff = ap->target_buffering - avg;
+            if (diff < 0 && buffered_samples < ap->target_buffering) {
+                // Do not accelerate if the instant buffering level is below
+                // the average, this would increase underflow
+                diff = 0;
+            }
+            // Compensate the diff over 4 seconds (but will be recomputed after
+            // 1 second)
+            int distance = 4 * ap->sample_rate;
+            // Limit compensation rate to 2%
+            int abs_max_diff = distance / 50;
+            diff = CLAMP(diff, -abs_max_diff, abs_max_diff);
+            LOGV("[Audio] Buffering: target=%" PRIu32 " avg=%f cur=%" PRIu32
+                 " compensation=%d", ap->target_buffering, avg,
+                 buffered_samples, diff);
+            int ret = swr_set_compensation(swr_ctx, diff, distance);
+            if (ret < 0) {
+                LOGW("Resampling compensation failed: %d", ret);
+                // not fatal
+            }
+        }
+    }
+
+    return true;
+}
+
+static bool
+sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
+                                const AVCodecContext *ctx) {
+    struct sc_audio_player *ap = DOWNCAST(sink);
+
+    SDL_AudioSpec desired = {
+        .freq = ctx->sample_rate,
+        .format = SC_SDL_SAMPLE_FMT,
+        .channels = ctx->ch_layout.nb_channels,
+        .samples = SC_AUDIO_OUTPUT_BUFFER_SAMPLES,
+        .callback = sc_audio_player_sdl_callback,
+        .userdata = ap,
+    };
+    SDL_AudioSpec obtained;
+
+    ap->device = SDL_OpenAudioDevice(NULL, 0, &desired, &obtained, 0);
+    if (!ap->device) {
+        LOGE("Could not open audio device: %s", SDL_GetError());
+        return false;
+    }
+
+    SwrContext *swr_ctx = swr_alloc();
+    if (!swr_ctx) {
+        LOG_OOM();
+        goto error_close_audio_device;
+    }
+    ap->swr_ctx = swr_ctx;
+
+    assert(ctx->sample_rate > 0);
+    assert(ctx->ch_layout.nb_channels > 0);
+    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
+    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
+    assert(out_bytes_per_sample > 0);
+
+    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
+
+    av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
+
+    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
+    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
+
+    int ret = swr_init(swr_ctx);
+    if (ret) {
+        LOGE("Failed to initialize the resampling context");
+        goto error_free_swr_ctx;
+    }
+
+    ap->sample_rate = ctx->sample_rate;
+    ap->nb_channels = ctx->ch_layout.nb_channels;
+    ap->out_bytes_per_sample = out_bytes_per_sample;
+
+    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
+                                                      / SC_TICK_FREQ;
+
+    // Use a ring-buffer of the target buffering size plus 1 second between the
+    // producer and the consumer. It's too big on purpose, to guarantee that
+    // the producer and the consumer will be able to access it in parallel
+    // without locking.
+    size_t bytebuf_samples = ap->target_buffering + ap->sample_rate;
+    size_t bytebuf_size = samples_to_bytes(ap, bytebuf_samples);
+
+    bool ok = sc_bytebuf_init(&ap->buf, bytebuf_size);
+    if (!ok) {
+        goto error_free_swr_ctx;
+    }
+
+    size_t initial_swr_buf_size = samples_to_bytes(ap, 4096);
+    ap->swr_buf = malloc(initial_swr_buf_size);
+    if (!ap->swr_buf) {
+        LOG_OOM();
+        goto error_destroy_bytebuf;
+    }
+    ap->swr_buf_alloc_size = initial_swr_buf_size;
+
+    ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf);
+
+    // Samples are produced and consumed by blocks, so the buffering must be
+    // smoothed to get a relatively stable value.
+    sc_average_init(&ap->avg_buffering, 32);
+    ap->samples_since_resync = 0;
+
+    ap->received = false;
+    ap->played = false;
+
+    SDL_PauseAudioDevice(ap->device, 0);
+
+    return true;
+
+error_destroy_bytebuf:
+    sc_bytebuf_destroy(&ap->buf);
+error_free_swr_ctx:
+    swr_free(&ap->swr_ctx);
+error_close_audio_device:
+    SDL_CloseAudioDevice(ap->device);
+
+    return false;
+}
+
+static void
+sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) {
+    struct sc_audio_player *ap = DOWNCAST(sink);
+
+    assert(ap->device);
+    SDL_PauseAudioDevice(ap->device, 1);
+    SDL_CloseAudioDevice(ap->device);
+
+    free(ap->swr_buf);
+    sc_bytebuf_destroy(&ap->buf);
+    swr_free(&ap->swr_ctx);
+}
+
+void
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering) {
+    ap->target_buffering_delay = target_buffering;
+
+    static const struct sc_frame_sink_ops ops = {
+        .open = sc_audio_player_frame_sink_open,
+        .close = sc_audio_player_frame_sink_close,
+        .push = sc_audio_player_frame_sink_push,
+    };
+
+    ap->frame_sink.ops = &ops;
+}
diff --git a/app/src/audio_player.h b/app/src/audio_player.h
new file mode 100644
index 00000000..c64760ec
--- /dev/null
+++ b/app/src/audio_player.h
@@ -0,0 +1,78 @@
+#ifndef SC_AUDIO_PLAYER_H
+#define SC_AUDIO_PLAYER_H
+
+#include "common.h"
+
+#include <stdbool.h>
+#include "trait/frame_sink.h"
+#include <util/average.h>
+#include <util/bytebuf.h>
+#include <util/thread.h>
+#include <util/tick.h>
+
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <SDL2/SDL.h>
+
+struct sc_audio_player {
+    struct sc_frame_sink frame_sink;
+
+    SDL_AudioDeviceID device;
+
+    // The target buffering between the producer and the consumer. This value
+    // is directly use for compensation.
+    // Since audio capture and/or encoding on the device typically produce
+    // blocks of 960 samples (20ms) or 1024 samples (~21.3ms), this target
+    // value should be higher.
+    sc_tick target_buffering_delay;
+    uint32_t target_buffering; // in samples
+
+    // Audio buffer to communicate between the receiver and the SDL audio
+    // callback (protected by SDL_AudioDeviceLock())
+    struct sc_bytebuf buf;
+
+    // The previous number of bytes available in the buffer (only used by the
+    // receiver thread)
+    size_t previous_write_avail;
+
+    // Resampler (only used from the receiver thread)
+    struct SwrContext *swr_ctx;
+
+    // The sample rate is the same for input and output
+    unsigned sample_rate;
+    // The number of channels is the same for input and output
+    unsigned nb_channels;
+    // The number of bytes per sample for a single channel
+    unsigned out_bytes_per_sample;
+
+    // Target buffer for resampling (only used by the receiver thread)
+    uint8_t *swr_buf;
+    size_t swr_buf_alloc_size;
+
+    // Number of buffered samples (may be negative on underflow) (only used by
+    // the receiver thread)
+    struct sc_average avg_buffering;
+    // Count the number of samples to trigger a compensation update regularly
+    // (only used by the receiver thread)
+    uint32_t samples_since_resync;
+
+    // Set to true the first time a sample is received (protected by
+    // SDL_AudioDeviceLock())
+    bool received;
+
+    // Set to true the first time the SDL callback is called (protected by
+    // SDL_AudioDeviceLock())
+    bool played;
+
+    const struct sc_audio_player_callbacks *cbs;
+    void *cbs_userdata;
+};
+
+struct sc_audio_player_callbacks {
+    void (*on_ended)(struct sc_audio_player *ap, bool success, void *userdata);
+};
+
+void
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering);
+
+#endif
diff --git a/app/src/decoder.c b/app/src/decoder.c
index a8168f66..4384186d 100644
--- a/app/src/decoder.c
+++ b/app/src/decoder.c
@@ -2,6 +2,7 @@
 
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
+#include <libavutil/channel_layout.h>
 
 #include "events.h"
 #include "trait/frame_sink.h"
@@ -23,6 +24,11 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) {
     if (codec->type == AVMEDIA_TYPE_VIDEO) {
         // Hardcoded video properties
         decoder->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    } else {
+        // Hardcoded audio properties
+        decoder->codec_ctx->ch_layout =
+            (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
+        decoder->codec_ctx->sample_rate = 48000;
     }
 
     if (avcodec_open2(decoder->codec_ctx, codec, NULL) < 0) {
diff --git a/app/src/scrcpy.c b/app/src/scrcpy.c
index dba1bad9..3f3a34f0 100644
--- a/app/src/scrcpy.c
+++ b/app/src/scrcpy.c
@@ -13,6 +13,7 @@
 # include <windows.h>
 #endif
 
+#include "audio_player.h"
 #include "controller.h"
 #include "decoder.h"
 #include "delay_buffer.h"
@@ -41,6 +42,7 @@
 struct scrcpy {
     struct sc_server server;
     struct sc_screen screen;
+    struct sc_audio_player audio_player;
     struct sc_demuxer video_demuxer;
     struct sc_demuxer audio_demuxer;
     struct sc_decoder video_decoder;
@@ -386,9 +388,16 @@ scrcpy(struct scrcpy_options *options) {
     }
 
     // Initialize SDL video in addition if display is enabled
-    if (options->display && SDL_Init(SDL_INIT_VIDEO)) {
-        LOGE("Could not initialize SDL: %s", SDL_GetError());
-        goto end;
+    if (options->display) {
+        if (SDL_Init(SDL_INIT_VIDEO)) {
+            LOGE("Could not initialize SDL video: %s", SDL_GetError());
+            goto end;
+        }
+
+        if (options->audio && SDL_Init(SDL_INIT_AUDIO)) {
+            LOGE("Could not initialize SDL audio: %s", SDL_GetError());
+            goto end;
+        }
     }
 
     sdl_configure(options->display, options->disable_screensaver);
@@ -676,6 +685,12 @@ aoa_hid_end:
         }
 
         sc_frame_source_add_sink(src, &s->screen.frame_sink);
+
+        if (options->audio) {
+            sc_audio_player_init(&s->audio_player, SC_TICK_FROM_MS(50));
+            sc_frame_source_add_sink(&s->audio_decoder.frame_source,
+                                     &s->audio_player.frame_sink);
+        }
     }
 
 #ifdef HAVE_V4L2
diff --git a/app/src/util/average.c b/app/src/util/average.c
new file mode 100644
index 00000000..ace23d45
--- /dev/null
+++ b/app/src/util/average.c
@@ -0,0 +1,26 @@
+#include "average.h"
+
+#include <assert.h>
+
+void
+sc_average_init(struct sc_average *avg, unsigned range) {
+    avg->range = range;
+    avg->avg = 0;
+    avg->count = 0;
+}
+
+void
+sc_average_push(struct sc_average *avg, float value) {
+    if (avg->count < avg->range) {
+        ++avg->count;
+    }
+
+    assert(avg->count);
+    avg->avg = ((avg->count - 1) * avg->avg + value) / avg->count;
+}
+
+float
+sc_average_get(struct sc_average *avg) {
+    assert(avg->count);
+    return avg->avg;
+}
diff --git a/app/src/util/average.h b/app/src/util/average.h
new file mode 100644
index 00000000..59fae7d1
--- /dev/null
+++ b/app/src/util/average.h
@@ -0,0 +1,40 @@
+#ifndef SC_AVERAGE
+#define SC_AVERAGE
+
+#include "common.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct sc_average {
+    // Current average value
+    float avg;
+
+    // Target range, to update the average as follow:
+    //     avg = ((range - 1) * avg + new_value) / range
+    unsigned range;
+
+    // Number of values pushed when less than range (count <= range).
+    // The purpose is to handle the first (range - 1) values properly.
+    unsigned count;
+};
+
+void
+sc_average_init(struct sc_average *avg, unsigned range);
+
+/**
+ * Push a new value to update the "rolling" average
+ */
+void
+sc_average_push(struct sc_average *avg, float value);
+
+/**
+ * Get the current average value
+ *
+ * It is an error to call this function if sc_average_push() has not been
+ * called at least once.
+ */
+float
+sc_average_get(struct sc_average *avg);
+
+#endif