diff --git a/BUILD.md b/BUILD.md index 0c708bde..51f8141e 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,7 +15,7 @@ First, you need to install the required packages: sudo apt install ffmpeg libsdl2-2.0-0 adb wget \ gcc git pkg-config meson ninja-build libsdl2-dev \ libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \ - libusb-1.0-0 libusb-1.0-0-dev + libswresample-dev libusb-1.0-0 libusb-1.0-0-dev ``` Then clone the repo and execute the installation script @@ -94,7 +94,7 @@ sudo apt install ffmpeg libsdl2-2.0-0 adb libusb-1.0-0 # client build dependencies sudo apt install gcc git pkg-config meson ninja-build libsdl2-dev \ libavcodec-dev libavdevice-dev libavformat-dev libavutil-dev \ - libusb-1.0-0-dev + libswresample-dev libusb-1.0-0-dev # server build dependencies sudo apt install openjdk-11-jdk diff --git a/app/meson.build b/app/meson.build index 392fa6d0..723274c9 100644 --- a/app/meson.build +++ b/app/meson.build @@ -4,6 +4,7 @@ src = [ 'src/adb/adb_device.c', 'src/adb/adb_parser.c', 'src/adb/adb_tunnel.c', + 'src/audio_player.c', 'src/cli.c', 'src/clock.c', 'src/compat.c', @@ -32,6 +33,7 @@ src = [ 'src/trait/frame_source.c', 'src/trait/packet_source.c', 'src/util/acksync.c', + 'src/util/average.c', 'src/util/bytebuf.c', 'src/util/file.c', 'src/util/intmap.c', @@ -103,6 +105,7 @@ if not crossbuild_windows dependency('libavformat', version: '>= 57.33'), dependency('libavcodec', version: '>= 57.37'), dependency('libavutil'), + dependency('libswresample'), dependency('sdl2', version: '>= 2.0.5'), ] @@ -138,6 +141,7 @@ else cc.find_library('avcodec-60', dirs: ffmpeg_bin_dir), cc.find_library('avformat-60', dirs: ffmpeg_bin_dir), cc.find_library('avutil-58', dirs: ffmpeg_bin_dir), + cc.find_library('swresample-4', dirs: ffmpeg_bin_dir), ], include_directories: include_directories(ffmpeg_include_dir) ) diff --git a/app/src/audio_player.c b/app/src/audio_player.c new file mode 100644 index 00000000..78a8ffe1 --- /dev/null +++ b/app/src/audio_player.c @@ -0,0 +1,409 @@ +#include "audio_player.h" + +#include + +#include "util/log.h" + +#define SC_AUDIO_PLAYER_NDEBUG // comment to debug + +/** Downcast frame_sink to sc_audio_player */ +#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink) + +#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT +#define SC_SDL_SAMPLE_FMT AUDIO_F32 + +#define SC_AUDIO_OUTPUT_BUFFER_SAMPLES 240 // 5ms at 48000Hz + +static inline uint32_t +bytes_to_samples(struct sc_audio_player *ap, size_t bytes) { + assert(bytes % (ap->nb_channels * ap->out_bytes_per_sample) == 0); + return bytes / (ap->nb_channels * ap->out_bytes_per_sample); +} + +static inline size_t +samples_to_bytes(struct sc_audio_player *ap, uint32_t samples) { + return samples * ap->nb_channels * ap->out_bytes_per_sample; +} + +static void SDLCALL +sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) { + struct sc_audio_player *ap = userdata; + + // This callback is called with the lock used by SDL_AudioDeviceLock(), so + // the bytebuf is protected + + assert(len_int > 0); + size_t len = len_int; + +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] SDL callback requests %" PRIu32 " samples", + bytes_to_samples(ap, len)); +#endif + + size_t read_avail = sc_bytebuf_read_available(&ap->buf); + if (!ap->played) { + uint32_t buffered_samples = bytes_to_samples(ap, read_avail); + + // Part of the buffering is handled by inserting initial silence. The + // remaining (margin) last samples will be handled by compensation. + uint32_t margin = 30 * ap->sample_rate / 1000; // 30ms + if (buffered_samples + margin < ap->target_buffering) { + LOGV("[Audio] Inserting initial buffering silence: %" PRIu32 + " samples", bytes_to_samples(ap, len)); + // Delay playback starting to reach the target buffering. Fill the + // whole buffer with silence (len is small compared to the + // arbitrary margin value). + memset(stream, 0, len); + return; + } + } + + size_t read = MIN(read_avail, len); + if (read) { + sc_bytebuf_read(&ap->buf, stream, read); + } + + if (read < len) { + size_t silence_bytes = len - read; + uint32_t silence_samples = bytes_to_samples(ap, silence_bytes); + // Insert silence. In theory, the inserted silent samples replace the + // missing real samples, which will arrive later, so they should be + // dropped to keep the latency minimal. However, this would cause very + // audible glitches, so let the clock compensation restore the target + // latency. + LOGD("[Audio] Buffer underflow, inserting silence: %" PRIu32 " samples", + silence_samples); + memset(stream + read, 0, silence_bytes); + + if (ap->received) { + // Inserting additional samples immediately increases buffering + ap->avg_buffering.avg += silence_samples; + } + } + + ap->played = true; +} + +static uint8_t * +sc_audio_player_get_swr_buf(struct sc_audio_player *ap, uint32_t min_samples) { + size_t min_buf_size = samples_to_bytes(ap, min_samples); + if (min_buf_size > ap->swr_buf_alloc_size) { + size_t new_size = min_buf_size + 4096; + uint8_t *buf = realloc(ap->swr_buf, new_size); + if (!buf) { + LOG_OOM(); + // Could not realloc to the requested size + return NULL; + } + ap->swr_buf = buf; + ap->swr_buf_alloc_size = new_size; + } + + return ap->swr_buf; +} + +static bool +sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, + const AVFrame *frame) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SwrContext *swr_ctx = ap->swr_ctx; + + int64_t swr_delay = swr_get_delay(swr_ctx, ap->sample_rate); + // No need to av_rescale_rnd(), input and output sample rates are the same. + // Add more space (256) for clock compensation. + int dst_nb_samples = swr_delay + frame->nb_samples + 256; + + uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, dst_nb_samples); + if (!swr_buf) { + return false; + } + + int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples, + (const uint8_t **) frame->data, frame->nb_samples); + if (ret < 0) { + LOGE("Resampling failed: %d", ret); + return false; + } + + // swr_convert() returns the number of samples which would have been + // written if the buffer was big enough. + uint32_t samples_written = MIN(ret, dst_nb_samples); + size_t swr_buf_size = samples_to_bytes(ap, samples_written); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] %" PRIu32 " samples written to buffer", samples_written); +#endif + + // Since this function is the only writer, the current available space is + // at least the previous available space. In practice, it should almost + // always be possible to write without lock. + bool lockless_write = swr_buf_size <= ap->previous_write_avail; + if (lockless_write) { + sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size); + } + + SDL_LockAudioDevice(ap->device); + + size_t read_avail = sc_bytebuf_read_available(&ap->buf); + uint32_t buffered_samples = bytes_to_samples(ap, read_avail); + + if (lockless_write) { + sc_bytebuf_commit_write(&ap->buf, swr_buf_size); + } else { + // Take care to keep full samples + size_t align = ap->nb_channels * ap->out_bytes_per_sample; + size_t write_avail = + sc_bytebuf_write_available(&ap->buf) / align * align; + if (swr_buf_size > write_avail) { + // Entering this branch is very unlikely, the ring-buffer (bytebuf) + // is allocated with a size sufficient to store 1 second more than + // the target buffering. If this happens, though, we have to skip + // old samples. + size_t cap = sc_bytebuf_capacity(&ap->buf) / align * align; + if (swr_buf_size > cap) { + // Very very unlikely: a single resampled frame should never + // exceed the ring-buffer size (or something is very wrong). + // Ignore the first bytes in swr_buf + swr_buf += swr_buf_size - cap; + swr_buf_size = cap; + // This change in samples_written will impact the + // instant_compensation below + samples_written -= bytes_to_samples(ap, swr_buf_size - cap); + } + + assert(swr_buf_size >= write_avail); + if (swr_buf_size > write_avail) { + sc_bytebuf_skip(&ap->buf, swr_buf_size - write_avail); + uint32_t skip_samples = + bytes_to_samples(ap, swr_buf_size - write_avail); + assert(buffered_samples >= skip_samples); + buffered_samples -= skip_samples; + if (ap->played) { + // Dropping input samples instantly decreases buffering + ap->avg_buffering.avg -= skip_samples; + } + } + + // It should remain exactly the expected size to write the new + // samples. + assert((sc_bytebuf_write_available(&ap->buf) / align * align) + == swr_buf_size); + } + + sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size); + } + + buffered_samples += samples_written; + assert(samples_to_bytes(ap, buffered_samples) + == sc_bytebuf_read_available(&ap->buf)); + + // Read with lock held, to be used after unlocking + bool played = ap->played; + if (played) { + uint32_t max_buffered_samples = ap->target_buffering + + 12 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES + + ap->target_buffering / 10; + if (buffered_samples > max_buffered_samples) { + uint32_t skip_samples = buffered_samples - max_buffered_samples; + size_t skip_bytes = samples_to_bytes(ap, skip_samples); + sc_bytebuf_skip(&ap->buf, skip_bytes); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Buffering threshold exceeded, skipping %" PRIu32 + " samples", skip_samples); +#endif + } + + // Number of samples added (or removed, if negative) for compensation + int32_t instant_compensation = + (int32_t) samples_written - frame->nb_samples; + + // The compensation must apply instantly, it must not be smoothed + ap->avg_buffering.avg += instant_compensation; + + // However, the buffering level must be smoothed + sc_average_push(&ap->avg_buffering, buffered_samples); + +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] buffered_samples=%" PRIu32 " avg_buffering=%f", + buffered_samples, sc_average_get(&ap->avg_buffering)); +#endif + } else { + // SDL playback not started yet, do not accumulate more than + // max_initial_buffering samples, this would cause unnecessary delay + // (and glitches to compensate) on start. + uint32_t max_initial_buffering = ap->target_buffering + + 2 * SC_AUDIO_OUTPUT_BUFFER_SAMPLES; + if (buffered_samples > max_initial_buffering) { + uint32_t skip_samples = buffered_samples - max_initial_buffering; + size_t skip_bytes = samples_to_bytes(ap, skip_samples); + sc_bytebuf_skip(&ap->buf, skip_bytes); +#ifndef SC_AUDIO_PLAYER_NDEBUG + LOGD("[Audio] Playback not started, skipping %" PRIu32 " samples", + skip_samples); +#endif + } + } + + ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf); + ap->received = true; + + SDL_UnlockAudioDevice(ap->device); + + if (played) { + ap->samples_since_resync += samples_written; + if (ap->samples_since_resync >= ap->sample_rate) { + // Recompute compensation every second + ap->samples_since_resync = 0; + + float avg = sc_average_get(&ap->avg_buffering); + int diff = ap->target_buffering - avg; + if (diff < 0 && buffered_samples < ap->target_buffering) { + // Do not accelerate if the instant buffering level is below + // the average, this would increase underflow + diff = 0; + } + // Compensate the diff over 4 seconds (but will be recomputed after + // 1 second) + int distance = 4 * ap->sample_rate; + // Limit compensation rate to 2% + int abs_max_diff = distance / 50; + diff = CLAMP(diff, -abs_max_diff, abs_max_diff); + LOGV("[Audio] Buffering: target=%" PRIu32 " avg=%f cur=%" PRIu32 + " compensation=%d", ap->target_buffering, avg, + buffered_samples, diff); + int ret = swr_set_compensation(swr_ctx, diff, distance); + if (ret < 0) { + LOGW("Resampling compensation failed: %d", ret); + // not fatal + } + } + } + + return true; +} + +static bool +sc_audio_player_frame_sink_open(struct sc_frame_sink *sink, + const AVCodecContext *ctx) { + struct sc_audio_player *ap = DOWNCAST(sink); + + SDL_AudioSpec desired = { + .freq = ctx->sample_rate, + .format = SC_SDL_SAMPLE_FMT, + .channels = ctx->ch_layout.nb_channels, + .samples = SC_AUDIO_OUTPUT_BUFFER_SAMPLES, + .callback = sc_audio_player_sdl_callback, + .userdata = ap, + }; + SDL_AudioSpec obtained; + + ap->device = SDL_OpenAudioDevice(NULL, 0, &desired, &obtained, 0); + if (!ap->device) { + LOGE("Could not open audio device: %s", SDL_GetError()); + return false; + } + + SwrContext *swr_ctx = swr_alloc(); + if (!swr_ctx) { + LOG_OOM(); + goto error_close_audio_device; + } + ap->swr_ctx = swr_ctx; + + assert(ctx->sample_rate > 0); + assert(ctx->ch_layout.nb_channels > 0); + assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT)); + int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT); + assert(out_bytes_per_sample > 0); + + av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0); + av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0); + + av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0); + av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0); + + av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0); + av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0); + + int ret = swr_init(swr_ctx); + if (ret) { + LOGE("Failed to initialize the resampling context"); + goto error_free_swr_ctx; + } + + ap->sample_rate = ctx->sample_rate; + ap->nb_channels = ctx->ch_layout.nb_channels; + ap->out_bytes_per_sample = out_bytes_per_sample; + + ap->target_buffering = ap->target_buffering_delay * ap->sample_rate + / SC_TICK_FREQ; + + // Use a ring-buffer of the target buffering size plus 1 second between the + // producer and the consumer. It's too big on purpose, to guarantee that + // the producer and the consumer will be able to access it in parallel + // without locking. + size_t bytebuf_samples = ap->target_buffering + ap->sample_rate; + size_t bytebuf_size = samples_to_bytes(ap, bytebuf_samples); + + bool ok = sc_bytebuf_init(&ap->buf, bytebuf_size); + if (!ok) { + goto error_free_swr_ctx; + } + + size_t initial_swr_buf_size = samples_to_bytes(ap, 4096); + ap->swr_buf = malloc(initial_swr_buf_size); + if (!ap->swr_buf) { + LOG_OOM(); + goto error_destroy_bytebuf; + } + ap->swr_buf_alloc_size = initial_swr_buf_size; + + ap->previous_write_avail = sc_bytebuf_write_available(&ap->buf); + + // Samples are produced and consumed by blocks, so the buffering must be + // smoothed to get a relatively stable value. + sc_average_init(&ap->avg_buffering, 32); + ap->samples_since_resync = 0; + + ap->received = false; + ap->played = false; + + SDL_PauseAudioDevice(ap->device, 0); + + return true; + +error_destroy_bytebuf: + sc_bytebuf_destroy(&ap->buf); +error_free_swr_ctx: + swr_free(&ap->swr_ctx); +error_close_audio_device: + SDL_CloseAudioDevice(ap->device); + + return false; +} + +static void +sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) { + struct sc_audio_player *ap = DOWNCAST(sink); + + assert(ap->device); + SDL_PauseAudioDevice(ap->device, 1); + SDL_CloseAudioDevice(ap->device); + + free(ap->swr_buf); + sc_bytebuf_destroy(&ap->buf); + swr_free(&ap->swr_ctx); +} + +void +sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering) { + ap->target_buffering_delay = target_buffering; + + static const struct sc_frame_sink_ops ops = { + .open = sc_audio_player_frame_sink_open, + .close = sc_audio_player_frame_sink_close, + .push = sc_audio_player_frame_sink_push, + }; + + ap->frame_sink.ops = &ops; +} diff --git a/app/src/audio_player.h b/app/src/audio_player.h new file mode 100644 index 00000000..c64760ec --- /dev/null +++ b/app/src/audio_player.h @@ -0,0 +1,78 @@ +#ifndef SC_AUDIO_PLAYER_H +#define SC_AUDIO_PLAYER_H + +#include "common.h" + +#include +#include "trait/frame_sink.h" +#include +#include +#include +#include + +#include +#include +#include + +struct sc_audio_player { + struct sc_frame_sink frame_sink; + + SDL_AudioDeviceID device; + + // The target buffering between the producer and the consumer. This value + // is directly use for compensation. + // Since audio capture and/or encoding on the device typically produce + // blocks of 960 samples (20ms) or 1024 samples (~21.3ms), this target + // value should be higher. + sc_tick target_buffering_delay; + uint32_t target_buffering; // in samples + + // Audio buffer to communicate between the receiver and the SDL audio + // callback (protected by SDL_AudioDeviceLock()) + struct sc_bytebuf buf; + + // The previous number of bytes available in the buffer (only used by the + // receiver thread) + size_t previous_write_avail; + + // Resampler (only used from the receiver thread) + struct SwrContext *swr_ctx; + + // The sample rate is the same for input and output + unsigned sample_rate; + // The number of channels is the same for input and output + unsigned nb_channels; + // The number of bytes per sample for a single channel + unsigned out_bytes_per_sample; + + // Target buffer for resampling (only used by the receiver thread) + uint8_t *swr_buf; + size_t swr_buf_alloc_size; + + // Number of buffered samples (may be negative on underflow) (only used by + // the receiver thread) + struct sc_average avg_buffering; + // Count the number of samples to trigger a compensation update regularly + // (only used by the receiver thread) + uint32_t samples_since_resync; + + // Set to true the first time a sample is received (protected by + // SDL_AudioDeviceLock()) + bool received; + + // Set to true the first time the SDL callback is called (protected by + // SDL_AudioDeviceLock()) + bool played; + + const struct sc_audio_player_callbacks *cbs; + void *cbs_userdata; +}; + +struct sc_audio_player_callbacks { + void (*on_ended)(struct sc_audio_player *ap, bool success, void *userdata); +}; + +void +sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering); + +#endif diff --git a/app/src/decoder.c b/app/src/decoder.c index a8168f66..4384186d 100644 --- a/app/src/decoder.c +++ b/app/src/decoder.c @@ -2,6 +2,7 @@ #include #include +#include #include "events.h" #include "trait/frame_sink.h" @@ -23,6 +24,11 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) { if (codec->type == AVMEDIA_TYPE_VIDEO) { // Hardcoded video properties decoder->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; + } else { + // Hardcoded audio properties + decoder->codec_ctx->ch_layout = + (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO; + decoder->codec_ctx->sample_rate = 48000; } if (avcodec_open2(decoder->codec_ctx, codec, NULL) < 0) { diff --git a/app/src/scrcpy.c b/app/src/scrcpy.c index dba1bad9..3f3a34f0 100644 --- a/app/src/scrcpy.c +++ b/app/src/scrcpy.c @@ -13,6 +13,7 @@ # include #endif +#include "audio_player.h" #include "controller.h" #include "decoder.h" #include "delay_buffer.h" @@ -41,6 +42,7 @@ struct scrcpy { struct sc_server server; struct sc_screen screen; + struct sc_audio_player audio_player; struct sc_demuxer video_demuxer; struct sc_demuxer audio_demuxer; struct sc_decoder video_decoder; @@ -386,9 +388,16 @@ scrcpy(struct scrcpy_options *options) { } // Initialize SDL video in addition if display is enabled - if (options->display && SDL_Init(SDL_INIT_VIDEO)) { - LOGE("Could not initialize SDL: %s", SDL_GetError()); - goto end; + if (options->display) { + if (SDL_Init(SDL_INIT_VIDEO)) { + LOGE("Could not initialize SDL video: %s", SDL_GetError()); + goto end; + } + + if (options->audio && SDL_Init(SDL_INIT_AUDIO)) { + LOGE("Could not initialize SDL audio: %s", SDL_GetError()); + goto end; + } } sdl_configure(options->display, options->disable_screensaver); @@ -676,6 +685,12 @@ aoa_hid_end: } sc_frame_source_add_sink(src, &s->screen.frame_sink); + + if (options->audio) { + sc_audio_player_init(&s->audio_player, SC_TICK_FROM_MS(50)); + sc_frame_source_add_sink(&s->audio_decoder.frame_source, + &s->audio_player.frame_sink); + } } #ifdef HAVE_V4L2 diff --git a/app/src/util/average.c b/app/src/util/average.c new file mode 100644 index 00000000..ace23d45 --- /dev/null +++ b/app/src/util/average.c @@ -0,0 +1,26 @@ +#include "average.h" + +#include + +void +sc_average_init(struct sc_average *avg, unsigned range) { + avg->range = range; + avg->avg = 0; + avg->count = 0; +} + +void +sc_average_push(struct sc_average *avg, float value) { + if (avg->count < avg->range) { + ++avg->count; + } + + assert(avg->count); + avg->avg = ((avg->count - 1) * avg->avg + value) / avg->count; +} + +float +sc_average_get(struct sc_average *avg) { + assert(avg->count); + return avg->avg; +} diff --git a/app/src/util/average.h b/app/src/util/average.h new file mode 100644 index 00000000..59fae7d1 --- /dev/null +++ b/app/src/util/average.h @@ -0,0 +1,40 @@ +#ifndef SC_AVERAGE +#define SC_AVERAGE + +#include "common.h" + +#include +#include + +struct sc_average { + // Current average value + float avg; + + // Target range, to update the average as follow: + // avg = ((range - 1) * avg + new_value) / range + unsigned range; + + // Number of values pushed when less than range (count <= range). + // The purpose is to handle the first (range - 1) values properly. + unsigned count; +}; + +void +sc_average_init(struct sc_average *avg, unsigned range); + +/** + * Push a new value to update the "rolling" average + */ +void +sc_average_push(struct sc_average *avg, float value); + +/** + * Get the current average value + * + * It is an error to call this function if sc_average_push() has not been + * called at least once. + */ +float +sc_average_get(struct sc_average *avg); + +#endif