6#include <ascii-chat/media/ffmpeg_decoder.h>
7#include <ascii-chat/common.h>
8#include <ascii-chat/log/logging.h>
9#include <ascii-chat/asciichat_errno.h>
10#include <ascii-chat/video/image.h>
11#include <ascii-chat/platform/system.h>
12#include <ascii-chat/platform/thread.h>
13#include <ascii-chat/util/time.h>
14#include <ascii-chat/util/url.h>
15#include <ascii-chat/options/options.h>
17#include <libavformat/avformat.h>
18#include <libavcodec/avcodec.h>
19#include <libavutil/imgutils.h>
20#include <libavutil/opt.h>
21#include <libavutil/log.h>
22#include <libswscale/swscale.h>
23#include <libswresample/swresample.h>
35static void ffmpeg_silent_log_callback(
void *avcl,
int level,
const char *fmt, va_list vl) {
48#define TARGET_SAMPLE_RATE 48000
51#define TARGET_CHANNELS 1
54#define AVIO_BUFFER_SIZE (64 * 1024)
133static int stdin_read_packet(
void *opaque, uint8_t *buf,
int buf_size) {
136 size_t bytes_read = fread(buf, 1, (
size_t)buf_size, stdin);
137 if (bytes_read == 0) {
144 return (
int)bytes_read;
154static inline double av_q2d_safe(AVRational r) {
155 return (r.den != 0) ? ((double)r.num / (
double)r.den) : 0.0;
161static double get_frame_pts_seconds(AVFrame *frame, AVRational time_base) {
162 if (frame->pts == AV_NOPTS_VALUE) {
165 return (
double)frame->pts * av_q2d_safe(time_base);
174static int ffmpeg_interrupt_callback(
void *opaque) {
190static void *ffmpeg_decoder_prefetch_thread_func(
void *arg) {
196 log_debug(
"Video prefetch thread started");
197 bool use_image_a =
true;
228 bool frame_decoded =
false;
238 if (ret == AVERROR_EOF) {
246 av_packet_unref(decoder->
packet);
252 av_packet_unref(decoder->
packet);
260 if (ret == AVERROR(EAGAIN)) {
262 }
else if (ret < 0) {
280 if (decode_buffer->w != width || decode_buffer->h != height) {
282 decode_buffer =
image_new((
size_t)width, (
size_t)height);
283 if (!decode_buffer) {
284 log_error(
"Failed to allocate prefetch image buffer");
296 uint8_t *dst_data[1] = {(uint8_t *)decode_buffer->pixels};
297 int dst_linesize[1] = {width * 3};
301 if (width > 0 && height > 0 && decoder->
video_codec_ctx->pix_fmt != AV_PIX_FMT_NONE) {
303 AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);
305 log_error(
"Failed to create swscale context on first frame");
308 log_debug(
"Lazy initialized swscale context with %dx%d", width, height);
310 log_error(
"Cannot initialize swscale: invalid dimensions or pixel format");
315 sws_scale(decoder->
sws_ctx, (
const uint8_t *
const *)decoder->
frame->data, decoder->
frame->linesize, 0, height,
316 dst_data, dst_linesize);
318 frame_decoded =
true;
334 double read_ms = (double)read_time_ns / NS_PER_MS;
345 log_dev_every(5 * US_PER_SEC_INT,
"PREFETCH: decoded frame in %.2f ms", read_ms);
348 use_image_a = !use_image_a;
355 log_debug(
"Video prefetch thread stopped");
362static asciichat_error_t open_codec_context(AVFormatContext *fmt_ctx,
enum AVMediaType type,
int *stream_idx,
363 AVCodecContext **codec_ctx) {
364 int ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
373 AVStream *stream = fmt_ctx->streams[ret];
376 const AVCodec *codec = avcodec_find_decoder(stream->codecpar->codec_id);
378 return SET_ERRNO(ERROR_MEDIA_DECODE,
"Codec not found for stream %d", ret);
382 *codec_ctx = avcodec_alloc_context3(codec);
384 return SET_ERRNO(ERROR_MEMORY,
"Failed to allocate codec context");
388 if (avcodec_parameters_to_context(*codec_ctx, stream->codecpar) < 0) {
389 avcodec_free_context(codec_ctx);
390 return SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to copy codec parameters");
394 if (avcodec_open2(*codec_ctx, codec, NULL) < 0) {
395 avcodec_free_context(codec_ctx);
396 return SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to open codec");
408 SET_ERRNO(ERROR_INVALID_PARAM,
"Path is NULL");
414 static bool ffmpeg_log_level_set =
false;
415 if (!ffmpeg_log_level_set) {
416 av_log_set_level(AV_LOG_QUIET);
417 av_log_set_callback(ffmpeg_silent_log_callback);
418 ffmpeg_log_level_set =
true;
423 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate decoder");
427 memset(decoder, 0,
sizeof(*decoder));
435 platform_stderr_redirect_handle_t stdio_handle = platform_stdout_stderr_redirect_to_null();
438 AVDictionary *options = NULL;
443 av_dict_set(&options,
"probesize",
"32768", 0);
445 av_dict_set(&options,
"analyzeduration",
"100000", 0);
447 av_dict_set(&options,
"reconnect",
"1", 0);
449 av_dict_set(&options,
"reconnect_streamed",
"1", 0);
451 av_dict_set(&options,
"rw_timeout",
"10000000", 0);
453 av_dict_set(&options,
"http_persistent",
"1", 0);
455 av_dict_set(&options,
"connect_timeout",
"5000000", 0);
459 int ret = avformat_open_input(&decoder->
format_ctx, path, NULL, &options);
460 av_dict_free(&options);
463 platform_stdout_stderr_restore(stdio_handle);
464 SET_ERRNO(ERROR_MEDIA_OPEN,
"Failed to open media file: %s", path);
470 if (avformat_find_stream_info(decoder->
format_ctx, NULL) < 0) {
471 platform_stdout_stderr_restore(stdio_handle);
472 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to find stream info");
481 decoder->
format_ctx->interrupt_callback.callback = ffmpeg_interrupt_callback;
482 decoder->
format_ctx->interrupt_callback.opaque = decoder;
486 platform_stdout_stderr_restore(stdio_handle);
491 if (err != ASCIICHAT_OK) {
492 log_warn(
"Failed to open video codec (file may be audio-only)");
499 if (err != ASCIICHAT_OK) {
500 log_debug(
"No audio codec found (file may be video-only or audio codec not available)");
507 SET_ERRNO(ERROR_MEDIA_DECODE,
"No video or audio streams found");
513 decoder->
frame = av_frame_alloc();
514 decoder->
packet = av_packet_alloc();
516 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate frame/packet");
526 log_warn(
"Video codec has invalid dimensions (%dx%d), will initialize swscale on first frame",
530 log_warn(
"Video codec has invalid pixel format, will initialize swscale on first frame");
537 decoder->
video_codec_ctx->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);
539 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to create swscale context");
552 decoder->
swr_ctx = swr_alloc();
554 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate swresample context");
564 AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO;
565 av_opt_set_chlayout(decoder->
swr_ctx,
"out_chlayout", &out_ch_layout, 0);
567 av_opt_set_sample_fmt(decoder->
swr_ctx,
"out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
570 if (swr_init(decoder->
swr_ctx) < 0) {
571 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to initialize swresample context");
580 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate audio buffer");
595 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate prefetch image buffers");
605 SET_ERRNO(ERROR_MEMORY,
"Failed to initialize prefetch mutex");
611 SET_ERRNO(ERROR_MEMORY,
"Failed to initialize prefetch condition variable");
621 log_debug(
"FFmpeg decoder opened: %s (video=%s, audio=%s)", path, decoder->
video_stream_idx >= 0 ?
"yes" :
"no",
630 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate decoder");
634 memset(decoder, 0,
sizeof(*decoder));
644 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate AVIO buffer");
659 SET_ERRNO(ERROR_MEMORY,
"Failed to create AVIO context");
666 decoder->
format_ctx = avformat_alloc_context();
668 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate format context");
669 av_freep(&decoder->
avio_ctx->buffer);
670 avio_context_free(&decoder->
avio_ctx);
678 platform_stderr_redirect_handle_t stdio_handle = platform_stdout_stderr_redirect_to_null();
681 if (avformat_open_input(&decoder->
format_ctx, NULL, NULL, NULL) < 0) {
682 platform_stdout_stderr_restore(stdio_handle);
683 SET_ERRNO(ERROR_MEDIA_OPEN,
"Failed to open stdin");
684 av_freep(&decoder->
avio_ctx->buffer);
685 avio_context_free(&decoder->
avio_ctx);
692 if (avformat_find_stream_info(decoder->
format_ctx, NULL) < 0) {
693 platform_stdout_stderr_restore(stdio_handle);
694 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to find stream info from stdin");
699 platform_stdout_stderr_restore(stdio_handle);
704 if (err != ASCIICHAT_OK) {
705 log_warn(
"Failed to open video codec from stdin");
708 if (GET_OPTION(audio_enabled)) {
711 if (err != ASCIICHAT_OK) {
712 log_warn(
"Failed to open audio codec from stdin");
717 log_debug(
"Audio decoding disabled by user option");
721 SET_ERRNO(ERROR_MEDIA_DECODE,
"No video or audio streams found in stdin");
727 decoder->
frame = av_frame_alloc();
728 decoder->
packet = av_packet_alloc();
730 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate frame/packet");
740 log_warn(
"Video codec has invalid dimensions (%dx%d), will initialize swscale on first frame",
744 log_warn(
"Video codec has invalid pixel format, will initialize swscale on first frame");
751 decoder->
video_codec_ctx->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);
753 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to create swscale context");
761 decoder->
swr_ctx = swr_alloc();
763 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate swresample context");
772 AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO;
773 av_opt_set_chlayout(decoder->
swr_ctx,
"out_chlayout", &out_ch_layout, 0);
775 av_opt_set_sample_fmt(decoder->
swr_ctx,
"out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
777 if (swr_init(decoder->
swr_ctx) < 0) {
778 SET_ERRNO(ERROR_MEDIA_DECODE,
"Failed to initialize swresample context");
786 SET_ERRNO(ERROR_MEMORY,
"Failed to allocate audio buffer");
792 log_debug(
"FFmpeg decoder opened from stdin (video=%s, audio=%s)", decoder->
video_stream_idx >= 0 ?
"yes" :
"no",
837 sws_freeContext(decoder->
sws_ctx);
847 if (decoder->
frame) {
848 av_frame_free(&decoder->
frame);
851 av_packet_free(&decoder->
packet);
869 av_freep(&decoder->
avio_ctx->buffer);
870 avio_context_free(&decoder->
avio_ctx);
910 log_dev_every(5 * US_PER_SEC_INT,
"Using prefetched frame");
918 log_dev_every(5 * US_PER_SEC_INT,
919 "Prefetch frame not ready, skipping to next iteration (allow prefetch to catch up)");
932 return ERROR_INVALID_PARAM;
936 return ERROR_INVALID_PARAM;
948 if (thread_err != 0) {
949 return SET_ERRNO(ERROR_THREAD,
"Failed to create video prefetch thread");
967 int join_result = asciichat_thread_join_timeout(&decoder->
prefetch_thread, NULL, 2000 * NS_PER_MS_INT);
969 if (join_result == 0) {
993 return ERROR_INVALID_PARAM;
1003 return ASCIICHAT_OK;
1014 double fps = av_q2d_safe(stream->avg_frame_rate);
1020 fps = av_q2d_safe(stream->r_frame_rate);
1031 if (!decoder || decoder->
audio_stream_idx < 0 || !buffer || num_samples == 0) {
1035 size_t samples_written = 0;
1040 size_t to_copy = (available < num_samples) ? available : num_samples;
1042 memcpy(buffer, decoder->
audio_buffer, to_copy *
sizeof(
float));
1043 samples_written += to_copy;
1046 if (to_copy < available) {
1051 if (samples_written >= num_samples) {
1052 return samples_written;
1057 static uint64_t packet_count = 0;
1058 while (samples_written < num_samples) {
1061 if (ret == AVERROR_EOF) {
1069 av_packet_unref(decoder->
packet);
1073 log_info_every(50 * US_PER_MS_INT,
"Audio packet #%lu: pts=%ld dts=%ld duration=%d size=%d", packet_count++,
1078 av_packet_unref(decoder->
packet);
1081 log_warn(
"Error sending audio packet to decoder");
1089 if (ret == AVERROR(EAGAIN)) {
1091 }
else if (ret < 0) {
1092 log_warn(
"Error receiving audio frame from decoder");
1093 goto audio_read_done;
1101 float *out_buf = buffer + samples_written;
1102 int out_samples = (int)(num_samples - samples_written);
1104 uint8_t *out_ptr = (uint8_t *)out_buf;
1105 int converted = swr_convert(decoder->
swr_ctx, &out_ptr, out_samples, (
const uint8_t **)decoder->
frame->data,
1106 decoder->
frame->nb_samples);
1108 if (converted > 0) {
1109 samples_written += (size_t)converted;
1112 if (samples_written >= num_samples) {
1113 goto audio_read_done;
1121 if (samples_written < num_samples) {
1122 int remaining_space = (int)(num_samples - samples_written);
1123 uint8_t *out_ptr = (uint8_t *)(buffer + samples_written);
1124 int flushed = swr_convert(decoder->
swr_ctx, &out_ptr, remaining_space, NULL, 0);
1126 samples_written += (size_t)flushed;
1133 return samples_written;
1146 return ERROR_INVALID_PARAM;
1150 return ERROR_NOT_SUPPORTED;
1162 if (av_seek_frame(decoder->
format_ctx, -1, 0, AVSEEK_FLAG_BACKWARD) < 0) {
1163 return SET_ERRNO(ERROR_MEDIA_SEEK,
"Failed to seek to beginning");
1172 return ASCIICHAT_OK;
1177 return ERROR_INVALID_PARAM;
1181 return ERROR_NOT_SUPPORTED;
1191 int64_t target_ts = (int64_t)(timestamp_sec * AV_TIME_BASE);
1195 int seek_ret = av_seek_frame(decoder->
format_ctx, -1, target_ts, AVSEEK_FLAG_BACKWARD);
1198 seek_ret = av_seek_frame(decoder->
format_ctx, -1, target_ts, 0);
1205 return SET_ERRNO(ERROR_MEDIA_SEEK,
"Failed to seek to timestamp %.2f seconds", timestamp_sec);
1243 return ASCIICHAT_OK;
1255 if (decoder->
format_ctx->duration == AV_NOPTS_VALUE) {
1259 return (
double)decoder->
format_ctx->duration / AV_TIME_BASE;
bool ffmpeg_decoder_at_end(ffmpeg_decoder_t *decoder)
double ffmpeg_decoder_get_position(ffmpeg_decoder_t *decoder)
void ffmpeg_decoder_stop_prefetch(ffmpeg_decoder_t *decoder)
Stop the background frame prefetching thread.
image_t * ffmpeg_decoder_read_video_frame(ffmpeg_decoder_t *decoder)
bool ffmpeg_decoder_has_video(ffmpeg_decoder_t *decoder)
double ffmpeg_decoder_get_video_fps(ffmpeg_decoder_t *decoder)
asciichat_error_t ffmpeg_decoder_rewind(ffmpeg_decoder_t *decoder)
asciichat_error_t ffmpeg_decoder_get_video_dimensions(ffmpeg_decoder_t *decoder, int *width, int *height)
bool ffmpeg_decoder_has_audio(ffmpeg_decoder_t *decoder)
bool ffmpeg_decoder_is_prefetch_running(ffmpeg_decoder_t *decoder)
asciichat_error_t ffmpeg_decoder_start_prefetch(ffmpeg_decoder_t *decoder)
Start the background frame prefetching thread.
asciichat_error_t ffmpeg_decoder_seek_to_timestamp(ffmpeg_decoder_t *decoder, double timestamp_sec)
#define TARGET_SAMPLE_RATE
void ffmpeg_decoder_destroy(ffmpeg_decoder_t *decoder)
ffmpeg_decoder_t * ffmpeg_decoder_create(const char *path)
size_t ffmpeg_decoder_read_audio_samples(ffmpeg_decoder_t *decoder, float *buffer, size_t num_samples)
ffmpeg_decoder_t * ffmpeg_decoder_create_stdin(void)
double ffmpeg_decoder_get_duration(ffmpeg_decoder_t *decoder)
FFmpeg decoder state for video and audio decoding.
mutex_t prefetch_mutex
Protect prefetch state and FFmpeg decoder access.
uint64_t audio_samples_read
Total audio samples decoded and output.
AVCodecContext * video_codec_ctx
Video codec context.
image_t * prefetch_image_a
First prefetch buffer.
AVFormatContext * format_ctx
FFmpeg format/container context.
image_t * current_read_buffer
Buffer main thread is currently reading/rendering.
bool prefetch_thread_running
Whether prefetch thread is active.
size_t audio_buffer_offset
Current offset in audio buffer.
bool buffer_b_in_use
Whether prefetch_image_b is being read by main thread.
image_t * prefetch_image_b
Second prefetch buffer.
cond_t prefetch_cond
Condition variable for pausing during seek.
AVIOContext * avio_ctx
Custom I/O context for stdin.
struct SwrContext * swr_ctx
Software resampler for format conversion.
bool prefetch_frame_ready
Whether current_prefetch_image has valid data.
AVPacket * packet
Reusable packet for reading.
int video_stream_idx
Video stream index (-1 if none)
bool eof_reached
Whether end of file was reached.
unsigned char * avio_buffer
Buffer for custom I/O.
double last_audio_pts
Last audio presentation timestamp.
bool seeking_in_progress
Signal to pause prefetch thread during seek.
AVCodecContext * audio_codec_ctx
Audio codec context.
bool is_stdin
Whether reading from stdin.
int audio_sample_rate
Audio sample rate (Hz)
size_t audio_buffer_size
Total size of audio buffer.
image_t * current_prefetch_image
Currently available prefetched frame.
bool buffer_a_in_use
Whether prefetch_image_a is being read by main thread.
float * audio_buffer
Buffer for partial audio frames.
double last_video_pts
Last video presentation timestamp.
bool prefetch_should_stop
Signal to stop prefetch thread.
asciichat_thread_t prefetch_thread
Prefetch thread handle.
int audio_stream_idx
Audio stream index (-1 if none)
AVFrame * frame
Reusable frame for decoding.
image_t * current_image
Working buffer for decoding.
struct SwsContext * sws_ctx
Software scaler for format conversion.
int mutex_init(mutex_t *mutex)
int asciichat_thread_create(asciichat_thread_t *thread, void *(*start_routine)(void *), void *arg)
int asciichat_thread_join(asciichat_thread_t *thread, void **retval)
int mutex_destroy(mutex_t *mutex)
bool url_is_valid(const char *url)
uint64_t time_get_ns(void)
uint64_t time_elapsed_ns(uint64_t start_ns, uint64_t end_ns)
void image_destroy(image_t *p)
image_t * image_new(size_t width, size_t height)