From d224adf4abffec10554002cf2c68a3f8e7f23e3d Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Thu, 10 Jan 2013 23:38:34 +0400 Subject: [PATCH] Use L-SMASH for mp4 muxing --- Makefile | 7 +- configure | 31 +- output/mp4.c | 571 +++-- output/mp4/a52.c | 633 ++++ output/mp4/a52.h | 75 + output/mp4/alac.c | 113 + output/mp4/box.c | 250 ++ output/mp4/box.h | 2248 +++++++++++++ output/mp4/chapter.c | 367 +++ output/mp4/description.c | 2821 +++++++++++++++++ output/mp4/description.h | 40 + output/mp4/dts.c | 1331 ++++++++ output/mp4/dts.h | 113 + output/mp4/h264.c | 2237 +++++++++++++ output/mp4/h264.h | 226 ++ output/mp4/importer.c | 3716 ++++++++++++++++++++++ output/mp4/importer.h | 47 + output/mp4/internal.h | 37 + output/mp4/isom.c | 7912 ++++++++++++++++++++++++++++++++++++++++++++++ output/mp4/isom.h | 34 + output/mp4/lsmash.h | 2294 ++++++++++++++ output/mp4/meta.c | 461 +++ output/mp4/mp4a.c | 935 ++++++ output/mp4/mp4a.h | 138 + output/mp4/mp4sys.c | 1354 ++++++++ output/mp4/mp4sys.h | 211 ++ output/mp4/summary.c | 305 ++ output/mp4/utils.c | 841 +++++ output/mp4/utils.h | 229 ++ output/mp4/vc1.c | 985 ++++++ output/mp4/vc1.h | 137 + output/mp4/write.c | 1918 +++++++++++ output/mp4/write.h | 39 + x264.c | 20 +- 34 files changed, 32418 insertions(+), 258 deletions(-) create mode 100644 output/mp4/a52.c create mode 100644 output/mp4/a52.h create mode 100644 output/mp4/alac.c create mode 100644 output/mp4/box.c create mode 100644 output/mp4/box.h create mode 100644 output/mp4/chapter.c create mode 100644 output/mp4/description.c create mode 100644 output/mp4/description.h create mode 100644 output/mp4/dts.c create mode 100644 output/mp4/dts.h create mode 100644 output/mp4/h264.c create mode 100644 output/mp4/h264.h create mode 100644 output/mp4/importer.c create mode 100644 output/mp4/importer.h create mode 100644 output/mp4/internal.h create mode 100644 output/mp4/isom.c create mode 100644 output/mp4/isom.h create mode 100644 output/mp4/lsmash.h create mode 100644 output/mp4/meta.c create mode 100644 output/mp4/mp4a.c create mode 100644 output/mp4/mp4a.h create mode 100644 output/mp4/mp4sys.c create mode 100644 output/mp4/mp4sys.h create mode 100644 output/mp4/summary.c create mode 100644 output/mp4/utils.c create mode 100644 output/mp4/utils.h create mode 100644 output/mp4/vc1.c create mode 100644 output/mp4/vc1.h create mode 100644 output/mp4/write.c create mode 100644 output/mp4/write.h diff --git a/Makefile b/Makefile index 7a22c42..a7ebe74 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,9 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \ filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \ filters/video/select_every.c filters/video/crop.c filters/video/depth.c +SRCCLI += output/mp4.c +SRCCLI += $(addprefix output/mp4/, isom.c utils.c write.c importer.c mp4sys.c mp4a.c summary.c chapter.c dts.c a52.c h264.c vc1.c alac.c meta.c description.c box.c) + SRCSO = OBJS = OBJSO = @@ -63,10 +66,6 @@ ifneq ($(findstring HAVE_FFMS 1, $(CONFIG)),) SRCCLI += input/ffms.c endif -ifneq ($(findstring HAVE_GPAC 1, $(CONFIG)),) -SRCCLI += output/mp4.c -endif - # Visualization sources ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),) SRCS += common/visualize.c common/display-x11.c diff --git a/configure b/configure index cb8f669..19638ea 100755 --- a/configure +++ b/configure @@ -50,7 +50,6 @@ External library support: --disable-swscale disable swscale support --disable-lavf disable libavformat support --disable-ffms disable ffmpegsource support - --disable-gpac disable gpac support EOF exit 1 @@ -105,7 +104,6 @@ icl_ldflags() { arg=${arg/pthreadGC/pthreadVC} [ "$arg" = avifil32.lib ] && arg=vfw32.lib - [ "$arg" = gpac_static.lib ] && arg=libgpac_static.lib [ -n "$arg" ] && echo -n "$arg " done @@ -259,7 +257,6 @@ static="no" avs="auto" lavf="auto" ffms="auto" -gpac="auto" gpl="yes" thread="auto" swscale="auto" @@ -285,7 +282,7 @@ cross_prefix="" EXE="" # list of all preprocessor HAVE values we can define -CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT" +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS AVS GPL VECTOREXT INTERLACED CPU_COUNT" # parse options @@ -334,9 +331,6 @@ for opt do --disable-ffms) ffms="no" ;; - --disable-gpac) - gpac="no" - ;; --disable-gpl) gpl="no" ;; @@ -891,28 +885,6 @@ if [ "$swscale" = "yes" ]; then fi fi -if [ "$gpac" = "auto" ] ; then - gpac="no" - cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static" - if [ "$SYS" = "WINDOWS" ] ; then - GPAC_LIBS="$GPAC_LIBS -lwinmm" - fi - if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then - if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then - gpac="yes" - else - echo "Warning: gpac is too old, update to 2007-06-21 UTC or later" - fi - fi -fi -if [ "$gpac" = "yes" ] ; then - define HAVE_GPAC - if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then - define HAVE_GF_MALLOC - fi - LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI" -fi - if [ "$avs" = "auto" ] ; then avs="no" # cygwin can use avisynth if it can use LoadLibrary @@ -1183,7 +1155,6 @@ interlaced: $interlaced avs: $avs lavf: $lavf ffms: $ffms -gpac: $gpac gpl: $gpl thread: $thread filters: $filters diff --git a/output/mp4.c b/output/mp4.c index ee54e66..62c1777 100644 --- a/output/mp4.c +++ b/output/mp4.c @@ -1,10 +1,13 @@ /***************************************************************************** - * mp4.c: mp4 muxer + * mp4.c: mp4 muxer using L-SMASH ***************************************************************************** * Copyright (C) 2003-2013 x264 project * * Authors: Laurent Aimar * Loren Merritt + * Yusuke Nakamura + * Takashi Hirata + * golgol7777 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,88 +28,88 @@ *****************************************************************************/ #include "output.h" -#include +#include "mp4/lsmash.h" +#include "mp4/importer.h" -#if HAVE_GF_MALLOC -#undef malloc -#undef free -#undef realloc -#define malloc gf_malloc -#define free gf_free -#define realloc gf_realloc -#endif +#define H264_NALU_LENGTH_SIZE 4 + +/*******************/ + +#define MP4_LOG_ERROR( ... ) x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ ) +#define MP4_LOG_WARNING( ... ) x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ ) +#define MP4_LOG_INFO( ... ) x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ ) +//#define MP4_RETURN_IF_ERR( cond, ret, ... ) RETURN_IF_ERR( cond, "mp4", ret, __VA_ARGS__ ) +#define MP4_FAIL_IF_ERR( cond, ... ) FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ ) + +/* For close_file() */ +#define MP4_LOG_IF_ERR( cond, ... )\ +if( cond )\ +{\ + MP4_LOG_ERROR( __VA_ARGS__ );\ +} + +/* For open_file() */ +#define MP4_FAIL_IF_ERR_EX( cond, ... )\ +if( cond )\ +{\ + remove_mp4_hnd( p_mp4 );\ + MP4_LOG_ERROR( __VA_ARGS__ );\ + return -1;\ +} + +/*******************/ typedef struct { - GF_ISOFile *p_file; - GF_AVCConfig *p_config; - GF_ISOSample *p_sample; - int i_track; - uint32_t i_descidx; - uint64_t i_time_res; - int64_t i_time_inc; - int64_t i_delay_time; - int64_t i_init_delta; + lsmash_root_t *p_root; + lsmash_brand_type major_brand; + lsmash_video_summary_t *summary; + int i_brand_3gpp; + int b_brand_qt; + int b_stdout; + uint32_t i_movie_timescale; + uint32_t i_video_timescale; + uint32_t i_track; + uint32_t i_sample_entry; + uint64_t i_time_inc; + int64_t i_start_offset; + uint64_t i_first_cts; + uint64_t i_prev_dts; + uint32_t i_sei_size; + uint8_t *p_sei_buffer; int i_numframe; + int64_t i_init_delta; int i_delay_frames; int b_dts_compress; int i_dts_compress_multiplier; - int i_data_size; + int b_use_recovery; + int b_no_pasp; + int b_fragments; + lsmash_scale_method scale_method; } mp4_hnd_t; -static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track ) -{ - u32 count, di, timescale, time_wnd, rate; - u64 offset; - Double br; - GF_ESD *esd; +/*******************/ - esd = gf_isom_get_esd( p_file, i_track, 1 ); - if( !esd ) +static void remove_mp4_hnd( hnd_t handle ) +{ + mp4_hnd_t *p_mp4 = handle; + if( !p_mp4 ) return; - - esd->decoderConfig->avgBitrate = 0; - esd->decoderConfig->maxBitrate = 0; - rate = time_wnd = 0; - - timescale = gf_isom_get_media_timescale( p_file, i_track ); - count = gf_isom_get_sample_count( p_file, i_track ); - for( u32 i = 0; i < count; i++ ) + if( p_mp4->p_sei_buffer ) { - GF_ISOSample *samp = gf_isom_get_sample_info( p_file, i_track, i+1, &di, &offset ); - if( !samp ) - { - x264_cli_log( "mp4", X264_LOG_ERROR, "failure reading back frame %u\n", i ); - break; - } - - if( esd->decoderConfig->bufferSizeDB < samp->dataLength ) - esd->decoderConfig->bufferSizeDB = samp->dataLength; - - esd->decoderConfig->avgBitrate += samp->dataLength; - rate += samp->dataLength; - if( samp->DTS > time_wnd + timescale ) - { - if( rate > esd->decoderConfig->maxBitrate ) - esd->decoderConfig->maxBitrate = rate; - time_wnd = samp->DTS; - rate = 0; - } - - gf_isom_sample_del( &samp ); + free( p_mp4->p_sei_buffer ); + p_mp4->p_sei_buffer = NULL; } - - br = (Double)(s64)gf_isom_get_media_duration( p_file, i_track ); - br /= timescale; - esd->decoderConfig->avgBitrate = (u32)(esd->decoderConfig->avgBitrate / br); - /*move to bps*/ - esd->decoderConfig->avgBitrate *= 8; - esd->decoderConfig->maxBitrate *= 8; - - gf_isom_change_mpeg4_description( p_file, i_track, 1, esd ); - gf_odf_desc_del( (GF_Descriptor*)esd ); + if( p_mp4->p_root ) + { + lsmash_destroy_root( p_mp4->p_root ); + p_mp4->p_root = NULL; + } + free( p_mp4 ); } +/*******************/ + static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts ) { mp4_hnd_t *p_mp4 = handle; @@ -114,56 +117,51 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest if( !p_mp4 ) return 0; - if( p_mp4->p_config ) - gf_odf_avc_cfg_del( p_mp4->p_config ); - - if( p_mp4->p_sample ) - { - if( p_mp4->p_sample->data ) - free( p_mp4->p_sample->data ); - - p_mp4->p_sample->dataLength = 0; - gf_isom_sample_del( &p_mp4->p_sample ); - } - - if( p_mp4->p_file ) + if( p_mp4->p_root ) { + double actual_duration = 0; /* FIXME: This may be inside block of "if( p_mp4->i_track )" if audio does not use this. */ if( p_mp4->i_track ) { - /* The mdhd duration is defined as CTS[final] - CTS[0] + duration of last frame. - * The mdhd duration (in seconds) should be able to be longer than the tkhd duration since the track is managed by edts. - * So, if mdhd duration is equal to the last DTS or less, we give the last composition time delta to the last sample duration. - * And then, the mdhd duration is updated, but it time-wise doesn't give the actual duration. - * The tkhd duration is the actual track duration. */ - uint64_t mdhd_duration = (2 * largest_pts - second_largest_pts) * p_mp4->i_time_inc; - if( mdhd_duration != gf_isom_get_media_duration( p_mp4->p_file, p_mp4->i_track ) ) - { - uint64_t last_dts = gf_isom_get_sample_dts( p_mp4->p_file, p_mp4->i_track, p_mp4->i_numframe ); - uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc ); - gf_isom_set_last_sample_duration( p_mp4->p_file, p_mp4->i_track, last_duration ); - } - - /* Write an Edit Box if the first CTS offset is positive. - * A media_time is given by not the mvhd timescale but rather the mdhd timescale. - * The reason is that an Edit Box maps the presentation time-line to the media time-line. - * Any demuxers should follow the Edit Box if it exists. */ - GF_ISOSample *sample = gf_isom_get_sample_info( p_mp4->p_file, p_mp4->i_track, 1, NULL, NULL ); - if( sample && sample->CTS_Offset > 0 ) + /* Flush the rest of samples and add the last sample_delta. */ + uint32_t last_delta = largest_pts - second_largest_pts; + MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ), + "failed to flush the rest of samples.\n" ); + + if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 ) /* avoid zero division */ + actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale; + else + MP4_LOG_ERROR( "timescale is broken.\n" ); + + /* + * Declare the explicit time-line mapping. + * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment + * is given by not the movie timescale but rather the media timescale. + * The reason is that ISO media have two time-lines, presentation and media time-line, + * and an edit maps the presentation time-line to the media time-line. + * According to QuickTime file format specification and the actual playback in QuickTime Player, + * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes + * the track's media_rate so that the entire media can be used by the track. + * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly. + * Note: Any demuxers should follow the Edit List Box if it exists. + */ + lsmash_edit_t edit; + edit.duration = actual_duration; + edit.start_time = p_mp4->i_first_cts; + edit.rate = ISOM_EDIT_MODE_NORMAL; + if( !p_mp4->b_fragments ) { - uint32_t mvhd_timescale = gf_isom_get_timescale( p_mp4->p_file ); - uint64_t tkhd_duration = (uint64_t)( mdhd_duration * ( (double)mvhd_timescale / p_mp4->i_time_res ) ); - gf_isom_append_edit_segment( p_mp4->p_file, p_mp4->i_track, tkhd_duration, sample->CTS_Offset, GF_ISOM_EDIT_NORMAL ); + MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ), + "failed to set timeline map for video.\n" ); } - gf_isom_sample_del( &sample ); - - recompute_bitrate_mp4( p_mp4->p_file, p_mp4->i_track ); + else if( !p_mp4->b_stdout ) + MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ), + "failed to update timeline map for video.\n" ); } - gf_isom_set_pl_indication( p_mp4->p_file, GF_ISOM_PL_VISUAL, 0x15 ); - gf_isom_set_storage_mode( p_mp4->p_file, GF_ISOM_STORE_FLAT ); - gf_isom_close( p_mp4->p_file ); + + MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" ); } - free( p_mp4 ); + remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */ return 0; } @@ -173,27 +171,54 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt mp4_hnd_t *p_mp4; *p_handle = NULL; - FILE *fh = fopen( psz_filename, "w" ); - if( !fh ) - return -1; - FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename ) - fclose( fh ); - if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) ) - return -1; + int b_regular = strcmp( psz_filename, "-" ); + b_regular = b_regular && x264_is_regular_file_path( psz_filename ); + if( b_regular ) + { + FILE *fh = fopen( psz_filename, "wb" ); + MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename ); + b_regular = x264_is_regular_file( fh ); + fclose( fh ); + } + p_mp4 = malloc( sizeof(mp4_hnd_t) ); + MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" ); memset( p_mp4, 0, sizeof(mp4_hnd_t) ); - p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL ); p_mp4->b_dts_compress = opt->use_dts_compress; - - if( !(p_mp4->p_sample = gf_isom_sample_new()) ) + p_mp4->b_use_recovery = 0; + p_mp4->b_no_pasp = 0; + p_mp4->scale_method = ISOM_SCALE_METHOD_MEET; + p_mp4->b_fragments = !b_regular; + p_mp4->b_stdout = !strcmp( psz_filename, "-" ); + + char* ext = get_filename_extension( psz_filename ); + if( !strcmp( ext, "mov" ) || !strcmp( ext, "qt" ) ) { - close_file( p_mp4, 0, 0 ); - return -1; + p_mp4->major_brand = ISOM_BRAND_TYPE_QT; + p_mp4->b_brand_qt = 1; + } + else if( !strcmp( ext, "3gp" ) ) + { + p_mp4->major_brand = ISOM_BRAND_TYPE_3GP6; + p_mp4->i_brand_3gpp = 1; + } + else if( !strcmp( ext, "3g2" ) ) + { + p_mp4->major_brand = ISOM_BRAND_TYPE_3G2A; + p_mp4->i_brand_3gpp = 2; } + else + p_mp4->major_brand = ISOM_BRAND_TYPE_MP42; + + p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE ); + MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" ); - gf_isom_set_brand_info( p_mp4->p_file, GF_ISOM_BRAND_AVC1, 0 ); + p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO ); + MP4_FAIL_IF_ERR_EX( !p_mp4->summary, + "failed to allocate memory for summary information of video.\n" ); + p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO; *p_handle = p_mp4; @@ -203,111 +228,193 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt static int set_param( hnd_t handle, x264_param_t *p_param ) { mp4_hnd_t *p_mp4 = handle; + uint64_t i_media_timescale; p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0; p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1; - p_mp4->i_time_res = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier; + i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier; p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier; - FAIL_IF_ERR( p_mp4->i_time_res > UINT32_MAX, "mp4", "MP4 media timescale %"PRIu64" exceeds maximum\n", p_mp4->i_time_res ) - - p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL, - p_mp4->i_time_res ); + MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale ); - p_mp4->p_config = gf_odf_avc_cfg_new(); - gf_isom_avc_config_new( p_mp4->p_file, p_mp4->i_track, p_mp4->p_config, - NULL, NULL, &p_mp4->i_descidx ); - - gf_isom_set_track_enabled( p_mp4->p_file, p_mp4->i_track, 1 ); - - gf_isom_set_visual_info( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, - p_param->i_width, p_param->i_height ); + /* Select brands. */ + lsmash_brand_type brands[11] = { 0 }; + uint32_t minor_version = 0; + uint32_t brand_count = 0; + if( p_mp4->b_brand_qt ) + { + brands[brand_count++] = ISOM_BRAND_TYPE_QT; + p_mp4->i_brand_3gpp = 0; + p_mp4->b_use_recovery = 0; /* Disable sample grouping. */ + } + else + { + if( p_mp4->i_brand_3gpp >= 1 ) + brands[brand_count++] = ISOM_BRAND_TYPE_3GP6; + if( p_mp4->i_brand_3gpp == 2 ) + { + brands[brand_count++] = ISOM_BRAND_TYPE_3G2A; + minor_version = 0x00010000; + } + brands[brand_count++] = ISOM_BRAND_TYPE_MP42; + brands[brand_count++] = ISOM_BRAND_TYPE_MP41; + brands[brand_count++] = ISOM_BRAND_TYPE_ISOM; + if( p_mp4->b_use_recovery ) + { + brands[brand_count++] = ISOM_BRAND_TYPE_AVC1; /* sdtp, sgpd, sbgp and visual roll recovery grouping */ + if( p_param->b_open_gop ) + brands[brand_count++] = ISOM_BRAND_TYPE_ISO6; /* cslg and visual random access grouping */ + } + } + /* Set movie parameters. */ + lsmash_movie_parameters_t movie_param; + lsmash_initialize_movie_parameters( &movie_param ); + movie_param.major_brand = p_mp4->major_brand; + movie_param.brands = brands; + movie_param.number_of_brands = brand_count; + movie_param.minor_version = minor_version; + MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ), + "failed to set movie parameters.\n" ); + p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root ); + MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" ); + + /* Create a video track. */ + p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ); + MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" ); + + p_mp4->summary->width = p_param->i_width; + p_mp4->summary->height = p_param->i_height; + uint32_t i_display_width = p_param->i_width << 16; + uint32_t i_display_height = p_param->i_height << 16; if( p_param->vui.i_sar_width && p_param->vui.i_sar_height ) { - uint64_t dw = p_param->i_width << 16; - uint64_t dh = p_param->i_height << 16; double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height; if( sar > 1.0 ) - dw *= sar ; + i_display_width *= sar; else - dh /= sar; - gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height ); - gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 ); + i_display_height /= sar; + if( !p_mp4->b_no_pasp ) + { + p_mp4->summary->par_h = p_param->vui.i_sar_width; + p_mp4->summary->par_v = p_param->vui.i_sar_height; + } } - - p_mp4->i_data_size = p_param->i_width * p_param->i_height * 3 / 2; - p_mp4->p_sample->data = malloc( p_mp4->i_data_size ); - if( !p_mp4->p_sample->data ) + p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim; + p_mp4->summary->color.transfer_index = p_param->vui.i_transfer; + p_mp4->summary->color.matrix_index = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED; + p_mp4->summary->color.full_range = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0; + + /* Set video track parameters. */ + lsmash_track_parameters_t track_param; + lsmash_initialize_track_parameters( &track_param ); + lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW; + if( p_mp4->b_brand_qt ) + track_mode |= QT_TRACK_IN_POSTER; + track_param.mode = track_mode; + track_param.display_width = i_display_width; + track_param.display_height = i_display_height; + track_param.aperture_modes = p_mp4->b_brand_qt && !p_mp4->b_no_pasp; + MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ), + "failed to set track parameters for video.\n" ); + + /* Set video media parameters. */ + lsmash_media_parameters_t media_param; + lsmash_initialize_media_parameters( &media_param ); + media_param.timescale = i_media_timescale; + media_param.media_handler_name = "L-SMASH Video Media Handler"; + if( p_mp4->b_brand_qt ) + media_param.data_handler_name = "L-SMASH URL Data Handler"; + if( p_mp4->b_use_recovery ) { - p_mp4->i_data_size = 0; - return -1; + media_param.roll_grouping = p_param->b_intra_refresh; + media_param.rap_grouping = p_param->b_open_gop; } + MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ), + "failed to set media parameters for video.\n" ); + p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track ); + MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" ); return 0; } -static int check_buffer( mp4_hnd_t *p_mp4, int needed_size ) -{ - if( needed_size > p_mp4->i_data_size ) - { - void *ptr = realloc( p_mp4->p_sample->data, needed_size ); - if( !ptr ) - return -1; - p_mp4->p_sample->data = ptr; - p_mp4->i_data_size = needed_size; - } - return 0; -} - static int write_headers( hnd_t handle, x264_nal_t *p_nal ) { mp4_hnd_t *p_mp4 = handle; - GF_AVCConfigSlot *p_slot; - int sps_size = p_nal[0].i_payload - 4; - int pps_size = p_nal[1].i_payload - 4; - int sei_size = p_nal[2].i_payload; + uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE; + uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE; + uint32_t sei_size = p_nal[2].i_payload; - uint8_t *sps = p_nal[0].p_payload + 4; - uint8_t *pps = p_nal[1].p_payload + 4; + uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE; + uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE; uint8_t *sei = p_nal[2].p_payload; - // SPS + lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); - p_mp4->p_config->configurationVersion = 1; - p_mp4->p_config->AVCProfileIndication = sps[1]; - p_mp4->p_config->profile_compatibility = sps[2]; - p_mp4->p_config->AVCLevelIndication = sps[3]; - p_slot = malloc( sizeof(GF_AVCConfigSlot) ); - if( !p_slot ) - return -1; - p_slot->size = sps_size; - p_slot->data = malloc( p_slot->size ); - if( !p_slot->data ) - return -1; - memcpy( p_slot->data, sps, sps_size ); - gf_list_add( p_mp4->p_config->sequenceParameterSets, p_slot ); + lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured; + param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1; - // PPS + /* SPS + * The remaining parameters are automatically set by SPS. */ + if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) ) + { + MP4_LOG_ERROR( "failed to append SPS.\n" ); + return -1; + } - p_slot = malloc( sizeof(GF_AVCConfigSlot) ); - if( !p_slot ) + /* PPS */ + if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) ) + { + MP4_LOG_ERROR( "failed to append PPS.\n" ); return -1; - p_slot->size = pps_size; - p_slot->data = malloc( p_slot->size ); - if( !p_slot->data ) + } + + if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) ) + { + MP4_LOG_ERROR( "failed to add H.264 specific info.\n" ); return -1; - memcpy( p_slot->data, pps, pps_size ); - gf_list_add( p_mp4->p_config->pictureParameterSets, p_slot ); - gf_isom_avc_config_update( p_mp4->p_file, p_mp4->i_track, 1, p_mp4->p_config ); + } - // SEI + lsmash_destroy_codec_specific_data( cs ); - if( check_buffer( p_mp4, p_mp4->p_sample->dataLength + sei_size ) ) - return -1; - memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, sei, sei_size ); - p_mp4->p_sample->dataLength += sei_size; + /* Additional extensions */ + if( p_mp4->major_brand != ISOM_BRAND_TYPE_QT ) + { + /* Bitrate info */ + cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( cs ) + lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ); + lsmash_destroy_codec_specific_data( cs ); + + if( !p_mp4->b_no_pasp ) + { + /* Sample scale method */ + cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( cs ) + { + lsmash_isom_sample_scale_t *data = (lsmash_isom_sample_scale_t *)cs->data.structured; + data->scale_method = p_mp4->scale_method; + data->constraint_flag = 1; + lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ); + } + lsmash_destroy_codec_specific_data( cs ); + } + } + + p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary ); + MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry, + "failed to add sample entry for video.\n" ); + + /* SEI */ + p_mp4->p_sei_buffer = malloc( sei_size ); + MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer, + "failed to allocate sei transition buffer.\n" ); + memcpy( p_mp4->p_sei_buffer, sei, sei_size ); + p_mp4->i_sei_size = sei_size; return sei_size + sps_size + pps_size; } @@ -315,21 +422,41 @@ static int write_headers( hnd_t handle, x264_nal_t *p_nal ) static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture ) { mp4_hnd_t *p_mp4 = handle; - int64_t dts; - int64_t cts; - - if( check_buffer( p_mp4, p_mp4->p_sample->dataLength + i_size ) ) - return -1; - memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size ); - p_mp4->p_sample->dataLength += i_size; + uint64_t dts, cts; if( !p_mp4->i_numframe ) - p_mp4->i_delay_time = p_picture->i_dts * -1; + { + p_mp4->i_start_offset = p_picture->i_dts * -1; + p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc; + if( p_mp4->b_fragments ) + { + lsmash_edit_t edit; + edit.duration = ISOM_EDIT_DURATION_UNKNOWN32; /* QuickTime doesn't support 64bit duration. */ + edit.start_time = p_mp4->i_first_cts; + edit.rate = ISOM_EDIT_MODE_NORMAL; + MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ), + "failed to set timeline map for video.\n" ); + } + } + + lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size ); + MP4_FAIL_IF_ERR( !p_sample, + "failed to create a video sample data.\n" ); + + if( p_mp4->p_sei_buffer ) + { + memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size ); + free( p_mp4->p_sei_buffer ); + p_mp4->p_sei_buffer = NULL; + } + + memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size ); + p_mp4->i_sei_size = 0; if( p_mp4->b_dts_compress ) { if( p_mp4->i_numframe == 1 ) - p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc; + p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc; dts = p_mp4->i_numframe > p_mp4->i_delay_frames ? p_picture->i_dts * p_mp4->i_time_inc : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier); @@ -337,16 +464,40 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_ } else { - dts = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc; - cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc; + dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc; + cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc; + } + + p_sample->dts = dts; + p_sample->cts = cts; + p_sample->index = p_mp4->i_sample_entry; + p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE; + if( p_mp4->b_brand_qt ) + { + p_sample->prop.independent = IS_X264_TYPE_I( p_picture->i_type ) ? ISOM_SAMPLE_IS_INDEPENDENT : ISOM_SAMPLE_IS_NOT_INDEPENDENT; + p_sample->prop.disposable = p_picture->i_type == X264_TYPE_B ? ISOM_SAMPLE_IS_DISPOSABLE : ISOM_SAMPLE_IS_NOT_DISPOSABLE; + p_sample->prop.redundant = ISOM_SAMPLE_HAS_NO_REDUNDANCY; + if( p_picture->i_type == X264_TYPE_I || p_picture->i_type == X264_TYPE_P || p_picture->i_type == X264_TYPE_BREF ) + p_sample->prop.allow_earlier = QT_SAMPLE_EARLIER_PTS_ALLOWED; +/* + if( p_picture->i_type == X264_TYPE_I && p_picture->b_keyframe ) + p_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_OPEN_RAP; +*/ + } + + if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE ) + { + MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ), + "failed to flush the rest of samples.\n" ); + MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ), + "failed to create a movie fragment.\n" ); } - p_mp4->p_sample->IsRAP = p_picture->b_keyframe; - p_mp4->p_sample->DTS = dts; - p_mp4->p_sample->CTS_Offset = (uint32_t)(cts - dts); - gf_isom_add_sample( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_mp4->p_sample ); + /* Append data per sample. */ + MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ), + "failed to append a video frame.\n" ); - p_mp4->p_sample->dataLength = 0; + p_mp4->i_prev_dts = dts; p_mp4->i_numframe++; return i_size; diff --git a/output/mp4/a52.c b/output/mp4/a52.c new file mode 100644 index 0000000..0f5718f --- /dev/null +++ b/output/mp4/a52.c @@ -0,0 +1,633 @@ +/***************************************************************************** + * a52.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include + +#include "box.h" + +static const char *bit_stream_mode[] = + { + "Main audio service: complete main (CM)", + "Main audio service: music and effects (ME)", + "Associated service: visually impaired (VI)", + "Associated service: hearing impaired (HI)", + "Associated service: dialogue (D)", + "Associated service: commentary (C)", + "Associated service: emergency (E)", + "Undefined service", + "Associated service: voice over (VO)", /* only if acmod == 0b001 */ + "Main audio service: karaoke" + }; + +/* For karaoke mode, C->M, S->V1, SL->V1 and SR->V2. */ +static const char *audio_coding_mode[] = + { + "1 + 1: Dual mono", + "1/0: C", + "2/0: L, R", + "3/0: L, C, R", + "2/1: L, R, S", + "3/1: L, C, R, S", + "2/2: L, R, SL, SR", + "3/2: L, C, R, SL, SR", + "Undefined audio coding mode", + "Undefined audio coding mode", + "2/0: L, R", + "3/0: L, M, R", + "2/1: L, R, V1", + "3/1: L, M, R, V1", + "2/2: L, R, V1, V2", + "3/2: L, M, R, V1, V2" + }; + +/*************************************************************************** + AC-3 tools + ETSI TS 102 366 V1.2.1 (2008-08) +***************************************************************************/ +#include "a52.h" + +#define AC3_SPECIFIC_BOX_LENGTH 11 + +uint8_t *lsmash_create_ac3_specific_info( lsmash_ac3_specific_parameters_t *param, uint32_t *data_length ) +{ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + lsmash_bits_init( &bits, &bs ); + uint8_t buffer[AC3_SPECIFIC_BOX_LENGTH] = { 0 }; + bs.data = buffer; + bs.alloc = AC3_SPECIFIC_BOX_LENGTH; + lsmash_bits_put( &bits, 32, AC3_SPECIFIC_BOX_LENGTH ); /* box size */ + lsmash_bits_put( &bits, 32, ISOM_BOX_TYPE_DAC3.fourcc ); /* box type: 'dac3' */ + lsmash_bits_put( &bits, 2, param->fscod ); + lsmash_bits_put( &bits, 5, param->bsid ); + lsmash_bits_put( &bits, 3, param->bsmod ); + lsmash_bits_put( &bits, 3, param->acmod ); + lsmash_bits_put( &bits, 1, param->lfeon ); + lsmash_bits_put( &bits, 5, param->frmsizecod >> 1 ); + lsmash_bits_put( &bits, 5, 0 ); + uint8_t *data = lsmash_bits_export_data( &bits, data_length ); + lsmash_bits_empty( &bits ); + return data; +} + +int lsmash_setup_ac3_specific_parameters_from_syncframe( lsmash_ac3_specific_parameters_t *param, uint8_t *data, uint32_t data_length ) +{ + if( !data || data_length < AC3_MIN_SYNCFRAME_LENGTH ) + return -1; + IF_A52_SYNCWORD( data ) + return -1; + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t buffer[AC3_MAX_SYNCFRAME_LENGTH] = { 0 }; + bs.data = buffer; + bs.alloc = AC3_MAX_SYNCFRAME_LENGTH; + ac3_info_t handler = { { 0 } }; + ac3_info_t *info = &handler; + memcpy( info->buffer, data, LSMASH_MIN( data_length, AC3_MAX_SYNCFRAME_LENGTH ) ); + info->bits = &bits; + lsmash_bits_init( &bits, &bs ); + if( ac3_parse_syncframe_header( info, info->buffer ) ) + return -1; + *param = info->dac3_param; + return 0; +} + +static int ac3_check_syncframe_header( lsmash_ac3_specific_parameters_t *param ) +{ + if( param->fscod == 0x3 ) + return -1; /* unknown Sample Rate Code */ + if( param->frmsizecod > 0x25 ) + return -1; /* unknown Frame Size Code */ + if( param->bsid >= 10 ) + return -1; /* might be EAC-3 */ + return 0; +} + +int ac3_parse_syncframe_header( ac3_info_t *info, uint8_t *data ) +{ + lsmash_bits_t *bits = info->bits; + if( lsmash_bits_import_data( bits, data, AC3_MIN_SYNCFRAME_LENGTH ) ) + return -1; + lsmash_ac3_specific_parameters_t *param = &info->dac3_param; + lsmash_bits_get( bits, 32 ); /* syncword + crc1 */ + param->fscod = lsmash_bits_get( bits, 2 ); + param->frmsizecod = lsmash_bits_get( bits, 6 ); + param->bsid = lsmash_bits_get( bits, 5 ); + param->bsmod = lsmash_bits_get( bits, 3 ); + param->acmod = lsmash_bits_get( bits, 3 ); + if( (param->acmod & 0x01) && (param->acmod != 0x01) ) + lsmash_bits_get( bits, 2 ); /* cmixlev */ + if( param->acmod & 0x04 ) + lsmash_bits_get( bits, 2 ); /* surmixlev */ + if( param->acmod == 0x02 ) + lsmash_bits_get( bits, 2 ); /* dsurmod */ + param->lfeon = lsmash_bits_get( bits, 1 ); + lsmash_bits_empty( bits ); + return ac3_check_syncframe_header( param ); +} + +int ac3_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( dst && dst->data.structured && src && src->data.unstructured ); + if( src->size < AC3_SPECIFIC_BOX_LENGTH ) + return -1; + lsmash_ac3_specific_parameters_t *param = (lsmash_ac3_specific_parameters_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + if( size != src->size ) + return -1; + param->fscod = (data[0] >> 6) & 0x03; /* XXxx xxxx xxxx xxxx xxxx xxxx */ + param->bsid = (data[0] >> 1) & 0x1F; /* xxXX XXXx xxxx xxxx xxxx xxxx */ + param->bsmod = ((data[0] & 0x01) << 2) | ((data[2] >> 6) & 0x03); /* xxxx xxxX XXxx xxxx xxxx xxxx */ + param->acmod = (data[1] >> 3) & 0x07; /* xxxx xxxx xxXX Xxxx xxxx xxxx */ + param->lfeon = (data[1] >> 2) & 0x01; /* xxxx xxxx xxxx xXxx xxxx xxxx */ + param->frmsizecod = ((data[1] & 0x03) << 3) | ((data[3] >> 5) & 0x07); /* xxxx xxxx xxxx xxXX XXXx xxxx */ + param->frmsizecod <<= 1; + return 0; +} + +int ac3_print_codec_specific( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: AC3 Specific Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + if( box->size < AC3_SPECIFIC_BOX_LENGTH ) + return -1; + isom_extension_box_t *ext = (isom_extension_box_t *)box; + assert( ext->format == EXTENSION_FORMAT_BINARY ); + uint8_t *data = ext->form.binary; + isom_skip_box_common( &data ); + uint8_t fscod = (data[0] >> 6) & 0x03; + uint8_t bsid = (data[0] >> 1) & 0x1F; + uint8_t bsmod = ((data[0] & 0x01) << 2) | ((data[1] >> 6) & 0x03); + uint8_t acmod = (data[1] >> 3) & 0x07; + uint8_t lfeon = (data[1] >> 2) & 0x01; + uint8_t bit_rate_code = ((data[1] & 0x03) << 3) | ((data[2] >> 5) & 0x07); + if( fscod != 0x03 ) + lsmash_ifprintf( fp, indent, "fscod = %"PRIu8" (%"PRIu32" Hz)\n", fscod, ac3_sample_rate_table[fscod] ); + else + lsmash_ifprintf( fp, indent, "fscod = 0x03 (reserved)\n" ); + lsmash_ifprintf( fp, indent, "bsid = %"PRIu8"\n", bsid ); + lsmash_ifprintf( fp, indent, "bsmod = %"PRIu8" (%s)\n", bsmod, bit_stream_mode[bsmod + (acmod == 0x01 ? 1 : acmod > 0x01 ? 2 : 0)] ); + lsmash_ifprintf( fp, indent, "acmod = %"PRIu8" (%s)\n", acmod, audio_coding_mode[acmod + (bsmod == 0x07 ? 8 : 0)] ); + lsmash_ifprintf( fp, indent, "lfeon = %s\n", lfeon ? "1 (LFE)" : "0" ); + static const uint32_t bit_rate[] = + { + 32, 40, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384, 448, 512, 576, 640, + 0 /* undefined */ + }; + lsmash_ifprintf( fp, indent, "bit_rate_code = 0x%02"PRIx8" (%"PRIu32" kbit/s)\n", bit_rate_code, bit_rate[bit_rate_code] ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", data[2] & 0x1F ); + return 0; +} + +#undef AC3_SPECIFIC_BOX_LENGTH + +/*************************************************************************** + Enhanced AC-3 tools + ETSI TS 102 366 V1.2.1 (2008-08) +***************************************************************************/ + +uint8_t *lsmash_create_eac3_specific_info( lsmash_eac3_specific_parameters_t *param, uint32_t *data_length ) +{ +#define EAC3_SPECIFIC_BOX_MAX_LENGTH 42 + if( param->num_ind_sub > 7 ) + return NULL; + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + lsmash_bits_init( &bits, &bs ); + uint8_t buffer[EAC3_SPECIFIC_BOX_MAX_LENGTH] = { 0 }; + bs.data = buffer; + bs.alloc = EAC3_SPECIFIC_BOX_MAX_LENGTH; + lsmash_bits_put( &bits, 32, 0 ); /* box size */ + lsmash_bits_put( &bits, 32, ISOM_BOX_TYPE_DEC3.fourcc ); /* box type: 'dec3' */ + lsmash_bits_put( &bits, 13, param->data_rate ); /* data_rate; setup by isom_update_bitrate_description */ + lsmash_bits_put( &bits, 3, param->num_ind_sub ); + /* Apparently, the condition of this loop defined in ETSI TS 102 366 V1.2.1 (2008-08) is wrong. */ + for( int i = 0; i <= param->num_ind_sub; i++ ) + { + lsmash_eac3_substream_info_t *independent_info = ¶m->independent_info[i]; + lsmash_bits_put( &bits, 2, independent_info->fscod ); + lsmash_bits_put( &bits, 5, independent_info->bsid ); + lsmash_bits_put( &bits, 5, independent_info->bsmod ); + lsmash_bits_put( &bits, 3, independent_info->acmod ); + lsmash_bits_put( &bits, 1, independent_info->lfeon ); + lsmash_bits_put( &bits, 3, 0 ); /* reserved */ + lsmash_bits_put( &bits, 4, independent_info->num_dep_sub ); + if( independent_info->num_dep_sub > 0 ) + lsmash_bits_put( &bits, 9, independent_info->chan_loc ); + else + lsmash_bits_put( &bits, 1, 0 ); /* reserved */ + } + uint8_t *data = lsmash_bits_export_data( &bits, data_length ); + lsmash_bits_empty( &bits ); + /* Update box size. */ + data[0] = ((*data_length) >> 24) & 0xff; + data[1] = ((*data_length) >> 16) & 0xff; + data[2] = ((*data_length) >> 8) & 0xff; + data[3] = (*data_length) & 0xff; + return data; +#undef EAC3_SPECIFIC_BOX_MAX_LENGTH +} + +/* Return -1 if incomplete Enhanced AC-3 sample is given. */ +int lsmash_setup_eac3_specific_parameters_from_frame( lsmash_eac3_specific_parameters_t *param, uint8_t *data, uint32_t data_length ) +{ + if( !data || data_length < 5 ) + return -1; + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t buffer[EAC3_MAX_SYNCFRAME_LENGTH] = { 0 }; + bs.data = buffer; + bs.alloc = EAC3_MAX_SYNCFRAME_LENGTH; + eac3_info_t handler = { { 0 } }; + eac3_info_t *info = &handler; + uint32_t overall_wasted_data_length = 0; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer; + info->bits = &bits; + lsmash_bits_init( &bits, &bs ); + while( 1 ) + { + /* Check the remainder length of the input data. + * If there is enough length, then parse the syncframe in it. + * The length 5 is the required byte length to get frame size. */ + uint32_t remainder_length = info->buffer_end - info->buffer_pos; + if( !info->no_more_read && remainder_length < EAC3_MAX_SYNCFRAME_LENGTH ) + { + if( remainder_length ) + memmove( info->buffer, info->buffer_pos, remainder_length ); + uint32_t wasted_data_length = LSMASH_MIN( data_length, EAC3_MAX_SYNCFRAME_LENGTH ); + data_length -= wasted_data_length; + memcpy( info->buffer + remainder_length, data + overall_wasted_data_length, wasted_data_length ); + overall_wasted_data_length += wasted_data_length; + remainder_length += wasted_data_length; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer + remainder_length; + info->no_more_read = (data_length < 5); + } + if( remainder_length < 5 && info->no_more_read ) + goto setup_param; /* No more valid data. */ + /* Parse syncframe. */ + IF_A52_SYNCWORD( info->buffer_pos ) + goto setup_param; + info->frame_size = 0; + if( eac3_parse_syncframe( info, info->buffer_pos, LSMASH_MIN( remainder_length, EAC3_MAX_SYNCFRAME_LENGTH ) ) ) + goto setup_param; + if( remainder_length < info->frame_size ) + goto setup_param; + int independent = info->strmtyp != 0x1; + if( independent && info->substreamid == 0x0 ) + { + if( info->number_of_audio_blocks == 6 ) + { + /* Encountered the first syncframe of the next access unit. */ + info->number_of_audio_blocks = 0; + goto setup_param; + } + else if( info->number_of_audio_blocks > 6 ) + goto setup_param; + info->number_of_audio_blocks += eac3_audio_block_table[ info->numblkscod ]; + info->number_of_independent_substreams = 0; + } + else if( info->syncframe_count == 0 ) + /* The first syncframe in an AU must be independent and assigned substream ID 0. */ + return -2; + if( independent ) + info->independent_info[info->number_of_independent_substreams ++].num_dep_sub = 0; + else + ++ info->independent_info[info->number_of_independent_substreams - 1].num_dep_sub; + info->buffer_pos += info->frame_size; + ++ info->syncframe_count; + } +setup_param: + if( info->number_of_independent_substreams == 0 || info->number_of_independent_substreams > 8 ) + return -1; + if( !info->dec3_param_initialized ) + eac3_update_specific_param( info ); + *param = info->dec3_param; + return info->number_of_audio_blocks == 6 ? 0 : -1; +} + +uint16_t lsmash_eac3_get_chan_loc_from_chanmap( uint16_t chanmap ) +{ + return ((chanmap & 0x7f8) >> 2) | ((chanmap & 0x2) >> 1); +} + +static int eac3_check_syncframe_header( eac3_info_t *info ) +{ + if( info->strmtyp == 0x3 ) + return -1; /* unknown Stream type */ + lsmash_eac3_substream_info_t *substream_info; + if( info->strmtyp != 0x1 ) + substream_info = &info->independent_info[ info->current_independent_substream_id ]; + else + substream_info = &info->dependent_info; + if( substream_info->fscod == 0x3 && substream_info->fscod2 == 0x3 ) + return -1; /* unknown Sample Rate Code */ + if( substream_info->bsid < 10 || substream_info->bsid > 16 ) + return -1; /* not EAC-3 */ + return 0; +} + +int eac3_parse_syncframe( eac3_info_t *info, uint8_t *data, uint32_t data_length ) +{ + lsmash_bits_t *bits = info->bits; + if( lsmash_bits_import_data( bits, data, data_length ) ) + return -1; + lsmash_bits_get( bits, 16 ); /* syncword (16) */ + info->strmtyp = lsmash_bits_get( bits, 2 ); /* strmtyp (2) */ + info->substreamid = lsmash_bits_get( bits, 3 ); /* substreamid (3) */ + lsmash_eac3_substream_info_t *substream_info; + if( info->strmtyp != 0x1 ) + { + if( info->substreamid == 0x0 && info->number_of_independent_substreams ) + eac3_update_specific_param( info ); + info->current_independent_substream_id = info->substreamid; + substream_info = &info->independent_info[ info->current_independent_substream_id ]; + substream_info->chan_loc = 0; + } + else + substream_info = &info->dependent_info; + info->frame_size = 2 * (lsmash_bits_get( bits, 11 ) + 1); /* frmsiz (11) */ + substream_info->fscod = lsmash_bits_get( bits, 2 ); /* fscod (2) */ + if( substream_info->fscod == 0x3 ) + { + substream_info->fscod2 = lsmash_bits_get( bits, 2 ); /* fscod2 (2) */ + info->numblkscod = 0x3; + } + else + info->numblkscod = lsmash_bits_get( bits, 2 ); /* numblkscod (2) */ + substream_info->acmod = lsmash_bits_get( bits, 3 ); /* acmod (3) */ + substream_info->lfeon = lsmash_bits_get( bits, 1 ); /* lfeon (1) */ + substream_info->bsid = lsmash_bits_get( bits, 5 ); /* bsid (5) */ + lsmash_bits_get( bits, 5 ); /* dialnorm (5) */ + if( lsmash_bits_get( bits, 1 ) ) /* compre (1) */ + lsmash_bits_get( bits, 8 ); /* compr (8) */ + if( substream_info->acmod == 0x0 ) + { + lsmash_bits_get( bits, 5 ); /* dialnorm2 (5) */ + if( lsmash_bits_get( bits, 1 ) ) /* compre2 (1) */ + lsmash_bits_get( bits, 8 ); /* compr2 (8) */ + } + if( info->strmtyp == 0x1 && lsmash_bits_get( bits, 1 ) ) /* chanmape (1) */ + { + uint16_t chanmap = lsmash_bits_get( bits, 16 ); /* chanmap (16) */ + info->independent_info[ info->current_independent_substream_id ].chan_loc |= lsmash_eac3_get_chan_loc_from_chanmap( chanmap ); + } + if( lsmash_bits_get( bits, 1 ) ) /* mixmdate (1) */ + { + if( substream_info->acmod > 0x2 ) + lsmash_bits_get( bits, 2 ); /* dmixmod (2) */ + if( ((substream_info->acmod & 0x1) && (substream_info->acmod > 0x2)) || (substream_info->acmod & 0x4) ) + lsmash_bits_get( bits, 6 ); /* ltrt[c/sur]mixlev (3) + * loro[c/sur]mixlev (3) */ + if( substream_info->lfeon && lsmash_bits_get( bits, 1 ) ) /* lfemixlevcode (1) */ + lsmash_bits_get( bits, 5 ); /* lfemixlevcod (5) */ + if( info->strmtyp == 0x0 ) + { + if( lsmash_bits_get( bits, 1 ) ) /* pgmscle (1) */ + lsmash_bits_get( bits, 6 ); /* pgmscl (6) */ + if( substream_info->acmod == 0x0 && lsmash_bits_get( bits, 1 ) ) /* pgmscle2 (1) */ + lsmash_bits_get( bits, 6 ); /* pgmscl2 (6) */ + if( lsmash_bits_get( bits, 1 ) ) /* extpgmscle (1) */ + lsmash_bits_get( bits, 6 ); /* extpgmscl (6) */ + uint8_t mixdef = lsmash_bits_get( bits, 2 ); /* mixdef (2) */ + if( mixdef == 0x1 ) + lsmash_bits_get( bits, 5 ); /* premixcmpsel (1) + * drcsrc (1) + * premixcmpscl (3) */ + else if( mixdef == 0x2 ) + lsmash_bits_get( bits, 12 ); /* mixdata (12) */ + else if( mixdef == 0x3 ) + { + uint8_t mixdeflen = lsmash_bits_get( bits, 5 ); /* mixdeflen (5) */ + lsmash_bits_get( bits, 8 * (mixdeflen + 2) ); /* mixdata (8 * (mixdeflen + 2)) + * mixdatafill (0-7) */ + } + if( substream_info->acmod < 0x2 ) + { + if( lsmash_bits_get( bits, 1 ) ) /* paninfoe (1) */ + lsmash_bits_get( bits, 14 ); /* panmean (8) + * paninfo (6) */ + if( substream_info->acmod == 0x0 && lsmash_bits_get( bits, 1 ) ) /* paninfo2e (1) */ + lsmash_bits_get( bits, 14 ); /* panmean2 (8) + * paninfo2 (6) */ + } + if( lsmash_bits_get( bits, 1 ) ) /* frmmixcfginfoe (1) */ + { + if( info->numblkscod == 0x0 ) + lsmash_bits_get( bits, 5 ); /* blkmixcfginfo[0] (5) */ + else + { + int number_of_blocks_per_syncframe = ((int []){ 1, 2, 3, 6 })[ info->numblkscod ]; + for( int blk = 0; blk < number_of_blocks_per_syncframe; blk++ ) + if( lsmash_bits_get( bits, 1 ) ) /* blkmixcfginfoe (1)*/ + lsmash_bits_get( bits, 5 ); /* blkmixcfginfo[blk] (5) */ + } + } + } + } + if( lsmash_bits_get( bits, 1 ) ) /* infomdate (1) */ + { + substream_info->bsmod = lsmash_bits_get( bits, 3 ); /* bsmod (3) */ + lsmash_bits_get( bits, 1 ); /* copyrightb (1) */ + lsmash_bits_get( bits, 1 ); /* origbs (1) */ + if( substream_info->acmod == 0x2 ) + lsmash_bits_get( bits, 4 ); /* dsurmod (2) + * dheadphonmod (2) */ + else if( substream_info->acmod >= 0x6 ) + lsmash_bits_get( bits, 2 ); /* dsurexmod (2) */ + if( lsmash_bits_get( bits, 1 ) ) /* audprodie (1) */ + lsmash_bits_get( bits, 8 ); /* mixlevel (5) + * roomtyp (2) + * adconvtyp (1) */ + if( substream_info->acmod == 0x0 && lsmash_bits_get( bits, 1 ) ) /* audprodie2 (1) */ + lsmash_bits_get( bits, 8 ); /* mixlevel2 (5) + * roomtyp2 (2) + * adconvtyp2 (1) */ + if( substream_info->fscod < 0x3 ) + lsmash_bits_get( bits, 1 ); /* sourcefscod (1) */ + } + else + substream_info->bsmod = 0; + if( info->strmtyp == 0x0 && info->numblkscod != 0x3 ) + lsmash_bits_get( bits, 1 ); /* convsync (1) */ + if( info->strmtyp == 0x2 ) + { + int blkid; + if( info->numblkscod == 0x3 ) + blkid = 1; + else + blkid = lsmash_bits_get( bits, 1 ); /* blkid (1) */ + if( blkid ) + lsmash_bits_get( bits, 6 ); /* frmsizecod (6) */ + } + if( lsmash_bits_get( bits, 1 ) ) /* addbsie (1) */ + { + uint8_t addbsil = lsmash_bits_get( bits, 6 ); /* addbsil (6) */ + lsmash_bits_get( bits, (addbsil + 1) * 8 ); /* addbsi ((addbsil + 1) * 8) */ + } + lsmash_bits_empty( bits ); + return eac3_check_syncframe_header( info ); +} + +void eac3_update_specific_param( eac3_info_t *info ) +{ + lsmash_eac3_specific_parameters_t *param = &info->dec3_param; + param->data_rate = 0; + param->num_ind_sub = info->number_of_independent_substreams - 1; + for( uint8_t i = 0; i <= param->num_ind_sub; i++ ) + param->independent_info[i] = info->independent_info[i]; + info->dec3_param_initialized = 1; +} + +#define EAC3_SPECIFIC_BOX_MIN_LENGTH 13 + +int eac3_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( dst && dst->data.structured && src && src->data.unstructured ); + if( src->size < EAC3_SPECIFIC_BOX_MIN_LENGTH ) + return -1; + lsmash_eac3_specific_parameters_t *param = (lsmash_eac3_specific_parameters_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + if( size != src->size ) + return -1; + param->data_rate = (data[0] << 5) | ((data[1] >> 3) & 0x1F); /* XXXX XXXX XXXX Xxxx */ + param->num_ind_sub = data[1] & 0x07; /* xxxx xxxx xxxx xXXX */ + data += 2; + size -= 2; + for( int i = 0; i <= param->num_ind_sub; i++ ) + { + if( size < 3 ) + return -1; + lsmash_eac3_substream_info_t *independent_info = ¶m->independent_info[i]; + independent_info->fscod = (data[0] >> 6) & 0x03; /* XXxx xxxx xxxx xxxx xxxx xxxx */ + independent_info->bsid = (data[0] >> 1) & 0x1F; /* xxXX XXXx xxxx xxxx xxxx xxxx */ + independent_info->bsmod = ((data[0] & 0x01) << 4) | ((data[1] >> 4) & 0x0F); /* xxxx xxxX XXXX xxxx xxxx xxxx */ + independent_info->acmod = (data[1] >> 1) & 0x07; /* xxxx xxxx xxxx XXXx xxxx xxxx */ + independent_info->lfeon = data[1] & 0x01; /* xxxx xxxx xxxx xxxX xxxx xxxx */ + independent_info->num_dep_sub = (data[2] >> 1) & 0x0F; /* xxxx xxxx xxxx xxxx xxxX XXXx */ + data += 3; + size -= 3; + if( independent_info->num_dep_sub > 0 ) + { + if( size < 1 ) + return -1; + independent_info->chan_loc = ((data[-1] & 0x01) << 8) | data[0]; /* xxxx xxxX XXXX XXXX */ + data += 1; + size -= 1; + } + } + return 0; +} + +int eac3_print_codec_specific( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: EC3 Specific Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + if( box->size < EAC3_SPECIFIC_BOX_MIN_LENGTH ) + return -1; + isom_extension_box_t *ext = (isom_extension_box_t *)box; + assert( ext->format == EXTENSION_FORMAT_BINARY ); + uint8_t *data = ext->form.binary; + isom_skip_box_common( &data ); + lsmash_ifprintf( fp, indent, "data_rate = %"PRIu16" kbit/s\n", (data[0] << 5) | ((data[1] >> 3) & 0x1F) ); + uint8_t num_ind_sub = data[1] & 0x07; + lsmash_ifprintf( fp, indent, "num_ind_sub = %"PRIu8"\n", num_ind_sub ); + data += 2; + for( int i = 0; i <= num_ind_sub; i++ ) + { + lsmash_ifprintf( fp, indent, "independent_substream[%d]\n", i ); + int sub_indent = indent + 1; + uint8_t fscod = (data[0] >> 6) & 0x03; + uint8_t bsid = (data[0] >> 1) & 0x1F; + uint8_t bsmod = ((data[0] & 0x01) << 4) | ((data[1] >> 4) & 0x0F); + uint8_t acmod = (data[1] >> 1) & 0x07; + uint8_t lfeon = data[1] & 0x01; + uint8_t num_dep_sub = (data[2] >> 1) & 0x0F; + if( fscod != 0x03 ) + lsmash_ifprintf( fp, sub_indent, "fscod = %"PRIu8" (%"PRIu32" Hz)\n", fscod, ac3_sample_rate_table[fscod] ); + else + lsmash_ifprintf( fp, sub_indent, "fscod = 0x03 (reduced sample rate)\n" ); + lsmash_ifprintf( fp, sub_indent, "bsid = %"PRIu8"\n", bsid ); + if( bsmod < 0x08 ) + lsmash_ifprintf( fp, sub_indent, "bsmod = %"PRIu8" (%s)\n", bsmod, bit_stream_mode[bsmod + (acmod == 0x01 ? 1 : acmod > 0x01 ? 2 : 0)] ); + else + lsmash_ifprintf( fp, sub_indent, "bsmod = %"PRIu8" (Undefined service)\n" ); + lsmash_ifprintf( fp, sub_indent, "acmod = %"PRIu8" (%s)\n", acmod, audio_coding_mode[acmod + (bsmod == 0x07 ? 8 : 0)] ); + lsmash_ifprintf( fp, sub_indent, "lfeon = %s\n", lfeon ? "1 (LFE)" : "0" ); + lsmash_ifprintf( fp, sub_indent, "num_dep_sub = %"PRIu8"\n", num_dep_sub ); + data += 3; + if( num_dep_sub > 0 ) + { + static const char *channel_location[] = + { + "LFE2", + "Cvh", + "Lvh/Rvh pair", + "Lw/Rw pair", + "Lsd/Rsd pair", + "Ts", + "Cs", + "Lrs/Rrs pair", + "Lc/Rc pair" + }; + uint16_t chan_loc = ((data[-1] & 0x01) << 8) | data[0]; + lsmash_ifprintf( fp, sub_indent, "chan_loc = 0x%04"PRIu16"\n", chan_loc ); + for( int j = 0; j < 9; j++ ) + if( (chan_loc >> j & 0x01) ) + lsmash_ifprintf( fp, sub_indent + 1, "%s\n", channel_location[j] ); + data += 1; + } + else + lsmash_ifprintf( fp, sub_indent, "reserved = %"PRIu8"\n", data[2] & 0x01 ); + } + return 0; +} + +#undef EAC3_SPECIFIC_BOX_MIN_LENGTH diff --git a/output/mp4/a52.h b/output/mp4/a52.h new file mode 100644 index 0000000..72e4866 --- /dev/null +++ b/output/mp4/a52.h @@ -0,0 +1,75 @@ +/***************************************************************************** + * a52.h: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#define AC3_MIN_SYNCFRAME_LENGTH 128 +#define AC3_MAX_SYNCFRAME_LENGTH 3840 +#define EAC3_MAX_SYNCFRAME_LENGTH 4096 + +#define IF_A52_SYNCWORD( x ) if( (x)[0] != 0x0b || (x)[1] != 0x77 ) + + +typedef struct +{ + lsmash_ac3_specific_parameters_t dac3_param; + lsmash_bits_t *bits; + uint8_t buffer[AC3_MAX_SYNCFRAME_LENGTH]; + uint8_t *next_dac3; + uint32_t au_number; +} ac3_info_t; + +typedef struct +{ + lsmash_eac3_specific_parameters_t dec3_param; + lsmash_eac3_substream_info_t independent_info[8]; + lsmash_eac3_substream_info_t dependent_info; + uint8_t dec3_param_initialized; + uint8_t strmtyp; + uint8_t substreamid; + uint8_t current_independent_substream_id; + uint8_t numblkscod; + uint8_t number_of_audio_blocks; + uint8_t frmsizecod; + uint8_t number_of_independent_substreams; + uint8_t no_more_read; + uint8_t *next_dec3; + uint32_t next_dec3_length; + uint32_t syncframe_count; + uint32_t syncframe_count_in_au; + uint32_t frame_size; + uint8_t buffer[2 * EAC3_MAX_SYNCFRAME_LENGTH]; + uint8_t *buffer_pos; + uint8_t *buffer_end; + lsmash_bits_t *bits; + lsmash_multiple_buffers_t *au_buffers; + uint8_t *au; + uint32_t au_length; + uint8_t *incomplete_au; + uint32_t incomplete_au_length; + uint32_t au_number; +} eac3_info_t; + +static const uint32_t ac3_sample_rate_table[4] = { 48000, 44100, 32000, 0 }; +static const uint8_t eac3_audio_block_table[4] = { 1, 2, 3, 6 }; + +int ac3_parse_syncframe_header( ac3_info_t *info, uint8_t *data ); +int eac3_parse_syncframe( eac3_info_t *info, uint8_t *data, uint32_t data_length ); +void eac3_update_specific_param( eac3_info_t *info ); diff --git a/output/mp4/alac.c b/output/mp4/alac.c new file mode 100644 index 0000000..8d20af9 --- /dev/null +++ b/output/mp4/alac.c @@ -0,0 +1,113 @@ +/***************************************************************************** + * alac.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include + +#include "box.h" + +#define ALAC_SPECIFIC_BOX_LENGTH 36 + +uint8_t *lsmash_create_alac_specific_info( lsmash_alac_specific_parameters_t *param, uint32_t *data_length ) +{ + uint8_t buffer[ALAC_SPECIFIC_BOX_LENGTH]; + lsmash_bs_t bs = { 0 }; + bs.data = buffer; + bs.alloc = ALAC_SPECIFIC_BOX_LENGTH; + lsmash_bs_put_be32( &bs, ALAC_SPECIFIC_BOX_LENGTH ); /* box size */ + lsmash_bs_put_be32( &bs, ISOM_BOX_TYPE_ALAC.fourcc ); /* box type: 'alac' */ + lsmash_bs_put_be32( &bs, 0 ); /* version + flags */ + lsmash_bs_put_be32( &bs, param->frameLength ); + lsmash_bs_put_byte( &bs, 0 ); /* compatibleVersion */ + lsmash_bs_put_byte( &bs, param->bitDepth ); + lsmash_bs_put_byte( &bs, 40 ); /* pb */ + lsmash_bs_put_byte( &bs, 14 ); /* mb */ + lsmash_bs_put_byte( &bs, 10 ); /* kb */ + lsmash_bs_put_byte( &bs, param->numChannels ); + lsmash_bs_put_be16( &bs, 255 ); /* maxRun */ + lsmash_bs_put_be32( &bs, param->maxFrameBytes ); + lsmash_bs_put_be32( &bs, param->avgBitrate ); + lsmash_bs_put_be32( &bs, param->sampleRate ); + return lsmash_bs_export_data( &bs, data_length ); +} + +int alac_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( dst && dst->data.structured && src && src->data.unstructured ); + if( src->size < ALAC_SPECIFIC_BOX_LENGTH ) + return -1; + lsmash_alac_specific_parameters_t *param = (lsmash_alac_specific_parameters_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + if( size != src->size ) + return -1; + data += 4; /* Skip version and flags. */ + param->frameLength = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + param->bitDepth = data[5]; + param->numChannels = data[9]; + param->maxFrameBytes = (data[12] << 24) | (data[13] << 16) | (data[14] << 8) | data[15]; + param->avgBitrate = (data[16] << 24) | (data[17] << 16) | (data[18] << 8) | data[19]; + param->sampleRate = (data[20] << 24) | (data[21] << 16) | (data[22] << 8) | data[23]; + return 0; +} + +int alac_print_codec_specific( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: ALAC Specific Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + if( box->size < ALAC_SPECIFIC_BOX_LENGTH ) + return -1; + isom_extension_box_t *ext = (isom_extension_box_t *)box; + assert( ext->format == EXTENSION_FORMAT_BINARY ); + uint8_t *data = ext->form.binary; + isom_skip_box_common( &data ); + lsmash_ifprintf( fp, indent, "version = %"PRIu8"\n", data[0] ); + lsmash_ifprintf( fp, indent, "flags = 0x%06"PRIx32"\n", (data[1] << 16) | (data[2] << 8) | data[3] ); + data += 4; + lsmash_ifprintf( fp, indent, "frameLength = %"PRIu32"\n", (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3] ); + lsmash_ifprintf( fp, indent, "compatibleVersion = %"PRIu8"\n", data[4] ); + lsmash_ifprintf( fp, indent, "bitDepth = %"PRIu8"\n", data[5] ); + lsmash_ifprintf( fp, indent, "pb = %"PRIu8"\n", data[6] ); + lsmash_ifprintf( fp, indent, "mb = %"PRIu8"\n", data[7] ); + lsmash_ifprintf( fp, indent, "kb = %"PRIu8"\n", data[8] ); + lsmash_ifprintf( fp, indent, "numChannels = %"PRIu8"\n", data[9] ); + lsmash_ifprintf( fp, indent, "maxRun = %"PRIu16"\n", (data[10] << 8) | data[11] ); + lsmash_ifprintf( fp, indent, "maxFrameBytes = %"PRIu32"\n", (data[12] << 24) | (data[13] << 16) | (data[14] << 8) | data[15] ); + lsmash_ifprintf( fp, indent, "avgBitrate = %"PRIu32"\n", (data[16] << 24) | (data[17] << 16) | (data[18] << 8) | data[19] ); + lsmash_ifprintf( fp, indent, "sampleRate = %"PRIu32"\n", (data[20] << 24) | (data[21] << 16) | (data[22] << 8) | data[23] ); + return 0; +} + +#undef ALAC_SPECIFIC_BOX_LENGTH diff --git a/output/mp4/box.c b/output/mp4/box.c new file mode 100644 index 0000000..2b3496d --- /dev/null +++ b/output/mp4/box.c @@ -0,0 +1,250 @@ +/***************************************************************************** + * box.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include "box.h" + +lsmash_extended_box_type_t lsmash_form_extended_box_type( uint32_t fourcc, const uint8_t id[12] ) +{ + return (lsmash_extended_box_type_t){ fourcc, { id[0], id[1], id[2], id[3], id[4], id[5], + id[6], id[7], id[8], id[9], id[10], id[11] } }; +} + +lsmash_box_type_t lsmash_form_iso_box_type( uint32_t fourcc ) +{ + return (lsmash_box_type_t){ fourcc, lsmash_form_extended_box_type( fourcc, LSMASH_ISO_12_BYTES ) }; +} + +lsmash_box_type_t lsmash_form_qtff_box_type( uint32_t fourcc ) +{ + return (lsmash_box_type_t){ fourcc, lsmash_form_extended_box_type( fourcc, LSMASH_QTFF_12_BYTES ) }; +} + +#define CHECK_BOX_TYPE_IDENTICAL( a, b ) \ + a.fourcc == b.fourcc \ + && a.user.fourcc == b.user.fourcc \ + && a.user.id[0] == b.user.id[0] \ + && a.user.id[1] == b.user.id[1] \ + && a.user.id[2] == b.user.id[2] \ + && a.user.id[3] == b.user.id[3] \ + && a.user.id[4] == b.user.id[4] \ + && a.user.id[5] == b.user.id[5] \ + && a.user.id[6] == b.user.id[6] \ + && a.user.id[7] == b.user.id[7] \ + && a.user.id[8] == b.user.id[8] \ + && a.user.id[9] == b.user.id[9] \ + && a.user.id[10] == b.user.id[10] \ + && a.user.id[11] == b.user.id[11] + +int lsmash_check_box_type_identical( lsmash_box_type_t a, lsmash_box_type_t b ) +{ + return CHECK_BOX_TYPE_IDENTICAL( a, b ); +} + +int lsmash_check_codec_type_identical( lsmash_codec_type_t a, lsmash_codec_type_t b ) +{ + return CHECK_BOX_TYPE_IDENTICAL( a, b ); +} + +int lsmash_check_box_type_specified( lsmash_box_type_t *box_type ) +{ + assert( box_type ); + if( !box_type ) + return 0; + return !!(box_type->fourcc + | box_type->user.fourcc + | box_type->user.id[0] | box_type->user.id[1] | box_type->user.id[2] | box_type->user.id[3] + | box_type->user.id[4] | box_type->user.id[5] | box_type->user.id[6] | box_type->user.id[7] + | box_type->user.id[8] | box_type->user.id[9] | box_type->user.id[10] | box_type->user.id[11]); +} + +void isom_init_box_common( void *_box, void *_parent, lsmash_box_type_t box_type ) +{ + isom_box_t *box = (isom_box_t *)_box; + isom_box_t *parent = (isom_box_t *)_parent; + assert( box && parent && parent->root ); + box->root = parent->root; + box->parent = parent; + box->size = 0; + box->type = box_type; + if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_STSD ) || !isom_is_fullbox( box ) ) + return; + box->version = 0; + box->flags = 0; +} + +void isom_bs_put_basebox_common( lsmash_bs_t *bs, isom_box_t *box ) +{ + if( box->size > UINT32_MAX ) + { + lsmash_bs_put_be32( bs, 1 ); + lsmash_bs_put_be32( bs, box->type.fourcc ); + lsmash_bs_put_be64( bs, box->size ); /* largesize */ + } + else + { + lsmash_bs_put_be32( bs, (uint32_t)box->size ); + lsmash_bs_put_be32( bs, box->type.fourcc ); + } + if( box->type.fourcc == ISOM_BOX_TYPE_UUID.fourcc ) + { + lsmash_bs_put_be32( bs, box->type.user.fourcc ); + lsmash_bs_put_bytes( bs, 12, box->type.user.id ); + } +} + +void isom_bs_put_fullbox_common( lsmash_bs_t *bs, isom_box_t *box ) +{ + isom_bs_put_basebox_common( bs, box ); + lsmash_bs_put_byte( bs, box->version ); + lsmash_bs_put_be24( bs, box->flags ); +} + +void isom_bs_put_box_common( lsmash_bs_t *bs, void *box ) +{ + if( !box ) + { + bs->error = 1; + return; + } + isom_box_t *parent = ((isom_box_t *)box)->parent; + if( parent && lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_STSD ) ) + { + isom_bs_put_basebox_common( bs, (isom_box_t *)box ); + return; + } + if( isom_is_fullbox( box ) ) + isom_bs_put_fullbox_common( bs, (isom_box_t *)box ); + else + isom_bs_put_basebox_common( bs, (isom_box_t *)box ); +} + +/* Return 1 if the box is fullbox, Otherwise return 0. */ +int isom_is_fullbox( void *box ) +{ + isom_box_t *current = (isom_box_t *)box; + lsmash_box_type_t type = current->type; + static lsmash_box_type_t fullbox_type_table[50] = { LSMASH_BOX_TYPE_INITIALIZER }; + if( !lsmash_check_box_type_specified( &fullbox_type_table[0] ) ) + { + /* Initialize the table. */ + int i = 0; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MVHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TKHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_IODS; + fullbox_type_table[i++] = ISOM_BOX_TYPE_ESDS; + fullbox_type_table[i++] = QT_BOX_TYPE_ESDS; + fullbox_type_table[i++] = QT_BOX_TYPE_CLEF; + fullbox_type_table[i++] = QT_BOX_TYPE_PROF; + fullbox_type_table[i++] = QT_BOX_TYPE_ENOF; + fullbox_type_table[i++] = ISOM_BOX_TYPE_ELST; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MDHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_HDLR; + fullbox_type_table[i++] = ISOM_BOX_TYPE_VMHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_SMHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_HMHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_NMHD; + fullbox_type_table[i++] = QT_BOX_TYPE_GMIN; + fullbox_type_table[i++] = ISOM_BOX_TYPE_DREF; + fullbox_type_table[i++] = ISOM_BOX_TYPE_URL; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STSD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STSL; + fullbox_type_table[i++] = QT_BOX_TYPE_CHAN; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STTS; + fullbox_type_table[i++] = ISOM_BOX_TYPE_CTTS; + fullbox_type_table[i++] = ISOM_BOX_TYPE_CSLG; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STSS; + fullbox_type_table[i++] = QT_BOX_TYPE_STPS; + fullbox_type_table[i++] = ISOM_BOX_TYPE_SDTP; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STSC; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STSZ; + fullbox_type_table[i++] = ISOM_BOX_TYPE_STCO; + fullbox_type_table[i++] = ISOM_BOX_TYPE_CO64; + fullbox_type_table[i++] = ISOM_BOX_TYPE_SGPD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_SBGP; + fullbox_type_table[i++] = ISOM_BOX_TYPE_CHPL; + fullbox_type_table[i++] = ISOM_BOX_TYPE_META; + fullbox_type_table[i++] = QT_BOX_TYPE_KEYS; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MEAN; + fullbox_type_table[i++] = ISOM_BOX_TYPE_NAME; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MEHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TREX; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MFHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TFHD; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TFDT; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TRUN; + fullbox_type_table[i++] = ISOM_BOX_TYPE_TFRA; + fullbox_type_table[i++] = ISOM_BOX_TYPE_MFRO; + fullbox_type_table[i] = LSMASH_BOX_TYPE_UNSPECIFIED; + } + for( int i = 0; lsmash_check_box_type_specified( &fullbox_type_table[i] ); i++ ) + if( lsmash_check_box_type_identical( type, fullbox_type_table[i] ) ) + return 1; + return lsmash_check_box_type_identical( type, ISOM_BOX_TYPE_CPRT ) + && current->parent && lsmash_check_box_type_identical( current->parent->type, ISOM_BOX_TYPE_UDTA ); +} + +/* Return 1 if the sample type is LPCM audio, Otherwise return 0. */ +int isom_is_lpcm_audio( void *box ) +{ + isom_box_t *current = (isom_box_t *)box; + lsmash_box_type_t type = current->type; + return lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_23NI_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_NONE_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_LPCM_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SOWT_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_TWOS_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FL32_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FL64_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_IN24_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_IN32_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_NOT_SPECIFIED ) + || (lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_RAW_AUDIO ) && (current->manager & LSMASH_AUDIO_DESCRIPTION)); +} + +/* Return 1 if the sample type is uncompressed Y'CbCr video, Otherwise return 0. */ +int isom_is_uncompressed_ycbcr( lsmash_box_type_t type ) +{ + return lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V210_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V216_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V308_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V408_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V410_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_YUV2_VIDEO ); +} + +size_t isom_skip_box_common( uint8_t **p_data ) +{ + uint8_t *orig = *p_data; + uint8_t *data = *p_data; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + *p_data = data; + return data - orig; +} diff --git a/output/mp4/box.h b/output/mp4/box.h new file mode 100644 index 0000000..35ae30a --- /dev/null +++ b/output/mp4/box.h @@ -0,0 +1,2248 @@ +/***************************************************************************** + * box.h: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#ifndef LSMASH_BOX_H +#define LSMASH_BOX_H + +/* For generating creation_time and modification_time. + * According to ISO/IEC-14496-5-2001, the difference between Unix time and Mac OS time is 2082758400. + * However this is wrong and 2082844800 is correct. */ +#include +#define ISOM_MAC_EPOCH_OFFSET 2082844800 + +#include "utils.h" + +typedef struct isom_box_tag isom_box_t; + +/* If size is 1, then largesize is actual size. + * If size is 0, then this box is the last one in the file. */ +#define ISOM_BASEBOX_COMMON \ + lsmash_root_t *root; /* pointer of root */ \ + isom_box_t *parent; /* pointer of the parent box of this box */ \ + uint32_t manager; /* flags for L-SMASH */ \ + uint64_t pos; /* starting position of this box in the file */ \ + lsmash_entry_list_t extensions; /* extension boxes */ \ + uint64_t size; /* the number of bytes in this box */ \ + lsmash_box_type_t type + +#define ISOM_FULLBOX_COMMON \ + ISOM_BASEBOX_COMMON; \ + uint8_t version; /* Basically, version is either 0 or 1 */ \ + uint32_t flags /* In the actual structure of box, flags is 24 bits. */ + +#define ISOM_BASEBOX_COMMON_SIZE 8 +#define ISOM_FULLBOX_COMMON_SIZE 12 +#define ISOM_LIST_FULLBOX_COMMON_SIZE 16 + +#define LSMASH_UNKNOWN_BOX 0x01 +#define LSMASH_ABSENT_IN_ROOT 0x02 +#define LSMASH_QTFF_BASE 0x04 +#define LSMASH_VIDEO_DESCRIPTION 0x08 +#define LSMASH_AUDIO_DESCRIPTION 0x10 +#define LSMASH_FULLBOX 0x20 +#define LSMASH_LAST_BOX 0x40 +#define LSMASH_INCOMPLETE_BOX 0x80 + +/* 12-byte ISO reserved value: + * 0xXXXXXXXX-0011-0010-8000-00AA00389B71 */ +static const uint8_t static_lsmash_iso_12_bytes[12] + = { 0x00, 0x11, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 }; +#define LSMASH_ISO_12_BYTES static_lsmash_iso_12_bytes + +/* L-SMASH original 12-byte QuickTime file format value for CODEC discrimination mainly: + * 0xXXXXXXXX-0F11-4DA5-BF4E-F2C48C6AA11E */ +static const uint8_t static_lsmash_qtff_12_bytes[12] + = { 0x0F, 0x11, 0x4D, 0xA5, 0xBF, 0x4E, 0xF2, 0xC4, 0x8C, 0x6A, 0xA1, 0x1E }; +#define LSMASH_QTFF_12_BYTES static_lsmash_qtff_12_bytes + +struct isom_box_tag +{ + ISOM_FULLBOX_COMMON; +}; + +/* Unknown Box + * This structure is for boxes we don't know or define yet. + * This box must be always appended as an extension box. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t unknown_size; + uint8_t *unknown_field; +} isom_unknown_box_t; + +/* Extension structure */ +typedef enum +{ + EXTENSION_FORMAT_BINARY = 0, + EXTENSION_FORMAT_BOX = 1 +} isom_extension_format; + +typedef void (*isom_extension_destructor_t)( void *extension_data ); + +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_extension_format format; + isom_extension_destructor_t destruct; + union + { + uint8_t *binary; + void *box; + } form; +} isom_extension_box_t; + +/* File Type Box + * This box identifies the specifications to which this file complies. + * This box shall occur before any variable-length box. + * In the absence of this box, the file is QuickTime file format or MP4 version 1 file format. + * In MP4 version 1 file format, Object Descriptor Box is mandatory. + * In QuickTime file format, Object Descriptor Box isn't defined. + * Therefore, if this box and an Object Descriptor Box are absent in the file, the file shall be QuickTime file format. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t major_brand; /* brand identifier */ + uint32_t minor_version; /* the minor version of the major brand */ + uint32_t *compatible_brands; /* a list, to the end of the box, of brands */ + + uint32_t brand_count; /* the number of factors in compatible_brands array */ +} isom_ftyp_t; + +/* Color Table Box + * This box defines a list of preferred colors for displaying the movie on devices that support only 256 colors. + * The list may contain up to 256 colors. This box contains a Macintosh color table data structure. + * This box is defined in QuickTime File Format Specification. + * The color table structure is also defined in struct ColorTable defined in Quickdraw.h. */ +typedef struct +{ + /* An array of colors. + * Each color is made of four unsigned 16-bit integers. */ + uint16_t value; /* index or other value + * Must be set to 0. */ + /* true color */ + uint16_t r; /* magnitude of red component */ + uint16_t g; /* magnitude of green component */ + uint16_t b; /* magnitude of blue component */ +} isom_qt_color_array_t; + +typedef struct +{ + uint32_t seed; /* unique identifier for table + * Must be set to 0. */ + uint16_t flags; /* high bit: 0 = PixMap; 1 = device + * Must be set to 0x8000. */ + uint16_t size; /* the number of colors in the following color array + * This is a zero-relative value; + * setting this field to 0 means that there is one color in the array. */ + isom_qt_color_array_t *array; +} isom_qt_color_table_t; + +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_qt_color_table_t color_table; +} isom_ctab_t; + +/* Track Header Box + * This box specifies the characteristics of a single track. */ +typedef struct +{ + /* version is either 0 or 1 + * flags + * 0x000001: Indicates that the track is enabled. + * A disabled track is treated as if it were not present. + * 0x000002: Indicates that the track is used in the presentation. + * 0x000004: Indicates that the track is used when previewing the presentation. + * 0x000008: Indicates that the track is used in the movie's poster. (only defined in QuickTime file format) + * ISOM: If in a presentation all tracks have neither track_in_movie nor track_in_preview set, + * then all tracks shall be treated as if both flags were set on all tracks. */ + ISOM_FULLBOX_COMMON; + /* version == 0: uint64_t -> uint32_t */ + uint64_t creation_time; /* the creation time of this track (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint64_t modification_time; /* the most recent time the track was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint32_t track_ID; /* an integer that uniquely identifies the track + * Track IDs are never re-used and cannot be zero. */ + uint32_t reserved1; + uint64_t duration; /* the duration of this track expressed in the movie timescale units */ + /* The following fields are treated as + * ISOM: template fields. + * MP41: reserved fields. + * MP42: ignored fileds since compositions are done using BIFS system. + * 3GPP: ignored fields except for alternate_group. + * QTFF: usable fields. */ + uint32_t reserved2[2]; + int16_t layer; /* the front-to-back ordering of video tracks; tracks with lower numbers are closer to the viewer. */ + int16_t alternate_group; /* an integer that specifies a group or collection of tracks + * If this field is not 0, it should be the same for tracks that contain alternate data for one another + * and different for tracks belonging to different such groups. + * Only one track within an alternate group should be played or streamed at any one time. */ + int16_t volume; /* fixed point 8.8 number. 0x0100 is full volume. */ + uint16_t reserved3; + int32_t matrix[9]; /* transformation matrix for the video */ + /* track's visual presentation size + * All images in the sequence are scaled to this size, before any overall transformation of the track represented by the matrix. + * Note: these fields are treated as reserved in MP4 version 1. */ + uint32_t width; /* fixed point 16.16 number */ + uint32_t height; /* fixed point 16.16 number */ + /* */ +} isom_tkhd_t; + +/* Track Clean Aperture Dimensions Box + * A presentation mode where clap and pasp are reflected. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t width; /* fixed point 16.16 number */ + uint32_t height; /* fixed point 16.16 number */ +} isom_clef_t; + +/* Track Production Aperture Dimensions Box + * A presentation mode where pasp is reflected. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t width; /* fixed point 16.16 number */ + uint32_t height; /* fixed point 16.16 number */ +} isom_prof_t; + +/* Track Encoded Pixels Dimensions Box + * A presentation mode where clap and pasp are not reflected. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t width; /* fixed point 16.16 number */ + uint32_t height; /* fixed point 16.16 number */ +} isom_enof_t; + +/* Track Aperture Mode Dimensions Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_clef_t *clef; /* Track Clean Aperture Dimensions Box */ + isom_prof_t *prof; /* Track Production Aperture Dimensions Box */ + isom_enof_t *enof; /* Track Encoded Pixels Dimensions Box */ +} isom_tapt_t; + +/* Edit List Box + * This box contains an explicit timeline map. + * Each entry defines part of the track timeline: by mapping part of the media timeline, or by indicating 'empty' time, + * or by defining a 'dwell', where a single time-point in the media is held for a period. + * The last edit in a track shall never be an empty edit. + * Any difference between the duration in the Movie Header Box, and the track's duration is expressed as an implicit empty edit at the end. + * It is recommended that any edits, explicit or implied, not select any portion of the composition timeline that doesn't map to a sample. + * Therefore, if the first sample in the track has non-zero CTS, then this track should have at least one edit and the start time in it should + * correspond to the value of the CTS the first sample has or more not to exceed the largest CTS in this track. */ +typedef struct +{ + /* This entry is called Timeline Mapping Edit (TME) entry in UltraViolet Common File Format. + * version == 0: 64bits -> 32bits */ + uint64_t segment_duration; /* the duration of this edit expressed in the movie timescale units */ + int64_t media_time; /* the starting composition time within the media of this edit segment + * If this field is set to -1, it is an empty edit. */ + int32_t media_rate; /* the relative rate at which to play the media corresponding to this edit segment + * If this value is 0, then the edit is specifying a 'dwell': + * the media at media_time is presented for the segment_duration. + * This field is expressed as 16.16 fixed-point number. */ +} isom_elst_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */ + lsmash_entry_list_t *list; +} isom_elst_t; + +/* Edit Box + * This optional box maps the presentation time-line to the media time-line as it is stored in the file. + * In the absence of this box, there is an implicit one-to-one mapping of these time-lines, + * and the presentation of a track starts at the beginning of the presentation. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_elst_t *elst; /* Edit List Box */ +} isom_edts_t; + +/* Track Reference Box + * The Track Reference Box contains Track Reference Type Boxes. + * Track Reference Type Boxes define relationships between tracks. + * They allow one track to specify how it is related to other tracks. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t *track_ID; /* track_IDs of reference tracks / Zero value must not be used */ + + uint32_t ref_count; /* number of reference tracks */ +} isom_tref_type_t; + +typedef struct +{ + ISOM_BASEBOX_COMMON; + lsmash_entry_list_t *ref_list; /* Track Reference Type Boxes */ +} isom_tref_t; + +/* Media Header Box + * This box declares overall information that is media-independent, and relevant to characteristics of the media in a track.*/ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */ + /* version == 0: uint64_t -> uint32_t */ + uint64_t creation_time; /* the creation time of the media in this track (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint64_t modification_time; /* the most recent time the media in this track was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint32_t timescale; /* media timescale: timescale for this media */ + uint64_t duration; /* the duration of this media expressed in the timescale indicated in this box */ + /* */ + uint16_t language; /* ISOM: ISO-639-2/T language codes. Most significant 1-bit is 0. + * Each character is packed as the difference between its ASCII value and 0x60. + * QTFF: Macintosh language codes is usually used. + * Mac's value is less than 0x800 while ISO's value is 0x800 or greater. */ + int16_t quality; /* ISOM: pre_defined / QTFF: the media's playback quality */ +} isom_mdhd_t; + +/* Handler Reference Box + * In Media Box, this box is mandatory and (ISOM: should/QTFF: must) come before Media Information Box. + * ISOM: this box might be also in Meta Box. + * QTFF: this box might be also in Media Information Box. If this box is present there, it must come before Data Information Box. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t componentType; /* ISOM: pre_difined = 0 + * QTFF: 'mhlr' for Media Handler Reference Box and 'dhlr' for Data Handler Reference Box */ + uint32_t componentSubtype; /* Both ISOM and QT: when present in Media Handler Reference Box, this field defines the type of media data. + * ISOM: when present in Metadata Handler Reference Box, this field defines the format of the meta box contents. + * QTFF: when present in Data Handler Reference Box, this field defines the data reference type. */ + /* The following fields are defined in QTFF however these fields aren't mentioned in QuickTime SDK and are reserved in the specification. + * In ISOM, these fields are still defined as reserved. */ + uint32_t componentManufacturer; /* vendor indentification / A value of 0 matches any manufacturer. */ + uint32_t componentFlags; /* flags describing required component capabilities + * The high-order 8 bits should be set to 0. + * The low-order 24 bits are specific to each component type. */ + uint32_t componentFlagsMask; /* This field indicates which flags in the componentFlags field are relevant to this operation. */ + /* */ + uint8_t *componentName; /* ISOM: a null-terminated string in UTF-8 characters + * QTFF: Pascal string */ + + uint32_t componentName_length; +} isom_hdlr_t; + + +/** Media Information Header Boxes + ** There is a different media information header for each track type + ** (corresponding to the media handler-type); the matching header shall be present. **/ +/* Video Media Header Box + * This box contains general presentation information, independent of the coding, for video media. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* flags is 1 */ + uint16_t graphicsmode; /* template: graphicsmode = 0 */ + uint16_t opcolor[3]; /* template: opcolor = { 0, 0, 0 } */ +} isom_vmhd_t; + +/* Sound Media Header Box + * This box contains general presentation information, independent of the coding, for audio media. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + int16_t balance; /* a fixed-point 8.8 number that places mono audio tracks in a stereo space. template: balance = 0 */ + uint16_t reserved; +} isom_smhd_t; + +/* Hint Media Header Box + * This box contains general information, independent of the protocol, for hint tracks. (A PDU is a Protocol Data Unit.) */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint16_t maxPDUsize; /* the size in bytes of the largest PDU in this (hint) stream */ + uint16_t avgPDUsize; /* the average size of a PDU over the entire presentation */ + uint32_t maxbitrate; /* the maximum rate in bits/second over any window of one second */ + uint32_t avgbitrate; /* the average rate in bits/second over the entire presentation */ + uint32_t reserved; +} isom_hmhd_t; + +/* Null Media Header Box + * This box may be used for streams other than visual and audio (e.g., timed metadata streams). */ +typedef struct +{ + /* Streams other than visual and audio may use a Null Media Header Box */ + ISOM_FULLBOX_COMMON; /* flags is currently all zero */ +} isom_nmhd_t; + +/* Generic Media Information Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint16_t graphicsmode; + uint16_t opcolor[3]; + int16_t balance; /* This field is nomally set to 0. */ + uint16_t reserved; /* Reserved for use by Apple. Set this field to 0. */ +} isom_gmin_t; + +/* Text Media Information Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + int32_t matrix[9]; /* Unkown fields. Default values are probably: + * { 0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000 } */ +} isom_text_t; + +/* Generic Media Information Header Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_gmin_t *gmin; /* Generic Media Information Box */ + isom_text_t *text; /* Text Media Information Box */ +} isom_gmhd_t; +/** **/ + +/* Data Reference Box + * name and location fields are expressed in null-terminated string using UTF-8 characters. */ +typedef struct +{ + /* This box is DataEntryUrlBox or DataEntryUrnBox */ + ISOM_FULLBOX_COMMON; /* flags == 0x000001 means that the media data is in the same file + * as the Movie Box containing this data reference. */ + char *name; /* only for DataEntryUrnBox */ + char *location; /* a location to find the resource with the given name */ + + uint32_t name_length; + uint32_t location_length; +} isom_dref_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_dref_t; + +/* Data Information Box */ +typedef struct +{ + /* This box is in Media Information Box or Meta Box */ + ISOM_BASEBOX_COMMON; + isom_dref_t *dref; /* Data Reference Box */ +} isom_dinf_t; + +/** Sample Description **/ +/* ES Descriptor Box */ +struct mp4sys_ES_Descriptor_t; /* FIXME: I think these structs using mp4sys should be placed in isom.c */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + struct mp4sys_ES_Descriptor_t *ES; +} isom_esds_t; + +/* AVCDecoderConfigurationRecord */ +typedef struct +{ +#define ISOM_REQUIRES_AVCC_EXTENSION( x ) ((x) == 100 || (x) == 110 || (x) == 122 || (x) == 144) + ISOM_BASEBOX_COMMON; + uint8_t configurationVersion; /* 1 */ + uint8_t AVCProfileIndication; /* profile_idc in SPS */ + uint8_t profile_compatibility; + uint8_t AVCLevelIndication; /* level_idc in SPS */ + uint8_t lengthSizeMinusOne; /* in bytes of the NALUnitLength field. upper 6-bits are reserved as 111111b */ + uint8_t numOfSequenceParameterSets; /* upper 3-bits are reserved as 111b */ + lsmash_entry_list_t *sequenceParameterSets; /* SPSs */ + uint8_t numOfPictureParameterSets; + lsmash_entry_list_t *pictureParameterSets; /* PPSs */ + /* if( ISOM_REQUIRES_AVCC_EXTENSION( AVCProfileIndication ) ) */ + uint8_t chroma_format; /* chroma_format_idc in SPS / upper 6-bits are reserved as 111111b */ + uint8_t bit_depth_luma_minus8; /* shall be in the range of 0 to 4 / upper 5-bits are reserved as 11111b */ + uint8_t bit_depth_chroma_minus8; /* shall be in the range of 0 to 4 / upper 5-bits are reserved as 11111b */ + uint8_t numOfSequenceParameterSetExt; + lsmash_entry_list_t *sequenceParameterSetExt; /* SPSExts */ + /* */ +} isom_avcC_t; + +/* Parameter Set Entry */ +typedef struct +{ + uint16_t parameterSetLength; + uint8_t *parameterSetNALUnit; +} isom_avcC_ps_entry_t; + +/* MPEG-4 Bit Rate Box + * This box signals the bit rate information of the AVC video stream. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t bufferSizeDB; /* the size of the decoding buffer for the elementary stream in bytes */ + uint32_t maxBitrate; /* the maximum rate in bits/second over any window of one second */ + uint32_t avgBitrate; /* the average rate in bits/second over the entire presentation */ +} isom_btrt_t; + +/* Global Header Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t header_size; + uint8_t *header_data; +} isom_glbl_t; + +/* Clean Aperture Box + * There are notionally four values in this box and these parameters are represented as a fraction N/D. + * Here, we refer to the pair of parameters fooN and fooD as foo. + * Considering the pixel dimensions as defined by the VisualSampleEntry width and height. + * If picture centre of the image is at pcX and pcY, then horizOff and vertOff are defined as follows: + * pcX = horizOff + (width - 1)/2; + * pcY = vertOff + (height - 1)/2; + * The leftmost/rightmost pixel and the topmost/bottommost line of the clean aperture fall at: + * pcX +/- (cleanApertureWidth - 1)/2; + * pcY +/- (cleanApertureHeight - 1)/2; + * QTFF: this box is a mandatory extension for all uncompressed Y'CbCr data formats. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t cleanApertureWidthN; + uint32_t cleanApertureWidthD; + uint32_t cleanApertureHeightN; + uint32_t cleanApertureHeightD; + int32_t horizOffN; + uint32_t horizOffD; + int32_t vertOffN; + uint32_t vertOffD; +} isom_clap_t; + +/* Pixel Aspect Ratio Box + * This box specifies the aspect ratio of a pixel, in arbitrary units. + * If a pixel appears H wide and V tall, then hSpacing/vSpacing is equal to H/V. + * When adjusting pixel aspect ratio, normally, the horizontal dimension of the video is scaled, if needed. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t hSpacing; /* horizontal spacing */ + uint32_t vSpacing; /* vertical spacing */ +} isom_pasp_t; + +/* ISOM: Colour Information Box / QTFF: Color Parameter Box + * This box is used to map the numerical values of pixels in the file to a common representation of color + * in which images can be correctly compared, combined, and displayed. + * If colour information is supplied in both this box, and also in the video bitstream, + * this box takes precedence, and over-rides the information in the bitstream. + * For QuickTime file format: + * This box ('colr') supersedes the Gamma Level Box ('gama'). + * Writers of QTFF should never write both into an Image Description, and readers of QTFF should ignore 'gama' if 'colr' is present. + * Note: this box is a mandatory extension for all uncompressed Y'CbCr data formats. + * For ISO Base Media file format: + * Colour information may be supplied in one or more Colour Information Boxes placed in a VisualSampleEntry. + * These should be placed in order in the sample entry starting with the most accurate (and potentially the most difficult to process), in progression to the least. + * These are advisory and concern rendering and colour conversion, and there is no normative behaviour associated with them; a reader may choose to use the most suitable. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t color_parameter_type; /* QTFF: 'nclc' or 'prof' + * ISOM: 'nclx', 'rICC' or 'prof' */ + /* for 'nclc' and 'nclx' */ + uint16_t primaries_index; /* CIE 1931 xy chromaticity coordinates */ + uint16_t transfer_function_index; /* nonlinear transfer function from RGB to ErEgEb */ + uint16_t matrix_index; /* matrix from ErEgEb to EyEcbEcr */ + /* for 'nclx' */ + unsigned full_range_flag : 1; + unsigned reserved : 7; +} isom_colr_t; + +/* Gamma Level Box + * This box is used to indicate that the decompressor corrects gamma level at display time. + * This box is defined in QuickTime File Format Specification and ImageCompression.h. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t level; /* A fixed-point 16.16 number indicating the gamma level at which the image was captured. + * Zero value indicates platform's standard gamma. */ +} isom_gama_t; + +/* Field/Frame Information Box + * This box is used by applications to modify decompressed image data or by decompressor components to determine field display order. + * This box is defined in QuickTime File Format Specification, dispatch019 and ImageCodec.h. + * Note: this box is a mandatory extension for all uncompressed Y'CbCr data formats. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint8_t fields; /* the number of fields per frame + * 1: progressive scan + * 2: 2:1 interlaced */ + uint8_t detail; /* field ordering */ +} isom_fiel_t; + +/* Colorspace Box + * This box is defined in ImageCompression.h. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t pixel_format; /* the native pixel format of an image */ +} isom_cspc_t; + +/* Significant Bits Box + * This box is defined in Letters from the Ice Floe dispatch019. + * Note: this box is a mandatory extension for 'v216' (Uncompressed Y'CbCr, 10, 12, 14, or 16-bit-per-component 4:2:2). */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint8_t significantBits; /* the number of significant bits per component */ +} isom_sgbt_t; + +/* Sample Scale Box + * If this box is present and can be interpreted by the decoder, + * all samples shall be displayed according to the scaling behaviour that is specified in this box. + * Otherwise, all samples are scaled to the size that is indicated by the width and height field in the Track Header Box. + * This box is defined in ISO Base Media file format. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint8_t constraint_flag; /* Upper 7-bits are reserved. + * If this flag is set, all samples described by this sample entry shall be scaled + * according to the method specified by the field 'scale_method'. */ + uint8_t scale_method; /* The semantics of the values for scale_method are as specified for the 'fit' attribute of regions in SMIL 1.0. */ + int16_t display_center_x; + int16_t display_center_y; +} isom_stsl_t; + +/* Sample Entry */ +#define ISOM_SAMPLE_ENTRY \ + ISOM_BASEBOX_COMMON; \ + uint8_t reserved[6]; \ + uint16_t data_reference_index + +typedef struct +{ + ISOM_SAMPLE_ENTRY; +} isom_sample_entry_t; + +/* Mpeg Sample Entry */ +typedef struct +{ + ISOM_SAMPLE_ENTRY; +} isom_mp4s_entry_t; + +/* ISOM: Visual Sample Entry / QTFF: Image Description + * For maximum compatibility, the following extension boxes should follow, not precede, + * any extension boxes defined in or required by derived specifications. + * Clean Aperture Box + * Pixel Aspect Ratio Box */ +typedef struct +{ + ISOM_SAMPLE_ENTRY; + int16_t version; /* ISOM: pre_defined / QTFF: sample description version */ + int16_t revision_level; /* ISOM: reserved / QTFF: version of the CODEC */ + int32_t vendor; /* ISOM: pre_defined / QTFF: whose CODEC */ + uint32_t temporalQuality; /* ISOM: pre_defined / QTFF: the temporal quality factor */ + uint32_t spatialQuality; /* ISOM: pre_defined / QTFF: the spatial quality factor */ + /* The width and height are the maximum pixel counts that the codec will deliver. + * Since these are counts they do not take into account pixel aspect ratio. */ + uint16_t width; + uint16_t height; + /* */ + uint32_t horizresolution; /* 16.16 fixed-point / template: horizresolution = 0x00480000 / 72 dpi */ + uint32_t vertresolution; /* 16.16 fixed-point / template: vertresolution = 0x00480000 / 72 dpi */ + uint32_t dataSize; /* ISOM: reserved / QTFF: if known, the size of data for this descriptor */ + uint16_t frame_count; /* frame per sample / template: frame_count = 1 */ + char compressorname[33]; /* a fixed 32-byte field, with the first byte set to the number of bytes to be displayed */ + uint16_t depth; /* ISOM: template: depth = 0x0018 + * AVC : 0x0018: colour with no alpha + * 0x0028: grayscale with no alpha + * 0x0020: gray or colour with alpha + * QTFF: depth of this data (1-32) or (33-40 grayscale) */ + int16_t color_table_ID; /* ISOM: template: pre_defined = -1 + * QTFF: color table ID + * If this field is set to -1, the default color table should be used for the specified depth + * If the color table ID is set to 0, a color table is contained within the sample description itself. + * The color table immediately follows the color table ID field. */ + /* Color table follows color_table_ID only when color_table_ID is set to 0. */ + isom_qt_color_table_t color_table; /* a list of preferred colors for displaying the movie on devices that support only 256 colors */ +} isom_visual_entry_t; + +/* Format Box + * This box shows the data format of the stored sound media. + * ISO base media file format also defines the same four-character-code for the type field, + * however, that is used to indicate original sample description of the media when a protected sample entry is used. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t data_format; /* copy of sample description type */ +} isom_frma_t; + +/* Audio Endian Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + int16_t littleEndian; +} isom_enda_t; + +/* MPEG-4 Audio Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t unknown; /* always 0? */ +} isom_mp4a_t; + +/* Terminator Box + * This box is present to indicate the end of the sound description. It contains no data. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; /* size = 8, type = 0x00000000 */ +} isom_terminator_t; + +/* Sound Information Decompression Parameters Box + * This box is defined in QuickTime file format. + * This box provides the ability to store data specific to a given audio decompressor in the sound description. + * The contents of this box are dependent on the audio decompressor. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_frma_t *frma; /* Format Box */ + isom_enda_t *enda; /* Audio Endian Box */ + isom_mp4a_t *mp4a; /* MPEG-4 Audio Box */ + isom_terminator_t *terminator; /* Terminator Box */ +} isom_wave_t; + +/* Audio Channel Layout Box + * This box is defined in QuickTime file format or Apple Lossless Audio inside ISO Base Media. */ +typedef struct +{ + uint32_t channelLabel; /* the channelLabel that describes the channel */ + uint32_t channelFlags; /* flags that control the interpretation of coordinates */ + uint32_t coordinates[3]; /* an ordered triple that specifies a precise speaker location / 32-bit floating point */ +} isom_channel_description_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t channelLayoutTag; /* the channelLayoutTag indicates the layout */ + uint32_t channelBitmap; /* If channelLayoutTag is set to 0x00010000, this field is the channel usage bitmap. */ + uint32_t numberChannelDescriptions; /* the number of items in the Channel Descriptions array */ + /* Channel Descriptions array */ + isom_channel_description_t *channelDescriptions; +} isom_chan_t; + +/* ISOM: Audio Sample Entry / QTFF: Sound Description */ +typedef struct +{ + ISOM_SAMPLE_ENTRY; + int16_t version; /* ISOM: reserved + * QTFF: sample description version + * version = 0 supports only 'raw ' or 'twos' audio format. + * version = 1 is used to support out-of-band configuration settings for decompression. + * version = 2 is used to support high samplerate or 3 or more multichannel audio. */ + int16_t revision_level; /* ISOM: reserved / QTFF: version of the CODEC */ + int32_t vendor; /* ISOM: reserved / QTFF: whose CODEC */ + uint16_t channelcount; /* ISOM: template: channelcount = 2 + * QTFF: the number of audio channels + * Allowable values are 1 (mono) or 2 (stereo). + * For more than 2, set this field to 3 and use numAudioChannels instead of this field. */ + uint16_t samplesize; /* ISOM: template: samplesize = 16 + * QTFF: the number of bits in each uncompressed sample for a single channel + * Allowable values are 8 or 16. + * For non-mod8, set this field to 16 and use constBitsPerChannel instead of this field. + * For more than 16, set this field to 16 and use bytesPerPacket instead of this field. */ + int16_t compression_ID; /* ISOM: pre_defined + * QTFF: version = 0 -> must be set to 0. + * version = 2 -> must be set to -2. */ + uint16_t packet_size; /* ISOM: reserved / QTFF: must be set to 0. */ + uint32_t samplerate; /* the sampling rate expressed as a 16.16 fixed-point number + * ISOM: template: samplerate = {default samplerate of media}<<16 + * QTFF: the integer portion should match the media's timescale. + * If this field is invalid because of higher samplerate, + * then set this field to 0x00010000 and use audioSampleRate instead of this field. */ + /* version 1 fields + * These fields are for description of the compression ratio of fixed ratio audio compression algorithms. + * If these fields are not used, they are set to 0. */ + uint32_t samplesPerPacket; /* For compressed audio, be set to the number of uncompressed frames generated by a compressed frame. + * For uncompressed audio, shall be set to 1. */ + uint32_t bytesPerPacket; /* the number of bytes in a sample for a single channel */ + uint32_t bytesPerFrame; /* the number of bytes in a frame */ + uint32_t bytesPerSample; /* 8-bit audio: 1, other audio: 2 */ + /* version 2 fields + * LPCMFrame: one sample from each channel. + * AudioPacket: For uncompressed audio, an AudioPacket is simply one LPCMFrame. + * For compressed audio, an AudioPacket is the natural compressed access unit of that format. */ + uint32_t sizeOfStructOnly; /* offset to extensions */ + uint64_t audioSampleRate; /* 64-bit floating point */ + uint32_t numAudioChannels; /* any channel assignment info will be in Audio Channel Layout Box. */ + int32_t always7F000000; /* always 0x7F000000 */ + uint32_t constBitsPerChannel; /* only set if constant (and only for uncompressed audio) */ + uint32_t formatSpecificFlags; + uint32_t constBytesPerAudioPacket; /* only set if constant */ + uint32_t constLPCMFramesPerAudioPacket; /* only set if constant */ + + lsmash_audio_summary_t summary; +} isom_audio_entry_t; + +/* Hint Sample Entry */ +#define ISOM_HINT_SAMPLE_ENTRY \ + ISOM_SAMPLE_ENTRY; \ + uint8_t *data; + +typedef struct +{ + ISOM_HINT_SAMPLE_ENTRY; + uint32_t data_length; +} isom_hint_entry_t; + +/* Metadata Sample Entry */ +#define ISOM_METADATA_SAMPLE_ENTRY \ + ISOM_SAMPLE_ENTRY; + +typedef struct +{ + ISOM_METADATA_SAMPLE_ENTRY; +} isom_metadata_entry_t; + +/* QuickTime Text Sample Description */ +typedef struct +{ + ISOM_SAMPLE_ENTRY; + int32_t displayFlags; + int32_t textJustification; + uint16_t bgColor[3]; /* background RGB color */ + /* defaultTextBox */ + int16_t top; + int16_t left; + int16_t bottom; + int16_t right; + /* defaultStyle */ + int32_t scrpStartChar; /* starting character position */ + int16_t scrpHeight; + int16_t scrpAscent; + int16_t scrpFont; + uint16_t scrpFace; /* only first 8-bits are used */ + int16_t scrpSize; + uint16_t scrpColor[3]; /* foreground RGB color */ + /* defaultFontName is Pascal string */ + uint8_t font_name_length; + char *font_name; +} isom_text_entry_t; + +/* FontRecord */ +typedef struct +{ + uint16_t font_ID; + /* Pascal string */ + uint8_t font_name_length; + char *font_name; +} isom_font_record_t; + +/* Font Table Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + /* FontRecord + * entry_count is uint16_t. */ + lsmash_entry_list_t *list; +} isom_ftab_t; + +/* Timed Text Sample Entry */ +typedef struct +{ + ISOM_SAMPLE_ENTRY; + uint32_t displayFlags; + int8_t horizontal_justification; + int8_t vertical_justification; + uint8_t background_color_rgba[4]; + /* BoxRecord default_text_box */ + int16_t top; + int16_t left; + int16_t bottom; + int16_t right; + /* StyleRecord default_style */ + uint16_t startChar; /* always 0 */ + uint16_t endChar; /* always 0 */ + uint16_t font_ID; + uint8_t face_style_flags; + uint8_t font_size; + uint8_t text_color_rgba[4]; + /* Font Table Box font_table */ + isom_ftab_t *ftab; +} isom_tx3g_entry_t; + +/* Sample Description Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t entry_count; /* print only */ + lsmash_entry_list_t *list; +} isom_stsd_t; +/** **/ + +/* Decoding Time to Sample Box + * This box contains a compact version of a table that allows indexing from decoding time to sample number. + * Each entry in the table gives the number of consecutive samples with the same time delta, and the delta of those samples. + * By adding the deltas a complete time-to-sample map may be built. + * All samples must have non-zero durations except for the last one. + * The sum of all deltas gives the media duration in the track (not mapped to the movie timescale, and not considering any edit list). + * DTS is an abbreviation of 'decoding time stamp'. */ +typedef struct +{ + uint32_t sample_count; /* number of consecutive samples that have the given sample_delta */ + uint32_t sample_delta; /* DTS[0] = 0; DTS[n+1] = DTS[n] + sample_delta[n]; */ +} isom_stts_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_stts_t; + +/* Composition Time to Sample Box + * This box provides the offset between decoding time and composition time. + * CTS is an abbreviation of 'composition time stamp'. + * This box is optional and must only be present if DTS and CTS differ for any samples. */ +typedef struct +{ + uint32_t sample_count; /* number of consecutive samples that have the given sample_offset */ + uint32_t sample_offset; /* CTS[n] = DTS[n] + sample_offset[n]; + * ISOM: if version is set to 1, sample_offset is signed 32-bit integer. + * QTFF: sample_offset is always signed 32-bit integer. */ +} isom_ctts_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_ctts_t; + +/* Composition to Decode Box (Composition Shift Least Greatest Box) + * This box may be used to relate the composition and decoding timelines, + * and deal with some of the ambiguities that signed composition offsets introduce. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + int32_t compositionToDTSShift; /* If this value is added to the composition times (as calculated by the CTS offsets from the DTS), + * then for all samples, their CTS is guaranteed to be greater than or equal to their DTS, + * and the buffer model implied by the indicated profile/level will be honoured; + * if leastDecodeToDisplayDelta is positive or zero, this field can be 0; + * otherwise it should be at least (- leastDecodeToDisplayDelta). */ + int32_t leastDecodeToDisplayDelta; /* the smallest sample_offset in this track */ + int32_t greatestDecodeToDisplayDelta; /* the largest sample_offset in this track */ + int32_t compositionStartTime; /* the smallest CTS for any sample */ + int32_t compositionEndTime; /* the CTS plus the composition duration, of the sample with the largest CTS in this track */ +} isom_cslg_t; + +/* Sample Size Box + * This box contains the sample count and a table giving the size in bytes of each sample. + * The total number of samples in the media is always indicated in the sample_count. + * Note: a sample size of zero is not prohibited in general, but it must be valid and defined for the coding system, + * as defined by the sample entry, that the sample belongs to. */ +typedef struct +{ + uint32_t entry_size; /* the size of a sample */ +} isom_stsz_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t sample_size; /* If this field is set to 0, then the samples have different sizes. */ + uint32_t sample_count; /* the number of samples in the track */ + lsmash_entry_list_t *list; /* available if sample_size == 0 */ +} isom_stsz_t; + +/* Sync Sample Box + * If this box is not present, every sample is a random access point. + * In AVC streams, this box cannot point non-IDR samples. + * The table is arranged in strictly increasing order of sample number. */ +typedef struct +{ + uint32_t sample_number; /* the numbers of the samples that are random access points in the stream. */ +} isom_stss_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_stss_t; + +/* Partial Sync Sample Box + * Tip from QT engineering - Open-GOP intra frames need to be marked as "partial sync samples". + * Partial sync frames perform a partial reset of inter-frame dependencies; + * decoding two partial sync frames and the non-droppable difference frames between them is + * sufficient to prepare a decompressor for correctly decoding the difference frames that follow. */ +typedef struct +{ + uint32_t sample_number; /* the numbers of the samples that are partial sync samples in the stream. */ +} isom_stps_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_stps_t; + +/* Independent and Disposable Samples Box */ +typedef struct +{ + unsigned is_leading : 2; /* ISOM: leading / QTFF: samples later in decode order may have earlier display times */ + unsigned sample_depends_on : 2; /* independency */ + unsigned sample_is_depended_on : 2; /* disposable */ + unsigned sample_has_redundancy : 2; /* redundancy */ +} isom_sdtp_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + /* According to the specification, the size of the table, sample_count, doesn't exist in this box. + * Instead of this, it is taken from the sample_count in the stsz or the stz2 box. */ + lsmash_entry_list_t *list; +} isom_sdtp_t; + +/* Sample To Chunk Box + * This box can be used to find the chunk that contains a sample, its position, and the associated sample description. + * The table is compactly coded. Each entry gives the index of the first chunk of a run of chunks with the same characteristics. + * By subtracting one entry here from the previous one, you can compute how many chunks are in this run. + * You can convert this to a sample count by multiplying by the appropriate samples_per_chunk. */ +typedef struct +{ + uint32_t first_chunk; /* the index of the first chunk in this run of chunks that share the same samples_per_chunk and sample_description_index */ + uint32_t samples_per_chunk; /* the number of samples in each of these chunks */ + uint32_t sample_description_index; /* the index of the sample entry that describes the samples in this chunk */ +} isom_stsc_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_stsc_t; + +/* Chunk Offset Box + * chunk_offset is the offset of the start of a chunk into its containing media file. + * Offsets are file offsets, not the offset into any box within the file. */ +typedef struct +{ + uint32_t chunk_offset; +} isom_stco_entry_t; + +typedef struct +{ + /* for large presentations */ + uint64_t chunk_offset; +} isom_co64_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; /* type = 'stco': 32-bit chunk offsets / type = 'co64': 64-bit chunk offsets */ + lsmash_entry_list_t *list; + + uint8_t large_presentation; /* Set 1 to this if 64-bit chunk-offset are needed. */ +} isom_stco_t; /* share with co64 box */ + +/* Sample Group Description Box + * This box gives information about the characteristics of sample groups. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* Use of version 0 entries is deprecated. */ + uint32_t grouping_type; /* an integer that identifies the sbgp that is associated with this sample group description */ + uint32_t default_length; /* the length of every group entry (if the length is constant), or zero (if it is variable) + * This field is available only if version == 1. */ + lsmash_entry_list_t *list; +} isom_sgpd_entry_t; + +/* Random Access Entry + * Samples marked by this group must be random access points, and may also be sync points. */ +typedef struct +{ + /* grouping_type is 'rap ' */ + uint32_t description_length; /* This field is available only if version == 1 and default_length == 0. */ + unsigned num_leading_samples_known : 1; /* the value of one indicates that the number of leading samples is known for each sample in this group, + * and the number is specified by num_leading_samples. */ + unsigned num_leading_samples : 7; /* the number of leading samples for each sample in this group + * Note: when num_leading_samples_known is equal to 0, this field should be ignored. */ +} isom_rap_entry_t; + +/* Roll Recovery Entry + * This grouping type is defined as that group of samples having the same roll distance. */ +typedef struct +{ + /* grouping_type is 'roll' */ + uint32_t description_length; /* This field is available only if version == 1 and default_length == 0. */ + int16_t roll_distance; /* the number of samples that must be decoded in order for a sample to be decoded correctly + * A positive value indicates post-roll, and a negative value indicates pre-roll. + * The value zero must not be used. */ +} isom_roll_entry_t; + +/* Sample to Group Box + * This box is used to find the group that a sample belongs to and the associated description of that sample group. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t grouping_type; /* Links it to its sample group description table with the same value for grouping type. */ + uint32_t grouping_type_parameter; /* an indication of the sub-type of the grouping + * This field is available only if version == 1. */ + lsmash_entry_list_t *list; +} isom_sbgp_entry_t; + +typedef struct +{ + uint32_t sample_count; /* the number of consecutive samples with the same sample group descriptor */ + uint32_t group_description_index; /* the index of the sample group entry which describes the samples in this group + * The index ranges from 1 to the number of sample group entries in the Sample Group Description Box, + * or takes the value 0 to indicate that this sample is a member of no group of this type. */ +} isom_group_assignment_entry_t; + +/* Sample Table Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_stsd_t *stsd; /* Sample Description Box */ + isom_stts_t *stts; /* Decoding Time to Sample Box */ + isom_ctts_t *ctts; /* Composition Time to Sample Box */ + isom_cslg_t *cslg; /* ISOM: Composition to Decode Box / QTFF: Composition Shift Least Greatest Box */ + isom_stss_t *stss; /* Sync Sample Box */ + isom_stps_t *stps; /* ISOM: null / QTFF: Partial Sync Sample Box */ + isom_sdtp_t *sdtp; /* Independent and Disposable Samples Box */ + isom_stsc_t *stsc; /* Sample To Chunk Box */ + isom_stsz_t *stsz; /* Sample Size Box */ + isom_stco_t *stco; /* Chunk Offset Box */ + lsmash_entry_list_t *sgpd_list; /* Sample Group Description Boxes */ + lsmash_entry_list_t *sbgp_list; /* Sample To Group Boxes */ +} isom_stbl_t; + +/* Media Information Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + /* Media Information Header Boxes */ + isom_vmhd_t *vmhd; /* Video Media Header Box */ + isom_smhd_t *smhd; /* Sound Media Header Box */ + isom_hmhd_t *hmhd; /* ISOM: Hint Media Header Box / QTFF: null */ + isom_nmhd_t *nmhd; /* ISOM: Null Media Header Box / QTFF: null */ + isom_gmhd_t *gmhd; /* ISOM: null / QTFF: Generic Media Information Header Box */ + /* */ + isom_hdlr_t *hdlr; /* ISOM: null / QTFF: Data Handler Reference Box + * Note: this box must come before Data Information Box. */ + isom_dinf_t *dinf; /* Data Information Box */ + isom_stbl_t *stbl; /* Sample Table Box */ +} isom_minf_t; + +/* Media Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_mdhd_t *mdhd; /* Media Header Box */ + isom_hdlr_t *hdlr; /* ISOM: Handler Reference Box / QTFF: Media Handler Reference Box + * Note: this box must come before Media Information Box. */ + isom_minf_t *minf; /* Media Information Box */ +} isom_mdia_t; + +/* Movie Header Box + * This box defines overall information which is media-independent, and relevant to the entire presentation considered as a whole. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */ + /* version == 0: uint64_t -> uint32_t */ + uint64_t creation_time; /* the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint64_t modification_time; /* the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */ + uint32_t timescale; /* movie timescale: timescale for the entire presentation */ + uint64_t duration; /* the duration, expressed in movie timescale, of the longest track */ + /* The following fields are treated as + * ISOM: template fields. + * MP41: reserved fields. + * MP42: ignored fileds since compositions are done using BIFS system. + * 3GPP: ignored fields. + * QTFF: usable fields. */ + int32_t rate; /* fixed point 16.16 number. 0x00010000 is normal forward playback. */ + int16_t volume; /* fixed point 8.8 number. 0x0100 is full volume. */ + int16_t reserved; + int32_t preferredLong[2]; /* ISOM: reserved / QTFF: unknown */ + int32_t matrix[9]; /* transformation matrix for the video */ + /* The following fields are defined in QuickTime file format. + * In ISO Base Media file format, these fields are treated as pre_defined. */ + int32_t previewTime; /* the time value in the movie at which the preview begins */ + int32_t previewDuration; /* the duration of the movie preview in movie timescale units */ + int32_t posterTime; /* the time value of the time of the movie poster */ + int32_t selectionTime; /* the time value for the start time of the current selection */ + int32_t selectionDuration; /* the duration of the current selection in movie timescale units */ + int32_t currentTime; /* the time value for current time position within the movie */ + /* */ + uint32_t next_track_ID; /* larger than the largest track-ID in use */ +} isom_mvhd_t; + +/* Object Descriptor Box + * Note that this box is mandatory under 14496-1:2001 (mp41) while not mandatory under 14496-14:2003 (mp42). */ +struct mp4sys_ObjectDescriptor_t; /* FIXME: I think these structs using mp4sys should be placed in isom.c */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + struct mp4sys_ObjectDescriptor_t *OD; +} isom_iods_t; + +/* Media Data Box + * This box contains the media data. + * A presentation may contain zero or more Media Data Boxes.*/ +typedef struct +{ + ISOM_BASEBOX_COMMON; /* If size is 0, then this box is the last box. */ + + uint64_t placeholder_pos; /* placeholder position for largesize */ +} isom_mdat_t; + +/* Free Space Box + * The contents of a free-space box are irrelevant and may be ignored without affecting the presentation. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; /* type is 'free' or 'skip' */ + uint32_t length; + uint8_t *data; +} isom_free_t; + +typedef isom_free_t isom_skip_t; + +/* Chapter List Box + * This box is NOT defined in the ISO/MPEG-4 specs. */ +typedef struct +{ + uint64_t start_time; /* version = 0: expressed in movie timescale units + * version = 1: expressed in 100 nanoseconds */ + /* Chapter name is Pascal string */ + uint8_t chapter_name_length; + char *chapter_name; +} isom_chpl_entry_t; + +typedef struct +{ + ISOM_FULLBOX_COMMON; /* version = 0 is defined in F4V file format. */ + uint8_t unknown; /* only available under version = 1 */ + lsmash_entry_list_t *list; /* if version is set to 0, entry_count is uint8_t. */ +} isom_chpl_t; + +typedef struct +{ + char *chapter_name; + uint64_t start_time; +} isom_chapter_entry_t; + +/* Metadata Item Keys Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + lsmash_entry_list_t *list; +} isom_keys_t; + +typedef struct +{ + uint32_t key_size; /* the size of the entire structure containing a key definition + * key_size = sizeof(key_size) + sizeof(key_namespace) + sizeof(key_value) */ + uint32_t key_namespace; /* a naming scheme used for metadata keys + * Location metadata keys, for example, use the 'mdta' key namespace. */ + uint8_t *key_value; /* the actual name of the metadata key + * Keys with the 'mdta' namespace use a reverse DNS naming convention. */ +} isom_keys_entry_t; + +/* Meaning Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint8_t *meaning_string; /* to fill the box */ + + uint32_t meaning_string_length; +} isom_mean_t; + +/* Name Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint8_t *name; /* to fill the box */ + + uint32_t name_length; +} isom_name_t; + +/* Data Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + /* type indicator */ + uint16_t reserved; /* always 0 */ + uint8_t type_set_identifier; /* 0: type set of the common basic data types */ + uint8_t type_code; /* type of data code */ + /* */ + uint32_t the_locale; /* reserved to be 0 */ + uint8_t *value; /* to fill the box */ + + uint32_t value_length; +} isom_data_t; + +/* Metadata Item Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_mean_t *mean; /* Meaning Box */ + isom_name_t *name; /* Name Box */ + isom_data_t *data; /* Data Box */ +} isom_metaitem_t; + +/* Metadata Item List Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + lsmash_entry_list_t *item_list; /* Metadata Item Box List + * There is no entry_count field. */ +} isom_ilst_t; + +/* Meta Box */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* ISOM: FullBox / QTFF: BaseBox */ + isom_hdlr_t *hdlr; /* Metadata Handler Reference Box */ + isom_dinf_t *dinf; /* ISOM: Data Information Box / QTFF: null */ + isom_keys_t *keys; /* ISOM: null / QTFF: Metadata Item Keys Box */ + isom_ilst_t *ilst; /* Metadata Item List Box only defined in Apple MPEG-4 and QTFF */ +} isom_meta_t; + +/* Window Location Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + /* default window location for movie */ + uint16_t x; + uint16_t y; +} isom_WLOC_t; + +/* Looping Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint32_t looping_mode; /* 0 for none, 1 for looping, 2 for palindromic looping */ +} isom_LOOP_t; + +/* Play Selection Only Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint8_t selection_only; /* whether only the selected area of the movie should be played */ +} isom_SelO_t; + +/* Play All Frames Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + uint8_t play_all_frames; /* whether all frames of video should be played, regardless of timing */ +} isom_AllF_t; + +/* Copyright Box + * The Copyright box contains a copyright declaration which applies to the entire presentation, + * when contained within the Movie Box, or, when contained in a track, to that entire track. + * There may be multiple copyright boxes using different language codes. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint16_t language; /* ISO-639-2/T language codes. Most significant 1-bit is 0. + * Each character is packed as the difference between its ASCII value and 0x60. */ + uint8_t *notice; /* a null-terminated string in either UTF-8 or UTF-16 characters, giving a copyright notice. + * If UTF-16 is used, the string shall start with the BYTE ORDER MARK (0xFEFF), to distinguish it from a UTF-8 string. + * This mark does not form part of the final string. */ + uint32_t notice_length; +} isom_cprt_t; + +/* User Data Box + * This box is a container box for informative user-data. + * This user data is formatted as a set of boxes with more specific box types, which declare more precisely their content. + * QTFF: for historical reasons, this box is optionally terminated by a 32-bit integer set to 0. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_chpl_t *chpl; /* Chapter List Box */ + isom_meta_t *meta; /* Meta Box extended by Apple for iTunes movie */ + /* QuickTime user data */ + isom_WLOC_t *WLOC; /* Window Location Box */ + isom_LOOP_t *LOOP; /* Looping Box */ + isom_SelO_t *SelO; /* Play Selection Only Box */ + isom_AllF_t *AllF; /* Play All Frames Box */ + /* Copyright Box List */ + lsmash_entry_list_t *cprt_list; /* Copyright Boxes is defined in ISO Base Media and 3GPP file format */ +} isom_udta_t; + +/** Caches for handling tracks **/ +typedef struct +{ + uint64_t alloc; /* total buffer size for the pool */ + uint64_t size; /* total size of samples in the pool */ + uint32_t sample_count; /* number of samples in the pool */ + uint8_t *data; /* actual data of samples in the pool */ +} isom_sample_pool_t; + +typedef struct +{ + uint32_t chunk_number; /* chunk number */ + uint32_t sample_description_index; /* sample description index */ + uint64_t first_dts; /* the first DTS in chunk */ + isom_sample_pool_t *pool; /* samples pooled to interleave */ +} isom_chunk_t; + +typedef struct +{ + uint64_t dts; + uint64_t cts; + int32_t ctd_shift; +} isom_timestamp_t; + +typedef struct +{ + isom_group_assignment_entry_t *assignment; /* the address corresponding to the entry in Sample to Group Box */ + isom_group_assignment_entry_t *prev_assignment; /* the address of the previous assignment */ + isom_rap_entry_t *random_access; /* the address corresponding to the random access entry in Sample Group Description Box */ + uint8_t is_prev_rap; /* whether the previous sample is a random access point or not */ +} isom_rap_group_t; + +typedef struct +{ + isom_group_assignment_entry_t *assignment; /* the address corresponding to the entry in Sample to Group Box */ + uint32_t first_sample; /* the number of the first sample of the group */ + uint32_t recovery_point; /* the identifier necessary for the recovery from its starting point to be completed */ + uint8_t delimited; /* the flag if the sample_count is determined */ + uint8_t described; /* the flag if the group description is determined */ + uint8_t prev_is_recovery_start; /* whether the previous sample is a starting point of recovery or not */ +} isom_roll_group_t; + +typedef struct +{ + lsmash_entry_list_t *pool; /* grouping pooled to delimit and describe */ +} isom_grouping_t; + +typedef struct +{ + uint8_t has_samples; + uint32_t traf_number; + uint32_t last_duration; /* the last sample duration in this track fragment */ + uint64_t largest_cts; /* the largest CTS in this track fragments */ +} isom_fragment_t; + +typedef struct +{ + uint8_t all_sync; /* if all samples are sync sample */ + isom_chunk_t chunk; + isom_timestamp_t timestamp; + isom_grouping_t roll; + isom_rap_group_t *rap; + isom_fragment_t *fragment; +} isom_cache_t; + +/** Movie Fragments Boxes **/ +/* Track Fragments Flags ('tf_flags') */ +typedef enum +{ + ISOM_TF_FLAGS_BASE_DATA_OFFSET_PRESENT = 0x000001, /* base_data_offset field exists. */ + ISOM_TF_FLAGS_SAMPLE_DESCRIPTION_INDEX_PRESENT = 0x000002, /* sample_description_index field exists. */ + ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT = 0x000008, /* default_sample_duration field exists. */ + ISOM_TF_FLAGS_DEFAULT_SAMPLE_SIZE_PRESENT = 0x000010, /* default_sample_size field exists. */ + ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT = 0x000020, /* default_sample_flags field exists. */ + ISOM_TF_FLAGS_DURATION_IS_EMPTY = 0x010000, /* There are no samples for this time interval. */ +} isom_tf_flags_code; + +/* Track Run Flags ('tr_flags') */ +typedef enum +{ + ISOM_TR_FLAGS_DATA_OFFSET_PRESENT = 0x000001, /* data_offset field exists. */ + ISOM_TR_FLAGS_FIRST_SAMPLE_FLAGS_PRESENT = 0x000004, /* first_sample_flags field exists. */ + ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT = 0x000100, /* sample_duration field exists. */ + ISOM_TR_FLAGS_SAMPLE_SIZE_PRESENT = 0x000200, /* sample_size field exists. */ + ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT = 0x000400, /* sample_flags field exists. */ + ISOM_TR_FLAGS_SAMPLE_COMPOSITION_TIME_OFFSET_PRESENT = 0x000800, /* sample_composition_time_offset field exists. */ +} isom_tr_flags_code; + +/* Sample Flags */ +typedef struct +{ + unsigned reserved : 4; + /* The definition of the following fields is quite the same as Independent and Disposable Samples Box. */ + unsigned is_leading : 2; + unsigned sample_depends_on : 2; + unsigned sample_is_depended_on : 2; + unsigned sample_has_redundancy : 2; + /* */ + unsigned sample_padding_value : 3; /* the number of bits at the end of this sample */ + unsigned sample_is_non_sync_sample : 1; /* 0 value means this sample is sync sample. */ + uint16_t sample_degradation_priority; +} isom_sample_flags_t; + +/* Movie Extends Header Box + * This box is omitted when used in live streaming. + * If this box is not present, the overall duration must be computed by examining each fragment. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + /* version == 0: uint64_t -> uint32_t */ + uint64_t fragment_duration; /* the duration of the longest track, in the timescale indicated in the Movie Header Box, including movie fragments. */ +} isom_mehd_t; + +/* Track Extends Box + * This box sets up default values used by the movie fragments. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t track_ID; /* identifier of the track; this shall be the track ID of a track in the Movie Box */ + uint32_t default_sample_description_index; + uint32_t default_sample_duration; + uint32_t default_sample_size; + isom_sample_flags_t default_sample_flags; +} isom_trex_entry_t; + +/* Movie Extends Box + * This box warns readers that there might be Movie Fragment Boxes in this file. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_mehd_t *mehd; /* Movie Extends Header Box / omitted when used in live streaming */ + lsmash_entry_list_t *trex_list; /* Track Extends Box */ + + uint64_t placeholder_pos; /* placeholder position for Movie Extends Header Box */ +} isom_mvex_t; + +/* Movie Fragment Header Box + * This box contains a sequence number, as a safety check. + * The sequence number 'usually' starts at 1 and must increase for each movie fragment in the file, in the order in which they occur. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t sequence_number; /* the ordinal number of this fragment, in increasing order */ +} isom_mfhd_t; + +/* Track Fragment Header Box + * Each movie fragment can contain zero or more fragments for each track; + * and a track fragment can contain zero or more contiguous runs of samples. + * This box sets up information and defaults used for those runs of samples. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* flags field is used for 'tf_flags'. */ + uint32_t track_ID; + /* all the following are optional fields */ + uint64_t base_data_offset; /* an explicit anchor for the data offsets in each track run + * Offsets are file offsets as like as chunk_offset in Chunk Offset Box. + * If not provided, the base_data_offset for the first track in the movie fragment is the position + * of the first byte of the enclosing Movie Fragment Box, and for second and subsequent track fragments, + * the default is the end of the data defined by the preceding fragment. + * To avoid the case this field might overflow, e.g. semi-permanent live streaming and broadcasting, + * you shall not use this optional field. */ + uint32_t sample_description_index; /* override default_sample_description_index in Track Extends Box */ + uint32_t default_sample_duration; /* override default_sample_duration in Track Extends Box */ + uint32_t default_sample_size; /* override default_sample_size in Track Extends Box */ + isom_sample_flags_t default_sample_flags; /* override default_sample_flags in Track Extends Box */ +} isom_tfhd_t; + +/* Track Fragment Base Media Decode Time Box + * This box provides the absolute decode time, measured on the media timeline, of the first sample in decode order in the track fragment. + * This can be useful, for example, when performing random access in a file; + * it is not necessary to sum the sample durations of all preceding samples in previous fragments to find this value + * (where the sample durations are the deltas in the Decoding Time to Sample Box and the sample_durations in the preceding track runs). + * This box, if present, shall be positioned after the Track Fragment Header Box and before the first Track Fragment Run box. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */ + /* version == 0: 64bits -> 32bits */ + uint64_t baseMediaDecodeTime; /* an integer equal to the sum of the decode durations of all earlier samples in the media, expressed in the media's timescale + * It does not include the samples added in the enclosing track fragment. + * NOTE: the decode timeline is a media timeline, established before any explicit or implied mapping of media time to presentation time, + * for example by an edit list or similar structure. */ +} isom_tfdt_t; + +/* Track Fragment Run Box + * Within the Track Fragment Box, there are zero or more Track Fragment Run Boxes. + * If the duration-is-empty flag is set in the tf_flags, there are no track runs. + * A track run documents a contiguous set of samples for a track. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; /* flags field is used for 'tr_flags'. */ + uint32_t sample_count; /* the number of samples being added in this run; also the number of rows in the following table */ + /* The following are optional fields. */ + int32_t data_offset; /* This value is added to the implicit or explicit data_offset established in the Track Fragment Header Box. + * If this field is not present, then the data for this run starts immediately after the data of the previous run, + * or at the base_data_offset defined by the Track Fragment Header Box if this is the first run in a track fragment. */ + isom_sample_flags_t first_sample_flags; /* a set of flags for the first sample only of this run */ + lsmash_entry_list_t *optional; /* all fields in this array are optional. */ +} isom_trun_entry_t; + +typedef struct +{ + /* If the following fields is present, each field overrides default value described in Track Fragment Header Box or Track Extends Box. */ + uint32_t sample_duration; /* override default_sample_duration */ + uint32_t sample_size; /* override default_sample_size */ + isom_sample_flags_t sample_flags; /* override default_sample_flags */ + /* */ + uint32_t sample_composition_time_offset; /* composition time offset + * If version == 0, unsigned 32-bit integer. + * Otherwise, signed 32-bit integer. */ +} isom_trun_optional_row_t; + +/* Track Fragment Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_tfhd_t *tfhd; /* Track Fragment Header Box */ + isom_tfdt_t *tfdt; /* Track Fragment Base Media Decode Time Box */ + lsmash_entry_list_t *trun_list; /* Track Fragment Run Box List + * If the duration-is-empty flag is set in the tf_flags, there are no track runs. */ + isom_sdtp_t *sdtp; /* Independent and Disposable Samples Box */ + + isom_cache_t *cache; +} isom_traf_entry_t; + +/* Movie Fragment Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_mfhd_t *mfhd; /* Movie Fragment Header Box */ + lsmash_entry_list_t *traf_list; /* Track Fragment Box List */ +} isom_moof_entry_t; + +/* Track Fragment Random Access Box + * Each entry in this box contains the location and the presentation time of the sync sample. + * Note that not every sync sample in the track needs to be listed in the table. + * The absence of this box does not mean that all the samples are sync samples. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t track_ID; + unsigned int reserved : 26; + unsigned int length_size_of_traf_num : 2; /* the length in byte of the traf_number field minus one */ + unsigned int length_size_of_trun_num : 2; /* the length in byte of the trun_number field minus one */ + unsigned int length_size_of_sample_num : 2; /* the length in byte of the sample_number field minus one */ + uint32_t number_of_entry; /* the number of the entries for this track + * Value zero indicates that every sample is a sync sample and no table entry follows. */ + lsmash_entry_list_t *list; /* entry_count corresponds to number_of_entry. */ +} isom_tfra_entry_t; + +typedef struct +{ + /* version == 0: 64bits -> 32bits */ + uint64_t time; /* the presentation time of the sync sample in units defined in the Media Header Box of the associated track + * For segments based on movie sample tables or movie fragments, presentation times are in the movie timeline, + * that is they are composition times after the application of any edit list for the track. + * Note: the definition of segment is portion of an ISO base media file format file, consisting of either + * (a) a movie box, with its associated media data (if any) and other associated boxes + * or + * (b) one or more movie fragment boxes, with their associated media data, and other associated boxes. */ + uint64_t moof_offset; /* the offset of the Movie Fragment Box used in this entry + * Offset is the byte-offset between the beginning of the file and the beginning of the Movie Fragment Box. */ + /* */ + uint32_t traf_number; /* the Track Fragment Box ('traf') number that contains the sync sample + * The number ranges from 1 in each Movie Fragment Box ('moof'). */ + uint32_t trun_number; /* the Track Fragment Run Box ('trun') number that contains the sync sample + * The number ranges from 1 in each Track Fragment Box ('traf'). */ + uint32_t sample_number; /* the sample number that contains the sync sample + * The number ranges from 1 in each Track Fragment Run Box ('trun'). */ +} isom_tfra_location_time_entry_t; + +/* Movie Fragment Random Access Offset Box + * This box provides a copy of the length field from the enclosing Movie Fragment Random Access Box. */ +typedef struct +{ + ISOM_FULLBOX_COMMON; + uint32_t length; /* an integer gives the number of bytes of the enclosing Movie Fragment Random Access Box + * This field is placed at the last of the enclosing box to assist readers scanning + * from the end of the file in finding the Movie Fragment Random Access Box. */ +} isom_mfro_t; + +/* Movie Fragment Random Access Box + * This box provides a table which may assist readers in finding sync samples in a file using movie fragments, + * and is usually placed at or near the end of the file. + * The last box within the Movie Fragment Random Access Box, which is called Movie Fragment Random Access Offset Box, + * provides a copy of the length field from the Movie Fragment Random Access Box. */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + lsmash_entry_list_t *tfra_list; /* Track Fragment Random Access Box */ + isom_mfro_t *mfro; /* Movie Fragment Random Access Offset Box */ +} isom_mfra_t; + +/* Movie fragment manager + * The presence of this means we use the structure of movie fragments. */ +typedef struct +{ + isom_moof_entry_t *movie; /* the address corresponding to the current Movie Fragment Box */ + uint64_t fragment_count; /* the number of movie fragments we created */ + uint64_t pool_size; + lsmash_entry_list_t *pool; /* samples pooled to interleave for the current movie fragment */ +} isom_fragment_manager_t; + +/** **/ + +/* Movie Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_mvhd_t *mvhd; /* Movie Header Box */ + isom_iods_t *iods; /* ISOM: Object Descriptor Box / QTFF: null */ + lsmash_entry_list_t *trak_list; /* Track Box List */ + isom_udta_t *udta; /* User Data Box */ + isom_ctab_t *ctab; /* ISOM: null / QTFF: Color Table Box */ + isom_meta_t *meta; /* Meta Box */ + isom_mvex_t *mvex; /* Movie Extends Box */ +} isom_moov_t; + +/* ROOT */ +struct lsmash_root_tag +{ + ISOM_FULLBOX_COMMON; /* the size field expresses total file size + * the flags field expresses file mode */ + isom_ftyp_t *ftyp; /* File Type Box */ + isom_moov_t *moov; /* Movie Box */ + lsmash_entry_list_t *moof_list; /* Movie Fragment Box List */ + isom_mdat_t *mdat; /* Media Data Box */ + isom_free_t *free; /* Free Space Box */ + isom_meta_t *meta; /* Meta Box */ + isom_mfra_t *mfra; /* Movie Fragment Random Access Box */ + + lsmash_bs_t *bs; /* bytestream manager */ + isom_fragment_manager_t *fragment; /* movie fragment manager */ + double max_chunk_duration; /* max duration per chunk in seconds */ + double max_async_tolerance; /* max tolerance, in seconds, for amount of interleaving asynchronization between tracks */ + uint64_t max_chunk_size; /* max size per chunk in bytes. */ + uint64_t max_read_size; /* max size of reading from a chunk at a time. */ + uint8_t file_type_written; /* whether File Type Box was written */ + uint8_t qt_compatible; /* compatibility with QuickTime file format */ + uint8_t isom_compatible; /* compatibility with ISO Base Media file format */ + uint8_t avc_extensions; /* compatibility with AVC extensions */ + uint8_t mp4_version1; /* compatibility with MP4 ver.1 file format */ + uint8_t mp4_version2; /* compatibility with MP4 ver.2 file format */ + uint8_t itunes_movie; /* compatibility with iTunes Movie */ + uint8_t max_3gpp_version; /* maximum 3GPP version */ + uint8_t max_isom_version; /* maximum ISO Base Media file format version */ + lsmash_entry_list_t *print; + lsmash_entry_list_t *timeline; +}; + +/* Track Box */ +typedef struct +{ + ISOM_BASEBOX_COMMON; + isom_tkhd_t *tkhd; /* Track Header Box */ + isom_tapt_t *tapt; /* ISOM: null / QTFF: Track Aperture Mode Dimensions Box */ + isom_edts_t *edts; /* Edit Box */ + isom_tref_t *tref; /* Track Reference Box */ + isom_mdia_t *mdia; /* Media Box */ + isom_udta_t *udta; /* User Data Box */ + isom_meta_t *meta; /* Meta Box */ + + isom_cache_t *cache; + uint32_t related_track_ID; + uint8_t is_chapter; +} isom_trak_entry_t; +/** **/ + +/* Box types */ +#define ISOM_BOX_TYPE_ID32 lsmash_form_iso_box_type( LSMASH_4CC( 'I', 'D', '3', '2' ) ) +#define ISOM_BOX_TYPE_ALBM lsmash_form_iso_box_type( LSMASH_4CC( 'a', 'l', 'b', 'm' ) ) +#define ISOM_BOX_TYPE_AUTH lsmash_form_iso_box_type( LSMASH_4CC( 'a', 'u', 't', 'h' ) ) +#define ISOM_BOX_TYPE_BPCC lsmash_form_iso_box_type( LSMASH_4CC( 'b', 'p', 'c', 'c' ) ) +#define ISOM_BOX_TYPE_BUFF lsmash_form_iso_box_type( LSMASH_4CC( 'b', 'u', 'f', 'f' ) ) +#define ISOM_BOX_TYPE_BXML lsmash_form_iso_box_type( LSMASH_4CC( 'b', 'x', 'm', 'l' ) ) +#define ISOM_BOX_TYPE_CCID lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'c', 'i', 'd' ) ) +#define ISOM_BOX_TYPE_CDEF lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'd', 'e', 'f' ) ) +#define ISOM_BOX_TYPE_CLSF lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'l', 's', 'f' ) ) +#define ISOM_BOX_TYPE_CMAP lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'm', 'a', 'p' ) ) +#define ISOM_BOX_TYPE_CO64 lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'o', '6', '4' ) ) +#define ISOM_BOX_TYPE_COLR lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'o', 'l', 'r' ) ) +#define ISOM_BOX_TYPE_CPRT lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'p', 'r', 't' ) ) +#define ISOM_BOX_TYPE_CSLG lsmash_form_iso_box_type( LSMASH_4CC( 'c', 's', 'l', 'g' ) ) +#define ISOM_BOX_TYPE_CTTS lsmash_form_iso_box_type( LSMASH_4CC( 'c', 't', 't', 's' ) ) +#define ISOM_BOX_TYPE_CVRU lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'v', 'r', 'u' ) ) +#define ISOM_BOX_TYPE_DCFD lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'c', 'f', 'D' ) ) +#define ISOM_BOX_TYPE_DINF lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'i', 'n', 'f' ) ) +#define ISOM_BOX_TYPE_DREF lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'r', 'e', 'f' ) ) +#define ISOM_BOX_TYPE_DSCP lsmash_form_iso_box_type( LSMASH_4CC( 'd', 's', 'c', 'p' ) ) +#define ISOM_BOX_TYPE_DSGD lsmash_form_iso_box_type( LSMASH_4CC( 'd', 's', 'g', 'd' ) ) +#define ISOM_BOX_TYPE_DSTG lsmash_form_iso_box_type( LSMASH_4CC( 'd', 's', 't', 'g' ) ) +#define ISOM_BOX_TYPE_EDTS lsmash_form_iso_box_type( LSMASH_4CC( 'e', 'd', 't', 's' ) ) +#define ISOM_BOX_TYPE_ELST lsmash_form_iso_box_type( LSMASH_4CC( 'e', 'l', 's', 't' ) ) +#define ISOM_BOX_TYPE_FECI lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'e', 'c', 'i' ) ) +#define ISOM_BOX_TYPE_FECR lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'e', 'c', 'r' ) ) +#define ISOM_BOX_TYPE_FIIN lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'i', 'i', 'n' ) ) +#define ISOM_BOX_TYPE_FIRE lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'i', 'r', 'e' ) ) +#define ISOM_BOX_TYPE_FPAR lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'p', 'a', 'r' ) ) +#define ISOM_BOX_TYPE_FREE lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'r', 'e', 'e' ) ) +#define ISOM_BOX_TYPE_FRMA lsmash_form_iso_box_type( LSMASH_4CC( 'f', 'r', 'm', 'a' ) ) +#define ISOM_BOX_TYPE_FTYP lsmash_form_iso_box_type( LSMASH_4CC( 'f', 't', 'y', 'p' ) ) +#define ISOM_BOX_TYPE_GITN lsmash_form_iso_box_type( LSMASH_4CC( 'g', 'i', 't', 'n' ) ) +#define ISOM_BOX_TYPE_GNRE lsmash_form_iso_box_type( LSMASH_4CC( 'g', 'n', 'r', 'e' ) ) +#define ISOM_BOX_TYPE_GRPI lsmash_form_iso_box_type( LSMASH_4CC( 'g', 'r', 'p', 'i' ) ) +#define ISOM_BOX_TYPE_HDLR lsmash_form_iso_box_type( LSMASH_4CC( 'h', 'd', 'l', 'r' ) ) +#define ISOM_BOX_TYPE_HMHD lsmash_form_iso_box_type( LSMASH_4CC( 'h', 'm', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_ICNU lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'c', 'n', 'u' ) ) +#define ISOM_BOX_TYPE_IDAT lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'd', 'a', 't' ) ) +#define ISOM_BOX_TYPE_IHDR lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'h', 'd', 'r' ) ) +#define ISOM_BOX_TYPE_IINF lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'i', 'n', 'f' ) ) +#define ISOM_BOX_TYPE_ILOC lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'l', 'o', 'c' ) ) +#define ISOM_BOX_TYPE_IMIF lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'm', 'i', 'f' ) ) +#define ISOM_BOX_TYPE_INFU lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'n', 'f', 'u' ) ) +#define ISOM_BOX_TYPE_IODS lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'o', 'd', 's' ) ) +#define ISOM_BOX_TYPE_IPHD lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'p', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_IPMC lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'p', 'm', 'c' ) ) +#define ISOM_BOX_TYPE_IPRO lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'p', 'r', 'o' ) ) +#define ISOM_BOX_TYPE_IREF lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'r', 'e', 'f' ) ) +#define ISOM_BOX_TYPE_JP lsmash_form_iso_box_type( LSMASH_4CC( 'j', 'p', ' ', ' ' ) ) +#define ISOM_BOX_TYPE_JP2C lsmash_form_iso_box_type( LSMASH_4CC( 'j', 'p', '2', 'c' ) ) +#define ISOM_BOX_TYPE_JP2H lsmash_form_iso_box_type( LSMASH_4CC( 'j', 'p', '2', 'h' ) ) +#define ISOM_BOX_TYPE_JP2I lsmash_form_iso_box_type( LSMASH_4CC( 'j', 'p', '2', 'i' ) ) +#define ISOM_BOX_TYPE_KYWD lsmash_form_iso_box_type( LSMASH_4CC( 'k', 'y', 'w', 'd' ) ) +#define ISOM_BOX_TYPE_LOCI lsmash_form_iso_box_type( LSMASH_4CC( 'l', 'o', 'c', 'i' ) ) +#define ISOM_BOX_TYPE_LRCU lsmash_form_iso_box_type( LSMASH_4CC( 'l', 'r', 'c', 'u' ) ) +#define ISOM_BOX_TYPE_MDAT lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'd', 'a', 't' ) ) +#define ISOM_BOX_TYPE_MDHD lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'd', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_MDIA lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'd', 'i', 'a' ) ) +#define ISOM_BOX_TYPE_MDRI lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'd', 'r', 'i' ) ) +#define ISOM_BOX_TYPE_MECO lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'e', 'c', 'o' ) ) +#define ISOM_BOX_TYPE_MEHD lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'e', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_M7HD lsmash_form_iso_box_type( LSMASH_4CC( 'm', '7', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_MERE lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'e', 'r', 'e' ) ) +#define ISOM_BOX_TYPE_META lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'e', 't', 'a' ) ) +#define ISOM_BOX_TYPE_MFHD lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'f', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_MFRA lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'f', 'r', 'a' ) ) +#define ISOM_BOX_TYPE_MFRO lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'f', 'r', 'o' ) ) +#define ISOM_BOX_TYPE_MINF lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'i', 'n', 'f' ) ) +#define ISOM_BOX_TYPE_MJHD lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'j', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_MOOF lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'o', 'o', 'f' ) ) +#define ISOM_BOX_TYPE_MOOV lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'o', 'o', 'v' ) ) +#define ISOM_BOX_TYPE_MVCG lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'v', 'c', 'g' ) ) +#define ISOM_BOX_TYPE_MVCI lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'v', 'c', 'i' ) ) +#define ISOM_BOX_TYPE_MVEX lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'v', 'e', 'x' ) ) +#define ISOM_BOX_TYPE_MVHD lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'v', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_MVRA lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'v', 'r', 'a' ) ) +#define ISOM_BOX_TYPE_NMHD lsmash_form_iso_box_type( LSMASH_4CC( 'n', 'm', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_OCHD lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'c', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_ODAF lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'a', 'f' ) ) +#define ISOM_BOX_TYPE_ODDA lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'd', 'a' ) ) +#define ISOM_BOX_TYPE_ODHD lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_ODHE lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'h', 'e' ) ) +#define ISOM_BOX_TYPE_ODRB lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'r', 'b' ) ) +#define ISOM_BOX_TYPE_ODRM lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 'r', 'm' ) ) +#define ISOM_BOX_TYPE_ODTT lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'd', 't', 't' ) ) +#define ISOM_BOX_TYPE_OHDR lsmash_form_iso_box_type( LSMASH_4CC( 'o', 'h', 'd', 'r' ) ) +#define ISOM_BOX_TYPE_PADB lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'a', 'd', 'b' ) ) +#define ISOM_BOX_TYPE_PAEN lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'a', 'e', 'n' ) ) +#define ISOM_BOX_TYPE_PCLR lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'c', 'l', 'r' ) ) +#define ISOM_BOX_TYPE_PDIN lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'd', 'i', 'n' ) ) +#define ISOM_BOX_TYPE_PERF lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'e', 'r', 'f' ) ) +#define ISOM_BOX_TYPE_PITM lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'i', 't', 'm' ) ) +#define ISOM_BOX_TYPE_RES lsmash_form_iso_box_type( LSMASH_4CC( 'r', 'e', 's', ' ' ) ) +#define ISOM_BOX_TYPE_RESC lsmash_form_iso_box_type( LSMASH_4CC( 'r', 'e', 's', 'c' ) ) +#define ISOM_BOX_TYPE_RESD lsmash_form_iso_box_type( LSMASH_4CC( 'r', 'e', 's', 'd' ) ) +#define ISOM_BOX_TYPE_RTNG lsmash_form_iso_box_type( LSMASH_4CC( 'r', 't', 'n', 'g' ) ) +#define ISOM_BOX_TYPE_SBGP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'b', 'g', 'p' ) ) +#define ISOM_BOX_TYPE_SCHI lsmash_form_iso_box_type( LSMASH_4CC( 's', 'c', 'h', 'i' ) ) +#define ISOM_BOX_TYPE_SCHM lsmash_form_iso_box_type( LSMASH_4CC( 's', 'c', 'h', 'm' ) ) +#define ISOM_BOX_TYPE_SDEP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'd', 'e', 'p' ) ) +#define ISOM_BOX_TYPE_SDHD lsmash_form_iso_box_type( LSMASH_4CC( 's', 'd', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_SDTP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'd', 't', 'p' ) ) +#define ISOM_BOX_TYPE_SDVP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'd', 'v', 'p' ) ) +#define ISOM_BOX_TYPE_SEGR lsmash_form_iso_box_type( LSMASH_4CC( 's', 'e', 'g', 'r' ) ) +#define ISOM_BOX_TYPE_SGPD lsmash_form_iso_box_type( LSMASH_4CC( 's', 'g', 'p', 'd' ) ) +#define ISOM_BOX_TYPE_SINF lsmash_form_iso_box_type( LSMASH_4CC( 's', 'i', 'n', 'f' ) ) +#define ISOM_BOX_TYPE_SKIP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'k', 'i', 'p' ) ) +#define ISOM_BOX_TYPE_SMHD lsmash_form_iso_box_type( LSMASH_4CC( 's', 'm', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_SRMB lsmash_form_iso_box_type( LSMASH_4CC( 's', 'r', 'm', 'b' ) ) +#define ISOM_BOX_TYPE_SRMC lsmash_form_iso_box_type( LSMASH_4CC( 's', 'r', 'm', 'c' ) ) +#define ISOM_BOX_TYPE_SRPP lsmash_form_iso_box_type( LSMASH_4CC( 's', 'r', 'p', 'p' ) ) +#define ISOM_BOX_TYPE_STBL lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 'b', 'l' ) ) +#define ISOM_BOX_TYPE_STCO lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 'c', 'o' ) ) +#define ISOM_BOX_TYPE_STDP lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 'd', 'p' ) ) +#define ISOM_BOX_TYPE_STSC lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 'c' ) ) +#define ISOM_BOX_TYPE_STSD lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 'd' ) ) +#define ISOM_BOX_TYPE_STSH lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 'h' ) ) +#define ISOM_BOX_TYPE_STSS lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 's' ) ) +#define ISOM_BOX_TYPE_STSZ lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 'z' ) ) +#define ISOM_BOX_TYPE_STTS lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 't', 's' ) ) +#define ISOM_BOX_TYPE_STZ2 lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 'z', '2' ) ) +#define ISOM_BOX_TYPE_SUBS lsmash_form_iso_box_type( LSMASH_4CC( 's', 'u', 'b', 's' ) ) +#define ISOM_BOX_TYPE_SWTC lsmash_form_iso_box_type( LSMASH_4CC( 's', 'w', 't', 'c' ) ) +#define ISOM_BOX_TYPE_TFHD lsmash_form_iso_box_type( LSMASH_4CC( 't', 'f', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_TFDT lsmash_form_iso_box_type( LSMASH_4CC( 't', 'f', 'd', 't' ) ) +#define ISOM_BOX_TYPE_TFRA lsmash_form_iso_box_type( LSMASH_4CC( 't', 'f', 'r', 'a' ) ) +#define ISOM_BOX_TYPE_TIBR lsmash_form_iso_box_type( LSMASH_4CC( 't', 'i', 'b', 'r' ) ) +#define ISOM_BOX_TYPE_TIRI lsmash_form_iso_box_type( LSMASH_4CC( 't', 'i', 'r', 'i' ) ) +#define ISOM_BOX_TYPE_TITL lsmash_form_iso_box_type( LSMASH_4CC( 't', 'i', 't', 'l' ) ) +#define ISOM_BOX_TYPE_TKHD lsmash_form_iso_box_type( LSMASH_4CC( 't', 'k', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_TRAF lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'a', 'f' ) ) +#define ISOM_BOX_TYPE_TRAK lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'a', 'k' ) ) +#define ISOM_BOX_TYPE_TREF lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'e', 'f' ) ) +#define ISOM_BOX_TYPE_TREX lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'e', 'x' ) ) +#define ISOM_BOX_TYPE_TRGR lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'g', 'r' ) ) +#define ISOM_BOX_TYPE_TRUN lsmash_form_iso_box_type( LSMASH_4CC( 't', 'r', 'u', 'n' ) ) +#define ISOM_BOX_TYPE_TSEL lsmash_form_iso_box_type( LSMASH_4CC( 't', 's', 'e', 'l' ) ) +#define ISOM_BOX_TYPE_UDTA lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'd', 't', 'a' ) ) +#define ISOM_BOX_TYPE_UINF lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'i', 'n', 'f' ) ) +#define ISOM_BOX_TYPE_ULST lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'l', 's', 't' ) ) +#define ISOM_BOX_TYPE_URL lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'r', 'l', ' ' ) ) +#define ISOM_BOX_TYPE_URN lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'r', 'n', ' ' ) ) +#define ISOM_BOX_TYPE_UUID lsmash_form_iso_box_type( LSMASH_4CC( 'u', 'u', 'i', 'd' ) ) +#define ISOM_BOX_TYPE_VMHD lsmash_form_iso_box_type( LSMASH_4CC( 'v', 'm', 'h', 'd' ) ) +#define ISOM_BOX_TYPE_VWDI lsmash_form_iso_box_type( LSMASH_4CC( 'v', 'w', 'd', 'i' ) ) +#define ISOM_BOX_TYPE_XML lsmash_form_iso_box_type( LSMASH_4CC( 'x', 'm', 'l', ' ' ) ) +#define ISOM_BOX_TYPE_YRRC lsmash_form_iso_box_type( LSMASH_4CC( 'y', 'r', 'r', 'c' ) ) + +#define ISOM_BOX_TYPE_BTRT lsmash_form_iso_box_type( LSMASH_4CC( 'b', 't', 'r', 't' ) ) +#define ISOM_BOX_TYPE_CLAP lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'l', 'a', 'p' ) ) +#define ISOM_BOX_TYPE_PASP lsmash_form_iso_box_type( LSMASH_4CC( 'p', 'a', 's', 'p' ) ) +#define ISOM_BOX_TYPE_STSL lsmash_form_iso_box_type( LSMASH_4CC( 's', 't', 's', 'l' ) ) + +#define ISOM_BOX_TYPE_FTAB lsmash_form_iso_box_type( LSMASH_4CC( 'f', 't', 'a', 'b' ) ) + +/* iTunes Metadata */ +#define ISOM_BOX_TYPE_DATA lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'a', 't', 'a' ) ) +#define ISOM_BOX_TYPE_ILST lsmash_form_iso_box_type( LSMASH_4CC( 'i', 'l', 's', 't' ) ) +#define ISOM_BOX_TYPE_MEAN lsmash_form_iso_box_type( LSMASH_4CC( 'm', 'e', 'a', 'n' ) ) +#define ISOM_BOX_TYPE_NAME lsmash_form_iso_box_type( LSMASH_4CC( 'n', 'a', 'm', 'e' ) ) + +/* Tyrant extension */ +#define ISOM_BOX_TYPE_CHPL lsmash_form_iso_box_type( LSMASH_4CC( 'c', 'h', 'p', 'l' ) ) + +/* Decoder Specific Info */ +#define ISOM_BOX_TYPE_ALAC lsmash_form_iso_box_type( LSMASH_4CC( 'a', 'l', 'a', 'c' ) ) +#define ISOM_BOX_TYPE_AVCC lsmash_form_iso_box_type( LSMASH_4CC( 'a', 'v', 'c', 'C' ) ) +#define ISOM_BOX_TYPE_DAC3 lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'a', 'c', '3' ) ) +#define ISOM_BOX_TYPE_DAMR lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'a', 'm', 'r' ) ) +#define ISOM_BOX_TYPE_DDTS lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'd', 't', 's' ) ) +#define ISOM_BOX_TYPE_DEC3 lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'e', 'c', '3' ) ) +#define ISOM_BOX_TYPE_DVC1 lsmash_form_iso_box_type( LSMASH_4CC( 'd', 'v', 'c', '1' ) ) +#define ISOM_BOX_TYPE_ESDS lsmash_form_iso_box_type( LSMASH_4CC( 'e', 's', 'd', 's' ) ) + +#define QT_BOX_TYPE_ALLF lsmash_form_qtff_box_type( LSMASH_4CC( 'A', 'l', 'l', 'F' ) ) +#define QT_BOX_TYPE_CLEF lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 'l', 'e', 'f' ) ) +#define QT_BOX_TYPE_CLIP lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 'l', 'i', 'p' ) ) +#define QT_BOX_TYPE_CRGN lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 'r', 'g', 'n' ) ) +#define QT_BOX_TYPE_CTAB lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 't', 'a', 'b' ) ) +#define QT_BOX_TYPE_ENOF lsmash_form_qtff_box_type( LSMASH_4CC( 'e', 'n', 'o', 'f' ) ) +#define QT_BOX_TYPE_GMHD lsmash_form_qtff_box_type( LSMASH_4CC( 'g', 'm', 'h', 'd' ) ) +#define QT_BOX_TYPE_GMIN lsmash_form_qtff_box_type( LSMASH_4CC( 'g', 'm', 'i', 'n' ) ) +#define QT_BOX_TYPE_ILST lsmash_form_qtff_box_type( LSMASH_4CC( 'i', 'l', 's', 't' ) ) +#define QT_BOX_TYPE_IMAP lsmash_form_qtff_box_type( LSMASH_4CC( 'i', 'm', 'a', 'p' ) ) +#define QT_BOX_TYPE_KEYS lsmash_form_qtff_box_type( LSMASH_4CC( 'k', 'e', 'y', 's' ) ) +#define QT_BOX_TYPE_KMAT lsmash_form_qtff_box_type( LSMASH_4CC( 'k', 'm', 'a', 't' ) ) +#define QT_BOX_TYPE_LOAD lsmash_form_qtff_box_type( LSMASH_4CC( 'l', 'o', 'a', 'd' ) ) +#define QT_BOX_TYPE_LOOP lsmash_form_qtff_box_type( LSMASH_4CC( 'L', 'O', 'O', 'P' ) ) +#define QT_BOX_TYPE_MATT lsmash_form_qtff_box_type( LSMASH_4CC( 'm', 'a', 't', 't' ) ) +#define QT_BOX_TYPE_META lsmash_form_qtff_box_type( LSMASH_4CC( 'm', 'e', 't', 'a' ) ) +#define QT_BOX_TYPE_PNOT lsmash_form_qtff_box_type( LSMASH_4CC( 'p', 'n', 'o', 't' ) ) +#define QT_BOX_TYPE_PROF lsmash_form_qtff_box_type( LSMASH_4CC( 'p', 'r', 'o', 'f' ) ) +#define QT_BOX_TYPE_SELO lsmash_form_qtff_box_type( LSMASH_4CC( 'S', 'e', 'l', 'O' ) ) +#define QT_BOX_TYPE_STPS lsmash_form_qtff_box_type( LSMASH_4CC( 's', 't', 'p', 's' ) ) +#define QT_BOX_TYPE_TAPT lsmash_form_qtff_box_type( LSMASH_4CC( 't', 'a', 'p', 't' ) ) +#define QT_BOX_TYPE_TEXT lsmash_form_qtff_box_type( LSMASH_4CC( 't', 'e', 'x', 't' ) ) +#define QT_BOX_TYPE_WLOC lsmash_form_qtff_box_type( LSMASH_4CC( 'W', 'L', 'O', 'C' ) ) + +#define QT_BOX_TYPE_CHAN lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 'h', 'a', 'n' ) ) +#define QT_BOX_TYPE_COLR lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 'o', 'l', 'r' ) ) +#define QT_BOX_TYPE_CSPC lsmash_form_qtff_box_type( LSMASH_4CC( 'c', 's', 'p', 'c' ) ) +#define QT_BOX_TYPE_ENDA lsmash_form_qtff_box_type( LSMASH_4CC( 'e', 'n', 'd', 'a' ) ) +#define QT_BOX_TYPE_FIEL lsmash_form_qtff_box_type( LSMASH_4CC( 'f', 'i', 'e', 'l' ) ) +#define QT_BOX_TYPE_FRMA lsmash_form_qtff_box_type( LSMASH_4CC( 'f', 'r', 'm', 'a' ) ) +#define QT_BOX_TYPE_GAMA lsmash_form_qtff_box_type( LSMASH_4CC( 'g', 'a', 'm', 'a' ) ) +#define QT_BOX_TYPE_SGBT lsmash_form_qtff_box_type( LSMASH_4CC( 's', 'g', 'b', 't' ) ) +#define QT_BOX_TYPE_WAVE lsmash_form_qtff_box_type( LSMASH_4CC( 'w', 'a', 'v', 'e' ) ) +#define QT_BOX_TYPE_TERMINATOR lsmash_form_qtff_box_type( 0x00000000 ) + +/* Decoder Specific Info */ +#define QT_BOX_TYPE_ALAC lsmash_form_qtff_box_type( LSMASH_4CC( 'a', 'l', 'a', 'c' ) ) +#define QT_BOX_TYPE_ESDS lsmash_form_qtff_box_type( LSMASH_4CC( 'e', 's', 'd', 's' ) ) +#define QT_BOX_TYPE_GLBL lsmash_form_qtff_box_type( LSMASH_4CC( 'g', 'l', 'b', 'l' ) ) +#define QT_BOX_TYPE_MP4A lsmash_form_qtff_box_type( LSMASH_4CC( 'm', 'p', '4', 'a' ) ) + +/* Track reference types */ +typedef enum +{ + ISOM_TREF_TYPE_AVCP = LSMASH_4CC( 'a', 'v', 'c', 'p' ), /* AVC parameter set stream link */ + ISOM_TREF_TYPE_CDSC = LSMASH_4CC( 'c', 'd', 's', 'c' ), /* This track describes the referenced track. */ + ISOM_TREF_TYPE_DPND = LSMASH_4CC( 'd', 'p', 'n', 'd' ), /* This track has an MPEG-4 dependency on the referenced track. */ + ISOM_TREF_TYPE_HIND = LSMASH_4CC( 'h', 'i', 'n', 'd' ), /* Hint dependency */ + ISOM_TREF_TYPE_HINT = LSMASH_4CC( 'h', 'i', 'n', 't' ), /* Links hint track to original media track */ + ISOM_TREF_TYPE_IPIR = LSMASH_4CC( 'i', 'p', 'i', 'r' ), /* This track contains IPI declarations for the referenced track. */ + ISOM_TREF_TYPE_MPOD = LSMASH_4CC( 'm', 'p', 'o', 'd' ), /* This track is an OD track which uses the referenced track as an included elementary stream track. */ + ISOM_TREF_TYPE_SBAS = LSMASH_4CC( 's', 'b', 'a', 's' ), /* Scalable base */ + ISOM_TREF_TYPE_SCAL = LSMASH_4CC( 's', 'c', 'a', 'l' ), /* Scalable extraction */ + ISOM_TREF_TYPE_SWFR = LSMASH_4CC( 's', 'w', 'f', 'r' ), /* AVC Switch from */ + ISOM_TREF_TYPE_SWTO = LSMASH_4CC( 's', 'w', 't', 'o' ), /* AVC Switch to */ + ISOM_TREF_TYPE_SYNC = LSMASH_4CC( 's', 'y', 'n', 'c' ), /* This track uses the referenced track as its synchronization source. */ + ISOM_TREF_TYPE_VDEP = LSMASH_4CC( 'v', 'd', 'e', 'p' ), /* Auxiliary video depth */ + ISOM_TREF_TYPE_VPLX = LSMASH_4CC( 'v', 'p', 'l', 'x' ), /* Auxiliary video parallax */ + + QT_TREF_TYPE_CHAP = LSMASH_4CC( 'c', 'h', 'a', 'p' ), /* Chapter or scene list. Usually references a text track. */ + QT_TREF_TYPE_SCPT = LSMASH_4CC( 's', 'c', 'p', 't' ), /* Transcript. Usually references a text track. */ + QT_TREF_TYPE_SSRC = LSMASH_4CC( 's', 's', 'r', 'c' ), /* Nonprimary source. Indicates that the referenced track should send its data to this track, rather than presenting it. */ + QT_TREF_TYPE_TMCD = LSMASH_4CC( 't', 'm', 'c', 'd' ), /* Time code. Usually references a time code track. */ +} isom_track_reference_type; + +/* Handler types */ +enum isom_handler_type +{ + QT_HANDLER_TYPE_DATA = LSMASH_4CC( 'd', 'h', 'l', 'r' ), + QT_HANDLER_TYPE_MEDIA = LSMASH_4CC( 'm', 'h', 'l', 'r' ), +}; + +enum isom_meta_type +{ + ISOM_META_HANDLER_TYPE_ITUNES_METADATA = LSMASH_4CC( 'm', 'd', 'i', 'r' ), +}; + +/* Data reference types */ +enum isom_data_reference_type +{ + ISOM_REFERENCE_HANDLER_TYPE_URL = LSMASH_4CC( 'u', 'r', 'l', ' ' ), + ISOM_REFERENCE_HANDLER_TYPE_URN = LSMASH_4CC( 'u', 'r', 'n', ' ' ), + + QT_REFERENCE_HANDLER_TYPE_ALIAS = LSMASH_4CC( 'a', 'l', 'i', 's' ), + QT_REFERENCE_HANDLER_TYPE_RESOURCE = LSMASH_4CC( 'r', 's', 'r', 'c' ), + QT_REFERENCE_HANDLER_TYPE_URL = LSMASH_4CC( 'u', 'r', 'l', ' ' ), +}; + +/* Lanuage codes */ +typedef struct +{ + uint16_t mac_value; + uint16_t iso_name; +} isom_language_t; + +static const isom_language_t isom_languages[] = +{ + { 0, ISOM_LANGUAGE_CODE_ENGLISH }, + { 1, ISOM_LANGUAGE_CODE_FRENCH }, + { 2, ISOM_LANGUAGE_CODE_GERMAN }, + { 3, ISOM_LANGUAGE_CODE_ITALIAN }, + { 4, ISOM_LANGUAGE_CODE_DUTCH_M }, + { 5, ISOM_LANGUAGE_CODE_SWEDISH }, + { 6, ISOM_LANGUAGE_CODE_SPANISH }, + { 7, ISOM_LANGUAGE_CODE_DANISH }, + { 8, ISOM_LANGUAGE_CODE_PORTUGUESE }, + { 9, ISOM_LANGUAGE_CODE_NORWEGIAN }, + { 10, ISOM_LANGUAGE_CODE_HEBREW }, + { 11, ISOM_LANGUAGE_CODE_JAPANESE }, + { 12, ISOM_LANGUAGE_CODE_ARABIC }, + { 13, ISOM_LANGUAGE_CODE_FINNISH }, + { 14, ISOM_LANGUAGE_CODE_GREEK }, + { 15, ISOM_LANGUAGE_CODE_ICELANDIC }, + { 16, ISOM_LANGUAGE_CODE_MALTESE }, + { 17, ISOM_LANGUAGE_CODE_TURKISH }, + { 18, ISOM_LANGUAGE_CODE_CROATIAN }, + { 19, ISOM_LANGUAGE_CODE_CHINESE }, + { 20, ISOM_LANGUAGE_CODE_URDU }, + { 21, ISOM_LANGUAGE_CODE_HINDI }, + { 22, ISOM_LANGUAGE_CODE_THAI }, + { 23, ISOM_LANGUAGE_CODE_KOREAN }, + { 24, ISOM_LANGUAGE_CODE_LITHUANIAN }, + { 25, ISOM_LANGUAGE_CODE_POLISH }, + { 26, ISOM_LANGUAGE_CODE_HUNGARIAN }, + { 27, ISOM_LANGUAGE_CODE_ESTONIAN }, + { 28, ISOM_LANGUAGE_CODE_LATVIAN }, + { 29, ISOM_LANGUAGE_CODE_SAMI }, + { 30, ISOM_LANGUAGE_CODE_FAROESE }, + { 32, ISOM_LANGUAGE_CODE_RUSSIAN }, + { 33, ISOM_LANGUAGE_CODE_CHINESE }, + { 34, ISOM_LANGUAGE_CODE_DUTCH }, + { 35, ISOM_LANGUAGE_CODE_IRISH }, + { 36, ISOM_LANGUAGE_CODE_ALBANIAN }, + { 37, ISOM_LANGUAGE_CODE_ROMANIAN }, + { 38, ISOM_LANGUAGE_CODE_CZECH }, + { 39, ISOM_LANGUAGE_CODE_SLOVAK }, + { 40, ISOM_LANGUAGE_CODE_SLOVENIA }, + { 41, ISOM_LANGUAGE_CODE_YIDDISH }, + { 42, ISOM_LANGUAGE_CODE_SERBIAN }, + { 43, ISOM_LANGUAGE_CODE_MACEDONIAN }, + { 44, ISOM_LANGUAGE_CODE_BULGARIAN }, + { 45, ISOM_LANGUAGE_CODE_UKRAINIAN }, + { 46, ISOM_LANGUAGE_CODE_BELARUSIAN }, + { 47, ISOM_LANGUAGE_CODE_UZBEK }, + { 48, ISOM_LANGUAGE_CODE_KAZAKH }, + { 49, ISOM_LANGUAGE_CODE_AZERBAIJANI }, + { 51, ISOM_LANGUAGE_CODE_ARMENIAN }, + { 52, ISOM_LANGUAGE_CODE_GEORGIAN }, + { 53, ISOM_LANGUAGE_CODE_MOLDAVIAN }, + { 54, ISOM_LANGUAGE_CODE_KIRGHIZ }, + { 55, ISOM_LANGUAGE_CODE_TAJIK }, + { 56, ISOM_LANGUAGE_CODE_TURKMEN }, + { 57, ISOM_LANGUAGE_CODE_MONGOLIAN }, + { 59, ISOM_LANGUAGE_CODE_PASHTO }, + { 60, ISOM_LANGUAGE_CODE_KURDISH }, + { 61, ISOM_LANGUAGE_CODE_KASHMIRI }, + { 62, ISOM_LANGUAGE_CODE_SINDHI }, + { 63, ISOM_LANGUAGE_CODE_TIBETAN }, + { 64, ISOM_LANGUAGE_CODE_NEPALI }, + { 65, ISOM_LANGUAGE_CODE_SANSKRIT }, + { 66, ISOM_LANGUAGE_CODE_MARATHI }, + { 67, ISOM_LANGUAGE_CODE_BENGALI }, + { 68, ISOM_LANGUAGE_CODE_ASSAMESE }, + { 69, ISOM_LANGUAGE_CODE_GUJARATI }, + { 70, ISOM_LANGUAGE_CODE_PUNJABI }, + { 71, ISOM_LANGUAGE_CODE_ORIYA }, + { 72, ISOM_LANGUAGE_CODE_MALAYALAM }, + { 73, ISOM_LANGUAGE_CODE_KANNADA }, + { 74, ISOM_LANGUAGE_CODE_TAMIL }, + { 75, ISOM_LANGUAGE_CODE_TELUGU }, + { 76, ISOM_LANGUAGE_CODE_SINHALESE }, + { 77, ISOM_LANGUAGE_CODE_BURMESE }, + { 78, ISOM_LANGUAGE_CODE_KHMER }, + { 79, ISOM_LANGUAGE_CODE_LAO }, + { 80, ISOM_LANGUAGE_CODE_VIETNAMESE }, + { 81, ISOM_LANGUAGE_CODE_INDONESIAN }, + { 82, ISOM_LANGUAGE_CODE_TAGALOG }, + { 83, ISOM_LANGUAGE_CODE_MALAY_ROMAN }, + { 84, ISOM_LANGUAGE_CODE_MAYAY_ARABIC }, + { 85, ISOM_LANGUAGE_CODE_AMHARIC }, + { 87, ISOM_LANGUAGE_CODE_OROMO }, + { 88, ISOM_LANGUAGE_CODE_SOMALI }, + { 89, ISOM_LANGUAGE_CODE_SWAHILI }, + { 90, ISOM_LANGUAGE_CODE_KINYARWANDA }, + { 91, ISOM_LANGUAGE_CODE_RUNDI }, + { 92, ISOM_LANGUAGE_CODE_CHEWA }, + { 93, ISOM_LANGUAGE_CODE_MALAGASY }, + { 94, ISOM_LANGUAGE_CODE_ESPERANTO }, + { 128, ISOM_LANGUAGE_CODE_WELSH }, + { 129, ISOM_LANGUAGE_CODE_BASQUE }, + { 130, ISOM_LANGUAGE_CODE_CATALAN }, + { 131, ISOM_LANGUAGE_CODE_LATIN }, + { 132, ISOM_LANGUAGE_CODE_QUECHUA }, + { 133, ISOM_LANGUAGE_CODE_GUARANI }, + { 134, ISOM_LANGUAGE_CODE_AYMARA }, + { 135, ISOM_LANGUAGE_CODE_TATAR }, + { 136, ISOM_LANGUAGE_CODE_UIGHUR }, + { 137, ISOM_LANGUAGE_CODE_DZONGKHA }, + { 138, ISOM_LANGUAGE_CODE_JAVANESE }, + { UINT16_MAX, 0 } +}; + +/* Color parameters */ +enum isom_color_patameter_type +{ + ISOM_COLOR_PARAMETER_TYPE_NCLX = LSMASH_4CC( 'n', 'c', 'l', 'x' ), /* on-screen colours */ + ISOM_COLOR_PARAMETER_TYPE_RICC = LSMASH_4CC( 'r', 'I', 'C', 'C' ), /* restricted ICC profile */ + ISOM_COLOR_PARAMETER_TYPE_PROF = LSMASH_4CC( 'p', 'r', 'o', 'f' ), /* unrestricted ICC profile */ + + QT_COLOR_PARAMETER_TYPE_NCLC = LSMASH_4CC( 'n', 'c', 'l', 'c' ), /* NonConstant Luminance Coding */ + QT_COLOR_PARAMETER_TYPE_PROF = LSMASH_4CC( 'p', 'r', 'o', 'f' ), /* ICC profile */ +}; + +/* Sample grouping types */ +typedef enum +{ + ISOM_GROUP_TYPE_3GAG = LSMASH_4CC( '3', 'g', 'a', 'g' ), /* Text track3GPP PSS Annex G video buffer parameters */ + ISOM_GROUP_TYPE_ALST = LSMASH_4CC( 'a', 'l', 's', 't' ), /* Alternative startup sequence */ + ISOM_GROUP_TYPE_AVCB = LSMASH_4CC( 'a', 'v', 'c', 'b' ), /* AVC HRD parameters */ + ISOM_GROUP_TYPE_AVLL = LSMASH_4CC( 'a', 'v', 'l', 'l' ), /* AVC Layer */ + ISOM_GROUP_TYPE_AVSS = LSMASH_4CC( 'a', 'v', 's', 's' ), /* AVC Sub Sequence */ + ISOM_GROUP_TYPE_DTRT = LSMASH_4CC( 'd', 't', 'r', 't' ), /* Decode re-timing */ + ISOM_GROUP_TYPE_MVIF = LSMASH_4CC( 'm', 'v', 'i', 'f' ), /* MVC Scalability Information */ + ISOM_GROUP_TYPE_RAP = LSMASH_4CC( 'r', 'a', 'p', ' ' ), /* Random Access Point */ + ISOM_GROUP_TYPE_RASH = LSMASH_4CC( 'r', 'a', 's', 'h' ), /* Rate Share */ + ISOM_GROUP_TYPE_ROLL = LSMASH_4CC( 'r', 'o', 'l', 'l' ), /* Random Access Recovery Point */ + ISOM_GROUP_TYPE_SCIF = LSMASH_4CC( 's', 'c', 'i', 'f' ), /* SVC Scalability Information */ + ISOM_GROUP_TYPE_SCNM = LSMASH_4CC( 's', 'c', 'n', 'm' ), /* AVC/SVC/MVC map groups */ + ISOM_GROUP_TYPE_VIPR = LSMASH_4CC( 'v', 'i', 'p', 'r' ), /* View priority */ +} isom_grouping_type; + +int isom_is_fullbox( void *box ); +int isom_is_lpcm_audio( void *box ); +int isom_is_uncompressed_ycbcr( lsmash_box_type_t type ); + +void isom_init_box_common( void *box, void *parent, lsmash_box_type_t box_type ); +size_t isom_skip_box_common( uint8_t **p_data ); + +void isom_bs_put_basebox_common( lsmash_bs_t *bs, isom_box_t *box ); +void isom_bs_put_fullbox_common( lsmash_bs_t *bs, isom_box_t *box ); +void isom_bs_put_box_common( lsmash_bs_t *bs, void *box ); + +int isom_check_compatibility( lsmash_root_t *root ); + +char *isom_4cc2str( uint32_t fourcc ); + +isom_trak_entry_t *isom_get_trak( lsmash_root_t *root, uint32_t track_ID ); +isom_trex_entry_t *isom_get_trex( isom_mvex_t *mvex, uint32_t track_ID ); +isom_tfra_entry_t *isom_get_tfra( isom_mfra_t *mfra, uint32_t track_ID ); +isom_sgpd_entry_t *isom_get_sample_group_description( isom_stbl_t *stbl, uint32_t grouping_type ); +isom_sbgp_entry_t *isom_get_sample_to_group( isom_stbl_t *stbl, uint32_t grouping_type ); + +isom_avcC_ps_entry_t *isom_create_ps_entry( uint8_t *ps, uint32_t ps_size ); +void isom_remove_avcC_ps( isom_avcC_ps_entry_t *ps ); + +int isom_add_edts( isom_trak_entry_t *trak ); +int isom_add_elst( isom_edts_t *edts ); +int isom_add_clap( isom_visual_entry_t *visual ); +int isom_add_pasp( isom_visual_entry_t *visual ); +int isom_add_glbl( isom_visual_entry_t *visual ); +int isom_add_colr( isom_visual_entry_t *visual ); +int isom_add_gama( isom_visual_entry_t *visual ); +int isom_add_fiel( isom_visual_entry_t *visual ); +int isom_add_cspc( isom_visual_entry_t *visual ); +int isom_add_sgbt( isom_visual_entry_t *visual ); +int isom_add_stsl( isom_visual_entry_t *visual ); +int isom_add_avcC( isom_visual_entry_t *visual ); +int isom_add_btrt( isom_visual_entry_t *visual ); +int isom_add_wave( isom_audio_entry_t *audio ); +int isom_add_frma( isom_wave_t *wave ); +int isom_add_enda( isom_wave_t *wave ); +int isom_add_mp4a( isom_wave_t *wave ); +int isom_add_terminator( isom_wave_t *wave ); +int isom_add_chan( isom_audio_entry_t *audio ); +int isom_add_ftab( isom_tx3g_entry_t *tx3g ); +int isom_add_hdlr( isom_mdia_t *mdia, isom_meta_t *meta, isom_minf_t *minf, uint32_t media_type ); +int isom_add_metaitem( isom_ilst_t *ilst, lsmash_itunes_metadata_item item ); +int isom_add_mean( isom_metaitem_t *metaitem ); +int isom_add_name( isom_metaitem_t *metaitem ); +int isom_add_data( isom_metaitem_t *metaitem ); +int isom_add_ilst( isom_moov_t *moov ); +int isom_add_meta( isom_box_t *parent ); +int isom_add_udta( lsmash_root_t *root, uint32_t track_ID ); + +void isom_remove_ctab( isom_ctab_t *ctab ); +void isom_remove_tapt( isom_tapt_t *tapt ); +void isom_remove_clap( isom_clap_t *clap ); +void isom_remove_pasp( isom_pasp_t *pasp ); +void isom_remove_glbl( isom_glbl_t *glbl ); +void isom_remove_colr( isom_colr_t *colr ); +void isom_remove_gama( isom_gama_t *gama ); +void isom_remove_cspc( isom_cspc_t *cspc ); +void isom_remove_fiel( isom_fiel_t *fiel ); +void isom_remove_sgbt( isom_sgbt_t *sgbt ); +void isom_remove_stsl( isom_stsl_t *stsl ); +void isom_remove_esds( isom_esds_t *esds ); +void isom_remove_avcC( isom_avcC_t *avcC ); +void isom_remove_btrt( isom_btrt_t *btrt ); +void isom_remove_frma( isom_frma_t *frma ); +void isom_remove_enda( isom_enda_t *enda ); +void isom_remove_mp4a( isom_mp4a_t *mp4a ); +void isom_remove_terminator( isom_terminator_t *terminator ); +void isom_remove_wave( isom_wave_t *wave ); +void isom_remove_chan( isom_chan_t *chan ); +void isom_remove_ftab( isom_ftab_t *ftab ); +void isom_remove_mean( isom_mean_t *mean ); +void isom_remove_name( isom_name_t *name ); +void isom_remove_data( isom_data_t *data ); +void isom_remove_metaitem( isom_metaitem_t *metaitem ); +void isom_remove_ilst( isom_ilst_t *ilst ); +void isom_remove_sample_description( isom_sample_entry_t *sample ); +void isom_remove_unknown_box( isom_unknown_box_t *unknown_box ); + +#define isom_create_box( box_name, parent_name, box_type ) \ + isom_##box_name##_t *(box_name) = lsmash_malloc_zero( sizeof(isom_##box_name##_t) ); \ + if( !box_name ) \ + return -1; \ + isom_init_box_common( box_name, parent_name, box_type ) + +#define isom_create_list_box( box_name, parent_name, box_type ) \ + isom_create_box( box_name, parent_name, box_type ); \ + box_name->list = lsmash_create_entry_list(); \ + if( !box_name->list ) \ + { \ + free( box_name ); \ + return -1; \ + } + +#define isom_copy_fields( dst, src, box_name ) \ + lsmash_root_t *root = dst->box_name->root; \ + isom_box_t *parent = dst->box_name->parent; \ + uint64_t pos = dst->box_name->pos; \ + *dst->box_name = *src->box_name; \ + dst->box_name->root = root; \ + dst->box_name->parent = parent; \ + dst->box_name->pos = pos + +#endif diff --git a/output/mp4/chapter.c b/output/mp4/chapter.c new file mode 100644 index 0000000..7bfc8b2 --- /dev/null +++ b/output/mp4/chapter.c @@ -0,0 +1,367 @@ +/***************************************************************************** + * chapter.c: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * Contributors: Takashi Hirata + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include +#include + +#include "box.h" +#include "isom.h" + +#define CHAPTER_BUFSIZE 512 +#define UTF8_BOM "\xEF\xBB\xBF" +#define UTF8_BOM_LENGTH 3 + +static int isom_get_start_time( char *chap_time, isom_chapter_entry_t *data ) +{ + uint64_t hh, mm; + double ss; + if( sscanf( chap_time, "%"SCNu64":%2"SCNu64":%lf", &hh, &mm, &ss ) != 3 ) + return -1; + /* check overflow */ + if( hh >= 5124095 || mm >= 60 || ss >= 60 ) + return -1; + /* 1ns timescale */ + data->start_time = (hh * 3600 + mm * 60 + ss) * 1e9; + return 0; +} + +static int isom_lumber_line( char *buff, int bufsize, FILE *chapter ) +{ + char *tail; + /* remove newline codes and skip empty line */ + do{ + if( fgets( buff, bufsize, chapter ) == NULL ) + return -1; + tail = &buff[ strlen( buff ) - 1 ]; + while( tail >= buff && ( *tail == '\n' || *tail == '\r' ) ) + *tail-- = '\0'; + }while( tail < buff ); + return 0; +} + +static int isom_read_simple_chapter( FILE *chapter, isom_chapter_entry_t *data ) +{ + char buff[CHAPTER_BUFSIZE]; + int len; + + /* get start_time */ + if( isom_lumber_line( buff, CHAPTER_BUFSIZE, chapter ) ) + return -1; + char *chapter_time = strchr( buff, '=' ); /* find separator */ + if( !chapter_time++ + || isom_get_start_time( chapter_time, data ) + || isom_lumber_line( buff, CHAPTER_BUFSIZE, chapter ) ) /* get chapter_name */ + return -1; + char *chapter_name = strchr( buff, '=' ); /* find separator */ + if( !chapter_name++ ) + return -1; + len = LSMASH_MIN( 255, strlen( chapter_name ) ); /* We support length of chapter_name up to 255 */ + data->chapter_name = (char *)malloc( len + 1 ); + if( !data->chapter_name ) + return -1; + memcpy( data->chapter_name, chapter_name, len ); + data->chapter_name[len] = '\0'; + return 0; +} + +static int isom_read_minimum_chapter( FILE *chapter, isom_chapter_entry_t *data ) +{ + char buff[CHAPTER_BUFSIZE]; + int len; + + if( isom_lumber_line( buff, CHAPTER_BUFSIZE, chapter ) ) /* read newline */ + return -1; + char *p_buff = !memcmp( buff, UTF8_BOM, UTF8_BOM_LENGTH ) ? &buff[UTF8_BOM_LENGTH] : &buff[0]; /* BOM detection */ + if( isom_get_start_time( p_buff, data ) ) /* get start_time */ + return -1; + /* get chapter_name */ + char *chapter_name = strchr( buff, ' ' ); /* find separator */ + if( !chapter_name++ ) + return -1; + len = LSMASH_MIN( 255, strlen( chapter_name ) ); /* We support length of chapter_name up to 255 */ + data->chapter_name = (char *)malloc( len + 1 ); + if( !data->chapter_name ) + return -1; + memcpy( data->chapter_name, chapter_name, len ); + data->chapter_name[len] = '\0'; + return 0; +} + +typedef int (*fn_get_chapter_data)( FILE *, isom_chapter_entry_t * ); + +static fn_get_chapter_data isom_check_chap_line( char *file_name ) +{ + char buff[CHAPTER_BUFSIZE]; + FILE *fp = fopen( file_name, "rb" ); + if( !fp ) + { + lsmash_log( LSMASH_LOG_ERROR, "failed to open the chapter file \"%s\".\n", file_name ); + return NULL; + } + fn_get_chapter_data fnc = NULL; + if( fgets( buff, CHAPTER_BUFSIZE, fp ) != NULL ) + { + char *p_buff = !memcmp( buff, UTF8_BOM, UTF8_BOM_LENGTH ) ? &buff[UTF8_BOM_LENGTH] : &buff[0]; /* BOM detection */ + if( !strncmp( p_buff, "CHAPTER", 7 ) ) + fnc = isom_read_simple_chapter; + else if( isdigit( p_buff[0] ) && isdigit( p_buff[1] ) && p_buff[2] == ':' + && isdigit( p_buff[3] ) && isdigit( p_buff[4] ) && p_buff[5] == ':' ) + fnc = isom_read_minimum_chapter; + else + lsmash_log( LSMASH_LOG_ERROR, "the chapter file is malformed.\n" ); + } + fclose( fp ); + return fnc; +} + +int lsmash_set_tyrant_chapter( lsmash_root_t *root, char *file_name, int add_bom ) +{ + /* This function should be called after updating of the latest movie duration. */ + if( !root || !root->moov || !root->moov->mvhd || !root->moov->mvhd->timescale || !root->moov->mvhd->duration ) + goto error_message; + /* check each line format */ + fn_get_chapter_data fnc = isom_check_chap_line( file_name ); + if( !fnc ) + goto error_message; + FILE *chapter = fopen( file_name, "rb" ); + if( !chapter ) + { + lsmash_log( LSMASH_LOG_ERROR, "failed to open the chapter file \"%s\".\n", file_name ); + goto error_message; + } + if( isom_add_udta( root, 0 ) || isom_add_chpl( root->moov ) ) + goto fail; + isom_chapter_entry_t data = {0}; + while( !fnc( chapter, &data ) ) + { + if( add_bom ) + { + char *chapter_name_with_bom = (char *)malloc( strlen( data.chapter_name ) + 1 + UTF8_BOM_LENGTH ); + if( !chapter_name_with_bom ) + goto fail2; + sprintf( chapter_name_with_bom, "%s%s", UTF8_BOM, data.chapter_name ); + free( data.chapter_name ); + data.chapter_name = chapter_name_with_bom; + } + data.start_time = (data.start_time + 50) / 100; /* convert to 100ns unit */ + if( data.start_time / 1e7 > (double)root->moov->mvhd->duration / root->moov->mvhd->timescale ) + { + lsmash_log( LSMASH_LOG_WARNING, "a chapter point exceeding the actual duration detected. This chapter point and the following ones (if any) will be cut off.\n" ); + free( data.chapter_name ); + break; + } + if( isom_add_chpl_entry( root->moov->udta->chpl, &data ) ) + goto fail2; + free( data.chapter_name ); + data.chapter_name = NULL; + } + fclose( chapter ); + return 0; +fail2: + if( data.chapter_name ) + free( data.chapter_name ); +fail: + fclose( chapter ); +error_message: + lsmash_log( LSMASH_LOG_ERROR, "failed to set chapter list.\n" ); + return -1; +} + +int lsmash_create_reference_chapter_track( lsmash_root_t *root, uint32_t track_ID, char *file_name ) +{ + if( !root || !root->moov || !root->moov->mvhd || !root->moov->trak_list ) + goto error_message; + if( !root->qt_compatible && !root->itunes_movie ) + { + lsmash_log( LSMASH_LOG_ERROR, "reference chapter is not available for this file.\n" ); + goto error_message; + } + FILE *chapter = NULL; /* shut up 'uninitialized' warning */ + /* Create a Track Reference Box. */ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + { + lsmash_log( LSMASH_LOG_ERROR, "the specified track ID to apply the chapter doesn't exist.\n" ); + goto error_message; + } + if( isom_add_tref( trak ) ) + goto error_message; + /* Create a track_ID for a new chapter track. */ + uint32_t *id = (uint32_t *)malloc( sizeof(uint32_t) ); + if( !id ) + goto error_message; + uint32_t chapter_track_ID = *id = root->moov->mvhd->next_track_ID; + /* Create a Track Reference Type Box. */ + isom_tref_type_t *chap = isom_add_track_reference_type( trak->tref, QT_TREF_TYPE_CHAP, 1, id ); + if( !chap ) + goto error_message; /* no need to free id */ + /* Create a reference chapter track. */ + if( chapter_track_ID != lsmash_create_track( root, ISOM_MEDIA_HANDLER_TYPE_TEXT_TRACK ) ) + goto error_message; + /* Set track parameters. */ + lsmash_track_parameters_t track_param; + lsmash_initialize_track_parameters( &track_param ); + track_param.mode = ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW; + if( lsmash_set_track_parameters( root, chapter_track_ID, &track_param ) ) + goto fail; + /* Set media parameters. */ + uint64_t media_timescale = lsmash_get_media_timescale( root, track_ID ); + if( !media_timescale ) + goto fail; + lsmash_media_parameters_t media_param; + lsmash_initialize_media_parameters( &media_param ); + media_param.timescale = media_timescale; + media_param.ISO_language = root->max_3gpp_version >= 6 || root->itunes_movie ? ISOM_LANGUAGE_CODE_UNDEFINED : 0; + media_param.MAC_language = 0; + if( lsmash_set_media_parameters( root, chapter_track_ID, &media_param ) ) + goto fail; + /* Create a sample description. */ + lsmash_codec_type_t sample_type = root->max_3gpp_version >= 6 || root->itunes_movie + ? ISOM_CODEC_TYPE_TX3G_TEXT + : QT_CODEC_TYPE_TEXT_TEXT; + lsmash_summary_t summary = { .sample_type = sample_type }; + uint32_t sample_entry = lsmash_add_sample_entry( root, chapter_track_ID, &summary ); + if( !sample_entry ) + goto fail; + /* Check each line format. */ + fn_get_chapter_data fnc = isom_check_chap_line( file_name ); + if( !fnc ) + goto fail; + /* Open chapter format file. */ + chapter = fopen( file_name, "rb" ); + if( !chapter ) + { + lsmash_log( LSMASH_LOG_ERROR, "failed to open the chapter file \"%s\".\n", file_name ); + goto fail; + } + /* Parse the file and write text samples. */ + isom_chapter_entry_t data; + while( !fnc( chapter, &data ) ) + { + /* set start_time */ + data.start_time = data.start_time * 1e-9 * media_timescale + 0.5; + /* write a text sample here */ + int is_qt_text = lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_TEXT_TEXT ); + uint16_t name_length = strlen( data.chapter_name ); + lsmash_sample_t *sample = lsmash_create_sample( 2 + name_length + 12 * is_qt_text ); + if( !sample ) + { + free( data.chapter_name ); + goto fail; + } + sample->data[0] = (name_length >> 8) & 0xff; + sample->data[1] = name_length & 0xff; + memcpy( sample->data + 2, data.chapter_name, name_length ); + if( is_qt_text ) + { + /* QuickTime Player requires Text Encoding Attribute Box ('encd') if media language is ISO language codes : undefined. + * Also this box can avoid garbling if the QuickTime text sample is encoded by Unicode characters. + * Note: 3GPP Timed Text supports only UTF-8 or UTF-16, so this box isn't needed. */ + static const uint8_t encd[12] = + { + 0x00, 0x00, 0x00, 0x0C, /* size: 12 */ + 0x65, 0x6E, 0x63, 0x64, /* type: 'encd' */ + 0x00, 0x00, 0x01, 0x00 /* Unicode Encoding */ + }; + memcpy( sample->data + 2 + name_length, encd, 12 ); + } + sample->dts = sample->cts = data.start_time; + sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + sample->index = sample_entry; + if( lsmash_append_sample( root, chapter_track_ID, sample ) ) + { + free( data.chapter_name ); + goto fail; + } + free( data.chapter_name ); + data.chapter_name = NULL; + } + if( lsmash_flush_pooled_samples( root, chapter_track_ID, 0 ) ) + goto fail; + isom_trak_entry_t *chapter_trak = isom_get_trak( root, chapter_track_ID ); + if( !chapter_trak ) + goto fail; + fclose( chapter ); + chapter_trak->is_chapter = 1; + chapter_trak->related_track_ID = track_ID; + return 0; +fail: + if( chapter ) + fclose( chapter ); + /* Remove chapter track reference. */ + lsmash_remove_entry_direct( trak->tref->ref_list, trak->tref->ref_list->tail, isom_remove_track_reference_type ); + if( trak->tref->ref_list->entry_count == 0 ) + isom_remove_tref( trak->tref ); + /* Remove the reference chapter track attached at tail of the list. */ + lsmash_remove_entry_direct( root->moov->trak_list, root->moov->trak_list->tail, isom_remove_trak ); +error_message: + lsmash_log( LSMASH_LOG_ERROR, "failed to set reference chapter.\n" ); + return -1; +} + +int lsmash_print_chapter_list( lsmash_root_t *root ) +{ + if( !root || !(root->flags & LSMASH_FILE_MODE_READ) ) + return -1; + if( root->moov && root->moov->udta && root->moov->udta->chpl ) + { + isom_chpl_t *chpl = root->moov->udta->chpl; + uint32_t timescale; + if( !chpl->version ) + { + if( !root->moov && !root->moov->mvhd ) + return -1; + timescale = root->moov->mvhd->timescale; + } + else + timescale = 10000000; + uint32_t i = 1; + for( lsmash_entry_t *entry = chpl->list->head; entry; entry = entry->next ) + { + isom_chpl_entry_t *data = (isom_chpl_entry_t *)entry->data; + int64_t start_time = data->start_time / timescale; + int hh = start_time / 3600; + int mm = (start_time / 60) % 60; + int ss = start_time % 60; + int ms = ((data->start_time / (double)timescale) - hh * 3600 - mm * 60 - ss) * 1e3 + 0.5; + if( !memcmp( data->chapter_name, UTF8_BOM, UTF8_BOM_LENGTH ) ) /* detect BOM */ + { + data->chapter_name += UTF8_BOM_LENGTH; +#ifdef _WIN32 + if( i == 1 ) + printf( UTF8_BOM ); /* add BOM on Windows */ +#endif + } + printf( "CHAPTER%02"PRIu32"=%02d:%02d:%02d.%03d\n", i, hh, mm, ss, ms ); + printf( "CHAPTER%02"PRIu32"NAME=%s\n", i++, data->chapter_name ); + } + return 0; + } + else + lsmash_log( LSMASH_LOG_ERROR, "this file doesn't have a chapter list.\n" ); + return -1; +} diff --git a/output/mp4/description.c b/output/mp4/description.c new file mode 100644 index 0000000..26410cd --- /dev/null +++ b/output/mp4/description.c @@ -0,0 +1,2821 @@ +/***************************************************************************** + * description.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include + +#include "box.h" +#include "mp4a.h" +#include "mp4sys.h" +#include "description.h" + +typedef isom_wave_t lsmash_qt_decoder_parameters_t; + +static void global_destruct_specific_data( void *data ) +{ + if( !data ) + return; + lsmash_codec_global_header_t *global = (lsmash_codec_global_header_t *)data; + if( global->header_data ) + free( global->header_data ); + free( global ); +} + +static int isom_is_qt_video( lsmash_codec_type_t type ) +{ + return lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_APCH_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_APCN_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_APCS_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_APCO_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_AP4H_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_CFHD_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_CIVD_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVC_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVCP_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVPP_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DV5N_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DV5P_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVH2_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVH3_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVH5_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVH6_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVHP_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVHQ_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DV10_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVOO_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVOR_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVTV_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVVT_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FLIC_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_GIF_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_H261_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_H263_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_HD10_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_JPEG_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_M105_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MJPA_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MJPB_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_PNG_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_PNTG_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_RAW_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_RLE_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_RPZA_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SHR0_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SHR1_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SHR2_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SHR3_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SHR4_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SVQ1_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SVQ3_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_TGA_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_TIFF_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ULRA_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ULRG_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ULY2_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ULY0_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V210_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V216_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V308_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V408_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_V410_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_YUV2_VIDEO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_WRLE_VIDEO ); +} + +static int isom_is_qt_audio( lsmash_codec_type_t type ) +{ + return lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_23NI_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MAC3_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MAC6_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_NONE_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_QDM2_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_QDMC_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_QCLP_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_AC_3_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_AGSM_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ALAC_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ALAW_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_CDX2_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_CDX4_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVCA_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_DVI_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FL32_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FL64_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_IMA4_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_IN24_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_IN32_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_LPCM_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MP4A_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_RAW_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_SOWT_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_TWOS_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ULAW_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_VDVA_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_FULLMP3_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_MP3_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ADPCM2_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_ADPCM17_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_GSM49_AUDIO ) + || lsmash_check_codec_type_identical( type, QT_CODEC_TYPE_NOT_SPECIFIED ); +} + +static int isom_is_avc( lsmash_codec_type_t type ) +{ + return lsmash_check_codec_type_identical( type, ISOM_CODEC_TYPE_AVC1_VIDEO ) + || lsmash_check_codec_type_identical( type, ISOM_CODEC_TYPE_AVC2_VIDEO ) + || lsmash_check_codec_type_identical( type, ISOM_CODEC_TYPE_AVCP_VIDEO ); +} + +int lsmash_convert_crop_into_clap( lsmash_crop_t crop, uint32_t width, uint32_t height, lsmash_clap_t *clap ) +{ + if( !clap || crop.top.d == 0 || crop.bottom.d == 0 || crop.left.d == 0 || crop.right.d == 0 ) + return -1; + uint64_t vertical_crop_lcm = lsmash_get_lcm( crop.top.d, crop.bottom.d ); + uint64_t horizontal_crop_lcm = lsmash_get_lcm( crop.left.d, crop.right.d ); + lsmash_rational_u64_t clap_height; + lsmash_rational_u64_t clap_width; + lsmash_rational_s64_t clap_horizontal_offset; + lsmash_rational_s64_t clap_vertical_offset; + clap_height.d = vertical_crop_lcm; + clap_width.d = horizontal_crop_lcm; + clap_horizontal_offset.d = 2 * vertical_crop_lcm; + clap_vertical_offset.d = 2 * horizontal_crop_lcm; + clap_height.n = height * vertical_crop_lcm + - (crop.top.n * (vertical_crop_lcm / crop.top.d) + crop.bottom.n * (vertical_crop_lcm / crop.bottom.d)); + clap_width.n = width * horizontal_crop_lcm + - (crop.left.n * (horizontal_crop_lcm / crop.left.d) + crop.right.n * (horizontal_crop_lcm / crop.right.d)); + clap_horizontal_offset.n = (int64_t)(crop.left.n * (horizontal_crop_lcm / crop.left.d)) + - crop.right.n * (horizontal_crop_lcm / crop.right.d); + clap_vertical_offset.n = (int64_t)(crop.top.n * (vertical_crop_lcm / crop.top.d)) + - crop.bottom.n * (vertical_crop_lcm / crop.bottom.d); + lsmash_reduce_fraction( &clap_height.n, &clap_height.d ); + lsmash_reduce_fraction( &clap_width.n, &clap_width.d ); + lsmash_reduce_fraction_su( &clap_vertical_offset.n, &clap_vertical_offset.d ); + lsmash_reduce_fraction_su( &clap_horizontal_offset.n, &clap_horizontal_offset.d ); + clap->height = (lsmash_rational_u32_t){ clap_height.n, clap_height.d }; + clap->width = (lsmash_rational_u32_t){ clap_width.n, clap_width.d }; + clap->vertical_offset = (lsmash_rational_s32_t){ clap_vertical_offset.n, clap_vertical_offset.d }; + clap->horizontal_offset = (lsmash_rational_s32_t){ clap_horizontal_offset.n, clap_horizontal_offset.d }; + return 0; +} + +int lsmash_convert_clap_into_crop( lsmash_clap_t clap, uint32_t width, uint32_t height, lsmash_crop_t *crop ) +{ + if( !crop || clap.height.d == 0 || clap.vertical_offset.d == 0 || clap.width.d == 0 || clap.horizontal_offset.d == 0 ) + return -1; + uint64_t clap_vertical_lcm = lsmash_get_lcm( clap.height.d, clap.vertical_offset.d ); + uint64_t clap_horizontal_lcm = lsmash_get_lcm( clap.width.d, clap.horizontal_offset.d ); + lsmash_rational_u64_t crop_top; + lsmash_rational_u64_t crop_bottom; + lsmash_rational_u64_t crop_left; + lsmash_rational_u64_t crop_right; + crop_top.d = 2 * clap_vertical_lcm; + crop_bottom.d = 2 * clap_vertical_lcm; + crop_left.d = 2 * clap_horizontal_lcm; + crop_right.d = 2 * clap_horizontal_lcm; + crop_top.n = (height * crop_top.d - clap.height.n * (crop_top.d / clap.height.d)) / 2 + + clap.vertical_offset.n * (crop_top.d / clap.vertical_offset.d); + crop_bottom.n = (height * crop_bottom.d - clap.height.n * (crop_bottom.d / clap.height.d)) / 2 + - clap.vertical_offset.n * (crop_bottom.d / clap.vertical_offset.d); + crop_left.n = (width * crop_left.d - clap.width.n * (crop_left.d / clap.width.d)) / 2 + + clap.horizontal_offset.n * (crop_left.d / clap.horizontal_offset.d); + crop_right.n = (width * crop_right.d - clap.width.n * (crop_right.d / clap.width.d)) / 2 + - clap.horizontal_offset.n * (crop_right.d / clap.horizontal_offset.d); + lsmash_reduce_fraction( &crop_top.n, &crop_top.d ); + lsmash_reduce_fraction( &crop_bottom.n, &crop_bottom.d ); + lsmash_reduce_fraction( &crop_left.n, &crop_left.d ); + lsmash_reduce_fraction( &crop_right.n, &crop_right.d ); + crop->top = (lsmash_rational_u32_t){ crop_top.n, crop_top.d }; + crop->bottom = (lsmash_rational_u32_t){ crop_bottom.n, crop_bottom.d }; + crop->left = (lsmash_rational_u32_t){ crop_left.n, crop_left.d }; + crop->right = (lsmash_rational_u32_t){ crop_right.n, crop_right.d }; + return 0; +} + +static void isom_destruct_nothing( void *data ) +{ + /* Do nothing. */; +} + +static int isom_initialize_structured_codec_specific_data( lsmash_codec_specific_t *specific ) +{ + extern void mp4sys_destruct_decoder_config( void * ); + extern void h264_destruct_specific_data( void * ); + extern void vc1_destruct_specific_data( void * ); + extern void dts_destruct_specific_data( void * ); + switch( specific->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG : + specific->size = sizeof(lsmash_mp4sys_decoder_parameters_t); + specific->destruct = mp4sys_destruct_decoder_config; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264 : + specific->size = sizeof(lsmash_h264_specific_parameters_t); + specific->destruct = h264_destruct_specific_data; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1 : + specific->size = sizeof(lsmash_vc1_specific_parameters_t); + specific->destruct = vc1_destruct_specific_data; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3 : + specific->size = sizeof(lsmash_ac3_specific_parameters_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 : + specific->size = sizeof(lsmash_eac3_specific_parameters_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS : + specific->size = sizeof(lsmash_dts_specific_parameters_t); + specific->destruct = dts_destruct_specific_data; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC : + specific->size = sizeof(lsmash_alac_specific_parameters_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE : + specific->size = sizeof(lsmash_isom_sample_scale_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE : + specific->size = sizeof(lsmash_h264_bitrate_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON : + specific->size = sizeof(lsmash_qt_video_common_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON : + specific->size = sizeof(lsmash_qt_audio_common_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS : + specific->size = sizeof(lsmash_qt_audio_format_specific_flags_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + specific->size = sizeof(lsmash_codec_global_header_t); + specific->destruct = global_destruct_specific_data; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO : + specific->size = sizeof(lsmash_qt_field_info_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_PIXEL_FORMAT : + specific->size = sizeof(lsmash_qt_pixel_format_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS : + specific->size = sizeof(lsmash_qt_significant_bits_t); + specific->destruct = free; + break; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT : + specific->size = sizeof(lsmash_qt_audio_channel_layout_t); + specific->destruct = free; + break; + default : + specific->size = 0; + specific->destruct = isom_destruct_nothing; + return 0; + } + specific->data.structured = lsmash_malloc_zero( specific->size ); + if( !specific->data.structured ) + { + specific->size = 0; + specific->destruct = NULL; + return -1; + } + return 0; +} + +static inline int isom_initialize_codec_specific_data( lsmash_codec_specific_t *specific, + lsmash_codec_specific_data_type type, + lsmash_codec_specific_format format ) +{ + specific->type = type; + specific->format = format; + if( format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + if( isom_initialize_structured_codec_specific_data( specific ) ) + return -1; + } + else + { + specific->data.unstructured = NULL; + specific->size = 0; + specific->destruct = (lsmash_codec_specific_destructor_t)free; + } + return 0; +} + +void lsmash_destroy_codec_specific_data( lsmash_codec_specific_t *specific ) +{ + if( !specific ) + return; + if( specific->destruct ) + { + if( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + if( specific->data.structured ) + specific->destruct( specific->data.structured ); + } + else + { + if( specific->data.unstructured ) + specific->destruct( specific->data.unstructured ); + } + } + free( specific ); +} + +lsmash_codec_specific_t *lsmash_create_codec_specific_data( lsmash_codec_specific_data_type type, lsmash_codec_specific_format format ) +{ + lsmash_codec_specific_t *specific = malloc( sizeof(lsmash_codec_specific_t) ); + if( !specific ) + return NULL; + if( isom_initialize_codec_specific_data( specific, type, format ) ) + { + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + return specific; +} + +static int isom_duplicate_structured_specific_data( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + extern int mp4sys_copy_decoder_config( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int h264_copy_codec_specific( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int vc1_copy_codec_specific( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int dts_copy_codec_specific( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + void *src_data = src->data.structured; + void *dst_data = dst->data.structured; + switch( src->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG : + return mp4sys_copy_decoder_config( dst, src ); + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264 : + return h264_copy_codec_specific( dst, src ); + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1 : + return vc1_copy_codec_specific( dst, src ); + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3 : + *(lsmash_ac3_specific_parameters_t *)dst_data = *(lsmash_ac3_specific_parameters_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 : + *(lsmash_eac3_specific_parameters_t *)dst_data = *(lsmash_eac3_specific_parameters_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS : + return dts_copy_codec_specific( dst, src ); + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC : + *(lsmash_alac_specific_parameters_t *)dst_data = *(lsmash_alac_specific_parameters_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE : + *(lsmash_isom_sample_scale_t *)dst_data = *(lsmash_isom_sample_scale_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE : + *(lsmash_h264_bitrate_t *)dst_data = *(lsmash_h264_bitrate_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON : + *(lsmash_qt_video_common_t *)dst_data = *(lsmash_qt_video_common_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON : + *(lsmash_qt_audio_common_t *)dst_data = *(lsmash_qt_audio_common_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS : + *(lsmash_qt_audio_format_specific_flags_t *)dst_data = *(lsmash_qt_audio_format_specific_flags_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + { + lsmash_codec_global_header_t *src_global = (lsmash_codec_global_header_t *)src_data; + if( src_global->header_data && src_global->header_size ) + { + lsmash_codec_global_header_t *dst_global = (lsmash_codec_global_header_t *)dst_data; + dst_global->header_data = lsmash_memdup( src_global->header_data, src_global->header_size ); + if( !dst_global->header_data ) + return -1; + dst_global->header_size = src_global->header_size; + } + return 0; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO : + *(lsmash_qt_field_info_t *)dst_data = *(lsmash_qt_field_info_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_PIXEL_FORMAT : + *(lsmash_qt_pixel_format_t *)dst_data = *(lsmash_qt_pixel_format_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS : + *(lsmash_qt_significant_bits_t *)dst_data = *(lsmash_qt_significant_bits_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_GAMMA_LEVEL : + *(lsmash_qt_gamma_t *)dst_data = *(lsmash_qt_gamma_t *)src_data; + return 0; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT : + *(lsmash_qt_audio_channel_layout_t *)dst_data = *(lsmash_qt_audio_channel_layout_t *)src_data; + return 0; + default : + return -1; + } +} + +lsmash_codec_specific_t *isom_duplicate_codec_specific_data( lsmash_codec_specific_t *specific ) +{ + if( !specific ) + return NULL; + lsmash_codec_specific_t *dup_data = lsmash_create_codec_specific_data( specific->type, specific->format ); + if( !dup_data ) + return NULL; + if( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + if( isom_duplicate_structured_specific_data( dup_data, specific ) ) + { + lsmash_destroy_codec_specific_data( dup_data ); + return NULL; + } + } + else + { + dup_data->data.unstructured = lsmash_memdup( specific->data.unstructured, specific->size ); + if( !dup_data->data.unstructured ) + { + lsmash_destroy_codec_specific_data( dup_data ); + return NULL; + } + } + dup_data->size = specific->size; + return dup_data; +} + +static size_t isom_description_read_box_common( uint8_t **p_data, uint64_t *size, lsmash_box_type_t *type ) +{ + uint8_t *orig = *p_data; + uint8_t *data = *p_data; + *size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + type->fourcc = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( *size == 1 ) + { + *size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + *p_data = data; + if( type->fourcc == ISOM_BOX_TYPE_UUID.fourcc ) + { + type->user.fourcc = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + memcpy( type->user.id, &data[4], 12 ); + } + return data - orig; +} + +uint8_t *isom_get_child_box_position( uint8_t *parent_data, uint32_t parent_size, lsmash_box_type_t child_type, uint32_t *child_size ) +{ + if( !parent_data || !child_size || parent_size < ISOM_BASEBOX_COMMON_SIZE ) + return NULL; + uint8_t *data = parent_data; + uint64_t size; + lsmash_box_type_t type; + uint32_t offset = isom_description_read_box_common( &data, &size, &type ); + if( size != parent_size ) + return NULL; + uint8_t *end = parent_data + parent_size; + for( uint8_t *pos = data; pos + ISOM_BASEBOX_COMMON_SIZE <= end; ) + { + offset = isom_description_read_box_common( &pos, &size, &type ); + if( lsmash_check_box_type_identical( type, child_type ) ) + { + *child_size = size; + return pos - offset; + } + pos += size - offset; /* Move to the next box. */ + } + return NULL; +} + +static int isom_construct_global_specific_header( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + if( src->size < ISOM_BASEBOX_COMMON_SIZE ) + return -1; + lsmash_codec_global_header_t *global = (lsmash_codec_global_header_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + global->header_size = size - ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + global->header_size -= 8; + } + if( size != src->size ) + return -1; + if( global->header_size ) + { + global->header_data = lsmash_memdup( data, global->header_size ); + if( !global->header_data ) + return -1; + } + return 0; +} + +static int isom_construct_audio_channel_layout( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + if( src->size < ISOM_FULLBOX_COMMON_SIZE + 12 ) + return -1; + lsmash_qt_audio_channel_layout_t *layout = (lsmash_qt_audio_channel_layout_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_FULLBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + if( size != src->size ) + return -1; + layout->channelLayoutTag = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + layout->channelBitmap = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7]; + return 0; +} + +#if 0 +static int codec_construct_qt_audio_decompression_info( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + if( src->size < ISOM_BASEBOX_COMMON_SIZE ) + return -1; + uint8_t *data = src->data.unstructured; + uint64_t size; + uint32_t type; + uint32_t offset = isom_description_read_box_common( &data, &size, &type ); + if( size != src->size ) + return -1; + uint8_t *end = src->data.unstructured + src->size; + isom_wave_t *wave = lsmash_malloc_zero( sizeof(isom_wave_t) ); + if( !wave ) + return -1; + wave->type = QT_BOX_TYPE_WAVE; + for( uint8_t *pos = data; pos + ISOM_BASEBOX_COMMON_SIZE <= end; ) + { + offset = isom_description_read_box_common( &pos, &size, &type ); + switch( type ) + { + case QT_BOX_TYPE_FRMA : + { + if( pos + 4 > end ) + return -1; + isom_frma_t *frma = lsmash_malloc_zero( sizeof(isom_frma_t) ); + if( !frma ) + return -1; + isom_init_box_common( frma, wave, QT_BOX_TYPE_FRMA ); + frma->data_format = (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; + pos += 4; + wave->frma = frma; + break; + } + case QT_BOX_TYPE_ENDA : + { + if( pos + 2 > end ) + return -1; + isom_enda_t *enda = lsmash_malloc_zero( sizeof(isom_enda_t) ); + if( !enda ) + return -1; + isom_init_box_common( enda, wave, QT_BOX_TYPE_ENDA ); + enda->littleEndian = (pos[0] << 8) | pos[1]; + pos += 2; + wave->enda = enda; + break; + } + case QT_BOX_TYPE_MP4A : + { + if( pos + 4 > end ) + return -1; + isom_mp4a_t *mp4a = lsmash_malloc_zero( sizeof(isom_mp4a_t) ); + if( !mp4a ) + return -1; + isom_init_box_common( mp4a, wave, QT_BOX_TYPE_MP4A ); + mp4a->unknown = (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; + pos += 4; + wave->mp4a = mp4a; + break; + } + case QT_BOX_TYPE_TERMINATOR : + { + isom_terminator_t *terminator = lsmash_malloc_zero( sizeof(isom_terminator_t) ); + if( !terminator ) + return -1; + isom_init_box_common( terminator, wave, QT_BOX_TYPE_TERMINATOR ); + wave->terminator = terminator; + break; + } + default : + { + isom_unknown_box_t *box = lsmash_malloc_zero( sizeof(isom_unknown_box_t) ); + if( !box ) + return -1; + isom_init_box_common( box, wave, type ); + box->unknown_size = size - offset; + box->unknown_field = lsmash_memdup( pos, box->unknown_size ); + if( !box->unknown_field ) + { + free( box ); + return -1; + } + if( isom_add_extension_box( &wave->extensions, box, isom_remove_unknown_box ) ) + { + isom_remove_unknown_box( box ); + return -1; + } + pos += box->unknown_size; + break; + } + } + } + return 0; +} +#endif + +/* structured <-> unstructured conversion might be irreversible by CODEC + * since structured formats we defined don't always have all contents included in unstructured data. */ +lsmash_codec_specific_t *lsmash_convert_codec_specific_format( lsmash_codec_specific_t *specific, lsmash_codec_specific_format format ) +{ + if( !specific || format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSPECIFIED ) + return NULL; + if( format == specific->format ) + return isom_duplicate_codec_specific_data( specific ); + lsmash_codec_specific_t *dst = lsmash_create_codec_specific_data( specific->type, format ); + if( format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ) + /* structured -> unstructured */ + switch( specific->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG : + dst->data.unstructured = lsmash_create_mp4sys_decoder_config( (lsmash_mp4sys_decoder_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264 : + dst->data.unstructured = lsmash_create_h264_specific_info( (lsmash_h264_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1 : + dst->data.unstructured = lsmash_create_vc1_specific_info( (lsmash_vc1_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3 : + dst->data.unstructured = lsmash_create_ac3_specific_info( (lsmash_ac3_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 : + dst->data.unstructured = lsmash_create_eac3_specific_info( (lsmash_eac3_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS : + dst->data.unstructured = lsmash_create_dts_specific_info( (lsmash_dts_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC : + dst->data.unstructured = lsmash_create_alac_specific_info( (lsmash_alac_specific_parameters_t *)specific->data.structured, &dst->size ); + if( !dst->data.unstructured ) + goto fail; + return dst; + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + { + lsmash_bs_t *bs = lsmash_bs_create( NULL ); + if( !bs ) + goto fail; + lsmash_codec_global_header_t *global = specific->data.structured; + lsmash_bs_put_be32( bs, ISOM_BASEBOX_COMMON_SIZE + global->header_size ); + lsmash_bs_put_be32( bs, QT_BOX_TYPE_GLBL.fourcc ); + lsmash_bs_put_bytes( bs, global->header_size, global->header_data ); + dst->data.unstructured = lsmash_bs_export_data( bs, &dst->size ); + lsmash_bs_cleanup( bs ); + if( !dst->data.unstructured || dst->size != (ISOM_BASEBOX_COMMON_SIZE + global->header_size) ) + goto fail; + return dst; + } + default : + break; + } + else if( format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + /* unstructured -> structured */ + extern int mp4sys_construct_decoder_config( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int h264_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int vc1_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int ac3_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int eac3_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int dts_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + extern int alac_construct_specific_parameters( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + static const struct + { + lsmash_codec_specific_data_type data_type; + int (*constructor)( lsmash_codec_specific_t *, lsmash_codec_specific_t * ); + } codec_specific_format_constructor_table[] = + { + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG, mp4sys_construct_decoder_config }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264, h264_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1, vc1_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3, ac3_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3, eac3_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS, dts_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC, alac_construct_specific_parameters }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER, isom_construct_global_specific_header }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT, isom_construct_audio_channel_layout }, + { LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN, NULL } + }; + int (*constructor)( lsmash_codec_specific_t *, lsmash_codec_specific_t * ) = NULL; + for( int i = 0; codec_specific_format_constructor_table[i].constructor; i++ ) + if( specific->type == codec_specific_format_constructor_table[i].data_type ) + { + constructor = codec_specific_format_constructor_table[i].constructor; + break; + } + if( constructor && !constructor( dst, specific ) ) + return dst; + } +fail: + lsmash_destroy_codec_specific_data( dst ); + return NULL; +} + +void isom_remove_sample_description_extension( isom_extension_box_t *ext ) +{ + if( !ext ) + return; + if( ext->destruct ) + { + if( ext->format == EXTENSION_FORMAT_BINARY ) + { + if( ext->form.binary ) + ext->destruct( ext->form.binary ); + } + else + { + if( ext->form.box ) + ext->destruct( ext->form.box ); + } + } + free( ext ); +} + +void isom_remove_sample_description_extensions( lsmash_entry_list_t *extensions ) +{ + lsmash_remove_entries( extensions, isom_remove_sample_description_extension ); +} + +static inline void isom_set_default_compressorname( char *compressorname, lsmash_codec_type_t sample_type ) +{ + static struct compressorname_table_tag + { + lsmash_codec_type_t type; + char name[33]; + } compressorname_table[32] = { { LSMASH_CODEC_TYPE_INITIALIZER, { '\0' } } }; + if( compressorname_table[0].name[0] == '\0' ) + { + int i = 0; +#define ADD_COMPRESSORNAME_TABLE( type, name ) compressorname_table[i++] = (struct compressorname_table_tag){ type, name } + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_AVC1_VIDEO, "\012AVC Coding" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_AVC1_VIDEO, "\012AVC Coding" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_AVC2_VIDEO, "\012AVC Coding" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_AVCP_VIDEO, "\016AVC Parameters" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_SVC1_VIDEO, "\012SVC Coding" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_MVC1_VIDEO, "\012MVC Coding" ); + ADD_COMPRESSORNAME_TABLE( ISOM_CODEC_TYPE_MVC2_VIDEO, "\012MVC Coding" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_APCH_VIDEO, "\023Apple ProRes 422 (HQ)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_APCN_VIDEO, "\023Apple ProRes 422 (SD)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_APCS_VIDEO, "\023Apple ProRes 422 (LT)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_APCO_VIDEO, "\026Apple ProRes 422 (Proxy)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_AP4H_VIDEO, "\019Apple ProRes 4444" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVPP_VIDEO, "\014DVCPRO - PAL" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DV5N_VIDEO, "\017DVCPRO50 - NTSC" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DV5P_VIDEO, "\016DVCPRO50 - PAL" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVH2_VIDEO, "\019DVCPRO HD 1080p25" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVH3_VIDEO, "\019DVCPRO HD 1080p30" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVH5_VIDEO, "\019DVCPRO HD 1080i50" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVH6_VIDEO, "\019DVCPRO HD 1080i60" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVHP_VIDEO, "\018DVCPRO HD 720p60" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_DVHQ_VIDEO, "\018DVCPRO HD 720p50" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_ULRA_VIDEO, "\017Ut Video (ULRA)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_ULRG_VIDEO, "\017Ut Video (ULRG)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_ULY0_VIDEO, "\017Ut Video (ULY0)" ); + ADD_COMPRESSORNAME_TABLE( QT_CODEC_TYPE_ULY2_VIDEO, "\017Ut Video (ULY2)" ); + ADD_COMPRESSORNAME_TABLE( LSMASH_CODEC_TYPE_UNSPECIFIED, { '\0' } ); +#undef ADD_COMPRESSORNAME_TABLE + } + for( int i = 0; compressorname_table[i].name[0] != '\0'; i++ ) + if( lsmash_check_codec_type_identical( sample_type, compressorname_table[i].type ) ) + { + strcpy( compressorname, compressorname_table[i].name ); + return; + } +} + +int isom_add_extension_box( lsmash_entry_list_t *extensions, void *box, void *eliminator ) +{ + if( !box ) + return -1; + isom_extension_box_t *ext = lsmash_malloc_zero( sizeof(isom_extension_box_t) ); + if( !ext ) + return -1; + ext->type = ((isom_box_t *)box)->type; + ext->format = EXTENSION_FORMAT_BOX; + ext->form.box = box; + ext->destruct = eliminator ? eliminator : free; + if( lsmash_add_entry( extensions, ext ) ) + { + ext->destruct( ext ); + return -1; + } + return 0; +} + +lsmash_codec_specific_t *isom_get_codec_specific( lsmash_codec_specific_list_t *opaque, lsmash_codec_specific_data_type type ) +{ + for( lsmash_entry_t *entry = opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific || specific->type != type ) + continue; + return specific; + } + return NULL; +} + +static int isom_check_valid_summary( lsmash_summary_t *summary ) +{ + if( !summary ) + return -1; + isom_box_t temp_box; + temp_box.type = summary->sample_type; + temp_box.manager = summary->summary_type == LSMASH_SUMMARY_TYPE_AUDIO ? LSMASH_AUDIO_DESCRIPTION: 0; + if( isom_is_lpcm_audio( &temp_box ) ) + { + if( isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS ) ) + return 0; + return -1; + } + if( isom_is_uncompressed_ycbcr( summary->sample_type ) ) + { + if( isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO ) ) + { + if( !lsmash_check_codec_type_identical( summary->sample_type, QT_CODEC_TYPE_V216_VIDEO ) ) + return 0; + } + else + return -1; + } + lsmash_codec_type_t sample_type = summary->sample_type; + lsmash_codec_specific_data_type required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNSPECIFIED; + if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVC1_VIDEO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_VC_1_VIDEO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1 ; + else if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ULRA_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ULRG_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ULY0_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ULY2_VIDEO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER; + else if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_V216_VIDEO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MP4V_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MP4A_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_MP4A_AUDIO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AC_3_AUDIO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_EC_3_AUDIO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSC_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSE_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSH_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSL_AUDIO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS; + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_ALAC_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ALAC_AUDIO ) ) + required_data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC; + if( required_data_type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNSPECIFIED ) + return 0; + return isom_get_codec_specific( summary->opaque, required_data_type ) ? 0 : -1; +} + +static lsmash_box_type_t isom_guess_video_codec_specific_box_type( lsmash_codec_type_t active_codec_type, lsmash_compact_box_type_t fourcc ) +{ + lsmash_box_type_t box_type = LSMASH_BOX_TYPE_INITIALIZER; + box_type.fourcc = fourcc; +#define GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( codec_type, predefined_box_type ) \ + else if( (codec_type.user.fourcc == 0 \ + || lsmash_check_codec_type_identical( active_codec_type, codec_type )) \ + && box_type.fourcc == predefined_box_type.fourcc ) \ + box_type = predefined_box_type + if( 0 ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_AVC1_VIDEO, ISOM_BOX_TYPE_AVCC ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_AVC2_VIDEO, ISOM_BOX_TYPE_AVCC ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_AVCP_VIDEO, ISOM_BOX_TYPE_AVCC ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_VC_1_VIDEO, ISOM_BOX_TYPE_DVC1 ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_MP4V_VIDEO, ISOM_BOX_TYPE_ESDS ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, ISOM_BOX_TYPE_BTRT ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_FIEL ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_CSPC ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_SGBT ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_GAMA ); + GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_GLBL ); +#undef GUESS_VIDEO_CODEC_SPECIFIC_BOX_TYPE + return box_type; +} + +int isom_setup_visual_description( isom_stsd_t *stsd, lsmash_codec_type_t sample_type, lsmash_video_summary_t *summary ) +{ + if( !summary || !stsd || !stsd->list || !stsd->parent || !stsd->parent->parent + || !stsd->parent->parent->parent || !stsd->parent->parent->parent->parent ) + return -1; + if( isom_check_valid_summary( (lsmash_summary_t *)summary ) ) + return -1; + lsmash_entry_list_t *list = stsd->list; + isom_visual_entry_t *visual = lsmash_malloc_zero( sizeof(isom_visual_entry_t) ); + if( !visual ) + return -1; + isom_init_box_common( visual, stsd, sample_type ); + visual->manager |= LSMASH_VIDEO_DESCRIPTION; + visual->data_reference_index = 1; + visual->version = 0; + visual->revision_level = 0; + visual->vendor = 0; + visual->temporalQuality = 0; + visual->spatialQuality = 0; + visual->width = (uint16_t)summary->width; + visual->height = (uint16_t)summary->height; + visual->horizresolution = 0x00480000; + visual->vertresolution = 0x00480000; + visual->dataSize = 0; + visual->frame_count = 1; + visual->depth = isom_is_qt_video( summary->sample_type ) || isom_is_avc( summary->sample_type ) ? summary->depth : 0x0018; + visual->color_table_ID = -1; + if( summary->compressorname[0] == '\0' ) + isom_set_default_compressorname( visual->compressorname, sample_type ); + else + { + memcpy( visual->compressorname, summary->compressorname, 32 ); + visual->compressorname[32] = '\0'; + } + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific ) + goto fail; + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + && specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + continue; /* LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED is not supported. */ + switch( specific->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON : + { + if( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ) + continue; + lsmash_qt_video_common_t *data = (lsmash_qt_video_common_t *)specific->data.structured; + visual->revision_level = data->revision_level; + visual->vendor = data->vendor; + visual->temporalQuality = data->temporalQuality; + visual->spatialQuality = data->spatialQuality; + visual->horizresolution = data->horizontal_resolution; + visual->vertresolution = data->vertical_resolution; + visual->dataSize = data->dataSize; + visual->frame_count = data->frame_count; + visual->color_table_ID = data->color_table_ID; + if( data->color_table_ID == 0 ) + { + lsmash_qt_color_table_t *src_ct = &data->color_table; + uint16_t element_count = LSMASH_MIN( src_ct->size + 1, 256 ); + isom_qt_color_array_t *dst_array = lsmash_malloc_zero( element_count * sizeof(isom_qt_color_array_t) ); + if( !dst_array ) + goto fail; + isom_qt_color_table_t *dst_ct = &visual->color_table; + dst_ct->array = dst_array; + dst_ct->seed = src_ct->seed; + dst_ct->flags = src_ct->flags; + dst_ct->size = src_ct->size; + for( uint16_t i = 0; i < element_count; i++ ) + { + dst_array[i].value = src_ct->array[i].unused; + dst_array[i].r = src_ct->array[i].r; + dst_array[i].g = src_ct->array[i].g; + dst_array[i].b = src_ct->array[i].b; + } + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_isom_sample_scale_t *data = (lsmash_isom_sample_scale_t *)cs->data.structured; + isom_stsl_t *box = lsmash_malloc_zero( sizeof(isom_stsl_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, ISOM_BOX_TYPE_STSL ); + box->constraint_flag = data->constraint_flag; + box->scale_method = data->scale_method; + box->display_center_x = data->display_center_x; + box->display_center_y = data->display_center_y; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_stsl ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_h264_bitrate_t *data = (lsmash_h264_bitrate_t *)cs->data.structured; + isom_btrt_t *box = lsmash_malloc_zero( sizeof(isom_btrt_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, ISOM_BOX_TYPE_BTRT ); + box->bufferSizeDB = data->bufferSizeDB; + box->maxBitrate = data->maxBitrate; + box->avgBitrate = data->avgBitrate; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_btrt ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_qt_field_info_t *data = (lsmash_qt_field_info_t *)cs->data.structured; + isom_fiel_t *box = lsmash_malloc_zero( sizeof(isom_fiel_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, QT_BOX_TYPE_FIEL ); + box->fields = data->fields; + box->detail = data->detail; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_fiel ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_PIXEL_FORMAT : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_qt_pixel_format_t *data = (lsmash_qt_pixel_format_t *)cs->data.structured; + isom_cspc_t *box = lsmash_malloc_zero( sizeof(isom_cspc_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, QT_BOX_TYPE_CSPC ); + box->pixel_format = data->pixel_format; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_cspc ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_qt_significant_bits_t *data = (lsmash_qt_significant_bits_t *)cs->data.structured; + isom_sgbt_t *box = lsmash_malloc_zero( sizeof(isom_sgbt_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, QT_BOX_TYPE_SGBT ); + box->significantBits = data->significantBits; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_sgbt ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_GAMMA_LEVEL : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_qt_gamma_t *data = (lsmash_qt_gamma_t *)cs->data.structured; + isom_gama_t *box = lsmash_malloc_zero( sizeof(isom_gama_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, QT_BOX_TYPE_GAMA ); + box->level = data->level; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_gama ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_codec_global_header_t *data = (lsmash_codec_global_header_t *)cs->data.structured; + isom_glbl_t *box = lsmash_malloc_zero( sizeof(isom_glbl_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, visual, QT_BOX_TYPE_GLBL ); + box->header_size = data->header_size; + box->header_data = lsmash_memdup( data->header_data, data->header_size ); + lsmash_destroy_codec_specific_data( cs ); + if( !box->header_data + || isom_add_extension_box( &visual->extensions, box, isom_remove_glbl ) ) + { + free( box ); + goto fail; + } + break; + } + default : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !cs || cs->size < ISOM_BASEBOX_COMMON_SIZE ) + goto fail; + isom_extension_box_t *extension = malloc( sizeof(isom_extension_box_t) ); + if( !extension ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + uint8_t *data = cs->data.unstructured; + lsmash_compact_box_type_t fourcc = LSMASH_4CC( data[4], data[5], data[6], data[7] ); + lsmash_box_type_t box_type = isom_guess_video_codec_specific_box_type( (lsmash_codec_type_t)visual->type, fourcc ); + /* Set up the extension. */ + extension->size = cs->size; + extension->type = box_type; + extension->format = EXTENSION_FORMAT_BINARY; + extension->form.binary = data; + extension->destruct = free; + cs->data.unstructured = NULL; /* Avoid freeing the binary data of the extension. */ + lsmash_destroy_codec_specific_data( cs ); + if( lsmash_add_entry( &visual->extensions, extension ) ) + { + extension->destruct( extension ); + goto fail; + } + break; + } + } + } + isom_trak_entry_t *trak = (isom_trak_entry_t *)visual->parent->parent->parent->parent->parent; + int qt_compatible = trak->root->qt_compatible; + isom_tapt_t *tapt = trak->tapt; + isom_stsl_t *stsl = (isom_stsl_t *)isom_get_extension_box( &visual->extensions, ISOM_BOX_TYPE_STSL ); + int set_aperture_modes = qt_compatible /* Track Aperture Modes is only available under QuickTime file format. */ + && (!stsl || stsl->scale_method == 0) /* Sample scaling method might conflict with this feature. */ + && tapt && tapt->clef && tapt->prof && tapt->enof /* Check if required boxes exist. */ + && !((isom_stsd_t *)visual->parent)->list->entry_count; /* Multiple sample description might conflict with this, so in that case, disable this feature. + * Note: this sample description isn't added yet here. */ + if( !set_aperture_modes ) + isom_remove_tapt( trak->tapt ); + int uncompressed_ycbcr = qt_compatible && isom_is_uncompressed_ycbcr( visual->type ); + /* Set up Clean Aperture. */ + if( set_aperture_modes || uncompressed_ycbcr + || (summary->clap.width.d && summary->clap.height.d && summary->clap.horizontal_offset.d && summary->clap.vertical_offset.d) ) + { + isom_clap_t *box = lsmash_malloc_zero( sizeof(isom_clap_t) ); + if( !box ) + goto fail; + isom_init_box_common( box, visual, ISOM_BOX_TYPE_CLAP ); + if( summary->clap.width.d && summary->clap.height.d && summary->clap.horizontal_offset.d && summary->clap.vertical_offset.d ) + { + box->cleanApertureWidthN = summary->clap.width.n; + box->cleanApertureWidthD = summary->clap.width.d; + box->cleanApertureHeightN = summary->clap.height.n; + box->cleanApertureHeightD = summary->clap.height.d; + box->horizOffN = summary->clap.horizontal_offset.n; + box->horizOffD = summary->clap.horizontal_offset.d; + box->vertOffN = summary->clap.vertical_offset.n; + box->vertOffD = summary->clap.vertical_offset.d; + } + else + { + box->cleanApertureWidthN = summary->width; + box->cleanApertureWidthD = 1; + box->cleanApertureHeightN = summary->height; + box->cleanApertureHeightD = 1; + box->horizOffN = 0; + box->horizOffD = 1; + box->vertOffN = 0; + box->vertOffD = 1; + } + if( isom_add_extension_box( &visual->extensions, box, isom_remove_clap ) ) + { + free( box ); + goto fail; + } + } + /* Set up Pixel Aspect Ratio. */ + if( set_aperture_modes || (summary->par_h && summary->par_v) ) + { + isom_pasp_t *box = lsmash_malloc_zero( sizeof(isom_pasp_t) ); + if( !box ) + goto fail; + isom_init_box_common( box, visual, ISOM_BOX_TYPE_PASP ); + box->hSpacing = LSMASH_MAX( summary->par_h, 1 ); + box->vSpacing = LSMASH_MAX( summary->par_v, 1 ); + if( isom_add_extension_box( &visual->extensions, box, isom_remove_pasp ) ) + { + free( box ); + goto fail; + } + } + /* Set up Color Parameter. */ + if( uncompressed_ycbcr + || summary->color.primaries_index + || summary->color.transfer_index + || summary->color.matrix_index + || (trak->root->isom_compatible && summary->color.full_range) ) + { + isom_colr_t *box = lsmash_malloc_zero( sizeof(isom_colr_t) ); + if( !box ) + goto fail; + isom_init_box_common( box, visual, ISOM_BOX_TYPE_COLR ); + /* Set 'nclc' to parameter type, we don't support 'prof'. */ + uint16_t primaries = summary->color.primaries_index; + uint16_t transfer = summary->color.transfer_index; + uint16_t matrix = summary->color.matrix_index; + if( qt_compatible && !trak->root->isom_compatible ) + { + box->manager |= LSMASH_QTFF_BASE; + box->color_parameter_type = QT_COLOR_PARAMETER_TYPE_NCLC; + box->primaries_index = (primaries == 1 || primaries == 5 || primaries == 6) + ? primaries : QT_PRIMARIES_INDEX_UNSPECIFIED; + box->transfer_function_index = (transfer == 1 || transfer == 7) + ? transfer : QT_TRANSFER_INDEX_UNSPECIFIED; + box->matrix_index = (matrix == 1 || matrix == 6 || matrix == 7) + ? matrix : QT_MATRIX_INDEX_UNSPECIFIED; + } + else + { + box->color_parameter_type = ISOM_COLOR_PARAMETER_TYPE_NCLX; + box->primaries_index = (primaries == 1 || (primaries >= 4 && primaries <= 7)) + ? primaries : ISOM_PRIMARIES_INDEX_UNSPECIFIED; + box->transfer_function_index = (transfer == 1 || (transfer >= 4 && transfer <= 8) || (transfer >= 11 && transfer <= 13)) + ? transfer : ISOM_TRANSFER_INDEX_UNSPECIFIED; + box->matrix_index = (matrix == 1 || (matrix >= 4 && matrix <= 8)) + ? matrix : ISOM_MATRIX_INDEX_UNSPECIFIED; + box->full_range_flag = summary->color.full_range; + } + if( isom_add_extension_box( &visual->extensions, box, isom_remove_colr ) ) + { + free( box ); + goto fail; + } + } + /* Set up Track Apeture Modes. */ + if( set_aperture_modes ) + { + uint32_t width = visual->width << 16; + uint32_t height = visual->height << 16; + isom_clap_t *clap = (isom_clap_t *)isom_get_extension_box( &visual->extensions, ISOM_BOX_TYPE_CLAP ); + isom_pasp_t *pasp = (isom_pasp_t *)isom_get_extension_box( &visual->extensions, ISOM_BOX_TYPE_PASP ); + double clap_width = ((double)clap->cleanApertureWidthN / clap->cleanApertureWidthD) * (1<<16); + double clap_height = ((double)clap->cleanApertureHeightN / clap->cleanApertureHeightD) * (1<<16); + double par = (double)pasp->hSpacing / pasp->vSpacing; + if( par >= 1.0 ) + { + tapt->clef->width = clap_width * par; + tapt->clef->height = clap_height; + tapt->prof->width = width * par; + tapt->prof->height = height; + } + else + { + tapt->clef->width = clap_width; + tapt->clef->height = clap_height / par; + tapt->prof->width = width; + tapt->prof->height = height / par; + } + tapt->enof->width = width; + tapt->enof->height = height; + } + if( !lsmash_add_entry( list, visual ) ) + return 0; /* successed */ +fail: + isom_remove_sample_description_extensions( &visual->extensions ); + free( visual ); + return -1; +} + +static int isom_append_audio_es_descriptor_extension( isom_box_t *box, lsmash_audio_summary_t *summary ) +{ + uint32_t esds_size = 0; + uint8_t *esds_data = NULL; + lsmash_codec_specific_t *specific = isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG ); + if( !specific ) + return -1; + if( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ) + { + esds_size = specific->size; + esds_data = lsmash_memdup( specific->data.unstructured, specific->size ); + if( !esds_data ) + return -1; + } + else + { + esds_data = lsmash_create_mp4sys_decoder_config( (lsmash_mp4sys_decoder_parameters_t *)specific->data.structured, &esds_size ); + if( !esds_data ) + return -1; + } + isom_esds_t *esds = lsmash_malloc_zero( sizeof(isom_esds_t) ); + if( !esds ) + { + free( esds_data ); + return -1; + } + isom_init_box_common( esds, box, ISOM_BOX_TYPE_ESDS ); + lsmash_bs_t bs = { 0 }; + bs.data = esds_data + ISOM_FULLBOX_COMMON_SIZE; + bs.alloc = esds_size - ISOM_FULLBOX_COMMON_SIZE; + bs.store = bs.alloc; + esds->ES = mp4sys_get_ES_Descriptor( &bs ); + free( esds_data ); + if( !esds->ES ) + { + free( esds ); + return -1; + } + if( isom_add_extension_box( &box->extensions, esds, isom_remove_esds ) ) + { + isom_remove_esds( esds ); + return -1; + } + return 0; +} + +static int isom_append_channel_layout_extension( lsmash_codec_specific_t *specific, void *parent, uint32_t channels ) +{ + assert( parent ); + if( isom_get_sample_description_extension( &((isom_box_t *)parent)->extensions, QT_BOX_TYPE_CHAN ) ) + return 0; /* Audio Channel Layout Box is already present. */ + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + return -1; + lsmash_qt_audio_channel_layout_t *data = (lsmash_qt_audio_channel_layout_t *)cs->data.structured; + lsmash_channel_layout_tag channelLayoutTag = data->channelLayoutTag; + lsmash_channel_bitmap channelBitmap = data->channelBitmap; + if( channelLayoutTag == QT_CHANNEL_LAYOUT_USE_CHANNEL_DESCRIPTIONS /* We don't support the feature of Channel Descriptions. */ + || (channelLayoutTag == QT_CHANNEL_LAYOUT_USE_CHANNEL_BITMAP && (!channelBitmap || channelBitmap > QT_CHANNEL_BIT_FULL)) ) + { + channelLayoutTag = data->channelLayoutTag = QT_CHANNEL_LAYOUT_UNKNOWN | channels; + channelBitmap = data->channelBitmap = 0; + } + /* Don't create Audio Channel Layout Box if the channel layout is unknown. */ + if( (channelLayoutTag ^ QT_CHANNEL_LAYOUT_UNKNOWN) >> 16 ) + { + isom_chan_t *box = lsmash_malloc_zero( sizeof(isom_chan_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + return -1; + } + isom_box_t *parent_box = parent; + isom_init_box_common( box, parent_box, QT_BOX_TYPE_CHAN ); + box->channelLayoutTag = channelLayoutTag; + box->channelBitmap = channelBitmap; + box->numberChannelDescriptions = 0; + box->channelDescriptions = NULL; + lsmash_destroy_codec_specific_data( cs ); + if( isom_add_extension_box( &parent_box->extensions, box, isom_remove_chan ) ) + { + free( box ); + return -1; + } + } + return 0; +} + +static int isom_set_qtff_mp4a_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + isom_wave_t *wave = lsmash_malloc_zero( sizeof(isom_wave_t) ); + if( !wave ) + return -1; + isom_init_box_common( wave, audio, QT_BOX_TYPE_WAVE ); + if( isom_add_frma( wave ) + || isom_add_mp4a( wave ) + || isom_add_terminator( wave ) + || isom_add_extension_box( &audio->extensions, wave, isom_remove_wave ) ) + { + isom_remove_wave( wave ); + return -1; + } + wave->frma->data_format = audio->type.fourcc; + /* Add ES Descriptor Box. */ + if( isom_append_audio_es_descriptor_extension( (isom_box_t *)wave, summary ) ) + return -1; + /* */ + audio->type = QT_CODEC_TYPE_MP4A_AUDIO; + audio->version = (summary->channels > 2 || summary->frequency > UINT16_MAX) ? 2 : 1; + audio->channelcount = audio->version == 2 ? 3 : LSMASH_MIN( summary->channels, 2 ); + audio->samplesize = 16; + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_VARIABLE_COMPRESSION; + audio->packet_size = 0; + if( audio->version == 1 ) + { + audio->samplerate = summary->frequency << 16; + audio->samplesPerPacket = summary->samples_in_frame; + audio->bytesPerPacket = 1; /* Apparently, this field is set to 1. */ + audio->bytesPerFrame = audio->bytesPerPacket * summary->channels; + audio->bytesPerSample = 2; + } + else /* audio->version == 2 */ + { + audio->samplerate = 0x00010000; + audio->sizeOfStructOnly = 72; + audio->audioSampleRate = (union {double d; uint64_t i;}){summary->frequency}.i; + audio->numAudioChannels = summary->channels; + audio->always7F000000 = 0x7F000000; + audio->constBitsPerChannel = 0; /* compressed audio */ + audio->formatSpecificFlags = 0; + audio->constBytesPerAudioPacket = 0; /* variable */ + audio->constLPCMFramesPerAudioPacket = summary->samples_in_frame; + } + return 0; +} + +static int isom_set_isom_mp4a_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + if( summary->summary_type != LSMASH_SUMMARY_TYPE_AUDIO ) + return -1; + /* Check objectTypeIndication. */ + lsmash_mp4sys_object_type_indication objectTypeIndication = lsmash_mp4sys_get_object_type_indication( (lsmash_summary_t *)summary ); + switch( objectTypeIndication ) + { + case MP4SYS_OBJECT_TYPE_Audio_ISO_14496_3: + case MP4SYS_OBJECT_TYPE_Audio_ISO_13818_7_Main_Profile: + case MP4SYS_OBJECT_TYPE_Audio_ISO_13818_7_LC_Profile: + case MP4SYS_OBJECT_TYPE_Audio_ISO_13818_7_SSR_Profile: + case MP4SYS_OBJECT_TYPE_Audio_ISO_13818_3: /* Legacy Interface */ + case MP4SYS_OBJECT_TYPE_Audio_ISO_11172_3: /* Legacy Interface */ + break; + default: + return -1; + } + /* Add ES Descriptor Box. */ + if( isom_append_audio_es_descriptor_extension( (isom_box_t *)audio, summary ) ) + return -1; + /* In pure mp4 file, these "template" fields shall be default values according to the spec. + But not pure - hybrid with other spec - mp4 file can take other values. + Which is to say, these template values shall be ignored in terms of mp4, except some object_type_indications. + see 14496-14, "Template fields used". */ + audio->type = ISOM_CODEC_TYPE_MP4A_AUDIO; + audio->version = 0; + audio->revision_level = 0; + audio->vendor = 0; + audio->channelcount = 2; + audio->samplesize = 16; + audio->compression_ID = 0; + audio->packet_size = 0; + /* WARNING: This field cannot retain frequency above 65535Hz. + This is not "FIXME", I just honestly implemented what the spec says. + BTW, who ever expects sampling frequency takes fixed-point decimal??? */ + audio->samplerate = summary->frequency <= UINT16_MAX ? summary->frequency << 16 : 0; + return 0; +} + +static int isom_set_qtff_lpcm_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + lsmash_qt_audio_format_specific_flags_t *lpcm = NULL; + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific ) + continue; + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS + && specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + lpcm = (lsmash_qt_audio_format_specific_flags_t *)specific->data.structured; + break; + } + } + if( !lpcm ) + return -1; + lsmash_codec_type_t sample_type = audio->type; + /* Convert the sample type into 'lpcm' if the description doesn't match the format or version = 2 fields are needed. */ + if( (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_RAW_AUDIO ) + && (summary->sample_size != 8 || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_FL32_AUDIO ) + && (summary->sample_size != 32 || !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_FL64_AUDIO ) + && (summary->sample_size != 64 || !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_IN24_AUDIO ) + && (summary->sample_size != 24 || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_IN32_AUDIO ) + && (summary->sample_size != 32 || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_23NI_AUDIO ) + && (summary->sample_size != 32 || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT) || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_SOWT_AUDIO ) + && (summary->sample_size != 16 || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT) || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_TWOS_AUDIO ) + && ((summary->sample_size != 16 && summary->sample_size != 8) || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT) || !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_NONE_AUDIO ) + && ((summary->sample_size != 16 && summary->sample_size != 8) || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT) || !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN))) + || (lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_NOT_SPECIFIED ) + && ((summary->sample_size != 16 && summary->sample_size != 8) || (lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT) || !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN))) + || (summary->channels > 2 || summary->frequency > UINT16_MAX || summary->sample_size % 8) ) + { + audio->type = QT_CODEC_TYPE_LPCM_AUDIO; + audio->version = 2; + } + else if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_LPCM_AUDIO ) ) + audio->version = 2; + else if( summary->sample_size > 16 + || (!lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_RAW_AUDIO ) + && !lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_TWOS_AUDIO ) + && !lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_NONE_AUDIO ) + && !lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_NOT_SPECIFIED )) ) + audio->version = 1; + /* Set up constBytesPerAudioPacket field. + * We use constBytesPerAudioPacket as the actual size of LPCM audio frame even when version is not 2. */ + audio->constBytesPerAudioPacket = (summary->sample_size * summary->channels) / 8; + /* Set up other fields in this description by its version. */ + if( audio->version == 2 ) + { + audio->channelcount = 3; + audio->samplesize = 16; + audio->compression_ID = -2; + audio->samplerate = 0x00010000; + audio->sizeOfStructOnly = 72; + audio->audioSampleRate = (union {double d; uint64_t i;}){summary->frequency}.i; + audio->numAudioChannels = summary->channels; + audio->always7F000000 = 0x7F000000; + audio->constBitsPerChannel = summary->sample_size; + audio->constLPCMFramesPerAudioPacket = 1; + audio->formatSpecificFlags = lpcm->format_flags; + if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_TWOS_AUDIO ) && summary->sample_size != 8 ) + audio->formatSpecificFlags |= QT_LPCM_FORMAT_FLAG_BIG_ENDIAN; + if( lpcm->format_flags & QT_LPCM_FORMAT_FLAG_FLOAT ) + audio->formatSpecificFlags &= ~QT_LPCM_FORMAT_FLAG_SIGNED_INTEGER; + if( lpcm->format_flags & QT_LPCM_FORMAT_FLAG_PACKED ) + audio->formatSpecificFlags &= ~QT_LPCM_FORMAT_FLAG_ALIGNED_HIGH; + } + else if( audio->version == 1 ) + { + audio->channelcount = summary->channels; + audio->samplesize = 16; + /* Audio formats other than 'raw ' and 'twos' are treated as compressed audio. */ + if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_RAW_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_TWOS_AUDIO ) ) + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_NOT_COMPRESSED; + else + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_FIXED_COMPRESSION; + audio->samplerate = summary->frequency << 16; + audio->samplesPerPacket = 1; + audio->bytesPerPacket = summary->sample_size / 8; + audio->bytesPerFrame = audio->bytesPerPacket * summary->channels; /* sample_size field in stsz box is NOT used. */ + audio->bytesPerSample = 1 + (summary->sample_size != 8); + if( lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_FL32_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_FL64_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_IN24_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_IN32_AUDIO ) ) + { + isom_wave_t *wave = lsmash_malloc_zero( sizeof(isom_wave_t) ); + if( !wave ) + return -1; + isom_init_box_common( wave, audio, QT_BOX_TYPE_WAVE ); + if( isom_add_frma( wave ) + || isom_add_enda( wave ) + || isom_add_terminator( wave ) + || isom_add_extension_box( &audio->extensions, wave, isom_remove_wave ) ) + { + isom_remove_wave( wave ); + return -1; + } + wave->frma->data_format = sample_type.fourcc; + wave->enda->littleEndian = !(lpcm->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN); + } + } + else /* audio->version == 0 */ + { + audio->channelcount = summary->channels; + audio->samplesize = summary->sample_size; + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_NOT_COMPRESSED; + audio->samplerate = summary->frequency << 16; + } + return 0; +} + +static int isom_set_isom_dts_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + audio->version = 0; + audio->revision_level = 0; + audio->vendor = 0; + audio->channelcount = summary->channels; + audio->samplesize = 16; + audio->compression_ID = 0; + audio->packet_size = 0; + switch( summary->frequency ) + { + case 12000 : /* Invalid? (No reference in the spec) */ + case 24000 : + case 48000 : + case 96000 : + case 192000 : + case 384000 : /* Invalid? (No reference in the spec) */ + audio->samplerate = 48000 << 16; + break; + case 22050 : + case 44100 : + case 88200 : + case 176400 : + case 352800 : /* Invalid? (No reference in the spec) */ + audio->samplerate = 44100 << 16; + break; + case 8000 : /* Invalid? (No reference in the spec) */ + case 16000 : + case 32000 : + case 64000 : + case 128000 : + audio->samplerate = 32000 << 16; + break; + default : + audio->samplerate = 0; + break; + } + return 0; +} + +static lsmash_box_type_t isom_guess_audio_codec_specific_box_type( lsmash_codec_type_t active_codec_type, lsmash_compact_box_type_t fourcc ) +{ + lsmash_box_type_t box_type = LSMASH_BOX_TYPE_INITIALIZER; + box_type.fourcc = fourcc; +#define GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( codec_type, predefined_box_type ) \ + else if( (codec_type.user.fourcc == 0 \ + || lsmash_check_codec_type_identical( active_codec_type, codec_type )) \ + && box_type.fourcc == predefined_box_type.fourcc ) \ + box_type = predefined_box_type + if( 0 ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_AC_3_AUDIO, ISOM_BOX_TYPE_DAC3 ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_EC_3_AUDIO, ISOM_BOX_TYPE_DEC3 ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_DTSC_AUDIO, ISOM_BOX_TYPE_DDTS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_DTSE_AUDIO, ISOM_BOX_TYPE_DDTS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_DTSH_AUDIO, ISOM_BOX_TYPE_DDTS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_DTSL_AUDIO, ISOM_BOX_TYPE_DDTS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_ALAC_AUDIO, ISOM_BOX_TYPE_ALAC ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( ISOM_CODEC_TYPE_MP4A_AUDIO, ISOM_BOX_TYPE_ESDS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( QT_CODEC_TYPE_ALAC_AUDIO, QT_BOX_TYPE_ALAC ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( QT_CODEC_TYPE_MP4A_AUDIO, QT_BOX_TYPE_ESDS ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_CHAN ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_GLBL ); + GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE( LSMASH_CODEC_TYPE_UNSPECIFIED, QT_BOX_TYPE_WAVE ); +#undef GUESS_AUDIO_CODEC_SPECIFIC_BOX_TYPE + return box_type; +} + +static int isom_set_qtff_template_audio_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + audio->type = lsmash_form_qtff_box_type( audio->type.fourcc ); + lsmash_qt_audio_format_specific_flags_t *specific_data = NULL; + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific ) + continue; + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS + && specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + specific_data = (lsmash_qt_audio_format_specific_flags_t *)specific->data.structured; + break; + } + } + /* A 'wave' extension itself shall be absent in the opaque CODEC specific info list. + * So, create a 'wave' extension here and append it as an extension to the audio sample description. */ + isom_wave_t *wave = lsmash_malloc_zero( sizeof(isom_wave_t) ); + if( !wave ) + return -1; + isom_init_box_common( wave, audio, QT_BOX_TYPE_WAVE ); + if( isom_add_frma( wave ) + || isom_add_terminator( wave ) + || isom_add_extension_box( &audio->extensions, wave, isom_remove_wave ) ) + { + isom_remove_wave( wave ); + return -1; + } + wave->frma->data_format = audio->type.fourcc; + /* Append extensions from the opaque CODEC specific info list to 'wave' extension. */ + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific ) + return -1; + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + && specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + continue; /* LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED is not supported. */ + switch( specific->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON : + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS : + continue; /* These cannot be an extension for 'wave' extension. */ + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT : + /* (Legacy?) ALAC might have an Audio Channel Layout Box inside 'wave' extension. */ +#if 1 + continue; +#else + if( lsmash_check_codec_type_identical( audio->type, QT_CODEC_TYPE_ALAC_AUDIO ) ) + continue; + if( isom_append_channel_layout_extension( specific, wave, summary->channels ) ) + return -1; + break; +#endif + default : + { + assert( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED || specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_DECOMPRESSION_PARAMETERS ); + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !cs ) + return -1; + if( cs->size < ISOM_BASEBOX_COMMON_SIZE ) + continue; + uint8_t *box_data = cs->data.unstructured; + uint64_t box_size = cs->size; + lsmash_compact_box_type_t fourcc = LSMASH_4CC( box_data[4], box_data[5], box_data[6], box_data[7] ); + lsmash_box_type_t box_type = isom_guess_audio_codec_specific_box_type( (lsmash_codec_type_t)audio->type, fourcc ); + if( lsmash_check_box_type_identical( box_type, QT_BOX_TYPE_WAVE ) ) + { + /* It is insane to appened a 'wave' extension to a 'wave' extension. */ + lsmash_destroy_codec_specific_data( cs ); + continue; + } + isom_extension_box_t *extension = lsmash_malloc_zero( sizeof(isom_extension_box_t) ); + if( !extension ) + { + lsmash_destroy_codec_specific_data( cs ); + return -1; + } + extension->size = box_size; + extension->type = box_type; + extension->format = EXTENSION_FORMAT_BINARY; + extension->form.binary = box_data; + extension->destruct = free; + cs->data.unstructured = NULL; /* Avoid freeing the binary data of the extension. */ + lsmash_destroy_codec_specific_data( cs ); + if( lsmash_add_entry( &wave->extensions, extension ) ) + { + extension->destruct( extension ); + return -1; + } + break; + } + } + } + /* Set up common audio description fields. */ + audio->version = (summary->channels > 2 || summary->frequency > UINT16_MAX) ? 2 : 1; + audio->channelcount = audio->version == 2 ? 3 : LSMASH_MIN( summary->channels, 2 ); + audio->samplesize = 16; + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_VARIABLE_COMPRESSION; + audio->packet_size = 0; + if( audio->version == 2 ) + { + audio->channelcount = 3; + audio->compression_ID = -2; + audio->samplerate = 0x00010000; + audio->sizeOfStructOnly = 72; + audio->audioSampleRate = (union {double d; uint64_t i;}){summary->frequency}.i; + audio->numAudioChannels = summary->channels; + audio->always7F000000 = 0x7F000000; + audio->constBitsPerChannel = 0; + audio->constBytesPerAudioPacket = 0; + audio->constLPCMFramesPerAudioPacket = summary->samples_in_frame; + if( lsmash_check_codec_type_identical( (lsmash_codec_type_t)audio->type, QT_CODEC_TYPE_ALAC_AUDIO ) ) + { + switch( summary->sample_size ) + { + case 16 : + audio->formatSpecificFlags = QT_ALAC_FORMAT_FLAG_16BIT_SOURCE_DATA; + break; + case 20 : + audio->formatSpecificFlags = QT_ALAC_FORMAT_FLAG_20BIT_SOURCE_DATA; + break; + case 24 : + audio->formatSpecificFlags = QT_ALAC_FORMAT_FLAG_24BIT_SOURCE_DATA; + break; + case 32 : + audio->formatSpecificFlags = QT_ALAC_FORMAT_FLAG_32BIT_SOURCE_DATA; + break; + default : + break; + } + } + else + { + if( specific_data ) + { + audio->formatSpecificFlags = specific_data->format_flags; + if( specific_data->format_flags & QT_AUDIO_FORMAT_FLAG_FLOAT ) + audio->formatSpecificFlags &= ~QT_AUDIO_FORMAT_FLAG_SIGNED_INTEGER; + if( specific_data->format_flags & QT_AUDIO_FORMAT_FLAG_PACKED ) + audio->formatSpecificFlags &= ~QT_AUDIO_FORMAT_FLAG_ALIGNED_HIGH; + } + else + audio->formatSpecificFlags = 0; + } + } + else /* if( audio->version == 1 ) */ + { + audio->channelcount = LSMASH_MIN( summary->channels, 2 ); + audio->samplerate = summary->frequency << 16; + audio->samplesPerPacket = summary->samples_in_frame; + audio->bytesPerPacket = summary->sample_size / 8; + audio->bytesPerFrame = audio->bytesPerPacket * summary->channels; /* sample_size field in stsz box is NOT used. */ + audio->bytesPerSample = 1 + (summary->sample_size != 8); + if( specific_data ) + { + if( wave ) + { + if( isom_add_enda( wave ) ) + return -1; + wave->enda->littleEndian = !(specific_data->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN); + } + else + { + isom_extension_box_t *ext = isom_get_sample_description_extension( &audio->extensions, QT_BOX_TYPE_WAVE ); + assert( ext && ext->format == EXTENSION_FORMAT_BINARY ); + uint32_t enda_size; + uint8_t *enda = isom_get_child_box_position( ext->form.binary, ext->size, QT_BOX_TYPE_ENDA, &enda_size ); + if( !enda ) + { + uint32_t wave_size = ext->size; + uint8_t *wave_data = ext->form.binary; + uint32_t frma_size; + uint8_t *frma_data = isom_get_child_box_position( ext->form.binary, ext->size, QT_BOX_TYPE_FRMA, &frma_size ); + uint8_t *frma_end = frma_data + frma_size; + uint32_t remainder_size = ext->size - (frma_end - wave_data); + uint32_t enda_offset = wave_data - frma_end; + enda_size = ISOM_BASEBOX_COMMON_SIZE + 2; + wave_data = lsmash_memdup( wave_data, wave_size + enda_size ); + enda = wave_data + enda_offset; + enda[0] = (enda_size >> 24) & 0xff; + enda[1] = (enda_size >> 16) & 0xff; + enda[2] = (enda_size >> 8) & 0xff; + enda[3] = enda_size & 0xff; + enda[4] = 'e'; + enda[5] = 'n'; + enda[6] = 'd'; + enda[7] = 'a'; + enda[8] = 0; + enda[9] = !(specific_data->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN); + memcpy( wave_data + enda_offset + enda_size, frma_end, remainder_size ); + free( ext->form.binary ); + ext->form.binary = wave_data; + ext->size += enda_size; + } + else + { + if( enda_size < ISOM_BASEBOX_COMMON_SIZE + 2 ) + return -1; + if( specific_data->format_flags & QT_LPCM_FORMAT_FLAG_BIG_ENDIAN ) + enda[9] &= ~0x01; + else + enda[9] |= 0x01; + } + } + } + } + return 0; +} + +static int isom_set_isom_template_audio_description( isom_audio_entry_t *audio, lsmash_audio_summary_t *summary ) +{ + audio->version = 0; + audio->revision_level = 0; + audio->vendor = 0; + audio->channelcount = summary->channels; + audio->samplesize = 16; + audio->compression_ID = 0; + audio->packet_size = 0; + audio->samplerate = summary->frequency <= UINT16_MAX ? summary->frequency << 16 : 0; + return 0; +} + +int isom_setup_audio_description( isom_stsd_t *stsd, lsmash_codec_type_t sample_type, lsmash_audio_summary_t *summary ) +{ + if( !stsd || !stsd->list || !stsd->root || !summary ) + return -1; + if( isom_check_valid_summary( (lsmash_summary_t *)summary ) ) + return -1; + lsmash_entry_list_t *list = stsd->list; + isom_audio_entry_t *audio = lsmash_malloc_zero( sizeof(isom_audio_entry_t) ); + if( !audio ) + return -1; + isom_init_box_common( audio, stsd, sample_type ); + audio->manager |= LSMASH_AUDIO_DESCRIPTION; + audio->data_reference_index = 1; + lsmash_root_t *root = stsd->root; + lsmash_codec_type_t audio_type = (lsmash_codec_type_t)audio->type; + int ret; + if( lsmash_check_codec_type_identical( audio_type, ISOM_CODEC_TYPE_MP4A_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_MP4A_AUDIO ) ) + { + if( root->ftyp && root->ftyp->major_brand == ISOM_BRAND_TYPE_QT ) + ret = isom_set_qtff_mp4a_description( audio, summary ); + else + ret = isom_set_isom_mp4a_description( audio, summary ); + } + else if( isom_is_lpcm_audio( audio ) ) + ret = isom_set_qtff_lpcm_description( audio, summary ); + else if( lsmash_check_codec_type_identical( audio_type, ISOM_CODEC_TYPE_DTSC_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, ISOM_CODEC_TYPE_DTSE_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, ISOM_CODEC_TYPE_DTSH_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, ISOM_CODEC_TYPE_DTSL_AUDIO ) ) + ret = isom_set_isom_dts_description( audio, summary ); + else if( root->qt_compatible ) + ret = isom_set_qtff_template_audio_description( audio, summary ); + else + ret = isom_set_isom_template_audio_description( audio, summary ); + if( ret ) + goto fail; + /* Don't use audio_type since audio->type might have changed. */ + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *specific = (lsmash_codec_specific_t *)entry->data; + if( !specific ) + goto fail; + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + && specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + continue; /* LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED is not supported. */ + switch( specific->type ) + { + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON : + { + if( specific->format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ) + continue; /* Ignore since not fatal. */ + lsmash_qt_audio_common_t *data = (lsmash_qt_audio_common_t *)specific->data.structured; + audio->revision_level = data->revision_level; + audio->vendor = data->vendor; + audio->compression_ID = data->compression_ID; + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT : + { + if( !root->qt_compatible + && !lsmash_check_codec_type_identical( (lsmash_codec_type_t)audio->type, ISOM_CODEC_TYPE_ALAC_AUDIO ) + && !lsmash_check_codec_type_identical( (lsmash_codec_type_t)audio->type, QT_CODEC_TYPE_ALAC_AUDIO ) ) + continue; + if( isom_append_channel_layout_extension( specific, audio, summary->channels ) ) + goto fail; + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !cs ) + goto fail; + lsmash_codec_global_header_t *data = (lsmash_codec_global_header_t *)cs->data.structured; + isom_glbl_t *box = lsmash_malloc_zero( sizeof(isom_glbl_t) ); + if( !box ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + isom_init_box_common( box, audio, QT_BOX_TYPE_GLBL ); + box->header_size = data->header_size; + box->header_data = lsmash_memdup( data->header_data, data->header_size ); + lsmash_destroy_codec_specific_data( cs ); + if( !box->header_data + || isom_add_extension_box( &audio->extensions, box, isom_remove_glbl ) ) + { + free( box ); + goto fail; + } + break; + } + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS : + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_DECOMPRESSION_PARAMETERS : + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG : + break; /* shall be set up already */ + case LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC : + if( root->qt_compatible ) + continue; /* shall be set up already */ + default : + { + lsmash_codec_specific_t *cs = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !cs ) + goto fail; + if( cs->size < ISOM_BASEBOX_COMMON_SIZE ) + continue; + uint8_t *box_data = cs->data.unstructured; + uint64_t box_size = cs->size; + lsmash_compact_box_type_t fourcc = LSMASH_4CC( box_data[4], box_data[5], box_data[6], box_data[7] ); + lsmash_box_type_t box_type = isom_guess_audio_codec_specific_box_type( (lsmash_codec_type_t)audio->type, fourcc ); + if( lsmash_check_box_type_identical( box_type, QT_BOX_TYPE_WAVE ) ) + { + /* CODEC specific info shall be already inside 'wave' extension. */ + lsmash_destroy_codec_specific_data( cs ); + continue; + } + /* Set up the extension. */ + isom_extension_box_t *extension = lsmash_malloc_zero( sizeof(isom_extension_box_t) ); + if( !extension ) + { + lsmash_destroy_codec_specific_data( cs ); + goto fail; + } + extension->size = box_size; + extension->type = box_type; + extension->format = EXTENSION_FORMAT_BINARY; + extension->form.binary = box_data; + extension->destruct = free; + cs->data.unstructured = NULL; /* Avoid freeing the binary data of the extension. */ + lsmash_destroy_codec_specific_data( cs ); + if( lsmash_add_entry( &audio->extensions, extension ) ) + { + extension->destruct( extension ); + goto fail; + } + break; + } + } + } + if( audio->version == 0 ) + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_NOT_COMPRESSED; + else if( audio->version == 2 ) + audio->compression_ID = QT_AUDIO_COMPRESSION_ID_VARIABLE_COMPRESSION; + if( !lsmash_add_entry( list, audio ) ) + return 0; /* successed */ +fail: + isom_remove_sample_description_extensions( &audio->extensions ); + free( audio ); + return -1; +} + +isom_extension_box_t *isom_get_sample_description_extension( lsmash_entry_list_t *extensions, lsmash_box_type_t box_type ) +{ + for( lsmash_entry_t *entry = extensions->head; entry; entry = entry->next ) + { + isom_extension_box_t *ext = (isom_extension_box_t *)entry->data; + if( !ext ) + continue; + if( lsmash_check_box_type_identical( ext->type, box_type ) ) + return ext; + } + return NULL; +} + +void *isom_get_extension_box( lsmash_entry_list_t *extensions, lsmash_box_type_t box_type ) +{ + for( lsmash_entry_t *entry = extensions->head; entry; entry = entry->next ) + { + isom_extension_box_t *ext = (isom_extension_box_t *)entry->data; + if( !ext || ext->format != EXTENSION_FORMAT_BOX || !lsmash_check_box_type_identical( ext->type, box_type ) ) + continue; + return ext->form.box; + } + return NULL; +} + +static lsmash_codec_specific_data_type isom_get_codec_specific_data_type( lsmash_compact_box_type_t extension_fourcc ) +{ + static struct codec_specific_data_type_table_tag + { + lsmash_compact_box_type_t extension_fourcc; + lsmash_codec_specific_data_type data_type; + } codec_specific_data_type_table[32] = { { 0, LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN } }; + if( codec_specific_data_type_table[0].data_type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN ) + { + int i = 0; +#define ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( extension_type, data_type ) \ + codec_specific_data_type_table[i++] = (struct codec_specific_data_type_table_tag){ extension_type.fourcc, data_type } + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_AVCC, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264 ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_DVC1, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1 ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_DAC3, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3 ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_DEC3, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_DDTS, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_ALAC, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_ESDS, LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_STSL, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( ISOM_BOX_TYPE_BTRT, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE ); + //ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_ALAC, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_ALAC ); + //ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_ESDS, LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_FIEL, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_CSPC, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_PIXEL_FORMAT ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_SGBT, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_GAMA, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_GAMMA_LEVEL ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_CHAN, LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( QT_BOX_TYPE_GLBL, LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER ); + ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT( LSMASH_BOX_TYPE_UNSPECIFIED, LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN ); +#undef ADD_CODEC_SPECIFIC_DATA_TYPE_TABLE_ELEMENT + } + lsmash_codec_specific_data_type data_type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN; + for( int i = 0; codec_specific_data_type_table[i].data_type != LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN; i++ ) + if( extension_fourcc == codec_specific_data_type_table[i].extension_fourcc ) + { + data_type = codec_specific_data_type_table[i].data_type; + break; + } + return data_type; +} + +lsmash_summary_t *isom_create_video_summary_from_description( isom_sample_entry_t *sample_entry ) +{ + if( !sample_entry ) + return NULL; + isom_visual_entry_t *visual = (isom_visual_entry_t *)sample_entry; + lsmash_video_summary_t *summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO ); + if( !summary ) + return NULL; + summary->sample_type = visual->type; + summary->width = visual->width; + summary->height = visual->height; + summary->depth = visual->depth; + memcpy( summary->compressorname, visual->compressorname, 32 ); + summary->compressorname[32] = '\0'; + if( isom_is_qt_video( summary->sample_type ) ) + { + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + lsmash_qt_video_common_t *data = (lsmash_qt_video_common_t *)specific->data.structured; + data->revision_level = visual->revision_level; + data->vendor = visual->vendor; + data->temporalQuality = visual->temporalQuality; + data->spatialQuality = visual->spatialQuality; + data->horizontal_resolution = visual->horizresolution; + data->vertical_resolution = visual->vertresolution; + data->dataSize = visual->dataSize; + data->frame_count = visual->frame_count; + data->color_table_ID = visual->color_table_ID; + if( visual->color_table_ID == 0 ) + { + isom_qt_color_table_t *src_ct = &visual->color_table; + if( !src_ct->array ) + goto fail; + uint16_t element_count = LSMASH_MIN( src_ct->size + 1, 256 ); + lsmash_qt_color_table_t *dst_ct = &data->color_table; + dst_ct->seed = src_ct->seed; + dst_ct->flags = src_ct->flags; + dst_ct->size = src_ct->size; + for( uint16_t i = 0; i < element_count; i++ ) + { + dst_ct->array[i].unused = src_ct->array[i].value; + dst_ct->array[i].r = src_ct->array[i].r; + dst_ct->array[i].g = src_ct->array[i].g; + dst_ct->array[i].b = src_ct->array[i].b; + } + } + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + for( lsmash_entry_t *entry = visual->extensions.head; entry; entry = entry->next ) + { + isom_extension_box_t *ext = (isom_extension_box_t *)entry->data; + if( !ext ) + continue; + if( ext->format == EXTENSION_FORMAT_BOX ) + { + if( !ext->form.box ) + continue; + lsmash_codec_specific_t *specific = NULL; + if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_CLAP ) ) + { + isom_clap_t *clap = (isom_clap_t *)ext->form.box; + summary->clap.width.n = clap->cleanApertureWidthN; + summary->clap.width.d = clap->cleanApertureWidthD; + summary->clap.height.n = clap->cleanApertureHeightN; + summary->clap.height.d = clap->cleanApertureHeightD; + summary->clap.horizontal_offset.n = clap->horizOffN; + summary->clap.horizontal_offset.d = clap->horizOffD; + summary->clap.vertical_offset.n = clap->vertOffN; + summary->clap.vertical_offset.d = clap->vertOffD; + continue; + } + else if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_PASP ) ) + { + isom_pasp_t *pasp = (isom_pasp_t *)ext->form.box; + summary->par_h = pasp->hSpacing; + summary->par_v = pasp->vSpacing; + continue; + } + else if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_COLR ) + || lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_COLR ) ) + { + isom_colr_t *colr = (isom_colr_t *)ext->form.box; + summary->color.primaries_index = colr->primaries_index; + summary->color.transfer_index = colr->transfer_function_index; + summary->color.matrix_index = colr->matrix_index; + summary->color.full_range = colr->full_range_flag; + continue; + } + else if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_STSL ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_SAMPLE_SCALE, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_stsl_t *stsl = (isom_stsl_t *)ext->form.box; + lsmash_isom_sample_scale_t *data = (lsmash_isom_sample_scale_t *)specific->data.structured; + data->constraint_flag = stsl->constraint_flag; + data->scale_method = stsl->scale_method; + data->display_center_x = stsl->display_center_x; + data->display_center_y = stsl->display_center_y; + } + else if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_BTRT ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_btrt_t *btrt = (isom_btrt_t *)ext->form.box; + lsmash_h264_bitrate_t *data = (lsmash_h264_bitrate_t *)specific->data.structured; + data->bufferSizeDB = btrt->bufferSizeDB; + data->maxBitrate = btrt->maxBitrate; + data->avgBitrate = btrt->avgBitrate; + } + else if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_FIEL ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_FIELD_INFO, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_fiel_t *fiel = (isom_fiel_t *)ext->form.box; + lsmash_qt_field_info_t *data = (lsmash_qt_field_info_t *)specific->data.structured; + data->fields = fiel->fields; + data->detail = fiel->detail; + } + else if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_CSPC ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_PIXEL_FORMAT, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_cspc_t *cspc = (isom_cspc_t *)ext->form.box; + lsmash_qt_pixel_format_t *data = (lsmash_qt_pixel_format_t *)specific->data.structured; + data->pixel_format = cspc->pixel_format; + } + else if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_SGBT ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_SIGNIFICANT_BITS, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_sgbt_t *sgbt = (isom_sgbt_t *)ext->form.box; + lsmash_qt_significant_bits_t *data = (lsmash_qt_significant_bits_t *)specific->data.structured; + data->significantBits = sgbt->significantBits; + } + else if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_GLBL ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_CODEC_GLOBAL_HEADER, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_glbl_t *glbl = (isom_glbl_t *)ext->form.box; + lsmash_codec_global_header_t *data = (lsmash_codec_global_header_t *)specific->data.structured; + data->header_size = glbl->header_size; + data->header_data = lsmash_memdup( glbl->header_data, glbl->header_size ); + if( !data->header_data ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + else + continue; + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + else + { + if( ext->size < ISOM_BASEBOX_COMMON_SIZE ) + continue; + uint8_t *data = ext->form.binary; + lsmash_compact_box_type_t fourcc = LSMASH_4CC( data[4], data[5], data[6], data[7] ); + lsmash_codec_specific_data_type type = isom_get_codec_specific_data_type( fourcc ); + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( type, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !specific ) + goto fail; + specific->size = ext->size; + specific->data.unstructured = lsmash_memdup( ext->form.binary, ext->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + } + return (lsmash_summary_t *)summary; +fail: + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return NULL; +} + +static int isom_append_structured_mp4sys_decoder_config( lsmash_codec_specific_list_t *opaque, isom_esds_t *esds ) +{ + lsmash_bs_t *bs = lsmash_bs_create( NULL ); + if( !bs ) + return -1; + /* Put box size, type, version and flags fields. */ + lsmash_bs_put_be32( bs, 0 ); + lsmash_bs_put_be32( bs, ISOM_BOX_TYPE_ESDS.fourcc ); + lsmash_bs_put_be32( bs, 0 ); + /* Put ES Descriptor. */ + mp4sys_put_ES_Descriptor( bs, esds->ES ); + /* Export ES Descriptor Box as binary string. */ + uint32_t esds_size; + uint8_t *esds_data = lsmash_bs_export_data( bs, &esds_size ); + lsmash_bs_cleanup( bs ); + if( !esds_data ) + return -1; + /* Update box size. */ + esds_data[0] = ((esds_size) >> 24) & 0xff; + esds_data[1] = ((esds_size) >> 16) & 0xff; + esds_data[2] = ((esds_size) >> 8) & 0xff; + esds_data[3] = (esds_size) & 0xff; + lsmash_codec_specific_data_type type = isom_get_codec_specific_data_type( ISOM_BOX_TYPE_ESDS.fourcc ); + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( type, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !specific ) + { + free( esds_data ); + return -1; + } + specific->data.unstructured = esds_data; + specific->size = esds_size; + /* Convert unstructured CODEC specific data format into structured, and append it to the opaque list. */ + lsmash_codec_specific_t *conv = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + lsmash_destroy_codec_specific_data( specific ); + if( !conv ) + return -1; + if( lsmash_add_entry( &opaque->list, conv ) ) + { + lsmash_destroy_codec_specific_data( conv ); + return -1; + } + return 0; +} + +lsmash_summary_t *isom_create_audio_summary_from_description( isom_sample_entry_t *sample_entry ) +{ + if( !sample_entry || !sample_entry->root ) + return NULL; + isom_audio_entry_t *audio = (isom_audio_entry_t *)sample_entry; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + summary->sample_type = audio->type; + summary->sample_size = audio->samplesize; + summary->channels = audio->channelcount; + summary->frequency = audio->samplerate >> 16; + if( audio->root->qt_compatible + && isom_is_qt_audio( (lsmash_codec_type_t)audio->type ) ) + { + if( audio->version == 1 ) + { + summary->channels = audio->bytesPerFrame / audio->bytesPerPacket; + summary->sample_size = audio->bytesPerPacket * 8; + summary->samples_in_frame = audio->samplesPerPacket; + } + else if( audio->version == 2 ) + { + summary->frequency = (union {uint64_t i; double d;}){audio->audioSampleRate}.d; + summary->channels = audio->numAudioChannels; + summary->sample_size = audio->constBitsPerChannel; + summary->samples_in_frame = audio->constLPCMFramesPerAudioPacket; + } + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + lsmash_qt_audio_common_t *common = (lsmash_qt_audio_common_t *)specific->data.structured; + common->revision_level = audio->revision_level; + common->vendor = audio->vendor; + common->compression_ID = audio->compression_ID; + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + if( isom_is_lpcm_audio( audio ) ) + { + specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + lsmash_qt_audio_format_specific_flags_t *data = (lsmash_qt_audio_format_specific_flags_t *)specific->data.structured; + if( audio->version == 2 ) + data->format_flags = audio->formatSpecificFlags; + else + { + data->format_flags = 0; + /* Here, don't override samplesize. + * We should trust samplesize field in the description for misused CODEC indentifier. */ + lsmash_codec_type_t audio_type = (lsmash_codec_type_t)audio->type; + if( lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_FL32_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_FL32_AUDIO ) ) + data->format_flags = QT_LPCM_FORMAT_FLAG_FLOAT; + else if( lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_TWOS_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_NONE_AUDIO ) + || lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_NOT_SPECIFIED ) ) + { + if( lsmash_check_codec_type_identical( audio_type, QT_CODEC_TYPE_TWOS_AUDIO ) ) + data->format_flags = QT_LPCM_FORMAT_FLAG_BIG_ENDIAN | QT_AUDIO_FORMAT_FLAG_SIGNED_INTEGER; + if( summary->sample_size > 8 ) + data->format_flags = QT_LPCM_FORMAT_FLAG_BIG_ENDIAN; + } + } + isom_wave_t *wave = (isom_wave_t *)isom_get_extension_box( &audio->extensions, QT_BOX_TYPE_WAVE ); + if( wave && wave->enda && !wave->enda->littleEndian ) + data->format_flags |= QT_LPCM_FORMAT_FLAG_BIG_ENDIAN; + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + else if( audio->version == 2 + && (lsmash_check_codec_type_identical( (lsmash_codec_type_t)audio->type, ISOM_CODEC_TYPE_ALAC_AUDIO ) + || lsmash_check_codec_type_identical( (lsmash_codec_type_t)audio->type, QT_CODEC_TYPE_ALAC_AUDIO )) ) + switch( audio->formatSpecificFlags ) + { + case QT_ALAC_FORMAT_FLAG_16BIT_SOURCE_DATA : + summary->sample_size = 16; + break; + case QT_ALAC_FORMAT_FLAG_20BIT_SOURCE_DATA : + summary->sample_size = 20; + break; + case QT_ALAC_FORMAT_FLAG_24BIT_SOURCE_DATA : + summary->sample_size = 24; + break; + case QT_ALAC_FORMAT_FLAG_32BIT_SOURCE_DATA : + summary->sample_size = 32; + break; + default : + break; + } + } + for( lsmash_entry_t *entry = audio->extensions.head; entry; entry = entry->next ) + { + isom_extension_box_t *ext = (isom_extension_box_t *)entry->data; + if( !ext ) + continue; + if( ext->format == EXTENSION_FORMAT_BOX ) + { + if( !ext->form.box ) + continue; + if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_CHAN ) ) + { + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_CHANNEL_LAYOUT, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + isom_chan_t *chan = (isom_chan_t *)ext->form.box; + lsmash_qt_audio_channel_layout_t *data = (lsmash_qt_audio_channel_layout_t *)specific->data.structured; + data->channelLayoutTag = chan->channelLayoutTag; + data->channelBitmap = chan->channelBitmap; + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + } + else if( lsmash_check_box_type_identical( ext->type, ISOM_BOX_TYPE_ESDS ) + || lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_ESDS ) ) + { + isom_esds_t *esds = (isom_esds_t *)ext->form.box; + if( !esds + || mp4sys_setup_summary_from_DecoderSpecificInfo( summary, esds->ES ) + || isom_append_structured_mp4sys_decoder_config( summary->opaque, esds ) ) + goto fail; + } + else if( lsmash_check_box_type_identical( ext->type, QT_BOX_TYPE_WAVE ) ) + { + /* Don't append 'wave' extension itself to the opaque CODEC specific info list. */ + isom_wave_t *wave = (isom_wave_t *)ext->form.box; + lsmash_bs_t *bs = lsmash_bs_create( NULL ); + if( !bs ) + goto fail; + for( lsmash_entry_t *wave_entry = wave->extensions.head; wave_entry; wave_entry = wave_entry->next ) + { + isom_extension_box_t *wave_ext = (isom_extension_box_t *)wave_entry->data; + if( !wave_ext ) + continue; + lsmash_box_type_t box_type = LSMASH_BOX_TYPE_INITIALIZER; + if( wave_ext->format == EXTENSION_FORMAT_BOX ) + { + if( !wave_ext->form.box ) + continue; + box_type = ((isom_box_t *)wave_ext->form.box)->type; + if( lsmash_check_box_type_identical( wave_ext->type, QT_BOX_TYPE_ENDA ) ) + { + isom_enda_t *enda = wave_ext->form.box; + isom_bs_put_box_common( bs, enda ); + lsmash_bs_put_be16( bs, enda->littleEndian ); + } + else if( lsmash_check_box_type_identical( wave_ext->type, QT_BOX_TYPE_MP4A ) ) + { + isom_mp4a_t *mp4a = wave_ext->form.box; + isom_bs_put_box_common( bs, mp4a ); + lsmash_bs_put_be32( bs, mp4a->unknown ); + } + else if( lsmash_check_box_type_identical( wave_ext->type, QT_BOX_TYPE_CHAN ) ) + { + isom_chan_t *chan = wave_ext->form.box; + isom_bs_put_box_common( bs, chan ); + lsmash_bs_put_be32( bs, chan->channelLayoutTag ); + lsmash_bs_put_be32( bs, chan->channelBitmap ); + lsmash_bs_put_be32( bs, chan->numberChannelDescriptions ); + if( chan->channelDescriptions ) + for( uint32_t i = 0; i < chan->numberChannelDescriptions; i++ ) + { + isom_channel_description_t *channelDescriptions = (isom_channel_description_t *)(&chan->channelDescriptions[i]); + if( !channelDescriptions ) + { + lsmash_bs_cleanup( bs ); + goto fail; + } + lsmash_bs_put_be32( bs, channelDescriptions->channelLabel ); + lsmash_bs_put_be32( bs, channelDescriptions->channelFlags ); + lsmash_bs_put_be32( bs, channelDescriptions->coordinates[0] ); + lsmash_bs_put_be32( bs, channelDescriptions->coordinates[1] ); + lsmash_bs_put_be32( bs, channelDescriptions->coordinates[2] ); + } + } + else if( lsmash_check_box_type_identical( wave_ext->type, QT_BOX_TYPE_ESDS ) ) + { + isom_esds_t *esds = (isom_esds_t *)wave_ext->form.box; + if( !esds + || mp4sys_setup_summary_from_DecoderSpecificInfo( summary, esds->ES ) + || isom_append_structured_mp4sys_decoder_config( summary->opaque, esds ) ) + { + lsmash_bs_cleanup( bs ); + goto fail; + } + continue; + } + else + /* Skip Format Box and Terminator Box since they are mandatory and fixed structure. */ + continue; + } + else + { + if( wave_ext->size < ISOM_BASEBOX_COMMON_SIZE ) + continue; + uint8_t *data = wave_ext->form.binary; + box_type.fourcc = LSMASH_4CC( data[4], data[5], data[6], data[7] ); + lsmash_bs_put_bytes( bs, wave_ext->size, wave_ext->form.binary ); + } + /* Export as binary string. */ + uint32_t box_size; + uint8_t *box_data = lsmash_bs_export_data( bs, &box_size ); + lsmash_bs_empty( bs ); + if( !box_data ) + { + lsmash_bs_cleanup( bs ); + goto fail; + } + /* Append as an unstructured CODEC specific info. */ + lsmash_codec_specific_data_type type; + if( box_type.fourcc == QT_BOX_TYPE_CHAN.fourcc ) + /* Complete audio channel layout is stored as binary string. + * We distinguish it from one of the outside of 'wave' extension here. */ + type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_DECOMPRESSION_PARAMETERS; + else + { + type = isom_get_codec_specific_data_type( box_type.fourcc ); + if( type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN ) + type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_DECOMPRESSION_PARAMETERS; + } + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( type, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !specific ) + { + lsmash_bs_cleanup( bs ); + goto fail; + } + specific->data.unstructured = box_data; + specific->size = box_size; + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + lsmash_bs_cleanup( bs ); + goto fail; + } + } + lsmash_bs_cleanup( bs ); + } + } + else + { + if( ext->size < ISOM_BASEBOX_COMMON_SIZE ) + continue; + uint8_t *data = ext->form.binary; + lsmash_compact_box_type_t fourcc = LSMASH_4CC( data[4], data[5], data[6], data[7] ); + lsmash_codec_specific_data_type type = isom_get_codec_specific_data_type( fourcc ); + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( type, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !specific ) + goto fail; + specific->size = ext->size; + specific->data.unstructured = lsmash_memdup( ext->form.binary, ext->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_destroy_codec_specific_data( specific ); + goto fail; + } + if( specific->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS ) + { + specific = lsmash_convert_codec_specific_format( specific, LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + goto fail; + lsmash_dts_specific_parameters_t *param = (lsmash_dts_specific_parameters_t *)specific->data.structured; + summary->sample_size = param->pcmSampleDepth; + summary->samples_in_frame = (summary->frequency * (512 << param->FrameDuration)) / param->DTSSamplingFrequency; + lsmash_destroy_codec_specific_data( specific ); + } + } + } + return (lsmash_summary_t *)summary; +fail: + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return NULL; +} + +lsmash_codec_specific_t *lsmash_get_codec_specific_data( lsmash_summary_t *summary, uint32_t extension_number ) +{ + if( !summary || !summary->opaque ) + return NULL; + uint32_t i = 0; + for( lsmash_entry_t *entry = summary->opaque->list.head; entry; entry = entry->next ) + if( ++i == extension_number ) + return (lsmash_codec_specific_t *)entry->data; + return NULL; +} + +uint32_t lsmash_count_codec_specific_data( lsmash_summary_t *summary ) +{ + if( !summary || !summary->opaque ) + return 0; + return summary->opaque->list.entry_count; +} + +int isom_compare_opaque_extensions( lsmash_summary_t *a, lsmash_summary_t *b ) +{ + assert( a && b ); + uint32_t in_number_of_extensions = lsmash_count_codec_specific_data( a ); + uint32_t out_number_of_extensions = lsmash_count_codec_specific_data( b ); + if( out_number_of_extensions != in_number_of_extensions ) + return 1; + uint32_t active_number_of_extensions = in_number_of_extensions; + uint32_t identical_count = 0; + for( uint32_t j = 1; j <= in_number_of_extensions; j++ ) + { + lsmash_codec_specific_t *in_cs_orig = lsmash_get_codec_specific_data( a, j ); + lsmash_codec_specific_t *in_cs; + lsmash_codec_specific_format compare_format = LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED; + if( in_cs_orig->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + if( in_cs_orig->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON + || in_cs_orig->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON + || in_cs_orig->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_FORMAT_SPECIFIC_FLAGS ) + { + compare_format = LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED; + in_cs = in_cs_orig; + } + else + { + in_cs = lsmash_convert_codec_specific_format( in_cs_orig, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !in_cs ) + { + /* We don't support the format converter of this data type. */ + --active_number_of_extensions; + continue; + } + } + } + else + in_cs = in_cs_orig; + for( uint32_t k = 1; k <= out_number_of_extensions; k++ ) + { + lsmash_codec_specific_t *out_cs_orig = lsmash_get_codec_specific_data( b, k ); + if( out_cs_orig->type != in_cs_orig->type ) + continue; + lsmash_codec_specific_t *out_cs; + if( out_cs_orig->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + { + if( compare_format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ) + out_cs = out_cs_orig; + else + { + out_cs = lsmash_convert_codec_specific_format( out_cs_orig, LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + if( !out_cs ) + continue; + } + } + else + out_cs = out_cs_orig; + int identical; + if( compare_format == LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ) + identical = out_cs->size == in_cs->size && !memcmp( out_cs->data.unstructured, in_cs->data.unstructured, in_cs->size ); + else + { + if( in_cs->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_VIDEO_COMMON ) + { + lsmash_qt_video_common_t *in_data = (lsmash_qt_video_common_t *)in_cs->data.structured; + lsmash_qt_video_common_t *out_data = (lsmash_qt_video_common_t *)out_cs->data.structured; + identical = in_data->revision_level == out_data->revision_level + && in_data->vendor == out_data->vendor + && in_data->temporalQuality == out_data->temporalQuality + && in_data->spatialQuality == out_data->spatialQuality + && in_data->horizontal_resolution == out_data->horizontal_resolution + && in_data->vertical_resolution == out_data->vertical_resolution + && in_data->dataSize == out_data->dataSize + && in_data->frame_count == out_data->frame_count + && in_data->color_table_ID == out_data->color_table_ID; + } + else if( in_cs->type == LSMASH_CODEC_SPECIFIC_DATA_TYPE_QT_AUDIO_COMMON ) + { + lsmash_qt_audio_common_t *in_data = (lsmash_qt_audio_common_t *)in_cs->data.structured; + lsmash_qt_audio_common_t *out_data = (lsmash_qt_audio_common_t *)out_cs->data.structured; + identical = in_data->revision_level == out_data->revision_level + && in_data->vendor == out_data->vendor + && in_data->compression_ID == out_data->compression_ID; + } + else + { + lsmash_qt_audio_format_specific_flags_t *in_data = (lsmash_qt_audio_format_specific_flags_t *)in_cs->data.structured; + lsmash_qt_audio_format_specific_flags_t *out_data = (lsmash_qt_audio_format_specific_flags_t *)out_cs->data.structured; + identical = (in_data->format_flags == out_data->format_flags); + } + } + if( out_cs != out_cs_orig ) + lsmash_destroy_codec_specific_data( out_cs ); + if( identical ) + { + ++identical_count; + break; + } + } + if( in_cs != in_cs_orig ) + lsmash_destroy_codec_specific_data( in_cs ); + } + return (identical_count != active_number_of_extensions); +} diff --git a/output/mp4/description.h b/output/mp4/description.h new file mode 100644 index 0000000..52b5b0a --- /dev/null +++ b/output/mp4/description.h @@ -0,0 +1,40 @@ +/***************************************************************************** + * description.h: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +struct lsmash_codec_specific_list_tag +{ + lsmash_entry_list_t list; +}; + +lsmash_codec_specific_t *isom_duplicate_codec_specific_data( lsmash_codec_specific_t *specific ); +isom_extension_box_t *isom_get_sample_description_extension( lsmash_entry_list_t *extensions, lsmash_box_type_t box_type ); +lsmash_codec_specific_t *isom_get_codec_specific( lsmash_codec_specific_list_t *opaque, lsmash_codec_specific_data_type type ); +uint8_t *isom_get_child_box_position( uint8_t *parent_data, uint32_t parent_size, lsmash_box_type_t child_type, uint32_t *child_size ); +void *isom_get_extension_box( lsmash_entry_list_t *exetnsion, lsmash_box_type_t box_type ); +int isom_add_extension_box( lsmash_entry_list_t *extensions, void *box, void *eliminator ); +void isom_remove_sample_description_extensions( lsmash_entry_list_t *extensions ); +void isom_remove_sample_description_extension( isom_extension_box_t *ext ); +int isom_setup_visual_description( isom_stsd_t *stsd, lsmash_codec_type_t sample_type, lsmash_video_summary_t *summary ); +int isom_setup_audio_description( isom_stsd_t *stsd, lsmash_codec_type_t sample_type, lsmash_audio_summary_t *summary ); +lsmash_summary_t *isom_create_video_summary_from_description( isom_sample_entry_t *sample_entry ); +lsmash_summary_t *isom_create_audio_summary_from_description( isom_sample_entry_t *sample_entry ); +int isom_compare_opaque_extensions( lsmash_summary_t *a, lsmash_summary_t *b ); diff --git a/output/mp4/dts.c b/output/mp4/dts.c new file mode 100644 index 0000000..1a1f22f --- /dev/null +++ b/output/mp4/dts.c @@ -0,0 +1,1331 @@ +/***************************************************************************** + * dts.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include + +#include "box.h" + +/*************************************************************************** + ETSI TS 102 114 V1.2.1 (2002-12) + ETSI TS 102 114 V1.3.1 (2011-08) + + IMPLEMENTATION OF DTS AUDIO IN MEDIA FILES BASED ON ISO/IEC 14496 + Document No.: 9302J81100 + Revision: E + Version: 1.3 +***************************************************************************/ +#include "dts.h" + +#define DTS_MIN_CORE_SIZE 96 +#define DTS_MAX_STREAM_CONSTRUCTION 21 +#define DTS_SPECIFIC_BOX_MIN_LENGTH 28 + +typedef enum +{ + DTS_SYNCWORD_CORE = 0x7FFE8001, + DTS_SYNCWORD_XCH = 0x5A5A5A5A, + DTS_SYNCWORD_XXCH = 0x47004A03, + DTS_SYNCWORD_X96K = 0x1D95F262, + DTS_SYNCWORD_XBR = 0x655E315E, + DTS_SYNCWORD_LBR = 0x0A801921, + DTS_SYNCWORD_XLL = 0x41A29547, + DTS_SYNCWORD_SUBSTREAM = 0x64582025, + DTS_SYNCWORD_SUBSTREAM_CORE = 0x02b09261, +} dts_syncword; + +typedef enum +{ + DTS_XXCH_LOUDSPEAKER_MASK_C = 0x00000001, /* Centre in front of listener */ + DTS_XXCH_LOUDSPEAKER_MASK_L = 0x00000002, /* Left in front */ + DTS_XXCH_LOUDSPEAKER_MASK_R = 0x00000004, /* Right in front */ + DTS_XXCH_LOUDSPEAKER_MASK_LS = 0x00000008, /* Left surround on side in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_RS = 0x00000010, /* Right surround on side in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_LFE1 = 0x00000020, /* Low frequency effects subwoofer */ + DTS_XXCH_LOUDSPEAKER_MASK_CS = 0x00000040, /* Centre surround in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_LSR = 0x00000080, /* Left surround in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_RSR = 0x00000100, /* Right surround in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_LSS = 0x00000200, /* Left surround on side */ + DTS_XXCH_LOUDSPEAKER_MASK_RSS = 0x00000400, /* Right surround on side */ + DTS_XXCH_LOUDSPEAKER_MASK_LC = 0x00000800, /* Between left and centre in front */ + DTS_XXCH_LOUDSPEAKER_MASK_RC = 0x00001000, /* Between right and centre in front */ + DTS_XXCH_LOUDSPEAKER_MASK_LH = 0x00002000, /* Left height in front */ + DTS_XXCH_LOUDSPEAKER_MASK_CH = 0x00004000, /* Centre Height in front */ + DTS_XXCH_LOUDSPEAKER_MASK_RH = 0x00008000, /* Right Height in front */ + DTS_XXCH_LOUDSPEAKER_MASK_LFE2 = 0x00010000, /* Second low frequency effects subwoofer */ + DTS_XXCH_LOUDSPEAKER_MASK_LW = 0x00020000, /* Left on side in front */ + DTS_XXCH_LOUDSPEAKER_MASK_RW = 0x00040000, /* Right on side in front */ + DTS_XXCH_LOUDSPEAKER_MASK_OH = 0x00080000, /* Over the listener's head */ + DTS_XXCH_LOUDSPEAKER_MASK_LHS = 0x00100000, /* Left height on side */ + DTS_XXCH_LOUDSPEAKER_MASK_RHS = 0x00200000, /* Right height on side */ + DTS_XXCH_LOUDSPEAKER_MASK_CHR = 0x00400000, /* Centre height in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_LHR = 0x00800000, /* Left height in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_RHR = 0x01000000, /* Right height in rear */ + DTS_XXCH_LOUDSPEAKER_MASK_CL = 0x02000000, /* Centre in the plane lower than listener's ears */ + DTS_XXCH_LOUDSPEAKER_MASK_LL = 0x04000000, /* Left in the plane lower than listener's ears */ + DTS_XXCH_LOUDSPEAKER_MASK_RL = 0x08000000, /* Right in the plane lower than listener's ears */ +} dts_loudspeaker_mask; + +typedef enum +{ + DTS_CHANNEL_LAYOUT_C = 0x0001, /* Centre in front of listener */ + DTS_CHANNEL_LAYOUT_L_R = 0x0002, /* Left/Right in front */ + DTS_CHANNEL_LAYOUT_LS_RS = 0x0004, /* Left/Right surround on side in rear */ + DTS_CHANNEL_LAYOUT_LFE1 = 0x0008, /* Low frequency effects subwoofer */ + DTS_CHANNEL_LAYOUT_CS = 0x0010, /* Centre surround in rear */ + DTS_CHANNEL_LAYOUT_LH_RH = 0x0020, /* Left/Right height in front */ + DTS_CHANNEL_LAYOUT_LSR_RSR = 0x0040, /* Left/Right surround in rear */ + DTS_CHANNEL_LAYOUT_CH = 0x0080, /* Centre height in front */ + DTS_CHANNEL_LAYOUT_OH = 0x0100, /* Over the listener's head */ + DTS_CHANNEL_LAYOUT_LC_RC = 0x0200, /* Between left/right and centre in front */ + DTS_CHANNEL_LAYOUT_LW_RW = 0x0400, /* Left/Right on side in front */ + DTS_CHANNEL_LAYOUT_LSS_RSS = 0x0800, /* Left/Right surround on side */ + DTS_CHANNEL_LAYOUT_LFE2 = 0x1000, /* Second low frequency effects subwoofer */ + DTS_CHANNEL_LAYOUT_LHS_RHS = 0x2000, /* Left/Right height on side */ + DTS_CHANNEL_LAYOUT_CHR = 0x4000, /* Centre height in rear */ + DTS_CHANNEL_LAYOUT_LHR_RHR = 0x8000, /* Left/Right height in rear */ +} dts_channel_layout; + +static const lsmash_dts_construction_flag construction_info[DTS_MAX_STREAM_CONSTRUCTION + 1] = + { + 0, + DTS_CORE_SUBSTREAM_CORE_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG, + DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG, + DTS_EXT_SUBSTREAM_XLL_FLAG, + DTS_EXT_SUBSTREAM_LBR_FLAG, + DTS_EXT_SUBSTREAM_CORE_FLAG, + DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG, + DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG , + }; + +struct lsmash_dts_reserved_box_tag +{ + uint32_t size; + uint8_t *data; +}; + +int lsmash_append_dts_reserved_box( lsmash_dts_specific_parameters_t *param, uint8_t *box_data, uint32_t box_size ) +{ + if( !param || !box_data || box_size == 0 ) + return -1; + param->box = malloc( sizeof(lsmash_dts_reserved_box_t) ); + if( !param->box ) + return -1; + param->box->data = lsmash_memdup( box_data, box_size ); + if( !param->box->data ) + { + free( param->box ); + param->box = NULL; + return -1; + } + param->box->size = box_size; + return 0; +} + +void lsmash_remove_dts_reserved_box( lsmash_dts_specific_parameters_t *param ) +{ + if( !param->box ) + return; + if( param->box->data ) + free( param->box->data ); + free( param->box ); + param->box = NULL; +} + +void dts_destruct_specific_data( void *data ) +{ + if( !data ) + return; + lsmash_remove_dts_reserved_box( data ); + free( data ); +} + +uint8_t lsmash_dts_get_stream_construction( lsmash_dts_construction_flag flags ) +{ + uint8_t StreamConstruction; + for( StreamConstruction = 1; StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION; StreamConstruction++ ) + if( flags == construction_info[StreamConstruction] ) + break; + /* For any stream type not listed in the above table, + * StreamConstruction shall be set to 0 and the codingname shall default to 'dtsh'. */ + return StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION ? StreamConstruction : 0; +} + +lsmash_dts_construction_flag lsmash_dts_get_construction_flags( uint8_t stream_construction ) +{ + if( stream_construction <= DTS_MAX_STREAM_CONSTRUCTION ) + return construction_info[stream_construction]; + return 0; +} + +lsmash_codec_type_t lsmash_dts_get_codingname( lsmash_dts_specific_parameters_t *param ) +{ + assert( param->StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION ); + if( param->MultiAssetFlag ) + return ISOM_CODEC_TYPE_DTSH_AUDIO; /* Multiple asset streams shall use the 'dtsh' coding_name. */ + static lsmash_codec_type_t codingname_table[DTS_MAX_STREAM_CONSTRUCTION + 1] = { LSMASH_CODEC_TYPE_INITIALIZER }; + if( lsmash_check_codec_type_identical( codingname_table[0], LSMASH_CODEC_TYPE_UNSPECIFIED ) ) + { + int i = 0; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; /* Undefined stream types shall be set to 0 and the codingname shall default to 'dtsh'. */ + codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSE_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; + codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO; + } + return codingname_table[ param->StreamConstruction ]; +} + +uint8_t *lsmash_create_dts_specific_info( lsmash_dts_specific_parameters_t *param, uint32_t *data_length ) +{ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + lsmash_bits_init( &bits, &bs ); + int reserved_box_present = (param->box && param->box->data && param->box->size); + uint32_t buffer_length = DTS_SPECIFIC_BOX_MIN_LENGTH + (reserved_box_present ? param->box->size : 0); + uint8_t buffer[buffer_length]; + memset( buffer, 0, buffer_length ); + bs.data = buffer; + bs.alloc = buffer_length; + /* Create a DTSSpecificBox. */ + lsmash_bits_put( &bits, 32, 0 ); /* box size */ + lsmash_bits_put( &bits, 32, ISOM_BOX_TYPE_DDTS.fourcc ); /* box type: 'ddts' */ + lsmash_bits_put( &bits, 32, param->DTSSamplingFrequency ); + lsmash_bits_put( &bits, 32, param->maxBitrate ); /* maxBitrate; setup by isom_update_bitrate_description */ + lsmash_bits_put( &bits, 32, param->avgBitrate ); /* avgBitrate; setup by isom_update_bitrate_description */ + lsmash_bits_put( &bits, 8, param->pcmSampleDepth ); + lsmash_bits_put( &bits, 2, param->FrameDuration ); + lsmash_bits_put( &bits, 5, param->StreamConstruction ); + lsmash_bits_put( &bits, 1, param->CoreLFEPresent ); + lsmash_bits_put( &bits, 6, param->CoreLayout ); + lsmash_bits_put( &bits, 14, param->CoreSize ); + lsmash_bits_put( &bits, 1, param->StereoDownmix ); + lsmash_bits_put( &bits, 3, param->RepresentationType ); + lsmash_bits_put( &bits, 16, param->ChannelLayout ); + lsmash_bits_put( &bits, 1, param->MultiAssetFlag ); + lsmash_bits_put( &bits, 1, param->LBRDurationMod ); + lsmash_bits_put( &bits, 1, reserved_box_present ); + lsmash_bits_put( &bits, 5, 0 ); /* Reserved */ + /* ReservedBox */ + if( reserved_box_present ) + for( uint32_t i = 0; i < param->box->size; i++ ) + lsmash_bits_put( &bits, 8, param->box->data[i] ); + /* */ + uint8_t *data = lsmash_bits_export_data( &bits, data_length ); + /* Update box size. */ + data[0] = ((*data_length) >> 24) & 0xff; + data[1] = ((*data_length) >> 16) & 0xff; + data[2] = ((*data_length) >> 8) & 0xff; + data[3] = (*data_length) & 0xff; + return data; +} + +int lsmash_setup_dts_specific_parameters_from_frame( lsmash_dts_specific_parameters_t *param, uint8_t *data, uint32_t data_length ) +{ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t buffer[DTS_MAX_EXTENSION_SIZE] = { 0 }; + bs.data = buffer; + bs.alloc = DTS_MAX_EXTENSION_SIZE; + dts_info_t handler = { 0 }; + dts_info_t *info = &handler; + uint32_t overall_wasted_data_length = 0; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer; + info->bits = &bits; + lsmash_bits_init( &bits, &bs ); + while( 1 ) + { + /* Check the remainder length of the buffer. + * If there is enough length, then continue to parse the frame in it. + * The length 10 is the required byte length to get frame size. */ + uint32_t remainder_length = info->buffer_end - info->buffer_pos; + if( !info->no_more_read && remainder_length < DTS_MAX_EXTENSION_SIZE ) + { + if( remainder_length ) + memmove( info->buffer, info->buffer_pos, remainder_length ); + uint32_t wasted_data_length = LSMASH_MIN( data_length, DTS_MAX_EXTENSION_SIZE ); + memcpy( info->buffer + remainder_length, data + overall_wasted_data_length, wasted_data_length ); + data_length -= wasted_data_length; + overall_wasted_data_length += wasted_data_length; + remainder_length += wasted_data_length; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer + remainder_length; + info->no_more_read = (data_length < 10); + } + if( remainder_length < 10 && info->no_more_read ) + goto setup_param; /* No more valid data. */ + /* Parse substream frame. */ + dts_substream_type prev_substream_type = info->substream_type; + info->substream_type = dts_get_substream_type( info ); + int (*dts_parse_frame)( dts_info_t *, uint8_t *, uint32_t ) = NULL; + switch( info->substream_type ) + { + /* Decide substream frame parser and check if this frame and the previous frame belong to the same AU. */ + case DTS_SUBSTREAM_TYPE_CORE : + if( prev_substream_type != DTS_SUBSTREAM_TYPE_NONE ) + goto setup_param; + dts_parse_frame = dts_parse_core_substream; + break; + case DTS_SUBSTREAM_TYPE_EXTENSION : + { + uint8_t prev_extension_index = info->extension_index; + if( dts_get_extension_index( info, &info->extension_index ) ) + return -1; + if( prev_substream_type == DTS_SUBSTREAM_TYPE_EXTENSION && info->extension_index <= prev_extension_index ) + goto setup_param; + dts_parse_frame = dts_parse_extension_substream; + break; + } + default : + return -1; + } + info->frame_size = 0; + if( dts_parse_frame( info, info->buffer_pos, LSMASH_MIN( remainder_length, DTS_MAX_EXTENSION_SIZE ) ) ) + return -1; /* Failed to parse. */ + info->buffer_pos += info->frame_size; + } +setup_param: + dts_update_specific_param( info ); + *param = info->ddts_param; + return 0; +} + +static uint32_t dts_bits_get( lsmash_bits_t *bits, uint32_t width, uint64_t *bits_pos ) +{ + *bits_pos += width; + return lsmash_bits_get( bits, width ); +} + +int dts_get_channel_count_from_channel_layout( uint16_t channel_layout ) +{ +#define DTS_CHANNEL_PAIR_MASK \ + (DTS_CHANNEL_LAYOUT_L_R \ + | DTS_CHANNEL_LAYOUT_LS_RS \ + | DTS_CHANNEL_LAYOUT_LH_RH \ + | DTS_CHANNEL_LAYOUT_LSR_RSR \ + | DTS_CHANNEL_LAYOUT_LC_RC \ + | DTS_CHANNEL_LAYOUT_LW_RW \ + | DTS_CHANNEL_LAYOUT_LSS_RSS \ + | DTS_CHANNEL_LAYOUT_LHS_RHS \ + | DTS_CHANNEL_LAYOUT_LHR_RHR) + return lsmash_count_bits( channel_layout ) + + lsmash_count_bits( channel_layout & DTS_CHANNEL_PAIR_MASK ); +#undef DTS_CHANNEL_PAIR_MASK +} + +static uint32_t dts_get_channel_layout_from_xxch_mask( uint32_t mask ) +{ + uint32_t layout = 0; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_C ) + layout |= DTS_CHANNEL_LAYOUT_C; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_L | DTS_XXCH_LOUDSPEAKER_MASK_R) ) + layout |= DTS_CHANNEL_LAYOUT_L_R; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LS | DTS_XXCH_LOUDSPEAKER_MASK_RS) ) + layout |= DTS_CHANNEL_LAYOUT_LS_RS; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_LFE1 ) + layout |= DTS_CHANNEL_LAYOUT_LFE1; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_CS ) + layout |= DTS_CHANNEL_LAYOUT_CS; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LH | DTS_XXCH_LOUDSPEAKER_MASK_RH) ) + layout |= DTS_CHANNEL_LAYOUT_LH_RH; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LSR | DTS_XXCH_LOUDSPEAKER_MASK_RSR) ) + layout |= DTS_CHANNEL_LAYOUT_LSR_RSR; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_CH ) + layout |= DTS_CHANNEL_LAYOUT_CH; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_OH ) + layout |= DTS_CHANNEL_LAYOUT_OH; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LC | DTS_XXCH_LOUDSPEAKER_MASK_RC) ) + layout |= DTS_CHANNEL_LAYOUT_LC_RC; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LW | DTS_XXCH_LOUDSPEAKER_MASK_RW) ) + layout |= DTS_CHANNEL_LAYOUT_LW_RW; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LSS | DTS_XXCH_LOUDSPEAKER_MASK_RSS) ) + layout |= DTS_CHANNEL_LAYOUT_LSS_RSS; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_LFE2 ) + layout |= DTS_CHANNEL_LAYOUT_LFE2; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LHS | DTS_XXCH_LOUDSPEAKER_MASK_RHS) ) + layout |= DTS_CHANNEL_LAYOUT_LHS_RHS; + if( mask & DTS_XXCH_LOUDSPEAKER_MASK_CHR ) + layout |= DTS_CHANNEL_LAYOUT_CHR; + if( mask & (DTS_XXCH_LOUDSPEAKER_MASK_LHR | DTS_XXCH_LOUDSPEAKER_MASK_RHR) ) + layout |= DTS_CHANNEL_LAYOUT_LHR_RHR; + return layout; +} + +static int dts_parse_asset_descriptor( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* Audio asset descriptor */ + uint64_t asset_descriptor_pos = *bits_pos; + int nuAssetDescriptFsize = dts_bits_get( bits, 9, bits_pos ); /* nuAssetDescriptFsize (9) */ + dts_bits_get( bits, 3, bits_pos ); /* nuAssetIndex (3) */ + /* Static metadata */ + int bEmbeddedStereoFlag = 0; + int bEmbeddedSixChFlag = 0; + int nuTotalNumChs = 0; + if( info->extension.bStaticFieldsPresent ) + { + if( dts_bits_get( bits, 1, bits_pos ) ) /* bAssetTypeDescrPresent (1)*/ + dts_bits_get( bits, 4, bits_pos ); /* nuAssetTypeDescriptor (4) */ + if( dts_bits_get( bits, 1, bits_pos ) ) /* bLanguageDescrPresent (1) */ + dts_bits_get( bits, 24, bits_pos ); /* LanguageDescriptor (24) */ + if( dts_bits_get( bits, 1, bits_pos ) ) + { + int nuInfoTextByteSize = dts_bits_get( bits, 10, bits_pos ) + 1; /* nuInfoTextByteSize (10) */ + dts_bits_get( bits, nuInfoTextByteSize * 8, bits_pos ); /* InfoTextString (nuInfoTextByteSize) */ + } + int nuBitResolution = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuBitResolution (5) */ + info->extension.bit_resolution = LSMASH_MAX( info->extension.bit_resolution, nuBitResolution ); + int nuMaxSampleRate = dts_bits_get( bits, 4, bits_pos ); /* nuMaxSampleRate (4) */ + static const uint32_t source_sample_rate_table[16] = + { + 8000, 16000, 32000, 64000, 128000, + 22050, 44100, 88200, 176400, 352800, + 12000, 24000, 48000, 96000, 192000, 384000 + }; + info->extension.sampling_frequency = LSMASH_MAX( info->extension.sampling_frequency, source_sample_rate_table[nuMaxSampleRate] ); + nuTotalNumChs = dts_bits_get( bits, 8, bits_pos ) + 1; /* nuTotalNumChs (8) */ + info->extension.bOne2OneMapChannels2Speakers = dts_bits_get( bits, 1, bits_pos ); /* bOne2OneMapChannels2Speakers (1) */ + if( info->extension.bOne2OneMapChannels2Speakers ) + { + if( nuTotalNumChs > 2 ) + { + bEmbeddedStereoFlag = dts_bits_get( bits, 1, bits_pos ); /* bEmbeddedStereoFlag (1) */ + info->extension.stereo_downmix |= bEmbeddedStereoFlag; + } + if( nuTotalNumChs > 6 ) + bEmbeddedSixChFlag = dts_bits_get( bits, 1, bits_pos ); /* bEmbeddedSixChFlag (1) */ + int nuNumBits4SAMask; + if( dts_bits_get( bits, 1, bits_pos ) ) /* bSpkrMaskEnabled (1) */ + { + nuNumBits4SAMask = (dts_bits_get( bits, 2, bits_pos ) + 1) << 2; /* nuNumBits4SAMask (2) */ + info->extension.channel_layout |= dts_bits_get( bits, nuNumBits4SAMask, bits_pos ); /* nuSpkrActivityMask (nuNumBits4SAMask) */ + } + else + /* The specification doesn't mention the value of nuNumBits4SAMask if bSpkrMaskEnabled is set to 0. */ + nuNumBits4SAMask = 0; + int nuNumSpkrRemapSets = dts_bits_get( bits, 3, bits_pos ); + int nuStndrSpkrLayoutMask[8] = { 0 }; + for( int ns = 0; ns < nuNumSpkrRemapSets; ns++ ) + nuStndrSpkrLayoutMask[ns] = dts_bits_get( bits, nuNumBits4SAMask, bits_pos ); + for( int ns = 0; ns < nuNumSpkrRemapSets; ns++ ) + { + int nuNumSpeakers = dts_get_channel_count_from_channel_layout( nuStndrSpkrLayoutMask[ns] ); + int nuNumDecCh4Remap = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuNumDecCh4Remap[ns] (5) */ + for( int nCh = 0; nCh < nuNumSpeakers; nCh++ ) + { + uint32_t nuRemapDecChMask = dts_bits_get( bits, nuNumDecCh4Remap, bits_pos ); + int nCoef = lsmash_count_bits( nuRemapDecChMask ); + for( int nc = 0; nc < nCoef; nc++ ) + dts_bits_get( bits, 5, bits_pos ); /* nuSpkrRemapCodes[ns][nCh][nc] (5) */ + } + } + } + else + { + info->extension.representation_type = dts_bits_get( bits, 3, bits_pos ); /* nuRepresentationType (3) */ + if( info->extension.representation_type == 2 || info->extension.representation_type == 3 ) + nuTotalNumChs = 2; + } + } + /* Dynamic metadata */ + int bDRCCoefPresent = dts_bits_get( bits, 1, bits_pos ); /* bDRCCoefPresent (1) */ + if( bDRCCoefPresent ) + dts_bits_get( bits, 8, bits_pos ); /* nuDRCCode (8) */ + if( dts_bits_get( bits, 1, bits_pos ) ) /* bDialNormPresent (1) */ + dts_bits_get( bits, 5, bits_pos ); /* nuDialNormCode (5) */ + if( bDRCCoefPresent && bEmbeddedStereoFlag ) + dts_bits_get( bits, 8, bits_pos ); /* nuDRC2ChDmixCode (8) */ + int bMixMetadataPresent; + if( info->extension.bMixMetadataEnbl ) + bMixMetadataPresent = dts_bits_get( bits, 1, bits_pos ); /* bMixMetadataPresent (1) */ + else + bMixMetadataPresent = 0; + if( bMixMetadataPresent ) + { + dts_bits_get( bits, 7, bits_pos ); /* bExternalMixFlag (1) + * nuPostMixGainAdjCode (7) */ + if( dts_bits_get( bits, 2, bits_pos ) < 3 ) /* nuControlMixerDRC (2) */ + dts_bits_get( bits, 3, bits_pos ); /* nuLimit4EmbeddedDRC (3) */ + else + dts_bits_get( bits, 8, bits_pos ); /* nuCustomDRCCode (8) */ + int bEnblPerChMainAudioScale = dts_bits_get( bits, 1, bits_pos ); /* bEnblPerChMainAudioScale (1) */ + for( uint8_t ns = 0; ns < info->extension.nuNumMixOutConfigs; ns++ ) + if( bEnblPerChMainAudioScale ) + for( uint8_t nCh = 0; nCh < info->extension.nNumMixOutCh[ns]; nCh++ ) + dts_bits_get( bits, 6, bits_pos ); /* nuMainAudioScaleCode[ns][nCh] (6) */ + else + dts_bits_get( bits, 6, bits_pos ); /* nuMainAudioScaleCode[ns][0] (6) */ + int nEmDM = 1; + int nDecCh[3] = { nuTotalNumChs, 0, 0 }; + if( bEmbeddedSixChFlag ) + { + nDecCh[nEmDM] = 6; + ++nEmDM; + } + if( bEmbeddedStereoFlag ) + { + nDecCh[nEmDM] = 2; + ++nEmDM; + } + for( uint8_t ns = 0; ns < info->extension.nuNumMixOutConfigs; ns++ ) + for( int nE = 0; nE < nEmDM; nE++ ) + for( int nCh = 0; nCh < nDecCh[nE]; nCh++ ) + { + int nuMixMapMask = dts_bits_get( bits, info->extension.nNumMixOutCh[ns], bits_pos ); /* nuMixMapMask (nNumMixOutCh[ns]) */ + int nuNumMixCoefs = lsmash_count_bits( nuMixMapMask ); + for( int nC = 0; nC < nuNumMixCoefs; nC++ ) + dts_bits_get( bits, 6, bits_pos ); /* nuMixCoeffs[ns][nE][nCh][nC] (6) */ + } + } + /* Decoder navigation data */ + if( dts_bits_get( bits, 2, bits_pos ) == 0 ) /* nuCodingMode (2) */ + { + int nuCoreExtensionMask = dts_bits_get( bits, 12, bits_pos ); /* nuCoreExtensionMask (12) */ + if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_CORE_FLAG ) + info->flags |= DTS_EXT_SUBSTREAM_CORE_FLAG; + } + dts_bits_get( bits, nuAssetDescriptFsize * 8 - (*bits_pos - asset_descriptor_pos), bits_pos ); /* Skip remaining part of Audio asset descriptor. */ + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_xxch( dts_info_t *info, uint64_t *bits_pos, int extension ) +{ + lsmash_bits_t *bits = info->bits; + /* XXCH Frame Header */ + uint64_t xxch_pos = *bits_pos - 32; /* SYNCXXCh (32) */ + if( !extension && (info->core.extension_audio_descriptor == 0 || info->core.extension_audio_descriptor == 3) ) + return -1; + uint64_t nuHeaderSizeXXCh = dts_bits_get( bits, 6, bits_pos ) + 1; /* nuHeaderSizeXXCh (6) */ + dts_bits_get( bits, 1, bits_pos ); /* bCRCPresent4ChSetHeaderXXCh (1) */ + int nuBits4SpkrMaskXXCh = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuBits4SpkrMaskXXCh (5) */ + int nuNumChSetsInXXCh = dts_bits_get( bits, 2, bits_pos ) + 1; /* nuNumChSetsInXXCh (2) */ + int pnuChSetFsizeXXCh[4]; + for( int nChSet = 0; nChSet < nuNumChSetsInXXCh; nChSet++ ) + pnuChSetFsizeXXCh[nChSet] = dts_bits_get( bits, 14, bits_pos ) + 1; /* pnuChSetFsizeXXCh[nChSet] (14) */ + uint32_t xxch_mask = dts_bits_get( bits, nuBits4SpkrMaskXXCh, bits_pos ); /* nuCoreSpkrActivityMask (nuBits4SpkrMaskXXCh) */ + uint16_t *channel_layout = extension ? &info->extension.channel_layout : &info->core.channel_layout; + *channel_layout |= dts_get_channel_layout_from_xxch_mask( xxch_mask ); + uint8_t *xxch_lower_planes = extension ? &info->extension.xxch_lower_planes : &info->core.xxch_lower_planes; + *xxch_lower_planes = (xxch_mask >> 25) & 0x7; + dts_bits_get( bits, nuHeaderSizeXXCh * 8 - (*bits_pos - xxch_pos), bits_pos ); /* Skip remaining part of XXCH Frame Header. */ + for( int nChSet = 0; nChSet < nuNumChSetsInXXCh; nChSet++ ) + { + /* XXCH Channel Set Header */ + xxch_pos = *bits_pos; + uint64_t nuXXChChSetHeaderSize = dts_bits_get( bits, 7, bits_pos ) + 1; /* nuXXChChSetHeaderSize (7)*/ + dts_bits_get( bits, 3, bits_pos ); /* nuChInChSetXXCh (3) */ + if( nuBits4SpkrMaskXXCh > 6 ) + { + xxch_mask = dts_bits_get( bits, nuBits4SpkrMaskXXCh - 6, bits_pos ) << 6; /* nuXXChSpkrLayoutMask (nuBits4SpkrMaskXXCh - 6) */ + *channel_layout |= dts_get_channel_layout_from_xxch_mask( xxch_mask ); + *xxch_lower_planes |= (xxch_mask >> 25) & 0x7; + } +#if 0 /* FIXME: Can we detect stereo downmixing from only XXCH data within the core substream? */ + if( dts_bits_get( bits, 1, bits_pos ) ) /* bDownMixCoeffCodeEmbedded (1) */ + { + int bDownMixEmbedded = dts_bits_get( bits, 1, bits_pos ); /* bDownMixEmbedded (1) */ + dts_bits_get( bits, 6, bits_pos ); /* nDmixScaleFactor (6) */ + uint32_t DownMixChMapMask[8]; + for( int nCh = 0; nCh < nuChInChSetXXCh; nCh++ ) + DownMixChMapMask[nCh] = dts_bits_get( bits, nuBits4SpkrMaskXXCh, bits_pos ); + } +#endif + dts_bits_get( bits, nuXXChChSetHeaderSize * 8 - (*bits_pos - xxch_pos), bits_pos ); /* Skip remaining part of XXCH Channel Set Header. */ + } + info->flags |= extension ? DTS_EXT_SUBSTREAM_XXCH_FLAG : DTS_CORE_SUBSTREAM_XXCH_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_core_x96( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* DTS_BCCORE_X96 Frame Header */ + /* SYNCX96 (32) */ + if( info->core.extension_audio_descriptor != 2 && info->core.extension_audio_descriptor != 3 ) + return 0; /* Probably, encountered four emulation bytes (pseudo sync word). */ + dts_bits_get( bits, 16, bits_pos ); /* FSIZE96 (12) + * REVNO (4) */ + info->core.sampling_frequency *= 2; + info->core.frame_duration *= 2; + info->flags |= DTS_CORE_SUBSTREAM_X96_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_core_xch( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* XCH Frame Header */ + /* XChSYNC (32) */ + uint64_t XChFSIZE = (lsmash_bs_show_byte( bits->bs, 0 ) << 2) + | ((lsmash_bs_show_byte( bits->bs, 1 ) >> 6) & 0x03); /* XChFSIZE (10) */ + if( (*bits_pos - 32 + (XChFSIZE + 1) * 8) != info->frame_size * 8 ) + return 0; /* Encountered four emulation bytes (pseudo sync word). */ + if( info->core.extension_audio_descriptor != 0 && info->core.extension_audio_descriptor != 3 ) + return -1; + dts_bits_get( bits, 10, bits_pos ); + if( dts_bits_get( bits, 4, bits_pos ) != 1 ) /* AMODE (4) */ + return -1; /* At present, only centre surround channel extension is defined. */ + dts_bits_get( bits, 2, bits_pos ); /* for bytes align */ + info->core.channel_layout |= DTS_CHANNEL_LAYOUT_CS; + info->flags |= DTS_CORE_SUBSTREAM_XCH_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_exsub_xbr( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* XBR Frame Header */ + uint64_t xbr_pos = *bits_pos - 32; /* SYNCXBR (32) */ + uint64_t nHeaderSizeXBR = dts_bits_get( bits, 6, bits_pos ) + 1; /* nHeaderSizeXBR (6) */ + dts_bits_get( bits, nHeaderSizeXBR * 8 - (*bits_pos - xbr_pos), bits_pos ); /* Skip the remaining bits in XBR Frame Header. */ + info->flags |= DTS_EXT_SUBSTREAM_XBR_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_exsub_x96( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* DTS_EXSUB_STREAM_X96 Frame Header */ + uint64_t x96_pos = *bits_pos - 32; /* SYNCX96 (32) */ + uint64_t nHeaderSizeX96 = dts_bits_get( bits, 6, bits_pos ) + 1; /* nHeaderSizeXBR (6) */ + dts_bits_get( bits, nHeaderSizeX96 * 8 - (*bits_pos - x96_pos), bits_pos ); /* Skip the remaining bits in DTS_EXSUB_STREAM_X96 Frame Header. */ + /* What the fuck! The specification drops 'if' sentence. + * We assume the same behaviour for core substream. */ + info->core.sampling_frequency *= 2; + info->core.frame_duration *= 2; + info->flags |= DTS_EXT_SUBSTREAM_X96_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_exsub_lbr( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + int ucFmtInfoCode = dts_bits_get( bits, 8, bits_pos ); + if( ucFmtInfoCode == 2 ) + { + /* LBR decoder initialization data */ + int nLBRSampleRateCode = dts_bits_get( bits, 8, bits_pos ); /* nLBRSampleRateCode (8) */ + int usLBRSpkrMask = dts_bits_get( bits, 16, bits_pos ); /* usLBRSpkrMask (16) */ + dts_bits_get( bits, 16, bits_pos ); /* nLBRversion (16) */ + int nLBRCompressedFlags = dts_bits_get( bits, 8, bits_pos ); /* nLBRCompressedFlags (8) */ + dts_bits_get( bits, 40, bits_pos ); /* nLBRBitRateMSnybbles (8) + * nLBROriginalBitRate_LSW (16) + * nLBRScaledBitRate_LSW (16) */ + static const uint32_t source_sample_rate_table[16] = + { + 8000, 16000, 32000, 0, 0, + 11025, 22050, 44100, 0, 0, + 12000, 24000, 48000, 0, 0, 0 + }; + info->lbr.sampling_frequency = source_sample_rate_table[nLBRSampleRateCode]; + if( info->lbr.sampling_frequency < 16000 ) + info->lbr.frame_duration = 1024; + else if( info->lbr.sampling_frequency < 32000 ) + info->lbr.frame_duration = 2048; + else + info->lbr.frame_duration = 4096; + info->lbr.channel_layout = ((usLBRSpkrMask >> 8) & 0xff) | ((usLBRSpkrMask << 8) & 0xff00); /* usLBRSpkrMask is little-endian. */ + info->lbr.stereo_downmix |= !!(nLBRCompressedFlags & 0x20); + info->lbr.lfe_present |= !!(nLBRCompressedFlags & 0x02); + info->lbr.duration_modifier |= (nLBRCompressedFlags & 0x04) || (nLBRCompressedFlags & 0x0C); + info->lbr.sample_size = nLBRCompressedFlags & 0x01 ? 24 : 16; + } + else if( ucFmtInfoCode != 1 ) + return -1; /* unknown */ + info->flags |= DTS_EXT_SUBSTREAM_LBR_FLAG; + return bits->bs->error ? -1 : 0; +} + +static int dts_parse_exsub_xll( dts_info_t *info, uint64_t *bits_pos ) +{ + lsmash_bits_t *bits = info->bits; + /* Common Header */ + uint64_t xll_pos = *bits_pos - 32; /* SYNCXLL (32) */ + dts_bits_get( bits, 4, bits_pos ); /* nVersion (4) */ + uint64_t nHeaderSize = dts_bits_get( bits, 8, bits_pos ) + 1; /* nHeaderSize (8) */ + int nBits4FrameFsize = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBits4FrameFsize (5) */ + dts_bits_get( bits, nBits4FrameFsize, bits_pos ); /* nLLFrameSize (nBits4FrameFsize) */ + int nNumChSetsInFrame = dts_bits_get( bits, 4, bits_pos ) + 1; /* nNumChSetsInFrame (4) */ + uint16_t nSegmentsInFrame = 1 << dts_bits_get( bits, 4, bits_pos ); /* nSegmentsInFrame (4) */ + uint16_t nSmplInSeg = 1 << dts_bits_get( bits, 4, bits_pos ); /* nSmplInSeg (4) */ + dts_bits_get( bits, 5, bits_pos ); /* nBits4SSize (5) */ + dts_bits_get( bits, 3, bits_pos ); /* nBandDataCRCEn (2) + * bScalableLSBs (1) */ + int nBits4ChMask = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBits4ChMask (5) */ + dts_bits_get( bits, nHeaderSize * 8 - (*bits_pos - xll_pos), bits_pos ); /* Skip the remaining bits in Common Header. */ + int sum_nChSetLLChannel = 0; + uint32_t nFs1 = 0; + int nNumFreqBands1 = 0; + info->lossless.channel_layout = 0; + for( int nChSet = 0; nChSet < nNumChSetsInFrame; nChSet++ ) + { + /* Channel Set Sub-Header */ + xll_pos = *bits_pos; + uint64_t nChSetHeaderSize = dts_bits_get( bits, 10, bits_pos ) + 1; /* nChSetHeaderSize (10) */ + int nChSetLLChannel = dts_bits_get( bits, 4, bits_pos ) + 1; /* nChSetLLChannel (4) */ + dts_bits_get( bits, nChSetLLChannel + 5, bits_pos ); /* nResidualChEncode (nChSetLLChannel) + * nBitResolution (5) */ + int nBitWidth = dts_bits_get( bits, 5, bits_pos ) < 16 ? 16 : 24; /* nBitWidth (5) */ + info->lossless.bit_width = LSMASH_MAX( info->lossless.bit_width, nBitWidth ); + static const uint32_t source_sample_rate_table[16] = + { + 8000, 16000, 32000, 64000, 128000, + 22050, 44100, 88200, 176400, 352800, + 12000, 24000, 48000, 96000, 192000, 384000 + }; + int sFreqIndex = dts_bits_get( bits, 4, bits_pos ); /* sFreqIndex (4) */ + uint32_t nFs = source_sample_rate_table[sFreqIndex]; + dts_bits_get( bits, 2, bits_pos ); /* nFsInterpolate (2) */ + int nReplacementSet = dts_bits_get( bits, 2, bits_pos ); /* nReplacementSet (2) */ + if( nReplacementSet > 0 ) + dts_bits_get( bits, 1, bits_pos ); /* bActiveReplaceSet (1) */ + if( info->extension.bOne2OneMapChannels2Speakers ) + { + int bPrimaryChSet = dts_bits_get( bits, 1, bits_pos ); /* bPrimaryChSet (1) */ + int bDownmixCoeffCodeEmbedded = dts_bits_get( bits, 1, bits_pos ); /* bDownmixCoeffCodeEmbedded (1) */ + int nLLDownmixType = 0x7; + if( bDownmixCoeffCodeEmbedded ) + { + dts_bits_get( bits, 1, bits_pos ); /* bDownmixEmbedded (1) */ + if( bPrimaryChSet ) + nLLDownmixType = dts_bits_get( bits, 3, bits_pos ); /* nLLDownmixType (3) */ + } + dts_bits_get( bits, 1, bits_pos ); /* bHierChSet (1) */ + if( bDownmixCoeffCodeEmbedded ) + { + static const int downmix_channel_count_table[8] = { 1, 2, 2, 3, 3, 4, 4, 0 }; + int N = nChSetLLChannel + 1; + int M = bPrimaryChSet ? downmix_channel_count_table[nLLDownmixType] : sum_nChSetLLChannel; + int nDownmixCoeffs = N * M; + dts_bits_get( bits, nDownmixCoeffs, bits_pos ); /* DownmixCoeffs (nDownmixCoeffs * 9) */ + } + sum_nChSetLLChannel += nChSetLLChannel; + if( dts_bits_get( bits, 1, bits_pos ) ) /* bChMaskEnabled (1) */ + info->lossless.channel_layout |= dts_bits_get( bits, nBits4ChMask, bits_pos ); /* nSpkrMask[nSpkrConf] (nBits4ChMask) */ + } + else + { + if( dts_bits_get( bits, 1, bits_pos ) ) /* bMappingCoeffsPresent (1) */ + { + int nBitsCh2SpkrCoef = 6 + 2 * dts_bits_get( bits, 3, bits_pos ); /* nBitsCh2SpkrCoef (3) */ + int nNumSpeakerConfigs = dts_bits_get( bits, 2, bits_pos ) + 1; /* nNumSpeakerConfigs (2) */ + for( int nSpkrConf = 0; nSpkrConf < nNumSpeakerConfigs; nSpkrConf++ ) + { + int pnActiveChannelMask = dts_bits_get( bits, nChSetLLChannel, bits_pos ); /* pnActiveChannelMask[nSpkrConf] (nChSetLLChannel) */ + int pnNumSpeakers = dts_bits_get( bits, 6, bits_pos ) + 1; /* pnNumSpeakers[nSpkrConf] (6) */ + int bSpkrMaskEnabled = dts_bits_get( bits, 1, bits_pos ); /* bSpkrMaskEnabled (1) */ + if( bSpkrMaskEnabled ) + info->lossless.channel_layout |= dts_bits_get( bits, nBits4ChMask, bits_pos ); /* nSpkrMask[nSpkrConf] (nBits4ChMask) */ + for( int nSpkr = 0; nSpkr < pnNumSpeakers; nSpkr++ ) + { + if( !bSpkrMaskEnabled ) + dts_bits_get( bits, 25, bits_pos ); /* ChSetSpeakerConfiguration (25) */ + for( int nCh = 0; nCh < nChSetLLChannel; nCh++ ) + if( pnActiveChannelMask & (1 << nCh) ) + dts_bits_get( bits, nBitsCh2SpkrCoef, bits_pos ); /* pnCh2SpkrMapCoeff (nBitsCh2SpkrCoef) */ + } + } + } + } + int nNumFreqBands; + if( nFs > 96000 ) + { + if( dts_bits_get( bits, 1, bits_pos ) ) /* bXtraFreqBands (1) */ + nNumFreqBands = nFs > 192000 ? 4 : 2; + else + nNumFreqBands = nFs > 192000 ? 2 : 1; + } + else + nNumFreqBands = 1; + uint32_t nSmplInSeg_nChSet; + if( nChSet == 0 ) + { + nFs1 = nFs; + nNumFreqBands1 = nNumFreqBands; + nSmplInSeg_nChSet = nSmplInSeg; + } + else + nSmplInSeg_nChSet = (nSmplInSeg * (nFs * nNumFreqBands1)) / (nFs1 * nNumFreqBands); + if( info->lossless.sampling_frequency < nFs ) + { + info->lossless.sampling_frequency = nFs; + uint32_t samples_per_band_in_frame = nSegmentsInFrame * nSmplInSeg_nChSet; + info->lossless.frame_duration = samples_per_band_in_frame * nNumFreqBands; + } + dts_bits_get( bits, nChSetHeaderSize * 8 - (*bits_pos - xll_pos), bits_pos ); /* Skip the remaining bits in Channel Set Sub-Header. */ + } + info->flags |= DTS_EXT_SUBSTREAM_XLL_FLAG; + return bits->bs->error ? -1 : 0; +} + +static uint16_t dts_generate_channel_layout_from_core( int channel_arrangement ) +{ + static const uint16_t channel_layout_map_table[] = + { + DTS_CHANNEL_LAYOUT_C, + DTS_CHANNEL_LAYOUT_L_R, /* dual mono */ + DTS_CHANNEL_LAYOUT_L_R, /* stereo */ + DTS_CHANNEL_LAYOUT_L_R, /* sum-difference */ + DTS_CHANNEL_LAYOUT_L_R, /* Lt/Rt */ + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R, + DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_CS, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_CS, + DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS, + DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LSR_RSR | DTS_CHANNEL_LAYOUT_OH, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_CS | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LSR_RSR, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS, + DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS | DTS_CHANNEL_LAYOUT_LSR_RSR, + DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_CS | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS + }; + return channel_arrangement < 16 ? channel_layout_map_table[channel_arrangement] : 0; +} + +int dts_parse_core_substream( dts_info_t *info, uint8_t *data, uint32_t data_length ) +{ + lsmash_bits_t *bits = info->bits; + if( lsmash_bits_import_data( info->bits, data, data_length ) ) + return -1; + uint64_t bits_pos = 0; + dts_bits_get( bits, 32, &bits_pos ); /* SYNC (32) */ + int frame_type = dts_bits_get( bits, 1, &bits_pos ); /* FTYPE (1) */ + int deficit_sample_count = dts_bits_get( bits, 5, &bits_pos ); /* SHORT (5) */ + if( frame_type == 1 && deficit_sample_count != 31 ) + goto parse_fail; /* Any normal frame (FTYPE == 1) must have SHORT == 31. */ + int crc_present_flag = dts_bits_get( bits, 1, &bits_pos ); /* CPF (1) */ + int num_of_pcm_sample_blocks = dts_bits_get( bits, 7, &bits_pos ) + 1; /* NBLKS (7) */ + if( num_of_pcm_sample_blocks <= 5 ) + goto parse_fail; + info->core.frame_duration = 32 * num_of_pcm_sample_blocks; + if( frame_type == 1 && info->core.frame_duration != 256 + && info->core.frame_duration != 512 && info->core.frame_duration != 1024 + && info->core.frame_duration != 2048 && info->core.frame_duration != 4096 ) + goto parse_fail; /* For any normal frame, the actual number of PCM core samples per channel must be + * either 4096, 2048, 1024, 512, or 256 samples per channel. */ + info->core.frame_size = dts_bits_get( bits, 14, &bits_pos ); /* FSIZE (14) */ + info->frame_size = info->core.frame_size + 1; + if( info->frame_size < DTS_MIN_CORE_SIZE ) + goto parse_fail; + info->core.channel_arrangement = dts_bits_get( bits, 6, &bits_pos ); /* AMODE (6) */ + info->core.channel_layout = dts_generate_channel_layout_from_core( info->core.channel_arrangement ); + int core_audio_sampling_frequency = dts_bits_get( bits, 4, &bits_pos ); /* SFREQ (4) */ + static const uint32_t sampling_frequency_table[16] = + { + 0, + 8000, 16000, 32000, 0, 0, + 11025, 22050, 44100, 0, 0, + 12000, 24000, 48000, 0, 0 + }; + info->core.sampling_frequency = sampling_frequency_table[core_audio_sampling_frequency]; + if( info->core.sampling_frequency == 0 ) + goto parse_fail; /* invalid */ + dts_bits_get( bits, 10, &bits_pos ); /* Skip remainder 10 bits. + * RATE (5) + * MIX (1) + * DYNF (1) + * TIMEF (1) + * AUXF (1) + * HDCD (1) */ + info->core.extension_audio_descriptor = dts_bits_get( bits, 3, &bits_pos ); /* EXT_AUDIO_ID (3) + * Note: EXT_AUDIO_ID == 3 is defined in V1.2.1. + * However, its definition disappears and is reserved in V1.3.1. */ + int extended_coding_flag = dts_bits_get( bits, 1, &bits_pos ); /* EXT_AUDIO (1) */ + dts_bits_get( bits, 1, &bits_pos ); /* ASPF (1) */ + int low_frequency_effects_flag = dts_bits_get( bits, 2, &bits_pos ); /* LFF (2) */ + if( low_frequency_effects_flag == 0x3 ) + goto parse_fail; /* invalid */ + if( low_frequency_effects_flag ) + info->core.channel_layout |= DTS_CHANNEL_LAYOUT_LFE1; + dts_bits_get( bits, 8 + crc_present_flag * 16, &bits_pos ); /* HFLAG (1) + * HCRC (16) + * FILTS (1) + * VERNUM (4) + * CHIST (2) */ + int PCMR = dts_bits_get( bits, 3, &bits_pos ); /* PCMR (3) */ + static const uint8_t source_resolution_table[8] = { 16, 16, 20, 20, 0, 24, 24, 0 }; + info->core.pcm_resolution = source_resolution_table[PCMR]; + if( info->core.pcm_resolution == 0 ) + goto parse_fail; /* invalid */ + dts_bits_get( bits, 6, &bits_pos ); /* SUMF (1) + * SUMS (1) + * DIALNORM/UNSPEC (4) */ + if( extended_coding_flag ) + { + uint32_t syncword = dts_bits_get( bits, 24, &bits_pos ); + uint64_t frame_size_bits = info->frame_size * 8; + while( (bits_pos + 24) < frame_size_bits ) + { + syncword = ((syncword << 8) & 0xffffff00) | dts_bits_get( bits, 8, &bits_pos ); + switch( syncword ) + { + case DTS_SYNCWORD_XXCH : + if( dts_parse_xxch( info, &bits_pos, 0 ) ) + goto parse_fail; + syncword = dts_bits_get( bits, 24, &bits_pos ); + break; + case DTS_SYNCWORD_X96K : + if( dts_parse_core_x96( info, &bits_pos ) ) + goto parse_fail; + syncword = dts_bits_get( bits, 24, &bits_pos ); + break; + case DTS_SYNCWORD_XCH : + if( dts_parse_core_xch( info, &bits_pos ) ) + goto parse_fail; + break; + default : + continue; + } + } + } + info->flags |= DTS_CORE_SUBSTREAM_CORE_FLAG; + info->extension_substream_count = 0; + lsmash_bits_empty( bits ); + return 0; +parse_fail: + lsmash_bits_empty( bits ); + return -1; +} + +int dts_parse_extension_substream( dts_info_t *info, uint8_t *data, uint32_t data_length ) +{ + lsmash_bits_t *bits = info->bits; + if( lsmash_bits_import_data( info->bits, data, data_length ) ) + return -1; + uint64_t bits_pos = 0; + dts_bits_get( bits, 40, &bits_pos ); /* SYNCEXTSSH (32) + * UserDefinedBits (8) */ + int nExtSSIndex = info->extension_index = dts_bits_get( bits, 2, &bits_pos ); /* nExtSSIndex (2) */ + int bHeaderSizeType = dts_bits_get( bits, 1, &bits_pos ); /* bHeaderSizeType (1) */ + int nuBits4Header = 8 + bHeaderSizeType * 4; + int nuBits4ExSSFsize = 16 + bHeaderSizeType * 4; + uint32_t nuExtSSHeaderSize = dts_bits_get( bits, nuBits4Header, &bits_pos ) + 1; /* nuExtSSHeaderSize (8 or 12) */ + info->frame_size = dts_bits_get( bits, nuBits4ExSSFsize, &bits_pos ) + 1; /* nuExtSSFsize (16 or 20) */ + if( info->frame_size < 10 ) + return -1; + int nuNumAssets; + info->extension.bStaticFieldsPresent = dts_bits_get( bits, 1, &bits_pos ); /* bStaticFieldsPresent (1) */ + if( info->extension.bStaticFieldsPresent ) + { + dts_bits_get( bits, 2, &bits_pos ); /* nuRefClockCode (2) */ + info->extension.frame_duration = 512 * (dts_bits_get( bits, 3, &bits_pos ) + 1); /* nuExSSFrameDurationCode (3) */ + if( dts_bits_get( bits, 1, &bits_pos ) ) /* bTimeStampFlag (1) */ + dts_bits_get( bits, 36, &bits_pos ); /* nuTimeStamp (32) + * nLSB (4) */ + int nuNumAudioPresnt = dts_bits_get( bits, 3, &bits_pos ) + 1; /* nuNumAudioPresnt (3) */ + nuNumAssets = dts_bits_get( bits, 3, &bits_pos ) + 1; /* nuNumAssets (3) */ + int nuActiveExSSMask[nuNumAudioPresnt]; + for( int nAuPr = 0; nAuPr < nuNumAudioPresnt; nAuPr++ ) + nuActiveExSSMask[nAuPr] = dts_bits_get( bits, nExtSSIndex + 1, &bits_pos ); /* nuActiveExSSMask[nAuPr] (nExtSSIndex + 1) */ + int nuActiveAssetMask[nuNumAudioPresnt][nExtSSIndex + 1]; + for( int nAuPr = 0; nAuPr < nuNumAudioPresnt; nAuPr++ ) + for( int nSS = 0; nSS < nExtSSIndex + 1; nSS++ ) + if( ((nuActiveExSSMask[nAuPr] >> nSS) & 0x1) == 1 ) + nuActiveAssetMask[nAuPr][nSS] = dts_bits_get( bits, 8, &bits_pos ); /* nuActiveAssetMask[nAuPr][nSS] (8) */ + else + nuActiveAssetMask[nAuPr][nSS] = 0; + info->extension.bMixMetadataEnbl = dts_bits_get( bits, 1, &bits_pos ); /* bMixMetadataEnbl (1) */ + if( info->extension.bMixMetadataEnbl ) + { + dts_bits_get( bits, 2, &bits_pos ); /* nuMixMetadataAdjLevel (2) */ + int nuBits4MixOutMask = (dts_bits_get( bits, 2, &bits_pos ) + 1) << 2; /* nuBits4MixOutMask (2) */ + info->extension.nuNumMixOutConfigs = dts_bits_get( bits, 2, &bits_pos ) + 1; /* nuNumMixOutConfigs (2) */ + for( int ns = 0; ns < info->extension.nuNumMixOutConfigs; ns++ ) + { + int nuMixOutChMask = dts_bits_get( bits, nuBits4MixOutMask, &bits_pos ); /* nuMixOutChMask[ns] (nuBits4MixOutMask) */ + info->extension.nNumMixOutCh[ns] = dts_get_channel_count_from_channel_layout( nuMixOutChMask ); + } + } + } + else + { + nuNumAssets = 1; + info->extension.bMixMetadataEnbl = 0; + info->extension.nuNumMixOutConfigs = 0; + } + info->extension.number_of_assets = nuNumAssets; + uint32_t nuAssetFsize[8]; + for( int nAst = 0; nAst < nuNumAssets; nAst++ ) + nuAssetFsize[nAst] = dts_bits_get( bits, nuBits4ExSSFsize, &bits_pos ) + 1; /* nuAssetFsize[nAst] (nuBits4ExSSFsize) */ + for( int nAst = 0; nAst < nuNumAssets; nAst++ ) + if( dts_parse_asset_descriptor( info, &bits_pos ) ) + goto parse_fail; + dts_bits_get( bits, nuExtSSHeaderSize * 8 - bits_pos, &bits_pos ); + uint32_t syncword = dts_bits_get( bits, 24, &bits_pos ); + uint64_t frame_size_bits = info->frame_size * 8; + while( (bits_pos + 24) < frame_size_bits ) + { + syncword = ((syncword << 8) & 0xffffff00) | dts_bits_get( bits, 8, &bits_pos ); + switch( syncword ) + { + case DTS_SYNCWORD_XBR : + if( dts_parse_exsub_xbr( info, &bits_pos ) ) + goto parse_fail; + break; + case DTS_SYNCWORD_XXCH : + if( dts_parse_xxch( info, &bits_pos, 1 ) ) + goto parse_fail; + break; + case DTS_SYNCWORD_X96K : + if( dts_parse_exsub_x96( info, &bits_pos ) ) + goto parse_fail; + break; + case DTS_SYNCWORD_LBR : + if( dts_parse_exsub_lbr( info, &bits_pos ) ) + goto parse_fail; + break; + case DTS_SYNCWORD_XLL : + if( dts_parse_exsub_xll( info, &bits_pos ) ) + goto parse_fail; + break; + default : + continue; + } + syncword = dts_bits_get( bits, 24, &bits_pos ); + } + ++ info->extension_substream_count; + lsmash_bits_empty( bits ); + return 0; +parse_fail: + lsmash_bits_empty( bits ); + return -1; +} + +dts_substream_type dts_get_substream_type( dts_info_t *info ) +{ + if( info->buffer_end - info->buffer_pos < 4 ) + return DTS_SUBSTREAM_TYPE_NONE; + uint8_t *buffer = info->buffer_pos; + uint32_t syncword = LSMASH_4CC( buffer[0], buffer[1], buffer[2], buffer[3] ); + switch( syncword ) + { + case DTS_SYNCWORD_CORE : + return DTS_SUBSTREAM_TYPE_CORE; + case DTS_SYNCWORD_SUBSTREAM : + return DTS_SUBSTREAM_TYPE_EXTENSION; + default : + return DTS_SUBSTREAM_TYPE_NONE; + } +} + +int dts_get_extension_index( dts_info_t *info, uint8_t *extension_index ) +{ + if( info->buffer_end - info->buffer_pos < 6 ) + return -1; + *extension_index = info->buffer_pos[5] >> 6; + return 0; +} + +void dts_update_specific_param( dts_info_t *info ) +{ + lsmash_dts_specific_parameters_t *param = &info->ddts_param; + /* DTSSamplingFrequency and FrameDuration */ + if( info->flags & DTS_CORE_SUBSTREAM_CORE_FLAG ) + { + param->DTSSamplingFrequency = info->core.sampling_frequency; + info->frame_duration = info->core.frame_duration; + } + if( param->DTSSamplingFrequency <= info->extension.sampling_frequency ) + { + param->DTSSamplingFrequency = info->extension.sampling_frequency; + info->frame_duration = info->extension.frame_duration; + } + if( param->DTSSamplingFrequency <= info->lbr.sampling_frequency ) + { + param->DTSSamplingFrequency = info->lbr.sampling_frequency; + info->frame_duration = info->lbr.frame_duration; + } + if( param->DTSSamplingFrequency <= info->lossless.sampling_frequency ) + { + param->DTSSamplingFrequency = info->lossless.sampling_frequency; + info->frame_duration = info->lossless.frame_duration; + } + param->FrameDuration = 0; + for( uint32_t frame_duration = info->frame_duration >> 10; frame_duration; frame_duration >>= 1 ) + ++ param->FrameDuration; + /* pcmSampleDepth */ + param->pcmSampleDepth = info->core.pcm_resolution; + param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, info->extension.bit_resolution ); + param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, info->lbr.sample_size ); + param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, info->lossless.bit_width ); + param->pcmSampleDepth = param->pcmSampleDepth > 16 ? 24 : 16; + /* StreamConstruction */ + param->StreamConstruction = lsmash_dts_get_stream_construction( info->flags ); + /* CoreLFEPresent */ + param->CoreLFEPresent = !!(info->core.channel_layout & DTS_CHANNEL_LAYOUT_LFE1); + /* CoreLayout */ + if( param->StreamConstruction == 0 || param->StreamConstruction >= 19 ) + param->CoreLayout = 31; /* Use ChannelLayout. */ + else + { + if( info->core.channel_arrangement != 1 + && info->core.channel_arrangement != 3 + && info->core.channel_arrangement <= 9 ) + param->CoreLayout = info->core.channel_arrangement; + else + param->CoreLayout = 31; /* Use ChannelLayout. */ + } + /* CoreSize + * The specification says this field is the size of a core substream AU in bytes. + * If we don't assume CoreSize is the copy of FSIZE, when FSIZE equals 0x3FFF, this field overflows and becomes 0. */ + param->CoreSize = LSMASH_MIN( info->core.frame_size, 0x3FFF ); + /* StereoDownmix */ + param->StereoDownmix = info->extension.stereo_downmix | info->lbr.stereo_downmix; + /* RepresentationType */ + param->RepresentationType = info->extension.representation_type; + /* ChannelLayout */ + param->ChannelLayout = info->core.channel_layout + | info->extension.channel_layout + | info->lbr.channel_layout + | info->lossless.channel_layout; + /* MultiAssetFlag + * When multiple assets exist, the remaining parameters in the DTSSpecificBox only reflect the coding parameters of the first asset. */ + param->MultiAssetFlag = 1 < info->extension.number_of_assets; + /* LBRDurationMod */ + param->LBRDurationMod = param->MultiAssetFlag + ? info->lbr.duration_modifier && !(info->flags & DTS_CORE_SUBSTREAM_CORE_FLAG) + : info->lbr.duration_modifier; + info->ddts_param_initialized = 1; +} + +int dts_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( dst && dst->data.structured && src && src->data.unstructured ); + if( src->size < DTS_SPECIFIC_BOX_MIN_LENGTH ) + return -1; + lsmash_dts_specific_parameters_t *param = (lsmash_dts_specific_parameters_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + int dts_specific_box_min_length = DTS_SPECIFIC_BOX_MIN_LENGTH; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + dts_specific_box_min_length += 8; + data += 8; + } + if( size != src->size ) + return -1; + param->DTSSamplingFrequency = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + param->maxBitrate = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7]; + param->avgBitrate = (data[8] << 24) | (data[9] << 16) | (data[10] << 8) | data[11]; + param->pcmSampleDepth = data[12]; + param->FrameDuration = (data[13] >> 6) & 0x03; + param->StreamConstruction = (data[13] >> 1) & 0x1F; + param->CoreLFEPresent = data[13] & 0x01; + param->CoreLayout = (data[14] >> 2) & 0x3F; + param->CoreSize = ((data[14] & 0x03) << 12) | (data[15] << 4) | ((data[16] >> 4) & 0x0F); + param->StereoDownmix = (data[16] >> 3) & 0x01; + param->RepresentationType = data[16] & 0x07; + param->ChannelLayout = (data[17] << 8) | data[18]; + param->MultiAssetFlag = (data[19] >> 7) & 0x01; + param->LBRDurationMod = (data[19] >> 6) & 0x01; + int reserved_box_present = ((data[19] >> 5) & 0x01) && (size > DTS_SPECIFIC_BOX_MIN_LENGTH); + if( reserved_box_present ) + lsmash_append_dts_reserved_box( param, data + 20, size - DTS_SPECIFIC_BOX_MIN_LENGTH ); + return 0; +} + +int dts_copy_codec_specific( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( src && src->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && src->data.structured ); + assert( dst && dst->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && dst->data.structured ); + lsmash_dts_specific_parameters_t *src_data = (lsmash_dts_specific_parameters_t *)src->data.structured; + lsmash_dts_specific_parameters_t *dst_data = (lsmash_dts_specific_parameters_t *)dst->data.structured; + lsmash_remove_dts_reserved_box( dst_data ); + *dst_data = *src_data; + if( !src_data->box && src_data->box->data && src_data->box->size ) + return 0; + return lsmash_append_dts_reserved_box( dst_data, src_data->box->data, src_data->box->size ); +} + +int dts_print_codec_specific( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: DTS Specific Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + if( box->size < DTS_SPECIFIC_BOX_MIN_LENGTH ) + return -1; + isom_extension_box_t *ext = (isom_extension_box_t *)box; + assert( ext->format == EXTENSION_FORMAT_BINARY ); + uint8_t *data = ext->form.binary; + isom_skip_box_common( &data ); + uint32_t DTSSamplingFrequency = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + uint32_t maxBitrate = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7]; + uint32_t avgBitrate = (data[8] << 24) | (data[9] << 16) | (data[10] << 8) | data[11]; + uint8_t pcmSampleDepth = data[12]; + uint8_t FrameDuration = (data[13] >> 6) & 0x03; + uint8_t StreamConstruction = (data[13] >> 1) & 0x1F; + uint8_t CoreLFEPresent = data[13] & 0x01; + uint8_t CoreLayout = (data[14] >> 2) & 0x3F; + uint16_t CoreSize = ((data[14] & 0x03) << 12) | (data[15] << 4) | ((data[16] >> 4) & 0x0F); + uint8_t StereoDownmix = (data[16] >> 3) & 0x01; + uint8_t RepresentationType = data[16] & 0x07; + uint16_t ChannelLayout = (data[17] << 8) | data[18]; + uint8_t MultiAssetFlag = (data[19] >> 7) & 0x01; + uint8_t LBRDurationMod = (data[19] >> 6) & 0x01; + uint8_t ReservedBoxPresent = (data[19] >> 5) & 0x01; + uint8_t Reserved = data[19] & 0x1F; + uint32_t frame_duration = 512 << FrameDuration; + int construction_flags = StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION ? construction_info[StreamConstruction] : 0; + static const char *core_layout_description[64] = + { + "Mono (1/0)", + "Undefined", + "Stereo (2/0)", + "Undefined", + "LT,RT (2/0)", + "L, C, R (3/0)", + "L, R, S (2/1)", + "L, C, R, S (3/1)", + "L, R, LS, RS (2/2)", + "L, C, R, LS, RS (3/2)", + [31] = "use ChannelLayout" + }; + static const char *representation_type_description[8] = + { + "Audio asset designated for mixing with another audio asset", + "Reserved", + "Lt/Rt Encoded for matrix surround decoding", + "Audio processed for headphone playback", + "Reserved", + "Reserved", + "Reserved", + "Reserved" + }; + static const char *channel_layout_description[16] = + { + "Center in front of listener", + "Left/Right in front", + "Left/Right surround on side in rear", + "Low frequency effects subwoofer", + "Center surround in rear", + "Left/Right height in front", + "Left/Right surround in rear", + "Center Height in front", + "Over the listenerfs head", + "Between left/right and center in front", + "Left/Right on side in front", + "Left/Right surround on side", + "Second low frequency effects subwoofer", + "Left/Right height on side", + "Center height in rear", + "Left/Right height in rear" + }; + lsmash_ifprintf( fp, indent, "DTSSamplingFrequency = %"PRIu32" Hz\n", DTSSamplingFrequency ); + lsmash_ifprintf( fp, indent, "maxBitrate = %"PRIu32" bit/s\n", maxBitrate ); + lsmash_ifprintf( fp, indent, "avgBitrate = %"PRIu32" bit/s\n", avgBitrate ); + lsmash_ifprintf( fp, indent, "pcmSampleDepth = %"PRIu8" bits\n", pcmSampleDepth ); + lsmash_ifprintf( fp, indent, "FrameDuration = %"PRIu8" (%"PRIu32" samples)\n", FrameDuration, frame_duration ); + lsmash_ifprintf( fp, indent, "StreamConstruction = 0x%02"PRIx8"\n", StreamConstruction ); + if( construction_flags & (DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG) ) + { + lsmash_ifprintf( fp, indent + 1, "Core substream\n" ); + if( construction_flags & DTS_CORE_SUBSTREAM_CORE_FLAG ) + lsmash_ifprintf( fp, indent + 2, "Core\n" ); + if( construction_flags & DTS_CORE_SUBSTREAM_XCH_FLAG ) + lsmash_ifprintf( fp, indent + 2, "XCH\n" ); + if( construction_flags & DTS_CORE_SUBSTREAM_X96_FLAG ) + lsmash_ifprintf( fp, indent + 2, "X96\n" ); + if( construction_flags & DTS_CORE_SUBSTREAM_XXCH_FLAG ) + lsmash_ifprintf( fp, indent + 2, "XXCH\n" ); + } + if( construction_flags & (DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG + | DTS_EXT_SUBSTREAM_XBR_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG | DTS_EXT_SUBSTREAM_LBR_FLAG) ) + { + lsmash_ifprintf( fp, indent + 1, "Extension substream\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_CORE_FLAG ) + lsmash_ifprintf( fp, indent + 2, "Core\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_XXCH_FLAG ) + lsmash_ifprintf( fp, indent + 2, "XXCH\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_X96_FLAG ) + lsmash_ifprintf( fp, indent + 2, "X96\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_XBR_FLAG ) + lsmash_ifprintf( fp, indent + 2, "XBR\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_XLL_FLAG ) + lsmash_ifprintf( fp, indent + 2, "XLL\n" ); + if( construction_flags & DTS_EXT_SUBSTREAM_LBR_FLAG ) + lsmash_ifprintf( fp, indent + 2, "LBR\n" ); + } + lsmash_ifprintf( fp, indent, "CoreLFEPresent = %s\n", CoreLFEPresent ? "1 (LFE exists)" : "0 (no LFE)" ); + if( core_layout_description[CoreLayout] ) + lsmash_ifprintf( fp, indent, "CoreLayout = %"PRIu8" (%s)\n", CoreLayout, core_layout_description[CoreLayout] ); + else + lsmash_ifprintf( fp, indent, "CoreLayout = %"PRIu8" (Undefined)\n", CoreLayout ); + if( CoreSize ) + lsmash_ifprintf( fp, indent, "CoreSize = %"PRIu16"\n", CoreSize ); + else + lsmash_ifprintf( fp, indent, "CoreSize = 0 (no core substream exists)\n" ); + lsmash_ifprintf( fp, indent, "StereoDownmix = %s\n", StereoDownmix ? "1 (embedded downmix present)" : "0 (no embedded downmix)" ); + lsmash_ifprintf( fp, indent, "RepresentationType = %"PRIu8" (%s)\n", RepresentationType, representation_type_description[RepresentationType] ); + lsmash_ifprintf( fp, indent, "ChannelLayout = 0x%04"PRIx16"\n", ChannelLayout ); + if( ChannelLayout ) + for( int i = 0; i < 16; i++ ) + if( (ChannelLayout >> i) & 0x01 ) + lsmash_ifprintf( fp, indent + 1, "%s\n", channel_layout_description[i] ); + lsmash_ifprintf( fp, indent, "MultiAssetFlag = %s\n", MultiAssetFlag ? "1 (multiple asset)" : "0 (single asset)" ); + if( LBRDurationMod ) + lsmash_ifprintf( fp, indent, "LBRDurationMod = 1 (%"PRIu32" -> %"PRIu32" samples)\n", frame_duration, (frame_duration * 3) / 2 ); + else + lsmash_ifprintf( fp, indent, "LBRDurationMod = 0 (no LBR duration modifier)\n" ); + lsmash_ifprintf( fp, indent, "ReservedBoxPresent = %s\n", ReservedBoxPresent ? "1 (ReservedBox present)" : "0 (no ReservedBox)" ); + lsmash_ifprintf( fp, indent, "Reserved = 0x%02"PRIx8"\n", Reserved ); + return 0; +} diff --git a/output/mp4/dts.h b/output/mp4/dts.h new file mode 100644 index 0000000..fcb70db --- /dev/null +++ b/output/mp4/dts.h @@ -0,0 +1,113 @@ +/***************************************************************************** + * dts.h: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#define DTS_MAX_CORE_SIZE 16384 +#define DTS_MAX_EXTENSION_SIZE 32768 + +typedef enum +{ + DTS_SUBSTREAM_TYPE_NONE = 0, + DTS_SUBSTREAM_TYPE_CORE = 1, + DTS_SUBSTREAM_TYPE_EXTENSION = 2, +} dts_substream_type; + +typedef struct +{ + uint32_t sampling_frequency; + uint32_t frame_duration; + uint16_t frame_size; + uint16_t channel_layout; + uint8_t channel_arrangement; + uint8_t xxch_lower_planes; + uint8_t extension_audio_descriptor; + uint8_t pcm_resolution; +} dts_core_info_t; + +typedef struct +{ + uint32_t sampling_frequency; + uint32_t frame_duration; + uint16_t channel_layout; + uint8_t xxch_lower_planes; + uint8_t bStaticFieldsPresent; + uint8_t bMixMetadataEnbl; + uint8_t bOne2OneMapChannels2Speakers; + uint8_t nuNumMixOutConfigs; + uint8_t nNumMixOutCh[4]; + uint8_t number_of_assets; + uint8_t stereo_downmix; + uint8_t representation_type; + uint8_t bit_resolution; +} dts_extension_info_t; + +typedef struct +{ + uint32_t sampling_frequency; + uint32_t frame_duration; + uint16_t channel_layout; + uint8_t bit_width; +} dts_lossless_info_t; + +typedef struct +{ + uint32_t sampling_frequency; + uint32_t frame_duration; + uint16_t channel_layout; + uint8_t stereo_downmix; + uint8_t lfe_present; + uint8_t duration_modifier; + uint8_t sample_size; +} dts_lbr_info_t; + +typedef struct +{ + dts_substream_type substream_type; + lsmash_dts_construction_flag flags; + lsmash_dts_specific_parameters_t ddts_param; + dts_core_info_t core; + dts_extension_info_t extension; + dts_lossless_info_t lossless; + dts_lbr_info_t lbr; + uint8_t ddts_param_initialized; + uint8_t no_more_read; + uint8_t extension_index; + uint8_t extension_substream_count; + uint32_t frame_duration; + uint32_t frame_size; + uint8_t buffer[2 * DTS_MAX_EXTENSION_SIZE]; + uint8_t *buffer_pos; + uint8_t *buffer_end; + lsmash_bits_t *bits; + lsmash_multiple_buffers_t *au_buffers; + uint8_t *au; + uint32_t au_length; + uint8_t *incomplete_au; + uint32_t incomplete_au_length; + uint32_t au_number; +} dts_info_t; + +int dts_parse_core_substream( dts_info_t *info, uint8_t *data, uint32_t data_length ); +int dts_parse_extension_substream( dts_info_t *info, uint8_t *data, uint32_t data_length ); +int dts_get_channel_count_from_channel_layout( uint16_t channel_layout ); +dts_substream_type dts_get_substream_type( dts_info_t *info ); +int dts_get_extension_index( dts_info_t *info, uint8_t *extension_index ); +void dts_update_specific_param( dts_info_t *info ); diff --git a/output/mp4/h264.c b/output/mp4/h264.c new file mode 100644 index 0000000..ec89879 --- /dev/null +++ b/output/mp4/h264.c @@ -0,0 +1,2237 @@ +/***************************************************************************** + * h264.c: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" + +#include +#include +#include +#include + +#include "box.h" + +/*************************************************************************** + ITU-T Recommendation H.264 (03/10) + ISO/IEC 14496-15:2010 +***************************************************************************/ +#include "h264.h" + +#define IF_INVALID_VALUE( x ) if( x ) +#define IF_EXCEED_INT32( x ) if( (x) < INT32_MIN || (x) > INT32_MAX ) +#define H264_POC_DEBUG_PRINT 0 + +typedef struct +{ + uint8_t *data; + uint32_t remainder_length; + uint32_t overall_wasted_length; +} h264_data_stream_handler_t; + +typedef enum +{ + H264_SLICE_TYPE_P = 0, + H264_SLICE_TYPE_B = 1, + H264_SLICE_TYPE_I = 2, + H264_SLICE_TYPE_SP = 3, + H264_SLICE_TYPE_SI = 4 +} h264_slice_type; + +void lsmash_destroy_h264_parameter_sets( lsmash_h264_specific_parameters_t *param ) +{ + if( !param || !param->parameter_sets ) + return; + lsmash_remove_entries( param->parameter_sets->sps_list, isom_remove_avcC_ps ); + lsmash_remove_entries( param->parameter_sets->pps_list, isom_remove_avcC_ps ); + lsmash_remove_entries( param->parameter_sets->spsext_list, isom_remove_avcC_ps ); + free( param->parameter_sets ); + param->parameter_sets = NULL; +} + +void h264_destruct_specific_data( void *data ) +{ + if( !data ) + return; + lsmash_destroy_h264_parameter_sets( data ); + free( data ); +} + +void h264_cleanup_parser( h264_info_t *info ) +{ + if( !info ) + return; + lsmash_remove_entries( info->sps_list, NULL ); + lsmash_remove_entries( info->pps_list, NULL ); + lsmash_remove_entries( info->slice_list, NULL ); + lsmash_destroy_h264_parameter_sets( &info->avcC_param ); + lsmash_destroy_multiple_buffers( info->buffer.bank ); + lsmash_bits_adhoc_cleanup( info->bits ); + info->buffer.bank = NULL; + info->bits = NULL; +} + +int h264_setup_parser( h264_info_t *info, int parse_only, uint32_t (*update)( h264_info_t *, void *, uint32_t ) ) +{ + if( !info ) + return -1; + memset( info, 0, sizeof(h264_info_t) ); + info->avcC_param.lengthSizeMinusOne = H264_DEFAULT_NALU_LENGTH_SIZE - 1; + h264_stream_buffer_t *buffer = &info->buffer; + buffer->bank = lsmash_create_multiple_buffers( parse_only ? 2 : 4, H264_DEFAULT_BUFFER_SIZE ); + if( !buffer->bank ) + return -1; + buffer->start = lsmash_withdraw_buffer( buffer->bank, 1 ); + buffer->rbsp = lsmash_withdraw_buffer( buffer->bank, 2 ); + buffer->pos = buffer->start; + buffer->end = buffer->start; + buffer->update = update; + if( !parse_only ) + { + info->picture.au = lsmash_withdraw_buffer( buffer->bank, 3 ); + info->picture.incomplete_au = lsmash_withdraw_buffer( buffer->bank, 4 ); + } + info->bits = lsmash_bits_adhoc_create(); + if( !info->bits ) + { + lsmash_destroy_multiple_buffers( info->buffer.bank ); + info->buffer.bank = NULL; + return -1; + } + lsmash_init_entry_list( info->sps_list ); + lsmash_init_entry_list( info->pps_list ); + lsmash_init_entry_list( info->slice_list ); + return 0; +} + +static h264_sps_t *h264_get_sps( lsmash_entry_list_t *sps_list, uint8_t sps_id ) +{ + if( !sps_list || sps_id > 31 ) + return NULL; + for( lsmash_entry_t *entry = sps_list->head; entry; entry = entry->next ) + { + h264_sps_t *sps = (h264_sps_t *)entry->data; + if( !sps ) + return NULL; + if( sps->seq_parameter_set_id == sps_id ) + return sps; + } + h264_sps_t *sps = lsmash_malloc_zero( sizeof(h264_sps_t) ); + if( !sps ) + return NULL; + sps->seq_parameter_set_id = sps_id; + if( lsmash_add_entry( sps_list, sps ) ) + { + free( sps ); + return NULL; + } + return sps; +} + +static h264_pps_t *h264_get_pps( lsmash_entry_list_t *pps_list, uint8_t pps_id ) +{ + if( !pps_list ) + return NULL; + for( lsmash_entry_t *entry = pps_list->head; entry; entry = entry->next ) + { + h264_pps_t *pps = (h264_pps_t *)entry->data; + if( !pps ) + return NULL; + if( pps->pic_parameter_set_id == pps_id ) + return pps; + } + h264_pps_t *pps = lsmash_malloc_zero( sizeof(h264_pps_t) ); + if( !pps ) + return NULL; + pps->pic_parameter_set_id = pps_id; + if( lsmash_add_entry( pps_list, pps ) ) + { + free( pps ); + return NULL; + } + return pps; +} + +static h264_slice_info_t *h264_get_slice_info( lsmash_entry_list_t *slice_list, uint8_t slice_id ) +{ + if( !slice_list ) + return NULL; + for( lsmash_entry_t *entry = slice_list->head; entry; entry = entry->next ) + { + h264_slice_info_t *slice = (h264_slice_info_t *)entry->data; + if( !slice ) + return NULL; + if( slice->slice_id == slice_id ) + return slice; + } + h264_slice_info_t *slice = lsmash_malloc_zero( sizeof(h264_slice_info_t) ); + if( !slice ) + return NULL; + slice->slice_id = slice_id; + if( lsmash_add_entry( slice_list, slice ) ) + { + free( slice ); + return NULL; + } + return slice; +} + +int h264_calculate_poc( h264_info_t *info, h264_picture_info_t *picture, h264_picture_info_t *prev_picture ) +{ +#if H264_POC_DEBUG_PRINT + fprintf( stderr, "PictureOrderCount\n" ); +#endif + h264_pps_t *pps = h264_get_pps( info->pps_list, picture->pic_parameter_set_id ); + if( !pps ) + return -1; + h264_sps_t *sps = h264_get_sps( info->sps_list, pps->seq_parameter_set_id ); + if( !sps ) + return -1; + int64_t TopFieldOrderCnt = 0; + int64_t BottomFieldOrderCnt = 0; + if( sps->pic_order_cnt_type == 0 ) + { + int32_t prevPicOrderCntMsb; + int32_t prevPicOrderCntLsb; + if( picture->idr ) + { + prevPicOrderCntMsb = 0; + prevPicOrderCntLsb = 0; + } + else if( prev_picture->ref_pic_has_mmco5 ) + { + prevPicOrderCntMsb = 0; + prevPicOrderCntLsb = prev_picture->ref_pic_bottom_field_flag ? 0 : prev_picture->ref_pic_TopFieldOrderCnt; + } + else + { + prevPicOrderCntMsb = prev_picture->ref_pic_PicOrderCntMsb; + prevPicOrderCntLsb = prev_picture->ref_pic_PicOrderCntLsb; + } + int64_t PicOrderCntMsb; + int32_t pic_order_cnt_lsb = picture->pic_order_cnt_lsb; + uint64_t MaxPicOrderCntLsb = sps->MaxPicOrderCntLsb; + if( (pic_order_cnt_lsb < prevPicOrderCntLsb) + && ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (MaxPicOrderCntLsb / 2)) ) + PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; + else if( (pic_order_cnt_lsb > prevPicOrderCntLsb) + && ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (MaxPicOrderCntLsb / 2)) ) + PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; + else + PicOrderCntMsb = prevPicOrderCntMsb; + IF_EXCEED_INT32( PicOrderCntMsb ) + return -1; + BottomFieldOrderCnt = TopFieldOrderCnt = PicOrderCntMsb + pic_order_cnt_lsb; + if( !picture->field_pic_flag ) + BottomFieldOrderCnt += picture->delta_pic_order_cnt_bottom; + IF_EXCEED_INT32( TopFieldOrderCnt ) + return -1; + IF_EXCEED_INT32( BottomFieldOrderCnt ) + return -1; + if( !picture->disposable ) + { + picture->ref_pic_has_mmco5 = picture->has_mmco5; + picture->ref_pic_bottom_field_flag = picture->bottom_field_flag; + picture->ref_pic_TopFieldOrderCnt = TopFieldOrderCnt; + picture->ref_pic_PicOrderCntMsb = PicOrderCntMsb; + picture->ref_pic_PicOrderCntLsb = pic_order_cnt_lsb; + } +#if H264_POC_DEBUG_PRINT + fprintf( stderr, " prevPicOrderCntMsb: %"PRId32"\n", prevPicOrderCntMsb ); + fprintf( stderr, " prevPicOrderCntLsb: %"PRId32"\n", prevPicOrderCntLsb ); + fprintf( stderr, " PicOrderCntMsb: %"PRId64"\n", PicOrderCntMsb ); + fprintf( stderr, " pic_order_cnt_lsb: %"PRId32"\n", pic_order_cnt_lsb ); + fprintf( stderr, " MaxPicOrderCntLsb: %"PRIu64"\n", MaxPicOrderCntLsb ); +#endif + } + else if( sps->pic_order_cnt_type == 1 ) + { + uint32_t frame_num = picture->frame_num; + uint32_t prevFrameNum = prev_picture->frame_num; + uint32_t prevFrameNumOffset = prev_picture->has_mmco5 ? 0 : prev_picture->FrameNumOffset; + uint64_t FrameNumOffset = picture->idr ? 0 : prevFrameNumOffset + (prevFrameNum > frame_num ? sps->MaxFrameNum : 0); + IF_INVALID_VALUE( FrameNumOffset > INT32_MAX ) + return -1; + int64_t expectedPicOrderCnt; + if( sps->num_ref_frames_in_pic_order_cnt_cycle ) + { + uint64_t absFrameNum = FrameNumOffset + frame_num; + absFrameNum -= picture->disposable && absFrameNum > 0; + if( absFrameNum ) + { + uint64_t picOrderCntCycleCnt = (absFrameNum - 1) / sps->num_ref_frames_in_pic_order_cnt_cycle; + uint8_t frameNumInPicOrderCntCycle = (absFrameNum - 1) % sps->num_ref_frames_in_pic_order_cnt_cycle; + expectedPicOrderCnt = picOrderCntCycleCnt * sps->ExpectedDeltaPerPicOrderCntCycle; + for( uint8_t i = 0; i <= frameNumInPicOrderCntCycle; i++ ) + expectedPicOrderCnt += sps->offset_for_ref_frame[i]; + } + else + expectedPicOrderCnt = 0; + } + else + expectedPicOrderCnt = 0; + if( picture->disposable ) + expectedPicOrderCnt += sps->offset_for_non_ref_pic; + TopFieldOrderCnt = expectedPicOrderCnt + picture->delta_pic_order_cnt[0]; + BottomFieldOrderCnt = TopFieldOrderCnt + sps->offset_for_top_to_bottom_field; + if( !picture->field_pic_flag ) + BottomFieldOrderCnt += picture->delta_pic_order_cnt[1]; + IF_EXCEED_INT32( TopFieldOrderCnt ) + return -1; + IF_EXCEED_INT32( BottomFieldOrderCnt ) + return -1; + picture->FrameNumOffset = FrameNumOffset; + } + else if( sps->pic_order_cnt_type == 2 ) + { + uint32_t frame_num = picture->frame_num; + uint32_t prevFrameNum = prev_picture->frame_num; + int32_t prevFrameNumOffset = prev_picture->has_mmco5 ? 0 : prev_picture->FrameNumOffset; + int64_t FrameNumOffset; + int64_t tempPicOrderCnt; + if( picture->idr ) + { + FrameNumOffset = 0; + tempPicOrderCnt = 0; + } + else + { + FrameNumOffset = prevFrameNumOffset + (prevFrameNum > frame_num ? sps->MaxFrameNum : 0); + tempPicOrderCnt = 2 * (FrameNumOffset + frame_num) - picture->disposable; + IF_EXCEED_INT32( FrameNumOffset ) + return -1; + IF_EXCEED_INT32( tempPicOrderCnt ) + return -1; + } + BottomFieldOrderCnt = TopFieldOrderCnt = tempPicOrderCnt; + picture->FrameNumOffset = FrameNumOffset; + } + if( !picture->field_pic_flag ) + picture->PicOrderCnt = LSMASH_MIN( TopFieldOrderCnt, BottomFieldOrderCnt ); + else + picture->PicOrderCnt = picture->bottom_field_flag ? BottomFieldOrderCnt : TopFieldOrderCnt; +#if H264_POC_DEBUG_PRINT + if( picture->field_pic_flag ) + { + if( !picture->bottom_field_flag ) + fprintf( stderr, " TopFieldOrderCnt: %"PRId64"\n", TopFieldOrderCnt ); + else + fprintf( stderr, " BottomFieldOrderCnt: %"PRId64"\n", BottomFieldOrderCnt ); + } + fprintf( stderr, " POC: %"PRId32"\n", picture->PicOrderCnt ); +#endif + return 0; +} + +int h264_check_nalu_header( h264_nalu_header_t *nalu_header, uint8_t **p_buf_pos, int use_long_start_code ) +{ + uint8_t *buf_pos = *p_buf_pos; + uint8_t forbidden_zero_bit = (*buf_pos >> 7) & 0x01; + uint8_t nal_ref_idc = nalu_header->nal_ref_idc = (*buf_pos >> 5) & 0x03; + uint8_t nal_unit_type = nalu_header->nal_unit_type = *buf_pos & 0x1f; + nalu_header->length = 1; + *p_buf_pos = buf_pos + nalu_header->length; + if( nal_unit_type == 14 || nal_unit_type == 20 ) + return -1; /* We don't support yet. */ + IF_INVALID_VALUE( forbidden_zero_bit ) + return -1; + /* SPS and PPS require long start code (0x00000001). + * Also AU delimiter requires it too because this type of NALU shall be the first NALU of any AU if present. */ + IF_INVALID_VALUE( !use_long_start_code && (nal_unit_type == 7 || nal_unit_type == 8 || nal_unit_type == 9) ) + return -1; + if( nal_ref_idc ) + { + /* nal_ref_idc shall be equal to 0 for all NALUs having nal_unit_type equal to 6, 9, 10, 11, or 12. */ + IF_INVALID_VALUE( nal_unit_type == 6 || nal_unit_type == 9 || nal_unit_type == 10 || nal_unit_type == 11 || nal_unit_type == 12 ) + return -1; + } + else + /* nal_ref_idc shall not be equal to 0 for NALUs with nal_unit_type equal to 5. */ + IF_INVALID_VALUE( nal_unit_type == 5 ) + return -1; + return 0; +} + +static inline uint64_t h264_get_codeNum( lsmash_bits_t *bits ) +{ + uint32_t leadingZeroBits = 0; + for( int b = 0; !b; leadingZeroBits++ ) + b = lsmash_bits_get( bits, 1 ); + --leadingZeroBits; + return ((uint64_t)1 << leadingZeroBits) - 1 + lsmash_bits_get( bits, leadingZeroBits ); +} + +static inline uint64_t h264_decode_exp_golomb_ue( uint64_t codeNum ) +{ + return codeNum; +} + +static inline int64_t h264_decode_exp_golomb_se( uint64_t codeNum ) +{ + if( codeNum & 1 ) + return (int64_t)((codeNum >> 1) + 1); + return -1 * (int64_t)(codeNum >> 1); +} + +static uint64_t h264_get_exp_golomb_ue( lsmash_bits_t *bits ) +{ + uint64_t codeNum = h264_get_codeNum( bits ); + return h264_decode_exp_golomb_ue( codeNum ); +} + +static uint64_t h264_get_exp_golomb_se( lsmash_bits_t *bits ) +{ + uint64_t codeNum = h264_get_codeNum( bits ); + return h264_decode_exp_golomb_se( codeNum ); +} + +/* Convert EBSP (Encapsulated Byte Sequence Packets) to RBSP (Raw Byte Sequence Packets). */ +static uint8_t *h264_remove_emulation_prevention( uint8_t *src, uint64_t src_length, uint8_t *dst ) +{ + uint8_t *src_end = src + src_length; + while( src < src_end ) + if( ((src + 2) < src_end) && !src[0] && !src[1] && (src[2] == 0x03) ) + { + /* 0x000003 -> 0x0000 */ + *dst++ = *src++; + *dst++ = *src++; + src++; /* Skip emulation_prevention_three_byte (0x03). */ + } + else + *dst++ = *src++; + return dst; +} + +static int h264_import_rbsp_from_ebsp( lsmash_bits_t *bits, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + uint8_t *rbsp_start = rbsp_buffer; + uint8_t *rbsp_end = h264_remove_emulation_prevention( ebsp, ebsp_size, rbsp_buffer ); + uint64_t rbsp_length = rbsp_end - rbsp_start; + return lsmash_bits_import_data( bits, rbsp_start, rbsp_length ); +} + +static int h264_check_more_rbsp_data( lsmash_bits_t *bits ) +{ + lsmash_bs_t *bs = bits->bs; + if( bs->pos < bs->store && !(bits->store == 0 && (bs->store == bs->pos + 1)) ) + return 1; /* rbsp_trailing_bits will be placed at the next or later byte. + * Note: bs->pos points at the next byte if bits->store isn't empty. */ + if( bits->store == 0 ) + { + if( bs->store == bs->pos + 1 ) + return bs->data[ bs->pos ] != 0x80; + /* No rbsp_trailing_bits is present in RBSP data. */ + bs->error = 1; + return 0; + } + /* Check whether remainder of bits is identical to rbsp_trailing_bits. */ + uint8_t remainder_bits = bits->cache & ~(~0U << bits->store); + uint8_t rbsp_trailing_bits = 1U << (bits->store - 1); + return remainder_bits != rbsp_trailing_bits; +} + +static int h264_parse_scaling_list( lsmash_bits_t *bits, int sizeOfScalingList ) +{ + /* scaling_list( scalingList, sizeOfScalingList, useDefaultScalingMatrixFlag ) */ + int nextScale = 8; + for( int i = 0; i < sizeOfScalingList; i++ ) + { + int64_t delta_scale = h264_get_exp_golomb_se( bits ); + IF_INVALID_VALUE( delta_scale < -128 || delta_scale > 127 ) + return -1; + nextScale = (nextScale + delta_scale + 256) % 256; + if( nextScale == 0 ) + break; + } + return 0; +} + +static int h264_parse_hrd_parameters( lsmash_bits_t *bits ) +{ + /* hrd_parameters() */ + uint64_t cpb_cnt_minus1 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( cpb_cnt_minus1 > 31 ) + return -1; + lsmash_bits_get( bits, 4 ); /* bit_rate_scale */ + lsmash_bits_get( bits, 4 ); /* cpb_size_scale */ + for( uint64_t SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; SchedSelIdx++ ) + { + h264_get_exp_golomb_ue( bits ); /* bit_rate_value_minus1[ SchedSelIdx ] */ + h264_get_exp_golomb_ue( bits ); /* cpb_size_value_minus1[ SchedSelIdx ] */ + lsmash_bits_get( bits, 1 ); /* cbr_flag [ SchedSelIdx ] */ + } + lsmash_bits_get( bits, 5 ); /* initial_cpb_removal_delay_length_minus1 */ + lsmash_bits_get( bits, 5 ); /* cpb_removal_delay_length_minus1 */ + lsmash_bits_get( bits, 5 ); /* dpb_output_delay_length_minus1 */ + lsmash_bits_get( bits, 5 ); /* time_offset_length */ + return 0; +} + +static int h264_parse_sps_easy( lsmash_bits_t *bits, h264_sps_t *sps, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + if( h264_import_rbsp_from_ebsp( bits, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + memset( sps, 0, sizeof(h264_sps_t) ); + sps->profile_idc = lsmash_bits_get( bits, 8 ); + sps->constraint_set_flags = lsmash_bits_get( bits, 8 ); + sps->level_idc = lsmash_bits_get( bits, 8 ); + uint64_t seq_parameter_set_id = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( seq_parameter_set_id > 31 ) + return -1; + sps->seq_parameter_set_id = seq_parameter_set_id; + if( sps->profile_idc == 100 || sps->profile_idc == 110 || sps->profile_idc == 122 + || sps->profile_idc == 244 || sps->profile_idc == 44 || sps->profile_idc == 83 + || sps->profile_idc == 86 || sps->profile_idc == 118 || sps->profile_idc == 128 ) + { + sps->chroma_format_idc = h264_get_exp_golomb_ue( bits ); + if( sps->chroma_format_idc == 3 ) + sps->separate_colour_plane_flag = lsmash_bits_get( bits, 1 ); + uint64_t bit_depth_luma_minus8 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( bit_depth_luma_minus8 > 6 ) + return -1; + uint64_t bit_depth_chroma_minus8 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( bit_depth_chroma_minus8 > 6 ) + return -1; + sps->bit_depth_luma_minus8 = bit_depth_luma_minus8; + sps->bit_depth_chroma_minus8 = bit_depth_chroma_minus8; + lsmash_bits_get( bits, 1 ); /* qpprime_y_zero_transform_bypass_flag */ + if( lsmash_bits_get( bits, 1 ) ) /* seq_scaling_matrix_present_flag */ + { + int num_loops = sps->chroma_format_idc != 3 ? 8 : 12; + for( int i = 0; i < num_loops; i++ ) + if( lsmash_bits_get( bits, 1 ) /* seq_scaling_list_present_flag[i] */ + && h264_parse_scaling_list( bits, i < 6 ? 16 : 64 ) ) + return -1; + } + } + else + { + sps->chroma_format_idc = 1; + sps->separate_colour_plane_flag = 0; + sps->bit_depth_luma_minus8 = 0; + sps->bit_depth_chroma_minus8 = 0; + } + return bits->bs->error ? -1 : 0; +} + +int h264_parse_sps( h264_info_t *info, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + lsmash_bits_t *bits = info->bits; + /* seq_parameter_set_data() */ + h264_sps_t temp_sps; + if( h264_parse_sps_easy( bits, &temp_sps, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + h264_sps_t *sps = h264_get_sps( info->sps_list, temp_sps.seq_parameter_set_id ); + if( !sps ) + return -1; + memset( sps, 0, sizeof(h264_sps_t) ); + sps->profile_idc = temp_sps.profile_idc; + sps->constraint_set_flags = temp_sps.constraint_set_flags; + sps->level_idc = temp_sps.level_idc; + sps->seq_parameter_set_id = temp_sps.seq_parameter_set_id; + sps->chroma_format_idc = temp_sps.chroma_format_idc; + sps->separate_colour_plane_flag = temp_sps.separate_colour_plane_flag; + sps->bit_depth_luma_minus8 = temp_sps.bit_depth_luma_minus8; + sps->bit_depth_chroma_minus8 = temp_sps.bit_depth_chroma_minus8; + sps->ChromaArrayType = sps->separate_colour_plane_flag ? 0 : sps->chroma_format_idc; + uint64_t log2_max_frame_num_minus4 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( log2_max_frame_num_minus4 > 12 ) + return -1; + sps->log2_max_frame_num = log2_max_frame_num_minus4 + 4; + sps->MaxFrameNum = 1 << sps->log2_max_frame_num; + uint64_t pic_order_cnt_type = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( pic_order_cnt_type > 2 ) + return -1; + sps->pic_order_cnt_type = pic_order_cnt_type; + if( sps->pic_order_cnt_type == 0 ) + { + uint64_t log2_max_pic_order_cnt_lsb_minus4 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( log2_max_pic_order_cnt_lsb_minus4 > 12 ) + return -1; + sps->log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4; + sps->MaxPicOrderCntLsb = 1 << sps->log2_max_pic_order_cnt_lsb; + } + else if( sps->pic_order_cnt_type == 1 ) + { + sps->delta_pic_order_always_zero_flag = lsmash_bits_get( bits, 1 ); + int64_t max_value = ((uint64_t)1 << 31) - 1; + int64_t min_value = -((uint64_t)1 << 31) + 1; + int64_t offset_for_non_ref_pic = h264_get_exp_golomb_se( bits ); + if( offset_for_non_ref_pic < min_value || offset_for_non_ref_pic > max_value ) + return -1; + sps->offset_for_non_ref_pic = offset_for_non_ref_pic; + int64_t offset_for_top_to_bottom_field = h264_get_exp_golomb_se( bits ); + if( offset_for_top_to_bottom_field < min_value || offset_for_top_to_bottom_field > max_value ) + return -1; + sps->offset_for_top_to_bottom_field = offset_for_top_to_bottom_field; + uint64_t num_ref_frames_in_pic_order_cnt_cycle = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( num_ref_frames_in_pic_order_cnt_cycle > 255 ) + return -1; + sps->num_ref_frames_in_pic_order_cnt_cycle = num_ref_frames_in_pic_order_cnt_cycle; + sps->ExpectedDeltaPerPicOrderCntCycle = 0; + for( int i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ ) + { + int64_t offset_for_ref_frame = h264_get_exp_golomb_se( bits ); + if( offset_for_ref_frame < min_value || offset_for_ref_frame > max_value ) + return -1; + sps->offset_for_ref_frame[i] = offset_for_ref_frame; + sps->ExpectedDeltaPerPicOrderCntCycle += offset_for_ref_frame; + } + } + sps->max_num_ref_frames = h264_get_exp_golomb_ue( bits ); + lsmash_bits_get( bits, 1 ); /* gaps_in_frame_num_value_allowed_flag */ + uint64_t pic_width_in_mbs_minus1 = h264_get_exp_golomb_ue( bits ); + uint64_t pic_height_in_map_units_minus1 = h264_get_exp_golomb_ue( bits ); + sps->frame_mbs_only_flag = lsmash_bits_get( bits, 1 ); + if( !sps->frame_mbs_only_flag ) + lsmash_bits_get( bits, 1 ); /* mb_adaptive_frame_field_flag */ + lsmash_bits_get( bits, 1 ); /* direct_8x8_inference_flag */ + uint64_t PicWidthInMbs = pic_width_in_mbs_minus1 + 1; + uint64_t PicHeightInMapUnits = pic_height_in_map_units_minus1 + 1; + sps->PicSizeInMapUnits = PicWidthInMbs * PicHeightInMapUnits; + sps->cropped_width = PicWidthInMbs * 16; + sps->cropped_height = (2 - sps->frame_mbs_only_flag) * PicHeightInMapUnits * 16; + if( lsmash_bits_get( bits, 1 ) ) /* frame_cropping_flag */ + { + uint8_t CropUnitX; + uint8_t CropUnitY; + if( sps->ChromaArrayType == 0 ) + { + CropUnitX = 1; + CropUnitY = 2 - sps->frame_mbs_only_flag; + } + else + { + static const int SubWidthC [] = { 0, 2, 2, 1 }; + static const int SubHeightC[] = { 0, 2, 1, 1 }; + CropUnitX = SubWidthC [ sps->chroma_format_idc ]; + CropUnitY = SubHeightC[ sps->chroma_format_idc ] * (2 - sps->frame_mbs_only_flag); + } + uint64_t frame_crop_left_offset = h264_get_exp_golomb_ue( bits ); + uint64_t frame_crop_right_offset = h264_get_exp_golomb_ue( bits ); + uint64_t frame_crop_top_offset = h264_get_exp_golomb_ue( bits ); + uint64_t frame_crop_bottom_offset = h264_get_exp_golomb_ue( bits ); + sps->cropped_width -= (frame_crop_left_offset + frame_crop_right_offset) * CropUnitX; + sps->cropped_height -= (frame_crop_top_offset + frame_crop_bottom_offset) * CropUnitY; + } + if( lsmash_bits_get( bits, 1 ) ) /* vui_parameters_present_flag */ + { + /* vui_parameters() */ + if( lsmash_bits_get( bits, 1 ) ) /* aspect_ratio_info_present_flag */ + { + uint8_t aspect_ratio_idc = lsmash_bits_get( bits, 8 ); + if( aspect_ratio_idc == 255 ) + { + /* Extended_SAR */ + sps->vui.sar_width = lsmash_bits_get( bits, 16 ); + sps->vui.sar_height = lsmash_bits_get( bits, 16 ); + } + else + { + static const struct + { + uint16_t sar_width; + uint16_t sar_height; + } pre_defined_sar[] + = { + { 0, 0 }, { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 }, + { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 }, { 80, 33 }, + { 18, 11 }, { 15, 11 }, { 64, 33 }, { 160, 99 }, { 4, 3 }, + { 3, 2 }, { 2, 1 } + }; + if( aspect_ratio_idc < (sizeof(pre_defined_sar) / sizeof(pre_defined_sar[0])) ) + { + sps->vui.sar_width = pre_defined_sar[ aspect_ratio_idc ].sar_width; + sps->vui.sar_height = pre_defined_sar[ aspect_ratio_idc ].sar_height; + } + else + { + /* Behavior when unknown aspect_ratio_idc is detected is not specified in the specification. */ + sps->vui.sar_width = 0; + sps->vui.sar_height = 0; + } + } + } + if( lsmash_bits_get( bits, 1 ) ) /* overscan_info_present_flag */ + lsmash_bits_get( bits, 1 ); /* overscan_appropriate_flag */ + if( lsmash_bits_get( bits, 1 ) ) /* video_signal_type_present_flag */ + { + lsmash_bits_get( bits, 3 ); /* video_format */ + sps->vui.video_full_range_flag = lsmash_bits_get( bits, 1 ); + if( lsmash_bits_get( bits, 1 ) ) /* colour_description_present_flag */ + { + sps->vui.colour_primaries = lsmash_bits_get( bits, 8 ); + sps->vui.transfer_characteristics = lsmash_bits_get( bits, 8 ); + sps->vui.matrix_coefficients = lsmash_bits_get( bits, 8 ); + } + } + if( lsmash_bits_get( bits, 1 ) ) /* chroma_loc_info_present_flag */ + { + h264_get_exp_golomb_ue( bits ); /* chroma_sample_loc_type_top_field */ + h264_get_exp_golomb_ue( bits ); /* chroma_sample_loc_type_bottom_field */ + } + if( lsmash_bits_get( bits, 1 ) ) /* timing_info_present_flag */ + { + sps->vui.num_units_in_tick = lsmash_bits_get( bits, 32 ); + sps->vui.time_scale = lsmash_bits_get( bits, 32 ); + sps->vui.fixed_frame_rate_flag = lsmash_bits_get( bits, 1 ); + } + int nal_hrd_parameters_present_flag = lsmash_bits_get( bits, 1 ); + if( nal_hrd_parameters_present_flag + && h264_parse_hrd_parameters( bits ) ) + return -1; + int vcl_hrd_parameters_present_flag = lsmash_bits_get( bits, 1 ); + if( vcl_hrd_parameters_present_flag + && h264_parse_hrd_parameters( bits ) ) + return -1; + if( nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag ) + { + sps->hrd_present = 1; + lsmash_bits_get( bits, 1 ); /* low_delay_hrd_flag */ + } + lsmash_bits_get( bits, 1 ); /* pic_struct_present_flag */ + if( lsmash_bits_get( bits, 1 ) ) /* bitstream_restriction_flag */ + { + lsmash_bits_get( bits, 1 ); /* motion_vectors_over_pic_boundaries_flag */ + h264_get_exp_golomb_ue( bits ); /* max_bytes_per_pic_denom */ + h264_get_exp_golomb_ue( bits ); /* max_bits_per_mb_denom */ + h264_get_exp_golomb_ue( bits ); /* log2_max_mv_length_horizontal */ + h264_get_exp_golomb_ue( bits ); /* log2_max_mv_length_vertical */ + h264_get_exp_golomb_ue( bits ); /* max_num_reorder_frames */ + h264_get_exp_golomb_ue( bits ); /* max_dec_frame_buffering */ + } + } + else + { + sps->vui.video_full_range_flag = 0; + sps->vui.num_units_in_tick = 1; + sps->vui.time_scale = 50; + sps->vui.fixed_frame_rate_flag = 0; + } + /* rbsp_trailing_bits() */ + IF_INVALID_VALUE( !lsmash_bits_get( bits, 1 ) ) /* rbsp_stop_one_bit */ + return -1; + lsmash_bits_empty( bits ); + if( bits->bs->error ) + return -1; + sps->present = 1; + info->sps = *sps; + return 0; +} + +static int h264_parse_pps_easy( lsmash_bits_t *bits, h264_pps_t *pps, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + if( h264_import_rbsp_from_ebsp( bits, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + memset( pps, 0, sizeof(h264_pps_t) ); + uint64_t pic_parameter_set_id = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( pic_parameter_set_id > 255 ) + return -1; + pps->pic_parameter_set_id = pic_parameter_set_id; + return bits->bs->error ? -1 : 0; +} + +int h264_parse_pps( h264_info_t *info, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + lsmash_bits_t *bits = info->bits; + /* pic_parameter_set_rbsp */ + h264_pps_t temp_pps; + if( h264_parse_pps_easy( bits, &temp_pps, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + h264_pps_t *pps = h264_get_pps( info->pps_list, temp_pps.pic_parameter_set_id ); + if( !pps ) + return -1; + memset( pps, 0, sizeof(h264_pps_t) ); + pps->pic_parameter_set_id = temp_pps.pic_parameter_set_id; + uint64_t seq_parameter_set_id = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( seq_parameter_set_id > 31 ) + return -1; + h264_sps_t *sps = h264_get_sps( info->sps_list, seq_parameter_set_id ); + if( !sps ) + return -1; + pps->seq_parameter_set_id = seq_parameter_set_id; + pps->entropy_coding_mode_flag = lsmash_bits_get( bits, 1 ); + pps->bottom_field_pic_order_in_frame_present_flag = lsmash_bits_get( bits, 1 ); + uint64_t num_slice_groups_minus1 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( num_slice_groups_minus1 > 7 ) + return -1; + pps->num_slice_groups_minus1 = num_slice_groups_minus1; + if( num_slice_groups_minus1 ) /* num_slice_groups_minus1 */ + { + uint64_t slice_group_map_type = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( slice_group_map_type > 6 ) + return -1; + pps->slice_group_map_type = slice_group_map_type; + if( slice_group_map_type == 0 ) + for( uint64_t iGroup = 0; iGroup <= num_slice_groups_minus1; iGroup++ ) + h264_get_exp_golomb_ue( bits ); /* run_length_minus1[ iGroup ] */ + else if( slice_group_map_type == 2 ) + for( uint64_t iGroup = 0; iGroup < num_slice_groups_minus1; iGroup++ ) + { + h264_get_exp_golomb_ue( bits ); /* top_left [ iGroup ] */ + h264_get_exp_golomb_ue( bits ); /* bottom_right[ iGroup ] */ + } + else if( slice_group_map_type == 3 + || slice_group_map_type == 4 + || slice_group_map_type == 5 ) + { + lsmash_bits_get( bits, 1 ); /* slice_group_change_direction_flag */ + uint64_t slice_group_change_rate_minus1 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( slice_group_change_rate_minus1 > (sps->PicSizeInMapUnits - 1) ) + return -1; + pps->SliceGroupChangeRate = slice_group_change_rate_minus1 + 1; + } + else if( slice_group_map_type == 6 ) + { + uint64_t pic_size_in_map_units_minus1 = h264_get_exp_golomb_ue( bits ); + /* slice_group_id_length = ceil( log2( num_slice_groups_minus1 + 1 ) ); */ + uint64_t slice_group_id_length; + for( slice_group_id_length = 1; num_slice_groups_minus1 >> slice_group_id_length; slice_group_id_length++ ); + for( uint64_t i = 0; i <= pic_size_in_map_units_minus1; i++ ) + /* slice_group_id */ + IF_INVALID_VALUE( lsmash_bits_get( bits, slice_group_id_length ) > num_slice_groups_minus1 ) + return -1; + } + } + h264_get_exp_golomb_ue( bits ); /* num_ref_idx_l0_default_active_minus1 */ + h264_get_exp_golomb_ue( bits ); /* num_ref_idx_l1_default_active_minus1 */ + pps->weighted_pred_flag = lsmash_bits_get( bits, 1 ); + pps->weighted_bipred_idc = lsmash_bits_get( bits, 2 ); + h264_get_exp_golomb_se( bits ); /* pic_init_qp_minus26 */ + h264_get_exp_golomb_se( bits ); /* pic_init_qs_minus26 */ + h264_get_exp_golomb_se( bits ); /* chroma_qp_index_offset */ + pps->deblocking_filter_control_present_flag = lsmash_bits_get( bits, 1 ); + lsmash_bits_get( bits, 1 ); /* constrained_intra_pred_flag */ + pps->redundant_pic_cnt_present_flag = lsmash_bits_get( bits, 1 ); + if( h264_check_more_rbsp_data( bits ) ) + { + int transform_8x8_mode_flag = lsmash_bits_get( bits, 1 ); + if( lsmash_bits_get( bits, 1 ) ) /* pic_scaling_matrix_present_flag */ + { + int num_loops = 6 + (sps->chroma_format_idc != 3 ? 2 : 6) * transform_8x8_mode_flag; + for( int i = 0; i < num_loops; i++ ) + if( lsmash_bits_get( bits, 1 ) /* pic_scaling_list_present_flag[i] */ + && h264_parse_scaling_list( bits, i < 6 ? 16 : 64 ) ) + return -1; + } + h264_get_exp_golomb_se( bits ); /* second_chroma_qp_index_offset */ + } + /* rbsp_trailing_bits() */ + IF_INVALID_VALUE( !lsmash_bits_get( bits, 1 ) ) /* rbsp_stop_one_bit */ + return -1; + lsmash_bits_empty( bits ); + if( bits->bs->error ) + return -1; + pps->present = 1; + info->sps = *sps; + info->pps = *pps; + return 0; +} + +int h264_parse_sei( lsmash_bits_t *bits, h264_sei_t *sei, + uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + if( h264_import_rbsp_from_ebsp( bits, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + uint8_t *rbsp_start = rbsp_buffer; + uint64_t rbsp_pos = 0; + do + { + /* sei_message() */ + uint32_t payloadType = 0; + for( uint8_t temp = lsmash_bits_get( bits, 8 ); ; temp = lsmash_bits_get( bits, 8 ) ) + { + /* 0xff : ff_byte + * otherwise: last_payload_type_byte */ + payloadType += temp; + ++rbsp_pos; + if( temp != 0xff ) + break; + } + uint32_t payloadSize = 0; + for( uint8_t temp = lsmash_bits_get( bits, 8 ); ; temp = lsmash_bits_get( bits, 8 ) ) + { + /* 0xff : ff_byte + * otherwise: last_payload_size_byte */ + payloadSize += temp; + ++rbsp_pos; + if( temp != 0xff ) + break; + } + if( payloadType == 3 ) + { + /* filler_payload + * AVC file format is forbidden to contain this. */ + return -1; + } + else if( payloadType == 6 ) + { + /* recovery_point */ + sei->present = 1; + sei->random_accessible = 1; + sei->recovery_frame_cnt = h264_get_exp_golomb_ue( bits ); + lsmash_bits_get( bits, 1 ); /* exact_match_flag */ + lsmash_bits_get( bits, 1 ); /* broken_link_flag */ + lsmash_bits_get( bits, 2 ); /* changing_slice_group_idc */ + } + else + lsmash_bits_get( bits, payloadSize * 8 ); + lsmash_bits_get_align( bits ); + rbsp_pos += payloadSize; + } while( *(rbsp_start + rbsp_pos) != 0x80 ); /* All SEI messages are byte aligned at their end. + * Therefore, 0x80 shall be rbsp_trailing_bits(). */ + lsmash_bits_empty( bits ); + return bits->bs->error ? -1 : 0; +} + +static int h264_parse_slice_header( h264_info_t *info, h264_nalu_header_t *nalu_header ) +{ + h264_slice_info_t *slice = &info->slice; + memset( slice, 0, sizeof(h264_slice_info_t) ); + /* slice_header() */ + lsmash_bits_t *bits = info->bits; + h264_get_exp_golomb_ue( bits ); /* first_mb_in_slice */ + uint8_t slice_type = slice->type = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( (uint64_t)slice->type > 9 ) + return -1; + if( slice_type > 4 ) + slice_type = slice->type -= 5; + uint64_t pic_parameter_set_id = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( pic_parameter_set_id > 255 ) + return -1; + slice->pic_parameter_set_id = pic_parameter_set_id; + h264_pps_t *pps = h264_get_pps( info->pps_list, pic_parameter_set_id ); + if( !pps ) + return -1; + h264_sps_t *sps = h264_get_sps( info->sps_list, pps->seq_parameter_set_id ); + if( !sps ) + return -1; + slice->nal_ref_idc = nalu_header->nal_ref_idc; + slice->IdrPicFlag = (nalu_header->nal_unit_type == 5); + slice->pic_order_cnt_type = sps->pic_order_cnt_type; + IF_INVALID_VALUE( (slice->IdrPicFlag || sps->max_num_ref_frames == 0) && slice_type != 2 && slice_type != 4 ) + return -1; + if( sps->separate_colour_plane_flag ) + lsmash_bits_get( bits, 2 ); /* colour_plane_id */ + uint64_t frame_num = lsmash_bits_get( bits, sps->log2_max_frame_num ); + IF_INVALID_VALUE( frame_num >= (1 << sps->log2_max_frame_num) || (slice->IdrPicFlag && frame_num) ) + return -1; + slice->frame_num = frame_num; + if( !sps->frame_mbs_only_flag ) + { + slice->field_pic_flag = lsmash_bits_get( bits, 1 ); + if( slice->field_pic_flag ) + slice->bottom_field_flag = lsmash_bits_get( bits, 1 ); + } + if( slice->IdrPicFlag ) + { + uint64_t idr_pic_id = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( idr_pic_id > 65535 ) + return -1; + slice->idr_pic_id = idr_pic_id; + } + if( sps->pic_order_cnt_type == 0 ) + { + uint64_t pic_order_cnt_lsb = lsmash_bits_get( bits, sps->log2_max_pic_order_cnt_lsb ); + IF_INVALID_VALUE( pic_order_cnt_lsb >= sps->MaxPicOrderCntLsb ) + return -1; + slice->pic_order_cnt_lsb = pic_order_cnt_lsb; + if( pps->bottom_field_pic_order_in_frame_present_flag && !slice->field_pic_flag ) + slice->delta_pic_order_cnt_bottom = h264_get_exp_golomb_se( bits ); + } + else if( sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag ) + { + slice->delta_pic_order_cnt[0] = h264_get_exp_golomb_se( bits ); + if( pps->bottom_field_pic_order_in_frame_present_flag && !slice->field_pic_flag ) + slice->delta_pic_order_cnt[1] = h264_get_exp_golomb_se( bits ); + } + if( pps->redundant_pic_cnt_present_flag ) + { + uint64_t redundant_pic_cnt = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( redundant_pic_cnt > 127 ) + return -1; + slice->has_redundancy = !!redundant_pic_cnt; + } + if( slice_type == H264_SLICE_TYPE_B ) + lsmash_bits_get( bits, 1 ); + uint64_t num_ref_idx_l0_active_minus1 = 0; + uint64_t num_ref_idx_l1_active_minus1 = 0; + if( slice_type == H264_SLICE_TYPE_P || slice_type == H264_SLICE_TYPE_SP || slice_type == H264_SLICE_TYPE_B ) + { + if( lsmash_bits_get( bits, 1 ) ) /* num_ref_idx_active_override_flag */ + { + num_ref_idx_l0_active_minus1 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( num_ref_idx_l0_active_minus1 > 31 ) + return -1; + if( slice_type == H264_SLICE_TYPE_B ) + { + num_ref_idx_l1_active_minus1 = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( num_ref_idx_l1_active_minus1 > 31 ) + return -1; + } + } + } + if( nalu_header->nal_unit_type == 20 ) + { + return -1; /* No support of MVC yet */ +#if 0 + /* ref_pic_list_mvc_modification() */ + if( slice_type == H264_SLICE_TYPE_P || slice_type == H264_SLICE_TYPE_B || slice_type == H264_SLICE_TYPE_SP ) + { + if( lsmash_bits_get( bits, 1 ) ) /* (S)P: ref_pic_list_modification_flag_l0 + * B: ref_pic_list_modification_flag_l1 */ + { + uint64_t modification_of_pic_nums_idc; + do + { + modification_of_pic_nums_idc = h264_get_exp_golomb_ue( bits ); +#if 0 + if( modification_of_pic_nums_idc == 0 || modification_of_pic_nums_idc == 1 ) + h264_get_exp_golomb_ue( bits ); /* abs_diff_pic_num_minus1 */ + else if( modification_of_pic_nums_idc == 2 ) + h264_get_exp_golomb_ue( bits ); /* long_term_pic_num */ + else if( modification_of_pic_nums_idc == 4 || modification_of_pic_nums_idc == 5 ) + h264_get_exp_golomb_ue( bits ); /* abs_diff_view_idx_minus1 */ +#else + if( modification_of_pic_nums_idc != 3 ) + h264_get_exp_golomb_ue( bits ); /* abs_diff_pic_num_minus1, long_term_pic_num or abs_diff_view_idx_minus1 */ +#endif + } while( modification_of_pic_nums_idc != 3 ); + } +#endif + } + else + { + /* ref_pic_list_modification() */ + if( slice_type == H264_SLICE_TYPE_P || slice_type == H264_SLICE_TYPE_B || slice_type == H264_SLICE_TYPE_SP ) + { + if( lsmash_bits_get( bits, 1 ) ) /* (S)P: ref_pic_list_modification_flag_l0 + * B: ref_pic_list_modification_flag_l1 */ + { + uint64_t modification_of_pic_nums_idc; + do + { + modification_of_pic_nums_idc = h264_get_exp_golomb_ue( bits ); +#if 0 + if( modification_of_pic_nums_idc == 0 || modification_of_pic_nums_idc == 1 ) + h264_get_exp_golomb_ue( bits ); /* abs_diff_pic_num_minus1 */ + else if( modification_of_pic_nums_idc == 2 ) + h264_get_exp_golomb_ue( bits ); /* long_term_pic_num */ +#else + if( modification_of_pic_nums_idc != 3 ) + h264_get_exp_golomb_ue( bits ); /* abs_diff_pic_num_minus1 or long_term_pic_num */ +#endif + } while( modification_of_pic_nums_idc != 3 ); + } + } + } + if( (pps->weighted_pred_flag && (slice_type == H264_SLICE_TYPE_P || slice_type == H264_SLICE_TYPE_SP)) + || (pps->weighted_bipred_idc == 1 && slice_type == H264_SLICE_TYPE_B) ) + { + /* pred_weight_table() */ + h264_get_exp_golomb_ue( bits ); /* luma_log2_weight_denom */ + if( sps->ChromaArrayType ) + h264_get_exp_golomb_ue( bits ); /* chroma_log2_weight_denom */ + for( uint8_t i = 0; i <= num_ref_idx_l0_active_minus1; i++ ) + { + if( lsmash_bits_get( bits, 1 ) ) /* luma_weight_l0_flag */ + { + h264_get_exp_golomb_se( bits ); /* luma_weight_l0[i] */ + h264_get_exp_golomb_se( bits ); /* luma_offset_l0[i] */ + } + if( sps->ChromaArrayType + && lsmash_bits_get( bits, 1 ) /* chroma_weight_l0_flag */ ) + for( int j = 0; j < 2; j++ ) + { + h264_get_exp_golomb_se( bits ); /* chroma_weight_l0[i][j]*/ + h264_get_exp_golomb_se( bits ); /* chroma_offset_l0[i][j] */ + } + } + if( slice_type == H264_SLICE_TYPE_B ) + for( uint8_t i = 0; i <= num_ref_idx_l1_active_minus1; i++ ) + { + if( lsmash_bits_get( bits, 1 ) ) /* luma_weight_l1_flag */ + { + h264_get_exp_golomb_se( bits ); /* luma_weight_l1[i] */ + h264_get_exp_golomb_se( bits ); /* luma_offset_l1[i] */ + } + if( sps->ChromaArrayType + && lsmash_bits_get( bits, 1 ) /* chroma_weight_l1_flag */ ) + for( int j = 0; j < 2; j++ ) + { + h264_get_exp_golomb_se( bits ); /* chroma_weight_l1[i][j]*/ + h264_get_exp_golomb_se( bits ); /* chroma_offset_l1[i][j] */ + } + } + } + if( !nalu_header->nal_ref_idc ) + { + /* dec_ref_pic_marking() */ + if( slice->IdrPicFlag ) + { + lsmash_bits_get( bits, 1 ); /* no_output_of_prior_pics_flag */ + lsmash_bits_get( bits, 1 ); /* long_term_reference_flag */ + } + else if( lsmash_bits_get( bits, 1 ) ) /* adaptive_ref_pic_marking_mode_flag */ + { + uint64_t memory_management_control_operation; + do + { + memory_management_control_operation = h264_get_exp_golomb_ue( bits ); + if( memory_management_control_operation ) + { + if( memory_management_control_operation == 5 ) + slice->has_mmco5 = 1; + h264_get_exp_golomb_ue( bits ); + } + } while( memory_management_control_operation ); + } + } + /* We needn't read more if not slice data partition A. + * Skip slice_data() and rbsp_slice_trailing_bits(). */ + if( nalu_header->nal_unit_type == 2 ) + { + if( pps->entropy_coding_mode_flag && slice_type != H264_SLICE_TYPE_I && slice_type != H264_SLICE_TYPE_SI ) + h264_get_exp_golomb_ue( bits ); /* cabac_init_idc */ + h264_get_exp_golomb_se( bits ); /* slice_qp_delta */ + if( slice_type == H264_SLICE_TYPE_SP || slice_type == H264_SLICE_TYPE_SI ) + { + if( slice_type == H264_SLICE_TYPE_SP ) + lsmash_bits_get( bits, 1 ); /* sp_for_switch_flag */ + h264_get_exp_golomb_se( bits ); /* slice_qs_delta */ + } + if( pps->deblocking_filter_control_present_flag + && h264_get_exp_golomb_ue( bits ) != 1 /* disable_deblocking_filter_idc */ ) + { + int64_t slice_alpha_c0_offset_div2 = h264_get_exp_golomb_se( bits ); + IF_INVALID_VALUE( slice_alpha_c0_offset_div2 < -6 || slice_alpha_c0_offset_div2 > 6 ) + return -1; + int64_t slice_beta_offset_div2 = h264_get_exp_golomb_se( bits ); + IF_INVALID_VALUE( slice_beta_offset_div2 < -6 || slice_beta_offset_div2 > 6 ) + return -1; + } + if( pps->num_slice_groups_minus1 + && (pps->slice_group_map_type == 3 || pps->slice_group_map_type == 4 || pps->slice_group_map_type == 5) ) + { + double temp = (double)sps->PicSizeInMapUnits / pps->SliceGroupChangeRate; + uint64_t slice_group_change_cycle_length = ceil( log( temp + 1 ) / 0.693147180559945 ); + uint64_t slice_group_change_cycle = lsmash_bits_get( bits, slice_group_change_cycle_length ); + IF_INVALID_VALUE( slice_group_change_cycle > (uint64_t)ceil( temp ) ) + return -1; + } + /* end of slice_header() */ + slice->slice_id = h264_get_exp_golomb_ue( bits ); + h264_slice_info_t *slice_part = h264_get_slice_info( info->slice_list, slice->slice_id ); + if( !slice_part ) + return -1; + *slice_part = *slice; + } + lsmash_bits_empty( bits ); + if( bits->bs->error ) + return -1; + info->sps = *sps; + info->pps = *pps; + return 0; +} + +int h264_parse_slice( h264_info_t *info, h264_nalu_header_t *nalu_header, + uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ) +{ + lsmash_bits_t *bits = info->bits; + if( h264_import_rbsp_from_ebsp( bits, rbsp_buffer, ebsp, ebsp_size ) ) + return -1; + if( nalu_header->nal_unit_type != 3 && nalu_header->nal_unit_type != 4 ) + return h264_parse_slice_header( info, nalu_header ); + /* slice_data_partition_b_layer_rbsp() or slice_data_partition_c_layer_rbsp() */ + uint64_t slice_id = h264_get_exp_golomb_ue( bits ); + h264_slice_info_t *slice = h264_get_slice_info( info->slice_list, slice_id ); + if( !slice ) + return -1; + h264_pps_t *pps = h264_get_pps( info->pps_list, slice->pic_parameter_set_id ); + if( !pps ) + return -1; + h264_sps_t *sps = h264_get_sps( info->sps_list, pps->seq_parameter_set_id ); + if( !sps ) + return -1; + if( sps->separate_colour_plane_flag ) + lsmash_bits_get( bits, 2 ); /* colour_plane_id */ + if( pps->redundant_pic_cnt_present_flag ) + { + uint64_t redundant_pic_cnt = h264_get_exp_golomb_ue( bits ); + IF_INVALID_VALUE( redundant_pic_cnt > 127 ) + return -1; + slice->has_redundancy = !!redundant_pic_cnt; + } + /* Skip slice_data() and rbsp_slice_trailing_bits(). */ + lsmash_bits_empty( bits ); + if( bits->bs->error ) + return -1; + info->sps = *sps; + info->pps = *pps; + return 0; +} + +static inline void h264_update_picture_type( h264_picture_info_t *picture, h264_slice_info_t *slice ) +{ + if( picture->type == H264_PICTURE_TYPE_I_P ) + { + if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_P_B; + else if( slice->type == H264_SLICE_TYPE_SI || slice->type == H264_SLICE_TYPE_SP ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP; + } + else if( picture->type == H264_PICTURE_TYPE_I_P_B ) + { + if( slice->type != H264_SLICE_TYPE_P && slice->type != H264_SLICE_TYPE_B && slice->type != H264_SLICE_TYPE_I ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP_B; + } + else if( picture->type == H264_PICTURE_TYPE_I ) + { + if( slice->type == H264_SLICE_TYPE_P ) + picture->type = H264_PICTURE_TYPE_I_P; + else if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_P_B; + else if( slice->type == H264_SLICE_TYPE_SI ) + picture->type = H264_PICTURE_TYPE_I_SI; + else if( slice->type == H264_SLICE_TYPE_SP ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP; + } + else if( picture->type == H264_PICTURE_TYPE_SI_SP ) + { + if( slice->type == H264_SLICE_TYPE_P || slice->type == H264_SLICE_TYPE_I ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP; + else if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP_B; + } + else if( picture->type == H264_PICTURE_TYPE_SI ) + { + if( slice->type == H264_SLICE_TYPE_P ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP; + else if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP_B; + else if( slice->type != H264_SLICE_TYPE_I ) + picture->type = H264_PICTURE_TYPE_I_SI; + else if( slice->type == H264_SLICE_TYPE_SP ) + picture->type = H264_PICTURE_TYPE_SI_SP; + } + else if( picture->type == H264_PICTURE_TYPE_I_SI ) + { + if( slice->type == H264_SLICE_TYPE_P || slice->type == H264_SLICE_TYPE_SP ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP; + else if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP_B; + } + else if( picture->type == H264_PICTURE_TYPE_I_SI_P_SP ) + { + if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_SI_P_SP_B; + } + else if( picture->type == H264_PICTURE_TYPE_NONE ) + { + if( slice->type == H264_SLICE_TYPE_P ) + picture->type = H264_PICTURE_TYPE_I_P; + else if( slice->type == H264_SLICE_TYPE_B ) + picture->type = H264_PICTURE_TYPE_I_P_B; + else if( slice->type == H264_SLICE_TYPE_I ) + picture->type = H264_PICTURE_TYPE_I; + else if( slice->type == H264_SLICE_TYPE_SI ) + picture->type = H264_PICTURE_TYPE_SI; + else if( slice->type == H264_SLICE_TYPE_SP ) + picture->type = H264_PICTURE_TYPE_SI_SP; + } +#if 0 + fprintf( stderr, "Picture type = %s\n", picture->type == H264_PICTURE_TYPE_I_P ? "P" + : picture->type == H264_PICTURE_TYPE_I_P_B ? "B" + : picture->type == H264_PICTURE_TYPE_I ? "I" + : picture->type == H264_PICTURE_TYPE_SI ? "SI" + : picture->type == H264_PICTURE_TYPE_I_SI ? "SI" + : "SP" ); +#endif +} + +/* Shall be called at least once per picture. */ +void h264_update_picture_info_for_slice( h264_picture_info_t *picture, h264_slice_info_t *slice ) +{ + picture->has_mmco5 |= slice->has_mmco5; + picture->has_redundancy |= slice->has_redundancy; + picture->incomplete_au_has_primary |= !slice->has_redundancy; + h264_update_picture_type( picture, slice ); + slice->present = 0; /* Discard this slice info. */ +} + +/* Shall be called exactly once per picture. */ +void h264_update_picture_info( h264_picture_info_t *picture, h264_slice_info_t *slice, h264_sei_t *sei ) +{ + picture->frame_num = slice->frame_num; + picture->pic_order_cnt_lsb = slice->pic_order_cnt_lsb; + picture->delta_pic_order_cnt_bottom = slice->delta_pic_order_cnt_bottom; + picture->delta_pic_order_cnt[0] = slice->delta_pic_order_cnt[0]; + picture->delta_pic_order_cnt[1] = slice->delta_pic_order_cnt[1]; + picture->field_pic_flag = slice->field_pic_flag; + picture->bottom_field_flag = slice->bottom_field_flag; + picture->idr = slice->IdrPicFlag; + picture->pic_parameter_set_id = slice->pic_parameter_set_id; + picture->disposable = (slice->nal_ref_idc == 0); + picture->random_accessible = slice->IdrPicFlag; + h264_update_picture_info_for_slice( picture, slice ); + picture->independent = picture->type == H264_PICTURE_TYPE_I || picture->type == H264_PICTURE_TYPE_I_SI; + if( sei->present ) + { + picture->random_accessible |= sei->random_accessible; + picture->recovery_frame_cnt = sei->recovery_frame_cnt; + sei->present = 0; + } +} + +int h264_find_au_delimit_by_slice_info( h264_slice_info_t *slice, h264_slice_info_t *prev_slice ) +{ + if( slice->frame_num != prev_slice->frame_num + || ((slice->pic_order_cnt_type == 0 && prev_slice->pic_order_cnt_type == 0) + && (slice->pic_order_cnt_lsb != prev_slice->pic_order_cnt_lsb + || slice->delta_pic_order_cnt_bottom != prev_slice->delta_pic_order_cnt_bottom)) + || ((slice->pic_order_cnt_type == 1 && prev_slice->pic_order_cnt_type == 1) + && (slice->delta_pic_order_cnt[0] != prev_slice->delta_pic_order_cnt[0] + || slice->delta_pic_order_cnt[1] != prev_slice->delta_pic_order_cnt[1])) + || slice->field_pic_flag != prev_slice->field_pic_flag + || slice->bottom_field_flag != prev_slice->bottom_field_flag + || slice->IdrPicFlag != prev_slice->IdrPicFlag + || slice->pic_parameter_set_id != prev_slice->pic_parameter_set_id + || ((slice->nal_ref_idc == 0 || prev_slice->nal_ref_idc == 0) + && (slice->nal_ref_idc != prev_slice->nal_ref_idc)) + || (slice->IdrPicFlag == 1 && prev_slice->IdrPicFlag == 1 + && slice->idr_pic_id != prev_slice->idr_pic_id) ) + return 1; + return 0; +} + +int h264_find_au_delimit_by_nalu_type( uint8_t nalu_type, uint8_t prev_nalu_type ) +{ + return ((nalu_type >= 6 && nalu_type <= 9) || (nalu_type >= 14 && nalu_type <= 18)) + && ((prev_nalu_type >= 1 && prev_nalu_type <= 5) || prev_nalu_type == 12 || prev_nalu_type == 19); +} + +int h264_supplement_buffer( h264_stream_buffer_t *buffer, h264_picture_info_t *picture, uint32_t size ) +{ + uint32_t buffer_pos_offset = buffer->pos - buffer->start; + uint32_t buffer_valid_length = buffer->end - buffer->start; + lsmash_multiple_buffers_t *bank = lsmash_resize_multiple_buffers( buffer->bank, size ); + if( !bank ) + return -1; + buffer->bank = bank; + buffer->start = lsmash_withdraw_buffer( bank, 1 ); + buffer->rbsp = lsmash_withdraw_buffer( bank, 2 ); + buffer->pos = buffer->start + buffer_pos_offset; + buffer->end = buffer->start + buffer_valid_length; + if( picture && bank->number_of_buffers == 4 ) + { + picture->au = lsmash_withdraw_buffer( bank, 3 ); + picture->incomplete_au = lsmash_withdraw_buffer( bank, 4 ); + } + return 0; +} + +uint32_t h264_update_buffer_from_access_unit( h264_info_t *info, void *src, uint32_t anticipation_bytes ) +{ + h264_stream_buffer_t *buffer = &info->buffer; + assert( anticipation_bytes < buffer->bank->buffer_size ); + uint32_t remainder_bytes = buffer->end - buffer->pos; + if( info->no_more_read ) + return remainder_bytes; + if( remainder_bytes <= anticipation_bytes ) + { + /* Move unused data to the head of buffer. */ + for( uint32_t i = 0; i < remainder_bytes; i++ ) + *(buffer->start + i) = *(buffer->pos + i); + /* Read and store the next data into the buffer. + * Move the position of buffer on the head. */ + h264_data_stream_handler_t *stream = (h264_data_stream_handler_t *)src; + uint32_t wasted_data_length = LSMASH_MIN( stream->remainder_length, buffer->bank->buffer_size - remainder_bytes ); + memcpy( buffer->start + remainder_bytes, stream->data + stream->overall_wasted_length, wasted_data_length ); + stream->remainder_length -= wasted_data_length; + stream->overall_wasted_length += wasted_data_length; + remainder_bytes += wasted_data_length; + buffer->pos = buffer->start; + buffer->end = buffer->start + remainder_bytes; + info->no_more_read = (stream->remainder_length == 0); + } + return remainder_bytes; +} + +static void h264_bs_put_parameter_sets( lsmash_bs_t *bs, lsmash_entry_list_t *ps_list, uint32_t max_ps_count ) +{ + uint32_t ps_count = 0; + for( lsmash_entry_t *entry = ps_list->head; entry && ps_count < max_ps_count; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( ps ) + { + lsmash_bs_put_be16( bs, ps->parameterSetLength ); + lsmash_bs_put_bytes( bs, ps->parameterSetLength, ps->parameterSetNALUnit ); + } + else + lsmash_bs_put_be16( bs, 0 ); + ++ps_count; + } +} + +uint8_t *lsmash_create_h264_specific_info( lsmash_h264_specific_parameters_t *param, uint32_t *data_length ) +{ + if( !param || !param->parameter_sets || !data_length ) + return NULL; + if( param->lengthSizeMinusOne != 0 && param->lengthSizeMinusOne != 1 && param->lengthSizeMinusOne != 3 ) + return NULL; + static const uint32_t max_ps_count[3] = { 31, 255, 255 }; + lsmash_entry_list_t *ps_list[3] = + { + param->parameter_sets->sps_list, /* SPS */ + param->parameter_sets->pps_list, /* PPS */ + param->parameter_sets->spsext_list /* SPSExt */ + }; + /* SPS and PPS are mandatory. */ + if( !ps_list[0] || !ps_list[0]->head || ps_list[0]->entry_count == 0 + || !ps_list[1] || !ps_list[1]->head || ps_list[1]->entry_count == 0 ) + return NULL; + /* Calculate enough buffer size. */ + uint32_t buffer_size = ISOM_BASEBOX_COMMON_SIZE + 11; + for( int i = 0; i < 3; i++ ) + if( ps_list[i] ) + { + uint32_t ps_count = 0; + for( lsmash_entry_t *entry = ps_list[i]->head; entry && ps_count < max_ps_count[i]; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return NULL; + buffer_size += 2 + ps->parameterSetLength; + ++ps_count; + } + if( ps_list[i]->entry_count <= max_ps_count[i] && ps_list[i]->entry_count != ps_count ) + return NULL; /* Created specific info will be broken. */ + } + /* Set up bytestream writer. */ + uint8_t buffer[buffer_size]; + lsmash_bs_t bs = { 0 }; + bs.data = buffer; + bs.alloc = buffer_size; + /* Create an AVCConfigurationBox */ + lsmash_bs_put_be32( &bs, 0 ); /* box size */ + lsmash_bs_put_be32( &bs, ISOM_BOX_TYPE_AVCC.fourcc ); /* box type: 'avcC' */ + lsmash_bs_put_byte( &bs, 1 ); /* configurationVersion */ + lsmash_bs_put_byte( &bs, param->AVCProfileIndication ); /* AVCProfileIndication */ + lsmash_bs_put_byte( &bs, param->profile_compatibility ); /* profile_compatibility */ + lsmash_bs_put_byte( &bs, param->AVCLevelIndication ); /* AVCLevelIndication */ + lsmash_bs_put_byte( &bs, param->lengthSizeMinusOne | 0xfc ); /* lengthSizeMinusOne */ + lsmash_bs_put_byte( &bs, LSMASH_MIN( ps_list[0]->entry_count, max_ps_count[0] ) | 0xe0 ); /* numOfSequenceParameterSets */ + h264_bs_put_parameter_sets( &bs, ps_list[0], max_ps_count[0] ); /* sequenceParameterSetLength + * sequenceParameterSetNALUnit */ + lsmash_bs_put_byte( &bs, LSMASH_MIN( ps_list[1]->entry_count, max_ps_count[1] ) ); /* numOfPictureParameterSets */ + h264_bs_put_parameter_sets( &bs, ps_list[1], max_ps_count[1] ); /* pictureParameterSetLength + * pictureParameterSetNALUnit */ + if( ISOM_REQUIRES_AVCC_EXTENSION( param->AVCProfileIndication ) ) + { + lsmash_bs_put_byte( &bs, param->chroma_format | 0xfc ); /* chroma_format */ + lsmash_bs_put_byte( &bs, param->bit_depth_luma_minus8 | 0xf8 ); /* bit_depth_luma_minus8 */ + lsmash_bs_put_byte( &bs, param->bit_depth_chroma_minus8 | 0xf8 ); /* bit_depth_chroma_minus8 */ + if( ps_list[2] ) + { + lsmash_bs_put_byte( &bs, LSMASH_MIN( ps_list[2]->entry_count, max_ps_count[2] ) ); /* numOfSequenceParameterSetExt */ + h264_bs_put_parameter_sets( &bs, ps_list[2], max_ps_count[2] ); /* sequenceParameterSetExtLength + * sequenceParameterSetExtNALUnit */ + } + else /* no sequence parameter set extensions */ + lsmash_bs_put_byte( &bs, 0 ); /* numOfSequenceParameterSetExt */ + } + uint8_t *data = lsmash_bs_export_data( &bs, data_length ); + /* Update box size. */ + data[0] = ((*data_length) >> 24) & 0xff; + data[1] = ((*data_length) >> 16) & 0xff; + data[2] = ((*data_length) >> 8) & 0xff; + data[3] = (*data_length) & 0xff; + return data; +} + +static int h264_get_sps_id( uint8_t *ps_ebsp, uint32_t ps_ebsp_length, uint8_t *ps_id ) +{ + /* max number of bits of sps_id = 11: 0b000001XXXXX + * (24 + 11 - 1) / 8 + 1 = 5 bytes + * Why +1? Because there might be an emulation_prevention_three_byte. */ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t rbsp_buffer[6]; + uint8_t buffer [6]; + bs.data = buffer; + bs.alloc = 6; + lsmash_bits_init( &bits, &bs ); + if( h264_import_rbsp_from_ebsp( &bits, rbsp_buffer, ps_ebsp, LSMASH_MIN( ps_ebsp_length, 6 ) ) ) + return -1; + lsmash_bits_get( &bits, 24 ); /* profile_idc, constraint_set_flags and level_idc */ + uint64_t sec_parameter_set_id = h264_get_exp_golomb_ue( &bits ); + IF_INVALID_VALUE( sec_parameter_set_id > 31 ) + return -1; + *ps_id = sec_parameter_set_id; + return bs.error ? -1 : 0; +} + +static int h264_get_pps_id( uint8_t *ps_ebsp, uint32_t ps_ebsp_length, uint8_t *ps_id ) +{ + /* max number of bits of pps_id = 17: 0b000000001XXXXXXXX + * (17 - 1) / 8 + 1 = 3 bytes + * Why +1? Because there might be an emulation_prevention_three_byte. */ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t rbsp_buffer[4]; + uint8_t buffer [4]; + bs.data = buffer; + bs.alloc = 4; + lsmash_bits_init( &bits, &bs ); + if( h264_import_rbsp_from_ebsp( &bits, rbsp_buffer, ps_ebsp, LSMASH_MIN( ps_ebsp_length, 4 ) ) ) + return -1; + uint64_t pic_parameter_set_id = h264_get_exp_golomb_ue( &bits ); + IF_INVALID_VALUE( pic_parameter_set_id > 255 ) + return -1; + *ps_id = pic_parameter_set_id; + return bs.error ? -1 : 0; +} + +static inline int h264_get_ps_id( uint8_t *ps_ebsp, uint32_t ps_ebsp_length, + uint8_t *ps_id, lsmash_h264_parameter_set_type ps_type ) +{ + int (*get_ps_id)( uint8_t *ps_ebsp, uint32_t ps_ebsp_length, uint8_t *ps_id ) + = ps_type == H264_PARAMETER_SET_TYPE_SPS ? h264_get_sps_id + : ps_type == H264_PARAMETER_SET_TYPE_PPS ? h264_get_pps_id + : NULL; + return get_ps_id ? get_ps_id( ps_ebsp, ps_ebsp_length, ps_id ) : -1; +} + +static inline lsmash_entry_list_t *h264_get_parameter_set_list( lsmash_h264_specific_parameters_t *param, + lsmash_h264_parameter_set_type ps_type ) +{ + if( !param->parameter_sets ) + return NULL; + return ps_type == H264_PARAMETER_SET_TYPE_SPS ? param->parameter_sets->sps_list + : ps_type == H264_PARAMETER_SET_TYPE_PPS ? param->parameter_sets->pps_list + : ps_type == H264_PARAMETER_SET_TYPE_SPSEXT ? param->parameter_sets->spsext_list + : NULL; +} + +static lsmash_entry_t *h264_get_ps_entry_from_param( lsmash_h264_specific_parameters_t *param, + lsmash_h264_parameter_set_type ps_type, + uint8_t ps_id ) +{ + int (*get_ps_id)( uint8_t *ps_ebsp, uint32_t ps_ebsp_length, uint8_t *ps_id ) + = ps_type == H264_PARAMETER_SET_TYPE_SPS ? h264_get_sps_id + : ps_type == H264_PARAMETER_SET_TYPE_PPS ? h264_get_pps_id + : NULL; + if( !get_ps_id ) + return NULL; + lsmash_entry_list_t *ps_list = h264_get_parameter_set_list( param, ps_type ); + if( !ps_list ) + return NULL; + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return NULL; + uint8_t param_ps_id; + if( get_ps_id( ps->parameterSetNALUnit + 1, ps->parameterSetLength - 1, ¶m_ps_id ) ) + return NULL; + if( ps_id == param_ps_id ) + return entry; + } + return NULL; +} + +static inline int h264_get_max_ps_length( lsmash_entry_list_t *ps_list, uint32_t *max_ps_length ) +{ + *max_ps_length = 0; + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return -1; + *max_ps_length = LSMASH_MAX( *max_ps_length, ps->parameterSetLength ); + } + return 0; +} + +static inline int h264_get_ps_count( lsmash_entry_list_t *ps_list, uint32_t *ps_count ) +{ + *ps_count = 0; + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return -1; + ++(*ps_count); + } + return 0; +} + +static inline int h264_check_same_ps_existence( lsmash_entry_list_t *ps_list, void *ps_data, uint32_t ps_length ) +{ + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return -1; + if( ps->parameterSetLength == ps_length && !memcmp( ps->parameterSetNALUnit, ps_data, ps_length ) ) + return 1; /* The same parameter set already exists. */ + } + return 0; +} + +static inline int h264_validate_ps_type( lsmash_h264_parameter_set_type ps_type, void *ps_data, uint32_t ps_length ) +{ + if( !ps_data || ps_length < 2 ) + return -1; + if( ps_type != H264_PARAMETER_SET_TYPE_SPS + && ps_type != H264_PARAMETER_SET_TYPE_PPS + && ps_type != H264_PARAMETER_SET_TYPE_SPSEXT ) + return -1; + uint8_t nalu_type = *((uint8_t *)ps_data) & 0x1f; + if( nalu_type != 7 && nalu_type != 8 && nalu_type != 13 ) + return -1; + if( (ps_type == H264_PARAMETER_SET_TYPE_SPS && nalu_type != 7) + || (ps_type == H264_PARAMETER_SET_TYPE_PPS && nalu_type != 8) + || (ps_type == H264_PARAMETER_SET_TYPE_SPSEXT && nalu_type != 13) ) + return -1; + return 0; +} + +/* Return 1 if a new parameter set is appendable. + * Return 0 if no need to append a new parameter set. + * Return -1 if there is error. + * Return -2 if a new specific info is needed. */ +int lsmash_check_h264_parameter_set_appendable( lsmash_h264_specific_parameters_t *param, + lsmash_h264_parameter_set_type ps_type, + void *ps_data, uint32_t ps_length ) +{ + if( !param ) + return -1; + if( h264_validate_ps_type( ps_type, ps_data, ps_length ) ) + return -1; + if( ps_type == H264_PARAMETER_SET_TYPE_SPSEXT + && !ISOM_REQUIRES_AVCC_EXTENSION( param->AVCProfileIndication ) ) + return 0; + /* Check whether the same parameter set already exsits or not. */ + lsmash_entry_list_t *ps_list = h264_get_parameter_set_list( param, ps_type ); + if( !ps_list || !ps_list->head ) + return 1; /* No parameter set */ + switch( h264_check_same_ps_existence( ps_list, ps_data, ps_length ) ) + { + case 0 : break; + case 1 : return 0; /* The same parameter set already exists. */ + default : return -1; /* An error occured. */ + } + uint32_t max_ps_length; + if( h264_get_max_ps_length( ps_list, &max_ps_length ) ) + return -1; + max_ps_length = LSMASH_MAX( max_ps_length, ps_length ); + uint32_t ps_count; + if( h264_get_ps_count( ps_list, &ps_count ) ) + return -1; + if( (ps_type == H264_PARAMETER_SET_TYPE_SPS && ps_count >= 31) + || (ps_type == H264_PARAMETER_SET_TYPE_PPS && ps_count >= 255) + || (ps_type == H264_PARAMETER_SET_TYPE_SPSEXT && ps_count >= 255) ) + return -2; /* No more appendable parameter sets. */ + if( ps_type == H264_PARAMETER_SET_TYPE_SPSEXT ) + return 1; + /* Check whether a new specific info is needed or not. */ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t rbsp_buffer[max_ps_length]; + uint8_t buffer [max_ps_length]; + bs.data = buffer; + bs.alloc = max_ps_length; + lsmash_bits_init( &bits, &bs ); + if( ps_type == H264_PARAMETER_SET_TYPE_PPS ) + { + /* PPS */ + uint8_t pps_id; + if( h264_get_pps_id( ps_data + 1, ps_length - 1, &pps_id ) ) + return -1; + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return -1; + uint8_t param_pps_id; + if( h264_get_pps_id( ps->parameterSetNALUnit + 1, ps->parameterSetLength - 1, ¶m_pps_id ) ) + return -1; + if( pps_id == param_pps_id ) + return -2; /* PPS that has the same pic_parameter_set_id already exists with different form. */ + } + return 0; + } + /* SPS */ + h264_sps_t sps; + if( h264_parse_sps_easy( &bits, &sps, rbsp_buffer, ps_data + 1, ps_length - 1 ) ) + return -1; + lsmash_bits_empty( &bits ); + /* FIXME; If the sequence parameter sets are marked with different profiles, + * and the relevant profile compatibility flags are all zero, + * then the stream may need examination to determine which profile, if any, the stream conforms to. + * If the stream is not examined, or the examination reveals that there is no profile to which the stream conforms, + * then the stream must be split into two or more sub-streams with separate configuration records in which these rules can be met. */ +#if 0 + if( sps.profile_idc != param->AVCProfileIndication && (sps->constraint_set_flags & param->profile_compatibility) ) +#else + if( sps.profile_idc != param->AVCProfileIndication ) +#endif + return -2; + /* The values of chroma_format_idc, bit_depth_luma_minus8 and bit_depth_chroma_minus8 + * must be identical in all SPSs in a single AVC configuration record. */ + if( ISOM_REQUIRES_AVCC_EXTENSION( param->AVCProfileIndication ) + && (sps.chroma_format_idc != param->chroma_format + || sps.bit_depth_luma_minus8 != param->bit_depth_luma_minus8 + || sps.bit_depth_chroma_minus8 != param->bit_depth_chroma_minus8) ) + return -2; + /* Forbidden to duplicate SPS that has the same seq_parameter_set_id with different form within the same configuration record. */ + uint8_t sps_id = sps.seq_parameter_set_id; + for( lsmash_entry_t *entry = ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *ps = (isom_avcC_ps_entry_t *)entry->data; + if( !ps ) + return -1; + uint8_t param_sps_id; + if( h264_get_sps_id( ps->parameterSetNALUnit + 1, ps->parameterSetLength - 1, ¶m_sps_id ) ) + return -1; + if( sps_id == param_sps_id ) + return -2; /* SPS that has the same seq_parameter_set_id already exists with different form. */ + } + return 0; +} + +int lsmash_append_h264_parameter_set( lsmash_h264_specific_parameters_t *param, + lsmash_h264_parameter_set_type ps_type, + void *ps_data, uint32_t ps_length ) +{ + if( !param || !ps_data || ps_length < 2 ) + return -1; + if( !param->parameter_sets ) + { + param->parameter_sets = lsmash_malloc_zero( sizeof(lsmash_h264_parameter_sets_t) ); + if( !param->parameter_sets ) + return -1; + } + lsmash_entry_list_t *ps_list = h264_get_parameter_set_list( param, ps_type ); + if( !ps_list ) + return -1; + if( ps_type != H264_PARAMETER_SET_TYPE_SPS + && ps_type != H264_PARAMETER_SET_TYPE_PPS + && ps_type != H264_PARAMETER_SET_TYPE_SPSEXT ) + return -1; + if( ps_type == H264_PARAMETER_SET_TYPE_SPSEXT ) + { + if( !ISOM_REQUIRES_AVCC_EXTENSION( param->AVCProfileIndication ) ) + return 0; + isom_avcC_ps_entry_t *ps = isom_create_ps_entry( ps_data, ps_length ); + if( !ps ) + return -1; + if( lsmash_add_entry( ps_list, ps ) ) + { + isom_remove_avcC_ps( ps ); + return -1; + } + return 0; + } + /* Check if the same parameter set identifier already exists. */ + uint8_t ps_id; + if( h264_get_ps_id( ps_data + 1, ps_length - 1, &ps_id, ps_type ) ) + return -1; + lsmash_entry_t *entry = h264_get_ps_entry_from_param( param, ps_type, ps_id ); + if( entry ) + return -1; /* The same parameter set identifier already exists. */ + isom_avcC_ps_entry_t *ps = isom_create_ps_entry( ps_data, ps_length ); + if( !ps ) + return -1; + if( lsmash_add_entry( ps_list, ps ) ) + { + isom_remove_avcC_ps( ps ); + return -1; + } + if( ps_type == H264_PARAMETER_SET_TYPE_SPS ) + { + /* Update specific info with SPS. */ + lsmash_bits_t bits = { 0 }; + lsmash_bs_t bs = { 0 }; + uint8_t rbsp_buffer[ps_length]; + uint8_t buffer [ps_length]; + bs.data = buffer; + bs.alloc = ps_length; + lsmash_bits_init( &bits, &bs ); + h264_sps_t sps; + if( h264_parse_sps_easy( &bits, &sps, rbsp_buffer, ps_data + 1, ps_length - 1 ) ) + { + lsmash_remove_entry_direct( ps_list, ps_list->tail, isom_remove_avcC_ps ); + return -1; + } + if( ps_list->entry_count == 1 ) + param->profile_compatibility = 0xff; + param->AVCProfileIndication = sps.profile_idc; + param->profile_compatibility &= sps.constraint_set_flags; + param->AVCLevelIndication = LSMASH_MAX( param->AVCLevelIndication, sps.level_idc ); + param->chroma_format = sps.chroma_format_idc; + param->bit_depth_luma_minus8 = sps.bit_depth_luma_minus8; + param->bit_depth_chroma_minus8 = sps.bit_depth_chroma_minus8; + } + /* Add a new parameter set in order of ascending parameter set identifier. */ + int append_head = 0; + if( ps_id ) + for( int i = ps_id - 1; i; i-- ) + { + entry = h264_get_ps_entry_from_param( param, ps_type, i ); + if( entry ) + break; + } + if( ps_id == 0 || !entry ) + { + /* Couldn't find parameter set with lower identifier. + * Next, find parameter set with upper identifier. */ + int max_ps_id = ps_type == H264_PARAMETER_SET_TYPE_SPS ? 31 : 255; + for( int i = ps_id + 1; i <= max_ps_id; i++ ) + { + entry = h264_get_ps_entry_from_param( param, ps_type, i ); + if( entry ) + break; + } + if( entry ) + append_head = 1; + } + if( !entry ) + return 0; /* The new entry was appended to tail. */ + lsmash_entry_t *new_entry = ps_list->tail; + if( append_head ) + { + /* before: entry[i > ps_id] ... -> prev_entry -> new_entry[ps_id] + * after: new_entry[ps_id] -> entry[i > ps_id] -> ... -> prev_entry */ + if( new_entry->prev ) + new_entry->prev->next = NULL; + new_entry->prev = NULL; + entry->prev = new_entry; + new_entry->next = entry; + return 0; + } + /* before: entry[i < ps_id] -> next_entry -> ... -> prev_entry -> new_entry[ps_id] + * after: entry[i < ps_id] -> new_entry[ps_id] -> next_entry -> ... -> prev_entry */ + if( new_entry->prev ) + new_entry->prev->next = NULL; + new_entry->prev = entry; + new_entry->next = entry->next; + if( entry->next ) + entry->next->prev = new_entry; + entry->next = new_entry; + return 0; +} + +int h264_try_to_append_parameter_set( h264_info_t *info, lsmash_h264_parameter_set_type ps_type, void *ps_data, uint32_t ps_length ) +{ + lsmash_h264_specific_parameters_t *param = &info->avcC_param; + int ret = lsmash_check_h264_parameter_set_appendable( param, ps_type, ps_data, ps_length ); + switch( ret ) + { + case -1 : /* Error */ + case -2 : /* Mulitiple sample description is needed. */ + return ret; + case 1 : /* Appendable */ + switch( ps_type ) + { + case H264_PARAMETER_SET_TYPE_SPS : + if( h264_parse_sps( info, info->buffer.rbsp, ps_data + 1, ps_length - 1 ) ) + return -1; + break; + case H264_PARAMETER_SET_TYPE_PPS : + if( h264_parse_pps( info, info->buffer.rbsp, ps_data + 1, ps_length - 1 ) ) + return -1; + break; + default : + break; + } + return lsmash_append_h264_parameter_set( param, ps_type, ps_data, ps_length ); + default : /* No need to append */ + return 0; + } +} + +static int h264_parse_succeeded( h264_info_t *info, lsmash_h264_specific_parameters_t *param ) +{ + int ret; + if( info->sps.present && info->pps.present ) + { + *param = info->avcC_param; + /* Avoid freeing parameter sets. */ + info->avcC_param.parameter_sets = NULL; + ret = 0; + } + else + ret = -1; + h264_cleanup_parser( info ); + return ret; +} + +static inline int h264_parse_failed( h264_info_t *info ) +{ + h264_cleanup_parser( info ); + return -1; +} + +int lsmash_setup_h264_specific_parameters_from_access_unit( lsmash_h264_specific_parameters_t *param, uint8_t *data, uint32_t data_length ) +{ + if( !param || !data || data_length == 0 ) + return -1; + h264_info_t handler = { { 0 } }; + h264_info_t *info = &handler; + if( h264_setup_parser( info, 1, h264_update_buffer_from_access_unit ) ) + return h264_parse_failed( info ); + h264_stream_buffer_t *buffer = &info->buffer; + h264_slice_info_t *slice = &info->slice; + h264_data_stream_handler_t stream = { 0 }; + stream.data = data; + stream.remainder_length = data_length; + h264_nalu_header_t nalu_header = { 0 }; + uint64_t consecutive_zero_byte_count = 0; + uint64_t ebsp_length = 0; + int no_more_buf = 0; + int complete_au = 0; + while( 1 ) + { + buffer->update( info, &stream, 2 ); + no_more_buf = buffer->pos >= buffer->end; + int no_more = info->no_more_read && no_more_buf; + if( !h264_check_next_short_start_code( buffer->pos, buffer->end ) && !no_more ) + { + if( *(buffer->pos ++) ) + consecutive_zero_byte_count = 0; + else + ++consecutive_zero_byte_count; + ++ebsp_length; + continue; + } + if( no_more && ebsp_length == 0 ) + /* For the last NALU. This NALU already has been parsed. */ + return h264_parse_succeeded( info, param ); + uint64_t next_nalu_head_pos = info->ebsp_head_pos + ebsp_length + !no_more * H264_SHORT_START_CODE_LENGTH; + uint8_t *next_short_start_code_pos = buffer->pos; /* Memorize position of short start code of the next NALU in buffer. + * This is used when backward reading of stream doesn't occur. */ + uint8_t nalu_type = nalu_header.nal_unit_type; + int read_back = 0; + if( nalu_type == 12 ) + { + /* We don't support streams with both filler and HRD yet. + * Otherwise, just skip filler because elemental streams defined in 14496-15 are forbidden to use filler. */ + if( info->sps.hrd_present ) + return h264_parse_failed( info ); + } + else if( (nalu_type >= 1 && nalu_type <= 13) || nalu_type == 19 ) + { + /* Get the EBSP of the current NALU here. + * AVC elemental stream defined in 14496-15 can recognize from 0 to 13, and 19 of nal_unit_type. + * We don't support SVC and MVC elemental stream defined in 14496-15 yet. */ + ebsp_length -= consecutive_zero_byte_count; /* Any EBSP doesn't have zero bytes at the end. */ + uint64_t nalu_length = nalu_header.length + ebsp_length; + if( buffer->bank->buffer_size < (H264_DEFAULT_NALU_LENGTH_SIZE + nalu_length) ) + { + if( h264_supplement_buffer( buffer, NULL, 2 * (H264_DEFAULT_NALU_LENGTH_SIZE + nalu_length) ) ) + return h264_parse_failed( info ); + next_short_start_code_pos = buffer->pos; + } + /* Move to the first byte of the current NALU. */ + read_back = (buffer->pos - buffer->start) < (nalu_length + consecutive_zero_byte_count); + if( read_back ) + { + memcpy( buffer->start, stream.data + info->ebsp_head_pos - nalu_header.length, nalu_length ); + buffer->pos = buffer->start; + buffer->end = buffer->start + nalu_length; + } + else + buffer->pos -= nalu_length + consecutive_zero_byte_count; + if( nalu_type >= 1 && nalu_type <= 5 ) + { + /* VCL NALU (slice) */ + h264_slice_info_t prev_slice = *slice; + if( h264_parse_slice( info, &nalu_header, buffer->rbsp, + buffer->pos + nalu_header.length, ebsp_length ) ) + return h264_parse_failed( info ); + if( prev_slice.present ) + { + /* Check whether the AU that contains the previous VCL NALU completed or not. */ + if( h264_find_au_delimit_by_slice_info( slice, &prev_slice ) ) + /* The current NALU is the first VCL NALU of the primary coded picture of an new AU. + * Therefore, the previous slice belongs to that new AU. */ + complete_au = 1; + } + slice->present = 1; + } + else + { + if( h264_find_au_delimit_by_nalu_type( nalu_type, info->prev_nalu_type ) ) + { + /* The last slice belongs to the AU you want at this time. */ + slice->present = 0; + complete_au = 1; + } + else if( no_more ) + complete_au = 1; + switch( nalu_type ) + { + case 7 : /* Sequence Parameter Set */ + if( h264_try_to_append_parameter_set( info, H264_PARAMETER_SET_TYPE_SPS, buffer->pos, nalu_length ) ) + return h264_parse_failed( info ); + break; + case 8 : /* Picture Parameter Set */ + if( h264_try_to_append_parameter_set( info, H264_PARAMETER_SET_TYPE_PPS, buffer->pos, nalu_length ) ) + return h264_parse_failed( info ); + break; + case 13 : /* Sequence Parameter Set Extension */ + if( h264_try_to_append_parameter_set( info, H264_PARAMETER_SET_TYPE_SPSEXT, buffer->pos, nalu_length ) ) + return h264_parse_failed( info ); + break; + default : + break; + } + } + } + /* Move to the first byte of the next NALU. */ + if( read_back ) + { + uint64_t wasted_data_length = LSMASH_MIN( stream.remainder_length, buffer->bank->buffer_size ); + memcpy( buffer->start, stream.data + next_nalu_head_pos, wasted_data_length ); + stream.overall_wasted_length = next_nalu_head_pos + wasted_data_length; + stream.remainder_length = data_length - stream.overall_wasted_length; + buffer->pos = buffer->start; + buffer->end = buffer->start + wasted_data_length; + } + else + buffer->pos = next_short_start_code_pos + H264_SHORT_START_CODE_LENGTH; + info->prev_nalu_type = nalu_type; + buffer->update( info, &stream, 0 ); + no_more_buf = buffer->pos >= buffer->end; + ebsp_length = 0; + no_more = info->no_more_read && no_more_buf; + if( !no_more && !complete_au ) + { + /* Check the next NALU header. */ + if( h264_check_nalu_header( &nalu_header, &buffer->pos, !!consecutive_zero_byte_count ) ) + return h264_parse_failed( info ); + info->ebsp_head_pos = next_nalu_head_pos + nalu_header.length; + } + else + return h264_parse_succeeded( info, param ); + consecutive_zero_byte_count = 0; + } +} + +static int isom_get_avcC_ps( lsmash_bs_t *bs, lsmash_entry_list_t *list, uint8_t entry_count ) +{ + for( uint8_t i = 0; i < entry_count; i++ ) + { + isom_avcC_ps_entry_t *data = malloc( sizeof(isom_avcC_ps_entry_t) ); + if( !data ) + return -1; + if( lsmash_add_entry( list, data ) ) + { + free( data ); + return -1; + } + data->parameterSetLength = lsmash_bs_get_be16( bs ); + data->parameterSetNALUnit = lsmash_bs_get_bytes( bs, data->parameterSetLength ); + if( !data->parameterSetNALUnit ) + { + lsmash_remove_entries( list, isom_remove_avcC_ps ); + return -1; + } + } + return 0; +} + +int h264_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( dst && dst->data.structured && src && src->data.unstructured ); + if( src->size < ISOM_BASEBOX_COMMON_SIZE + 7 ) + return -1; + lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)dst->data.structured; + uint8_t *data = src->data.unstructured; + uint64_t size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + data += ISOM_BASEBOX_COMMON_SIZE; + if( size == 1 ) + { + size = ((uint64_t)data[0] << 56) | ((uint64_t)data[1] << 48) | ((uint64_t)data[2] << 40) | ((uint64_t)data[3] << 32) + | ((uint64_t)data[4] << 24) | ((uint64_t)data[5] << 16) | ((uint64_t)data[6] << 8) | (uint64_t)data[7]; + data += 8; + } + if( size != src->size ) + return -1; + if( !param->parameter_sets ) + { + param->parameter_sets = lsmash_malloc_zero( sizeof(lsmash_h264_parameter_sets_t) ); + if( !param->parameter_sets ) + return -1; + } + lsmash_bs_t *bs = lsmash_bs_create( NULL ); + if( !bs ) + return -1; + if( lsmash_bs_import_data( bs, data, src->size - (src->data.unstructured - data) ) ) + goto fail; + if( lsmash_bs_get_byte( bs ) != 1 ) + goto fail; /* We don't support configurationVersion other than 1. */ + param->AVCProfileIndication = lsmash_bs_get_byte( bs ); + param->profile_compatibility = lsmash_bs_get_byte( bs ); + param->AVCLevelIndication = lsmash_bs_get_byte( bs ); + param->lengthSizeMinusOne = lsmash_bs_get_byte( bs ) & 0x03; + uint8_t numOfSequenceParameterSets = lsmash_bs_get_byte( bs ) & 0x1F; + if( numOfSequenceParameterSets + && isom_get_avcC_ps( bs, param->parameter_sets->sps_list, numOfSequenceParameterSets ) ) + goto fail; + uint8_t numOfPictureParameterSets = lsmash_bs_get_byte( bs ); + if( numOfPictureParameterSets + && isom_get_avcC_ps( bs, param->parameter_sets->pps_list, numOfPictureParameterSets ) ) + goto fail; + if( ISOM_REQUIRES_AVCC_EXTENSION( param->AVCProfileIndication ) ) + { + param->chroma_format = lsmash_bs_get_byte( bs ) & 0x03; + param->bit_depth_luma_minus8 = lsmash_bs_get_byte( bs ) & 0x07; + param->bit_depth_chroma_minus8 = lsmash_bs_get_byte( bs ) & 0x07; + uint8_t numOfSequenceParameterSetExt = lsmash_bs_get_byte( bs ); + if( numOfSequenceParameterSetExt + && isom_get_avcC_ps( bs, param->parameter_sets->spsext_list, numOfSequenceParameterSetExt ) ) + goto fail; + } + lsmash_bs_cleanup( bs ); + return 0; +fail: + lsmash_bs_cleanup( bs ); + return -1; +} + +int h264_print_codec_specific( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: AVC Configuration Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + isom_extension_box_t *ext = (isom_extension_box_t *)box; + assert( ext->format == EXTENSION_FORMAT_BINARY ); + uint8_t *data = ext->form.binary; + uint32_t offset = isom_skip_box_common( &data ); + lsmash_bs_t *bs = lsmash_bs_create( NULL ); + if( !bs ) + return -1; + if( lsmash_bs_import_data( bs, data, ext->size - offset ) ) + { + lsmash_bs_cleanup( bs ); + return -1; + } + lsmash_ifprintf( fp, indent, "configurationVersion = %"PRIu8"\n", lsmash_bs_get_byte( bs ) ); + uint8_t AVCProfileIndication = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "AVCProfileIndication = %"PRIu8"\n", AVCProfileIndication ); + lsmash_ifprintf( fp, indent, "profile_compatibility = 0x%02"PRIx8"\n", lsmash_bs_get_byte( bs ) ); + lsmash_ifprintf( fp, indent, "AVCLevelIndication = %"PRIu8"\n", lsmash_bs_get_byte( bs ) ); + uint8_t temp8 = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", (temp8 >> 2) & 0x3F ); + lsmash_ifprintf( fp, indent, "lengthSizeMinusOne = %"PRIu8"\n", temp8 & 0x03 ); + temp8 = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", (temp8 >> 5) & 0x07 ); + uint8_t numOfSequenceParameterSets = temp8 & 0x1f; + lsmash_ifprintf( fp, indent, "numOfSequenceParameterSets = %"PRIu8"\n", numOfSequenceParameterSets ); + for( uint8_t i = 0; i < numOfSequenceParameterSets; i++ ) + { + uint16_t parameterSetLength = lsmash_bs_get_be16( bs ); + lsmash_bs_get_bytes( bs, parameterSetLength ); + } + uint8_t numOfPictureParameterSets = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "numOfPictureParameterSets = %"PRIu8"\n", numOfPictureParameterSets ); + for( uint8_t i = 0; i < numOfPictureParameterSets; i++ ) + { + uint16_t parameterSetLength = lsmash_bs_get_be16( bs ); + lsmash_bs_get_bytes( bs, parameterSetLength ); + } + /* Note: there are too many files, in the world, that don't contain the following fields. */ + if( ISOM_REQUIRES_AVCC_EXTENSION( AVCProfileIndication ) + && (lsmash_bs_get_pos( bs ) < (ext->size - offset)) ) + { + temp8 = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", (temp8 >> 2) & 0x3F ); + lsmash_ifprintf( fp, indent, "chroma_format = %"PRIu8"\n", temp8 & 0x03 ); + temp8 = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", (temp8 >> 3) & 0x1F ); + lsmash_ifprintf( fp, indent, "bit_depth_luma_minus8 = %"PRIu8"\n", temp8 & 0x7 ); + temp8 = lsmash_bs_get_byte( bs ); + lsmash_ifprintf( fp, indent, "reserved = 0x%02"PRIx8"\n", (temp8 >> 3) & 0x1F ); + lsmash_ifprintf( fp, indent, "bit_depth_chroma_minus8 = %"PRIu8"\n", temp8 & 0x7 ); + lsmash_ifprintf( fp, indent, "numOfSequenceParameterSetExt = %"PRIu8"\n", lsmash_bs_get_byte( bs ) ); + } + lsmash_bs_cleanup( bs ); + return 0; +} + +int h264_copy_codec_specific( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src ) +{ + assert( src && src->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && src->data.structured ); + assert( dst && dst->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && dst->data.structured ); + lsmash_h264_specific_parameters_t *src_data = (lsmash_h264_specific_parameters_t *)src->data.structured; + lsmash_h264_specific_parameters_t *dst_data = (lsmash_h264_specific_parameters_t *)dst->data.structured; + lsmash_destroy_h264_parameter_sets( dst_data ); + *dst_data = *src_data; + if( !src_data->parameter_sets ) + return 0; + dst_data->parameter_sets = lsmash_malloc_zero( sizeof(lsmash_h264_parameter_sets_t) ); + if( !dst_data->parameter_sets ) + return -1; + for( int i = 0; i < 3; i++ ) + { + lsmash_entry_list_t *src_ps_list = h264_get_parameter_set_list( src_data, i ); + lsmash_entry_list_t *dst_ps_list = h264_get_parameter_set_list( dst_data, i ); + assert( src_ps_list && dst_ps_list ); + for( lsmash_entry_t *entry = src_ps_list->head; entry; entry = entry->next ) + { + isom_avcC_ps_entry_t *src_ps = (isom_avcC_ps_entry_t *)entry->data; + if( !src_ps ) + continue; + isom_avcC_ps_entry_t *dst_ps = isom_create_ps_entry( src_ps->parameterSetNALUnit, src_ps->parameterSetLength ); + if( !dst_ps ) + { + lsmash_destroy_h264_parameter_sets( dst_data ); + return -1; + } + if( lsmash_add_entry( dst_ps_list, dst_ps ) ) + { + lsmash_destroy_h264_parameter_sets( dst_data ); + isom_remove_avcC_ps( dst_ps ); + return -1; + } + } + } + return 0; +} + +int h264_print_bitrate( FILE *fp, lsmash_root_t *root, isom_box_t *box, int level ) +{ + assert( fp && root && box ); + int indent = level; + lsmash_ifprintf( fp, indent++, "[%s: MPEG-4 Bit Rate Box]\n", isom_4cc2str( box->type.fourcc ) ); + lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos ); + lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size ); + isom_btrt_t *btrt = (isom_btrt_t *)box; + lsmash_ifprintf( fp, indent, "bufferSizeDB = %"PRIu32"\n", btrt->bufferSizeDB ); + lsmash_ifprintf( fp, indent, "maxBitrate = %"PRIu32"\n", btrt->maxBitrate ); + lsmash_ifprintf( fp, indent, "avgBitrate = %"PRIu32"\n", btrt->avgBitrate ); + return 0; +} diff --git a/output/mp4/h264.h b/output/mp4/h264.h new file mode 100644 index 0000000..3dc2be0 --- /dev/null +++ b/output/mp4/h264.h @@ -0,0 +1,226 @@ +/***************************************************************************** + * h264.h: + ***************************************************************************** + * Copyright (C) 2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#define H264_DEFAULT_BUFFER_SIZE (1<<16) +#define H264_DEFAULT_NALU_LENGTH_SIZE 4 /* We always use 4 bytes length. */ +#define H264_SHORT_START_CODE_LENGTH 3 + +struct lsmash_h264_parameter_sets_tag +{ + lsmash_entry_list_t sps_list[1]; + lsmash_entry_list_t pps_list[1]; + lsmash_entry_list_t spsext_list[1]; +}; + +typedef struct +{ + uint8_t nal_ref_idc; + uint8_t nal_unit_type; + uint16_t length; +} h264_nalu_header_t; + +typedef struct +{ + uint16_t sar_width; + uint16_t sar_height; + uint8_t video_full_range_flag; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; + uint32_t num_units_in_tick; + uint32_t time_scale; + uint8_t fixed_frame_rate_flag; +} h264_vui_t; + +typedef struct +{ + uint8_t present; + uint8_t profile_idc; + uint8_t constraint_set_flags; + uint8_t level_idc; + uint8_t seq_parameter_set_id; + uint8_t chroma_format_idc; + uint8_t separate_colour_plane_flag; + uint8_t ChromaArrayType; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t pic_order_cnt_type; + uint8_t delta_pic_order_always_zero_flag; + uint8_t num_ref_frames_in_pic_order_cnt_cycle; + uint8_t frame_mbs_only_flag; + uint8_t hrd_present; + int32_t offset_for_non_ref_pic; + int32_t offset_for_top_to_bottom_field; + int32_t offset_for_ref_frame[255]; + int64_t ExpectedDeltaPerPicOrderCntCycle; + uint32_t max_num_ref_frames; + uint32_t log2_max_frame_num; + uint32_t MaxFrameNum; + uint32_t log2_max_pic_order_cnt_lsb; + uint32_t MaxPicOrderCntLsb; + uint32_t PicSizeInMapUnits; + uint32_t cropped_width; + uint32_t cropped_height; + h264_vui_t vui; +} h264_sps_t; + +typedef struct +{ + uint8_t present; + uint8_t pic_parameter_set_id; + uint8_t seq_parameter_set_id; + uint8_t entropy_coding_mode_flag; + uint8_t bottom_field_pic_order_in_frame_present_flag; + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint8_t weighted_pred_flag; + uint8_t weighted_bipred_idc; + uint8_t deblocking_filter_control_present_flag; + uint8_t redundant_pic_cnt_present_flag; + uint32_t SliceGroupChangeRate; +} h264_pps_t; + +typedef struct +{ + uint8_t present; + uint8_t random_accessible; + uint32_t recovery_frame_cnt; +} h264_sei_t; + +typedef struct +{ + uint8_t present; + uint8_t slice_id; /* only for slice data partition */ + uint8_t type; + uint8_t pic_order_cnt_type; + uint8_t nal_ref_idc; + uint8_t IdrPicFlag; + uint8_t pic_parameter_set_id; + uint8_t field_pic_flag; + uint8_t bottom_field_flag; + uint8_t has_mmco5; + uint8_t has_redundancy; + uint16_t idr_pic_id; + uint32_t frame_num; + int32_t pic_order_cnt_lsb; + int32_t delta_pic_order_cnt_bottom; + int32_t delta_pic_order_cnt[2]; +} h264_slice_info_t; + +typedef struct +{ + uint8_t type; + uint8_t idr; + uint8_t random_accessible; + uint8_t independent; + uint8_t disposable; /* 1: nal_ref_idc == 0, 0: otherwise */ + uint8_t has_redundancy; + uint8_t incomplete_au_has_primary; + uint8_t pic_parameter_set_id; + uint8_t field_pic_flag; + uint8_t bottom_field_flag; + /* POC */ + uint8_t has_mmco5; + uint8_t ref_pic_has_mmco5; + uint8_t ref_pic_bottom_field_flag; + int32_t ref_pic_TopFieldOrderCnt; + int32_t ref_pic_PicOrderCntMsb; + int32_t ref_pic_PicOrderCntLsb; + int32_t pic_order_cnt_lsb; + int32_t delta_pic_order_cnt_bottom; + int32_t delta_pic_order_cnt[2]; + int32_t PicOrderCnt; + uint32_t FrameNumOffset; + /* */ + uint32_t recovery_frame_cnt; + uint32_t frame_num; + uint8_t *au; + uint32_t au_length; + uint8_t *incomplete_au; + uint32_t incomplete_au_length; + uint32_t au_number; +} h264_picture_info_t; + +typedef struct h264_info_tag h264_info_t; + +typedef struct +{ + lsmash_multiple_buffers_t *bank; + uint8_t *rbsp; + uint8_t *start; + uint8_t *end; + uint8_t *pos; + uint32_t (*update)( h264_info_t *, void *, uint32_t ); +} h264_stream_buffer_t; + +struct h264_info_tag +{ + lsmash_h264_specific_parameters_t avcC_param; + h264_nalu_header_t nalu_header; + lsmash_entry_list_t sps_list[1]; + lsmash_entry_list_t pps_list[1]; + lsmash_entry_list_t slice_list[1]; /* for slice data partition */ + h264_sps_t sps; /* active SPS */ + h264_pps_t pps; /* active PPS */ + h264_sei_t sei; /* active SEI */ + h264_slice_info_t slice; /* active slice */ + h264_picture_info_t picture; + uint8_t prev_nalu_type; + uint8_t no_more_read; + uint64_t ebsp_head_pos; + lsmash_bits_t *bits; + h264_stream_buffer_t buffer; +}; + +typedef enum +{ + H264_PICTURE_TYPE_I = 0, + H264_PICTURE_TYPE_I_P = 1, + H264_PICTURE_TYPE_I_P_B = 2, + H264_PICTURE_TYPE_SI = 3, + H264_PICTURE_TYPE_SI_SP = 4, + H264_PICTURE_TYPE_I_SI = 5, + H264_PICTURE_TYPE_I_SI_P_SP = 6, + H264_PICTURE_TYPE_I_SI_P_SP_B = 7, + H264_PICTURE_TYPE_NONE = 8, +} h264_picture_type; + +int h264_setup_parser( h264_info_t *info, int parse_only, uint32_t (*update)( h264_info_t *, void *, uint32_t ) ); +void h264_cleanup_parser( h264_info_t *info ); +int h264_calculate_poc( h264_info_t *info, h264_picture_info_t *picture, h264_picture_info_t *prev_picture ); +void h264_update_picture_info_for_slice( h264_picture_info_t *picture, h264_slice_info_t *slice ); +void h264_update_picture_info( h264_picture_info_t *picture, h264_slice_info_t *slice, h264_sei_t *sei ); +int h264_find_au_delimit_by_slice_info( h264_slice_info_t *slice, h264_slice_info_t *prev_slice ); +int h264_find_au_delimit_by_nalu_type( uint8_t nalu_type, uint8_t prev_nalu_type ); +int h264_supplement_buffer( h264_stream_buffer_t *buffer, h264_picture_info_t *picture, uint32_t size ); +int h264_check_nalu_header( h264_nalu_header_t *nalu_header, uint8_t **p_buf_pos, int use_long_start_code ); +int h264_parse_sps( h264_info_t *info, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ); +int h264_parse_pps( h264_info_t *info, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ); +int h264_parse_sei( lsmash_bits_t *bits, h264_sei_t *sei, uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ); +int h264_parse_slice( h264_info_t *info, h264_nalu_header_t *nalu_header, + uint8_t *rbsp_buffer, uint8_t *ebsp, uint64_t ebsp_size ); +int h264_try_to_append_parameter_set( h264_info_t *info, lsmash_h264_parameter_set_type ps_type, void *ps_data, uint32_t ps_length ); + +static inline int h264_check_next_short_start_code( uint8_t *buf_pos, uint8_t *buf_end ) +{ + return ((buf_pos + 2) < buf_end) && !buf_pos[0] && !buf_pos[1] && (buf_pos[2] == 0x01); +} diff --git a/output/mp4/importer.c b/output/mp4/importer.c new file mode 100644 index 0000000..5a66d10 --- /dev/null +++ b/output/mp4/importer.c @@ -0,0 +1,3716 @@ +/***************************************************************************** + * importer.c: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Takashi Hirata + * Contributors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include +#include + +#define LSMASH_IMPORTER_INTERNAL +#include "importer.h" + +#include "mp4a.h" +#include "box.h" +#include "description.h" + +/*************************************************************************** + importer framework +***************************************************************************/ +struct mp4sys_importer_tag; + +typedef void ( *mp4sys_importer_cleanup ) ( struct mp4sys_importer_tag * ); +typedef int ( *mp4sys_importer_get_accessunit ) ( struct mp4sys_importer_tag *, uint32_t, lsmash_sample_t * ); +typedef int ( *mp4sys_importer_probe ) ( struct mp4sys_importer_tag * ); +typedef uint32_t ( *mp4sys_importer_get_last_duration )( struct mp4sys_importer_tag *, uint32_t ); + +typedef struct +{ + const char* name; + int detectable; + mp4sys_importer_probe probe; + mp4sys_importer_get_accessunit get_accessunit; + mp4sys_importer_get_last_duration get_last_delta; + mp4sys_importer_cleanup cleanup; +} mp4sys_importer_functions; + +typedef struct mp4sys_importer_tag +{ + FILE* stream; + int is_stdin; + void* info; /* importer internal status information. */ + mp4sys_importer_functions funcs; + lsmash_entry_list_t* summaries; +} mp4sys_importer_t; + +typedef enum +{ + MP4SYS_IMPORTER_ERROR = -1, + MP4SYS_IMPORTER_OK = 0, + MP4SYS_IMPORTER_CHANGE = 1, + MP4SYS_IMPORTER_EOF = 2, +} mp4sys_importer_status; + +/*************************************************************************** + ADTS importer +***************************************************************************/ +#define MP4SYS_ADTS_FIXED_HEADER_LENGTH 4 /* this is partly a lie. actually 28 bits. */ +#define MP4SYS_ADTS_BASIC_HEADER_LENGTH 7 +#define MP4SYS_ADTS_MAX_FRAME_LENGTH ( ( 1 << 13 ) - 1 ) +#define MP4SYS_ADTS_MAX_RAW_DATA_BLOCKS 4 + +typedef struct +{ + uint16_t syncword; /* 12; */ + uint8_t ID; /* 1; */ + uint8_t layer; /* 2; */ + uint8_t protection_absent; /* 1; */ + uint8_t profile_ObjectType; /* 2; */ + uint8_t sampling_frequency_index; /* 4; */ +// uint8_t private_bit; /* 1; we don't care. */ + uint8_t channel_configuration; /* 3; */ +// uint8_t original_copy; /* 1; we don't care. */ +// uint8_t home; /* 1; we don't care. */ + +} mp4sys_adts_fixed_header_t; + +typedef struct +{ +// uint8_t copyright_identification_bit; /* 1; we don't care. */ +// uint8_t copyright_identification_start; /* 1; we don't care. */ + uint16_t frame_length; /* 13; */ +// uint16_t adts_buffer_fullness; /* 11; we don't care. */ + uint8_t number_of_raw_data_blocks_in_frame; /* 2; */ +// uint16_t adts_error_check; /* we don't support */ +// uint16_t raw_data_block_position[MP4SYS_ADTS_MAX_RAW_DATA_BLOCKS-1]; /* we don't use this directly, and... */ + uint16_t raw_data_block_size[MP4SYS_ADTS_MAX_RAW_DATA_BLOCKS]; /* use this instead of above. */ +// uint16_t adts_header_error_check; /* we don't support, actually crc_check within this */ +// uint16_t adts_raw_data_block_error_check[MP4SYS_ADTS_MAX_RAW_DATA_BLOCKS]; /* we don't support */ +} mp4sys_adts_variable_header_t; + +static void mp4sys_adts_parse_fixed_header( uint8_t* buf, mp4sys_adts_fixed_header_t* header ) +{ + /* FIXME: should we rewrite these code using bitstream reader? */ + header->syncword = (buf[0] << 4) | (buf[1] >> 4); + header->ID = (buf[1] >> 3) & 0x1; + header->layer = (buf[1] >> 1) & 0x3; + header->protection_absent = buf[1] & 0x1; + header->profile_ObjectType = buf[2] >> 6; + header->sampling_frequency_index = (buf[2] >> 2) & 0xF; +// header->private_bit = (buf[2] >> 1) & 0x1; /* we don't care currently. */ + header->channel_configuration = ((buf[2] << 2) | (buf[3] >> 6)) & 0x07; +// header->original_copy = (buf[3] >> 5) & 0x1; /* we don't care currently. */ +// header->home = (buf[3] >> 4) & 0x1; /* we don't care currently. */ +} + +static int mp4sys_adts_check_fixed_header( mp4sys_adts_fixed_header_t* header ) +{ + if( header->syncword != 0xFFF ) return -1; +// if( header->ID != 0x0 ) return -1; /* we don't care. */ + if( header->layer != 0x0 ) return -1; /* must be 0b00 for any type of AAC */ +// if( header->protection_absent != 0x1 ) return -1; /* we don't care. */ + if( header->profile_ObjectType != 0x1 ) return -1; /* FIXME: 0b00=Main, 0b01=LC, 0b10=SSR, 0b11=LTP. */ + if( header->sampling_frequency_index > 0xB ) return -1; /* must not be > 0xB. */ + if( header->channel_configuration == 0x0 ) return -1; /* FIXME: we do not support 0b000 currently. */ + if( header->profile_ObjectType == 0x3 && header->ID != 0x0 ) return -1; /* LTP is valid only if ID==0. */ + return 0; +} + +static int mp4sys_adts_parse_variable_header( FILE* stream, uint8_t* buf, unsigned int protection_absent, mp4sys_adts_variable_header_t* header ) +{ + /* FIXME: should we rewrite these code using bitstream reader? */ +// header->copyright_identification_bit = (buf[3] >> 3) & 0x1; /* we don't care. */ +// header->copyright_identification_start = (buf[3] >> 2) & 0x1; /* we don't care. */ + header->frame_length = ((buf[3] << 11) | (buf[4] << 3) | (buf[5] >> 5)) & 0x1FFF ; +// header->adts_buffer_fullness = ((buf[5] << 6) | (buf[6] >> 2)) 0x7FF ; /* we don't care. */ + header->number_of_raw_data_blocks_in_frame = buf[6] & 0x3; + + if( header->frame_length <= MP4SYS_ADTS_BASIC_HEADER_LENGTH + 2 * (protection_absent == 0) ) + return -1; /* easy error check */ + + /* protection_absent and number_of_raw_data_blocks_in_frame relatives */ + + uint8_t buf2[2]; + unsigned int number_of_blocks = header->number_of_raw_data_blocks_in_frame; + if( number_of_blocks == 0 ) + { + header->raw_data_block_size[0] = header->frame_length - MP4SYS_ADTS_BASIC_HEADER_LENGTH; + /* skip adts_error_check() and subtract that from block_size */ + if( protection_absent == 0 ) + { + header->raw_data_block_size[0] -= 2; + if( fread( buf2, 1, 2, stream ) != 2 ) + return -1; + } + return 0; + } + + /* now we have multiple raw_data_block()s, so evaluate adts_header_error_check() */ + + uint16_t raw_data_block_position[MP4SYS_ADTS_MAX_RAW_DATA_BLOCKS]; + uint16_t first_offset = MP4SYS_ADTS_BASIC_HEADER_LENGTH; + if( protection_absent == 0 ) + { + /* process adts_header_error_check() */ + for( int i = 0 ; i < number_of_blocks ; i++ ) /* 1-based in the spec, but we use 0-based */ + { + if( fread( buf2, 1, 2, stream ) != 2 ) + return -1; + raw_data_block_position[i] = (buf2[0] << 8) | buf2[1]; + } + /* skip crc_check in adts_header_error_check(). + Or might be sizeof( adts_error_check() ) if we share with the case number_of_raw_data_blocks_in_frame == 0 */ + if( fread( buf2, 1, 2, stream ) != 2 ) + return -1; + first_offset += ( 2 * number_of_blocks ) + 2; /* according to above */ + } + else + { + /* + * NOTE: We never support the case where number_of_raw_data_blocks_in_frame != 0 && protection_absent != 0, + * because we have to parse the raw AAC bitstream itself to find boundaries of raw_data_block()s in this case. + * Which is to say, that braindamaged spec requires us (mp4 muxer) to decode AAC once to split frames. + * L-SMASH is NOT AAC DECODER, so that we've just given up for this case. + * This is ISO/IEC 13818-7's sin which defines ADTS format originally. + */ + return -1; + } + + /* convert raw_data_block_position --> raw_data_block_size */ + + /* do conversion for first */ + header->raw_data_block_size[0] = raw_data_block_position[0] - first_offset; + /* set dummy offset to tail for loop, do coversion for rest. */ + raw_data_block_position[number_of_blocks] = header->frame_length; + for( int i = 1 ; i <= number_of_blocks ; i++ ) + header->raw_data_block_size[i] = raw_data_block_position[i] - raw_data_block_position[i-1]; + + /* adjustment for adts_raw_data_block_error_check() */ + if( protection_absent == 0 && number_of_blocks != 0 ) + for( int i = 0 ; i <= number_of_blocks ; i++ ) + header->raw_data_block_size[i] -= 2; + + return 0; +} + +static int mp4sys_adts_parse_headers( FILE* stream, uint8_t* buf, mp4sys_adts_fixed_header_t* header, mp4sys_adts_variable_header_t* variable_header ) +{ + mp4sys_adts_parse_fixed_header( buf, header ); + if( mp4sys_adts_check_fixed_header( header ) ) + return -1; + /* get payload length & skip extra(crc) header */ + return mp4sys_adts_parse_variable_header( stream, buf, header->protection_absent, variable_header ); +} + +static lsmash_audio_summary_t *mp4sys_adts_create_summary( mp4sys_adts_fixed_header_t *header ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + summary->sample_type = ISOM_CODEC_TYPE_MP4A_AUDIO; + summary->max_au_length = MP4SYS_ADTS_MAX_FRAME_LENGTH; + summary->frequency = mp4a_sampling_frequency_table[header->sampling_frequency_index][1]; + summary->channels = header->channel_configuration + ( header->channel_configuration == 0x07 ); /* 0x07 means 7.1ch */ + summary->sample_size = 16; + summary->samples_in_frame = 1024; + summary->aot = header->profile_ObjectType + MP4A_AUDIO_OBJECT_TYPE_AAC_MAIN; + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; +#if 0 /* FIXME: This is very unstable. Many players crash with this. */ + if( header->ID != 0 ) + { + /* + * NOTE: This ADTS seems of ISO/IEC 13818-7 (MPEG-2 AAC). + * It has special object_type_indications, depending on it's profile (Legacy Interface). + * If ADIF header is not available, it should not have decoder specific information, so AudioObjectType neither. + * see ISO/IEC 14496-1, DecoderSpecificInfo and 14496-3 Subpart 9: MPEG-1/2 Audio in MPEG-4. + */ + summary->object_type_indication = header->profile_ObjectType + MP4SYS_OBJECT_TYPE_Audio_ISO_13818_7_Main_Profile; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_NULL; + summary->asc = NULL; + summary->asc_length = 0; + // summary->sbr_mode = MP4A_AAC_SBR_NONE; /* MPEG-2 AAC should not be HE-AAC, but we forgive them. */ + return summary; + } +#endif + uint32_t data_length; + uint8_t *data = mp4a_export_AudioSpecificConfig( header->profile_ObjectType + MP4A_AUDIO_OBJECT_TYPE_AAC_MAIN, + summary->frequency, summary->channels, summary->sbr_mode, + NULL, 0, &data_length ); + if( !data ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return NULL; + } + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + free( data ); + return NULL; + } + lsmash_mp4sys_decoder_parameters_t *param = (lsmash_mp4sys_decoder_parameters_t *)specific->data.structured; + param->objectTypeIndication = MP4SYS_OBJECT_TYPE_Audio_ISO_14496_3; + param->streamType = MP4SYS_STREAM_TYPE_AudioStream; + if( lsmash_set_mp4sys_decoder_specific_info( param, data, data_length ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + free( data ); + return NULL; + } + free( data ); + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + return summary; +} + +typedef struct +{ + mp4sys_importer_status status; + unsigned int raw_data_block_idx; + mp4sys_adts_fixed_header_t header; + mp4sys_adts_variable_header_t variable_header; + uint32_t samples_in_frame; + uint32_t au_number; +} mp4sys_adts_info_t; + +static int mp4sys_adts_get_accessunit( mp4sys_importer_t* importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + mp4sys_adts_info_t* info = (mp4sys_adts_info_t*)importer->info; + mp4sys_importer_status current_status = info->status; + uint16_t raw_data_block_size = info->variable_header.raw_data_block_size[info->raw_data_block_idx]; + if( current_status == MP4SYS_IMPORTER_ERROR || buffered_sample->length < raw_data_block_size ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + if( current_status == MP4SYS_IMPORTER_CHANGE ) + { + lsmash_audio_summary_t* summary = mp4sys_adts_create_summary( &info->header ); + if( !summary ) + return -1; + lsmash_entry_t* entry = lsmash_get_entry( importer->summaries, track_number ); + if( !entry || !entry->data ) + return -1; + lsmash_cleanup_summary( entry->data ); + entry->data = summary; + info->samples_in_frame = summary->samples_in_frame; + } + + /* read a raw_data_block(), typically == payload of a ADTS frame */ + if( fread( buffered_sample->data, 1, raw_data_block_size, importer->stream ) != raw_data_block_size ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return -1; + } + buffered_sample->length = raw_data_block_size; + buffered_sample->dts = info->au_number++ * info->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->prop.pre_roll.distance = 1; /* MDCT */ + + /* now we succeeded to read current frame, so "return" takes 0 always below. */ + + /* skip adts_raw_data_block_error_check() */ + if( info->header.protection_absent == 0 + && info->variable_header.number_of_raw_data_blocks_in_frame != 0 + && fread( buffered_sample->data, 1, 2, importer->stream ) != 2 ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + /* current adts_frame() has any more raw_data_block()? */ + if( info->raw_data_block_idx < info->variable_header.number_of_raw_data_blocks_in_frame ) + { + info->raw_data_block_idx++; + info->status = MP4SYS_IMPORTER_OK; + return 0; + } + info->raw_data_block_idx = 0; + + /* preparation for next frame */ + + uint8_t buf[MP4SYS_ADTS_MAX_FRAME_LENGTH]; + size_t ret = fread( buf, 1, MP4SYS_ADTS_BASIC_HEADER_LENGTH, importer->stream ); + if( ret == 0 ) + { + info->status = MP4SYS_IMPORTER_EOF; + return 0; + } + if( ret != MP4SYS_ADTS_BASIC_HEADER_LENGTH ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + /* + * NOTE: About the spec of ADTS headers. + * By the spec definition, ADTS's fixed header cannot change in the middle of stream. + * But spec of MP4 allows that a stream(track) changes its properties in the middle of it. + */ + /* + * NOTE: About detailed check for ADTS headers. + * We do not ommit detailed check for fixed header by simply testing bits' identification, + * because there're some flags which does not matter to audio_summary (so AudioSpecificConfig neither) + * so that we can take them as no change and never make new ObjectDescriptor. + * I know that can be done with/by bitmask also and that should be fast, but L-SMASH project prefers + * even foolishly straightforward way. + */ + /* + * NOTE: About our reading algorithm for ADTS. + * It's rather simple if we retrieve payload of ADTS (i.e. raw AAC frame) at the same time to + * retrieve headers. + * But then we have to cache and memcpy every frame so that it requires more clocks and memory. + * To avoid them, I adopted this separate retrieving method. + */ + mp4sys_adts_fixed_header_t header = {0}; + mp4sys_adts_variable_header_t variable_header = {0}; + if( mp4sys_adts_parse_headers( importer->stream, buf, &header, &variable_header ) ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + info->variable_header = variable_header; + + /* + * NOTE: About our support for change(s) of properties within an ADTS stream. + * We have to modify these conditions depending on the features we support. + * For example, if we support copyright_identification_* in any way within any feature + * defined by/in any specs, such as ISO/IEC 14496-1 (MPEG-4 Systems), like... + * "8.3 Intellectual Property Management and Protection (IPMP)", or something similar, + * we have to check copyright_identification_* and treat them in audio_summary. + * "Change(s)" may result in MP4SYS_IMPORTER_ERROR or MP4SYS_IMPORTER_CHANGE + * depending on the features we support, and what the spec allows. + * Sometimes the "change(s)" can be allowed, while sometimes they're forbidden. + */ + /* currently UNsupported "change(s)". */ + if( info->header.profile_ObjectType != header.profile_ObjectType /* currently unsupported. */ + || info->header.ID != header.ID /* In strict, this means change of object_type_indication. */ + || info->header.sampling_frequency_index != header.sampling_frequency_index ) /* This may change timebase. */ + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + /* currently supported "change(s)". */ + if( info->header.channel_configuration != header.channel_configuration ) + { + /* + * FIXME: About conditions of VALID "change(s)". + * we have to check whether any "change(s)" affect to audioProfileLevelIndication + * in InitialObjectDescriptor (MP4_IOD) or not. + * If another type or upper level is required by the change(s), that is forbidden. + * Because ObjectDescriptor does not have audioProfileLevelIndication, + * so that it seems impossible to change audioProfileLevelIndication in the middle of the stream. + * Note also any other properties, such as AudioObjectType, object_type_indication. + */ + /* + * NOTE: updating summary must be done on next call, + * because user may retrieve summary right after this function call of this time, + * and that should be of current, before change, one. + */ + info->header = header; + info->status = MP4SYS_IMPORTER_CHANGE; + return 0; + } + /* no change which matters to mp4 muxing was found */ + info->status = MP4SYS_IMPORTER_OK; + return 0; +} + +static void mp4sys_adts_cleanup( mp4sys_importer_t* importer ) +{ + debug_if( importer && importer->info ) + free( importer->info ); +} + +/* returns 0 if it seems adts. */ +static int mp4sys_adts_probe( mp4sys_importer_t* importer ) +{ + uint8_t buf[MP4SYS_ADTS_MAX_FRAME_LENGTH]; + if( fread( buf, 1, MP4SYS_ADTS_BASIC_HEADER_LENGTH, importer->stream ) != MP4SYS_ADTS_BASIC_HEADER_LENGTH ) + return -1; + + mp4sys_adts_fixed_header_t header = {0}; + mp4sys_adts_variable_header_t variable_header = {0}; + if( mp4sys_adts_parse_headers( importer->stream, buf, &header, &variable_header ) ) + return -1; + + /* now the stream seems valid ADTS */ + + lsmash_audio_summary_t* summary = mp4sys_adts_create_summary( &header ); + if( !summary ) + return -1; + + /* importer status */ + mp4sys_adts_info_t* info = lsmash_malloc_zero( sizeof(mp4sys_adts_info_t) ); + if( !info ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + info->status = MP4SYS_IMPORTER_OK; + info->raw_data_block_idx = 0; + info->header = header; + info->variable_header = variable_header; + info->samples_in_frame = summary->samples_in_frame; + + if( lsmash_add_entry( importer->summaries, summary ) ) + { + free( info ); + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + importer->info = info; + + return 0; +} + +static uint32_t mp4sys_adts_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_adts_info_t *info = (mp4sys_adts_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + return info->samples_in_frame; +} + +const static mp4sys_importer_functions mp4sys_adts_importer = +{ + "adts", + 1, + mp4sys_adts_probe, + mp4sys_adts_get_accessunit, + mp4sys_adts_get_last_delta, + mp4sys_adts_cleanup +}; + +/*************************************************************************** + mp3 (Legacy Interface) importer +***************************************************************************/ + +static void mp4sys_mp3_cleanup( mp4sys_importer_t* importer ) +{ + debug_if( importer && importer->info ) + free( importer->info ); +} + +typedef struct +{ + uint16_t syncword; /* <12> */ + uint8_t ID; /* <1> */ + uint8_t layer; /* <2> */ +// uint8_t protection_bit; /* <1> don't care. */ + uint8_t bitrate_index; /* <4> */ + uint8_t sampling_frequency; /* <2> */ + uint8_t padding_bit; /* <1> */ +// uint8_t private_bit; /* <1> don't care. */ + uint8_t mode; /* <2> */ +// uint8_t mode_extension; /* <2> don't care. */ +// uint8_t copyright; /* <1> don't care. */ +// uint8_t original_copy; /* <1> don't care. */ + uint8_t emphasis; /* <2> for error check only. */ + +} mp4sys_mp3_header_t; + +static int mp4sys_mp3_parse_header( uint8_t* buf, mp4sys_mp3_header_t* header ) +{ + /* FIXME: should we rewrite these code using bitstream reader? */ + uint32_t data = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + header->syncword = (data >> 20) & 0xFFF; /* NOTE: don't consider what is called MPEG2.5, which last bit is 0. */ + header->ID = (data >> 19) & 0x1; + header->layer = (data >> 17) & 0x3; +// header->protection_bit = (data >> 16) & 0x1; /* don't care. */ + header->bitrate_index = (data >> 12) & 0xF; + header->sampling_frequency = (data >> 10) & 0x3; + header->padding_bit = (data >> 9) & 0x1; +// header->private_bit = (data >> 8) & 0x1; /* don't care. */ + header->mode = (data >> 6) & 0x3; +// header->mode_extension = (data >> 4) & 0x3; +// header->copyright = (data >> 3) & 0x1; /* don't care. */ +// header->original_copy = (data >> 2) & 0x1; /* don't care. */ + header->emphasis = data & 0x3; /* for error check only. */ + + if( header->syncword != 0xFFF ) return -1; + if( header->layer == 0x0 ) return -1; + if( header->bitrate_index == 0x0 || header->bitrate_index == 0xF ) return -1; /* FIXME: "free" bitrate is unsupported currently. */ + if( header->sampling_frequency == 0x3) return -1; + if( header->emphasis == 0x2) return -1; + return 0; +} + +#define MP4SYS_MP3_MAX_FRAME_LENGTH (1152*(16/8)*2) +#define MP4SYS_MP3_HEADER_LENGTH 4 +#define MP4SYS_MODE_IS_2CH( mode ) (!!~(mode)) +#define MP4SYS_LAYER_III 0x1 +#define MP4SYS_LAYER_I 0x3 + +static const uint32_t mp4sys_mp3_frequency_tbl[2][3] = { + { 22050, 24000, 16000 }, /* MPEG-2 BC audio */ + { 44100, 48000, 32000 } /* MPEG-1 audio */ +}; + +static lsmash_audio_summary_t *mp4sys_mp3_create_summary( mp4sys_mp3_header_t *header, int legacy_mode ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + summary->sample_type = ISOM_CODEC_TYPE_MP4A_AUDIO; + summary->max_au_length = MP4SYS_MP3_MAX_FRAME_LENGTH; + summary->frequency = mp4sys_mp3_frequency_tbl[header->ID][header->sampling_frequency]; + summary->channels = MP4SYS_MODE_IS_2CH( header->mode ) + 1; + summary->sample_size = 16; + summary->samples_in_frame = header->layer == MP4SYS_LAYER_I ? 384 : 1152; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_Layer_1 + (MP4SYS_LAYER_I - header->layer); /* no effect with Legacy Interface. */ + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ +#if 0 /* FIXME: This is very unstable. Many players crash with this. */ + if( !legacy_mode ) + { + summary->object_type_indication = MP4SYS_OBJECT_TYPE_Audio_ISO_14496_3; + if( lsmash_setup_AudioSpecificConfig( summary ) ) + { + lsmash_cleanup_summary( summary ); + return NULL; + } + } +#endif + uint32_t data_length; + uint8_t *data = mp4a_export_AudioSpecificConfig( MP4A_AUDIO_OBJECT_TYPE_Layer_1 + (MP4SYS_LAYER_I - header->layer), + summary->frequency, summary->channels, summary->sbr_mode, + NULL, 0, &data_length ); + if( !data ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return NULL; + } + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + free( data ); + return NULL; + } + lsmash_mp4sys_decoder_parameters_t *param = (lsmash_mp4sys_decoder_parameters_t *)specific->data.structured; + param->objectTypeIndication = header->ID ? MP4SYS_OBJECT_TYPE_Audio_ISO_11172_3 : MP4SYS_OBJECT_TYPE_Audio_ISO_13818_3; + param->streamType = MP4SYS_STREAM_TYPE_AudioStream; + if( lsmash_set_mp4sys_decoder_specific_info( param, data, data_length ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + free( data ); + return NULL; + } + free( data ); + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + return summary; +} + +typedef struct +{ + mp4sys_importer_status status; + mp4sys_mp3_header_t header; + uint8_t raw_header[MP4SYS_MP3_HEADER_LENGTH]; + uint32_t samples_in_frame; + uint32_t au_number; +} mp4sys_mp3_info_t; + +static int mp4sys_mp3_get_accessunit( mp4sys_importer_t* importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + mp4sys_mp3_info_t* info = (mp4sys_mp3_info_t*)importer->info; + mp4sys_mp3_header_t* header = (mp4sys_mp3_header_t*)&info->header; + mp4sys_importer_status current_status = info->status; + + const uint32_t bitrate_tbl[2][3][16] = { + { /* MPEG-2 BC audio */ + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, /* Layer III */ + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, /* Layer II */ + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } /* Layer I */ + }, + { /* MPEG-1 audio */ + { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 }, /* Layer III */ + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, /* Layer II */ + { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 } /* Layer I */ + } + }; + uint32_t bitrate = bitrate_tbl[header->ID][header->layer-1][header->bitrate_index]; + uint32_t frequency = mp4sys_mp3_frequency_tbl[header->ID][header->sampling_frequency]; + debug_if( bitrate == 0 || frequency == 0 ) + return -1; + uint32_t frame_size; + if( header->layer == MP4SYS_LAYER_I ) + { + /* mp1's 'slot' is 4 bytes unit. see 11172-3, Audio Sequence General. */ + frame_size = ( 12 * 1000 * bitrate / frequency + header->padding_bit ) * 4; + } + else + { + /* mp2/3's 'slot' is 1 bytes unit. */ + frame_size = 144 * 1000 * bitrate / frequency + header->padding_bit; + } + + if( current_status == MP4SYS_IMPORTER_ERROR || frame_size <= 4 || buffered_sample->length < frame_size ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + if( current_status == MP4SYS_IMPORTER_CHANGE ) + { + lsmash_audio_summary_t* summary = mp4sys_mp3_create_summary( header, 1 ); /* FIXME: use legacy mode. */ + if( !summary ) + return -1; + lsmash_entry_t* entry = lsmash_get_entry( importer->summaries, track_number ); + if( !entry || !entry->data ) + return -1; + lsmash_cleanup_summary( entry->data ); + entry->data = summary; + info->samples_in_frame = summary->samples_in_frame; + } + /* read a frame's data. */ + memcpy( buffered_sample->data, info->raw_header, MP4SYS_MP3_HEADER_LENGTH ); + frame_size -= MP4SYS_MP3_HEADER_LENGTH; + if( fread( ((uint8_t*)buffered_sample->data)+MP4SYS_MP3_HEADER_LENGTH, 1, frame_size, importer->stream ) != frame_size ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return -1; + } + buffered_sample->length = MP4SYS_MP3_HEADER_LENGTH + frame_size; + buffered_sample->dts = info->au_number++ * info->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->prop.pre_roll.distance = header->layer == MP4SYS_LAYER_III ? 1 : 0; /* Layer III uses MDCT */ + + /* now we succeeded to read current frame, so "return" takes 0 always below. */ + /* preparation for next frame */ + + uint8_t buf[MP4SYS_MP3_HEADER_LENGTH]; + size_t ret = fread( buf, 1, MP4SYS_MP3_HEADER_LENGTH, importer->stream ); + if( ret == 0 ) + { + info->status = MP4SYS_IMPORTER_EOF; + return 0; + } + if( ret == 1 && *buf == 0x00 ) + { + /* NOTE: ugly hack for mp1 stream created with SCMPX. */ + info->status = MP4SYS_IMPORTER_EOF; + return 0; + } + if( ret != MP4SYS_MP3_HEADER_LENGTH ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + + mp4sys_mp3_header_t new_header = {0}; + if( mp4sys_mp3_parse_header( buf, &new_header ) ) + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + memcpy( info->raw_header, buf, MP4SYS_MP3_HEADER_LENGTH ); + + /* currently UNsupported "change(s)". */ + if( header->layer != new_header.layer /* This means change of object_type_indication with Legacy Interface. */ + || header->sampling_frequency != new_header.sampling_frequency ) /* This may change timescale. */ + { + info->status = MP4SYS_IMPORTER_ERROR; + return 0; + } + + /* currently supported "change(s)". */ + if( MP4SYS_MODE_IS_2CH( header->mode ) != MP4SYS_MODE_IS_2CH( new_header.mode ) ) + info->status = MP4SYS_IMPORTER_CHANGE; + else + info->status = MP4SYS_IMPORTER_OK; /* no change which matters to mp4 muxing was found */ + info->header = new_header; + return 0; +} + +static int mp4sys_mp3_probe( mp4sys_importer_t* importer ) +{ + uint8_t buf[MP4SYS_MP3_HEADER_LENGTH]; + if( fread( buf, 1, MP4SYS_MP3_HEADER_LENGTH, importer->stream ) != MP4SYS_MP3_HEADER_LENGTH ) + return -1; + + mp4sys_mp3_header_t header = {0}; + if( mp4sys_mp3_parse_header( buf, &header ) ) + return -1; + + /* now the stream seems valid mp3 */ + + lsmash_audio_summary_t* summary = mp4sys_mp3_create_summary( &header, 1 ); /* FIXME: use legacy mode. */ + if( !summary ) + return -1; + + /* importer status */ + mp4sys_mp3_info_t* info = lsmash_malloc_zero( sizeof(mp4sys_mp3_info_t) ); + if( !info ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + info->status = MP4SYS_IMPORTER_OK; + info->header = header; + info->samples_in_frame = summary->samples_in_frame; + memcpy( info->raw_header, buf, MP4SYS_MP3_HEADER_LENGTH ); + + if( lsmash_add_entry( importer->summaries, summary ) ) + { + free( info ); + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + importer->info = info; + + return 0; +} + +static uint32_t mp4sys_mp3_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_mp3_info_t *info = (mp4sys_mp3_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + return info->samples_in_frame; +} + +const static mp4sys_importer_functions mp4sys_mp3_importer = +{ + "MPEG-1/2BC_Audio_Legacy", + 1, + mp4sys_mp3_probe, + mp4sys_mp3_get_accessunit, + mp4sys_mp3_get_last_delta, + mp4sys_mp3_cleanup +}; + +/*************************************************************************** + AMR-NB/WB storage format importer + http://www.ietf.org/rfc/rfc3267.txt (Obsoleted) + http://www.ietf.org/rfc/rfc4867.txt +***************************************************************************/ +static void mp4sys_amr_cleanup( mp4sys_importer_t* importer ) +{ + debug_if( importer && importer->info ) + free( importer->info ); +} + +typedef struct +{ + uint8_t wb; + uint32_t samples_in_frame; + uint32_t au_number; +} mp4sys_amr_info_t; + +static int mp4sys_amr_get_accessunit( mp4sys_importer_t* importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( track_number != 1 ) + return -1; + mp4sys_amr_info_t *info = (mp4sys_amr_info_t *)importer->info; + + uint8_t* buf = buffered_sample->data; + if( fread( buf, 1, 1, importer->stream ) == 0 ) + { + /* EOF */ + buffered_sample->length = 0; + return 0; + } + uint8_t FT = (*buf >> 3) & 0x0F; + + /* AMR-NB has varieties of frame-size table like this. so I'm not sure yet. */ + const int frame_size[2][16] = { + { 13, 14, 16, 18, 20, 21, 27, 32, 5, 5, 5, 5, 0, 0, 0, 1 }, + { 18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1 } + }; + int read_size = frame_size[info->wb][FT]; + if( read_size == 0 || buffered_sample->length < read_size-- ) + return -1; + if( read_size == 0 ) + buffered_sample->length = 1; + else + { + if( fread( buf+1, 1, read_size, importer->stream ) != read_size ) + return -1; + buffered_sample->length = read_size + 1; + } + buffered_sample->dts = info->au_number++ * info->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + return 0; +} + +#define MP4SYS_DAMR_LENGTH 17 + +int mp4sys_amr_create_damr( lsmash_audio_summary_t *summary ) +{ + lsmash_bs_t* bs = lsmash_bs_create( NULL ); /* no file writing */ + if( !bs ) + return -1; + lsmash_bs_put_be32( bs, MP4SYS_DAMR_LENGTH ); + lsmash_bs_put_be32( bs, ISOM_BOX_TYPE_DAMR.fourcc ); + /* NOTE: These are specific to each codec vendor, but we're surely not a vendor. + Using dummy data. */ + lsmash_bs_put_be32( bs, 0x20202020 ); /* vendor */ + lsmash_bs_put_byte( bs, 0 ); /* decoder_version */ + /* NOTE: Using safe value for these settings, maybe sub-optimal. */ + lsmash_bs_put_be16( bs, 0x83FF ); /* mode_set, represents for possibly existing frame-type (0x83FF == all). */ + lsmash_bs_put_byte( bs, 1 ); /* mode_change_period */ + lsmash_bs_put_byte( bs, 1 ); /* frames_per_sample */ + lsmash_codec_specific_t *specific = lsmash_malloc_zero( sizeof(lsmash_codec_specific_t) ); + if( !specific ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_bs_cleanup( bs ); + return -1; + } + specific->type = LSMASH_CODEC_SPECIFIC_DATA_TYPE_UNKNOWN; + specific->format = LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED; + specific->destruct = (lsmash_codec_specific_destructor_t)free; + specific->data.unstructured = lsmash_bs_export_data( bs, &specific->size ); + specific->size = MP4SYS_DAMR_LENGTH; + lsmash_bs_cleanup( bs ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return -1; + } + return 0; +} + +#define MP4SYS_AMR_STORAGE_MAGIC_LENGTH 6 +#define MP4SYS_AMRWB_EX_MAGIC_LENGTH 3 + +static int mp4sys_amr_probe( mp4sys_importer_t* importer ) +{ + uint8_t buf[MP4SYS_AMR_STORAGE_MAGIC_LENGTH]; + uint8_t wb = 0; + if( fread( buf, 1, MP4SYS_AMR_STORAGE_MAGIC_LENGTH, importer->stream ) != MP4SYS_AMR_STORAGE_MAGIC_LENGTH ) + return -1; + if( memcmp( buf, "#!AMR", MP4SYS_AMR_STORAGE_MAGIC_LENGTH-1 ) ) + return -1; + if( buf[MP4SYS_AMR_STORAGE_MAGIC_LENGTH-1] != '\n' ) + { + if( buf[MP4SYS_AMR_STORAGE_MAGIC_LENGTH-1] != '-' ) + return -1; + if( fread( buf, 1, MP4SYS_AMRWB_EX_MAGIC_LENGTH, importer->stream ) != MP4SYS_AMRWB_EX_MAGIC_LENGTH ) + return -1; + if( memcmp( buf, "WB\n", MP4SYS_AMRWB_EX_MAGIC_LENGTH ) ) + return -1; + wb = 1; + } + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return -1; + summary->sample_type = wb ? ISOM_CODEC_TYPE_SAWB_AUDIO : ISOM_CODEC_TYPE_SAMR_AUDIO; + summary->max_au_length = wb ? 61 : 32; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_NULL; /* no effect */ + summary->frequency = (8000 << wb); + summary->channels = 1; + summary->sample_size = 16; + summary->samples_in_frame = (160 << wb); + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ + mp4sys_amr_info_t *info = malloc( sizeof(mp4sys_amr_info_t) ); + if( !info ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + info->wb = wb; + info->samples_in_frame = summary->samples_in_frame; + info->au_number = 0; + importer->info = info; + if( mp4sys_amr_create_damr( summary ) || lsmash_add_entry( importer->summaries, summary ) ) + { + free( importer->info ); + importer->info = NULL; + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + return 0; +} + +static uint32_t mp4sys_amr_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_amr_info_t *info = (mp4sys_amr_info_t *)importer->info; + if( !info || track_number != 1 ) + return 0; + return info->samples_in_frame; +} + +const static mp4sys_importer_functions mp4sys_amr_importer = +{ + "amr", + 1, + mp4sys_amr_probe, + mp4sys_amr_get_accessunit, + mp4sys_amr_get_last_delta, + mp4sys_amr_cleanup +}; + +/*************************************************************************** + AC-3 importer + ETSI TS 102 366 V1.2.1 (2008-08) +***************************************************************************/ +#include "a52.h" + +#define AC3_SAMPLE_DURATION 1536 /* 256 (samples per audio block) * 6 (audio blocks) */ + +typedef struct +{ + mp4sys_importer_status status; + ac3_info_t info; +} mp4sys_ac3_info_t; + +static void mp4sys_remove_ac3_info( mp4sys_ac3_info_t *info ) +{ + if( !info ) + return; + lsmash_bits_adhoc_cleanup( info->info.bits ); + free( info ); +} + +static mp4sys_ac3_info_t *mp4sys_create_ac3_info( void ) +{ + mp4sys_ac3_info_t *info = (mp4sys_ac3_info_t *)lsmash_malloc_zero( sizeof(mp4sys_ac3_info_t) ); + if( !info ) + return NULL; + info->info.bits = lsmash_bits_adhoc_create(); + if( !info->info.bits ) + { + free( info ); + return NULL; + } + return info; +} + +static void mp4sys_ac3_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_ac3_info( importer->info ); +} + +static const uint32_t ac3_frame_size_table[19][3] = +{ + /* 48, 44.1, 32 */ + { 128, 138, 192 }, + { 160, 174, 240 }, + { 192, 208, 288 }, + { 224, 242, 336 }, + { 256, 278, 384 }, + { 320, 348, 480 }, + { 384, 416, 576 }, + { 448, 486, 672 }, + { 512, 556, 768 }, + { 640, 696, 960 }, + { 768, 834, 1152 }, + { 896, 974, 1344 }, + { 1024, 1114, 1536 }, + { 1280, 1392, 1920 }, + { 1536, 1670, 2304 }, + { 1792, 1950, 2688 }, + { 2048, 2228, 3072 }, + { 2304, 2506, 3456 }, + { 2560, 2786, 3840 } +}; + +static const uint32_t ac3_channel_count_table[8] = { 2, 1, 2, 3, 3, 4, 4, 5 }; + +#if 0 +/* FIXME: though this table is for AC-3 in QTFF, we don't support it yet since the structure of CODEC specific info is unknown. + * ChannelLayout is given by ac3_channel_layout_table[ acmod ][ lfeon ]. */ +static const lsmash_channel_layout_tag ac3_channel_layout_table[8][2] = +{ + /* LFE: off LFE: on */ + { QT_CHANNEL_LAYOUT_UNKNOWN, QT_CHANNEL_LAYOUT_UNKNOWN }, /* FIXME: dual mono */ + { QT_CHANNEL_LAYOUT_MONO, QT_CHANNEL_LAYOUT_AC3_1_0_1 }, + { QT_CHANNEL_LAYOUT_STEREO, QT_CHANNEL_LAYOUT_DVD_4 }, + { QT_CHANNEL_LAYOUT_AC3_3_0, QT_CHANNEL_LAYOUT_AC3_3_0_1 }, + { QT_CHANNEL_LAYOUT_DVD_2, QT_CHANNEL_LAYOUT_AC3_2_1_1 }, + { QT_CHANNEL_LAYOUT_AC3_3_1, QT_CHANNEL_LAYOUT_AC3_3_1_1 }, + { QT_CHANNEL_LAYOUT_DVD_3, QT_CHANNEL_LAYOUT_DVD_18 }, + { QT_CHANNEL_LAYOUT_MPEG_5_0_C, QT_CHANNEL_LAYOUT_MPEG_5_1_C } +}; +#endif + +static lsmash_audio_summary_t *ac3_create_summary( ac3_info_t *info ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + lsmash_ac3_specific_parameters_t *param = &info->dac3_param; + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3, + LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + specific->data.unstructured = lsmash_create_ac3_specific_info( &info->dac3_param, &specific->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + summary->sample_type = ISOM_CODEC_TYPE_AC_3_AUDIO; + summary->max_au_length = AC3_MAX_SYNCFRAME_LENGTH; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_NULL; /* no effect */ + summary->frequency = ac3_sample_rate_table[ param->fscod ]; + summary->channels = ac3_channel_count_table[ param->acmod ] + param->lfeon; + summary->sample_size = 16; /* no effect */ + summary->samples_in_frame = AC3_SAMPLE_DURATION; + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ + return summary; +} + +static int ac3_compare_specific_param( lsmash_ac3_specific_parameters_t *a, lsmash_ac3_specific_parameters_t *b ) +{ + return (a->fscod != b->fscod) + || (a->bsid != b->bsid) + || (a->bsmod != b->bsmod) + || (a->acmod != b->acmod) + || (a->lfeon != b->lfeon) + || ((a->frmsizecod >> 1) != (b->frmsizecod >> 1)); +} + +static int mp4sys_ac3_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_get_entry_data( importer->summaries, track_number ); + if( !summary ) + return -1; + mp4sys_ac3_info_t *importer_info = (mp4sys_ac3_info_t *)importer->info; + ac3_info_t *info = &importer_info->info; + mp4sys_importer_status current_status = importer_info->status; + if( current_status == MP4SYS_IMPORTER_ERROR ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + lsmash_ac3_specific_parameters_t *param = &info->dac3_param; + uint32_t frame_size = ac3_frame_size_table[ param->frmsizecod >> 1 ][ param->fscod ]; + if( param->fscod == 0x1 && param->frmsizecod & 0x1 ) + frame_size += 2; + if( buffered_sample->length < frame_size ) + return -1; + if( current_status == MP4SYS_IMPORTER_CHANGE ) + { + lsmash_codec_specific_t *specific = isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_AC_3 ); + if( specific ) + { + specific->destruct( specific->data.unstructured ); + specific->data.unstructured = info->next_dac3; + } + summary->frequency = ac3_sample_rate_table[ param->fscod ]; + summary->channels = ac3_channel_count_table[ param->acmod ] + param->lfeon; + //summary->layout_tag = ac3_channel_layout_table[ param->acmod ][ param->lfeon ]; + } + if( frame_size > AC3_MIN_SYNCFRAME_LENGTH ) + { + uint32_t read_size = frame_size - AC3_MIN_SYNCFRAME_LENGTH; + if( fread( info->buffer + AC3_MIN_SYNCFRAME_LENGTH, 1, read_size, importer->stream ) != read_size ) + return -1; + } + memcpy( buffered_sample->data, info->buffer, frame_size ); + buffered_sample->length = frame_size; + buffered_sample->dts = info->au_number++ * summary->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->prop.pre_roll.distance = 1; /* MDCT */ + if( fread( info->buffer, 1, AC3_MIN_SYNCFRAME_LENGTH, importer->stream ) != AC3_MIN_SYNCFRAME_LENGTH ) + importer_info->status = MP4SYS_IMPORTER_EOF; + else + { + /* Parse the next syncframe header. */ + IF_A52_SYNCWORD( info->buffer ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return current_status; + } + lsmash_ac3_specific_parameters_t current_param = info->dac3_param; + ac3_parse_syncframe_header( info, info->buffer ); + if( ac3_compare_specific_param( ¤t_param, &info->dac3_param ) ) + { + uint32_t dummy; + uint8_t *dac3 = lsmash_create_ac3_specific_info( &info->dac3_param, &dummy ); + if( !dac3 ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return current_status; + } + importer_info->status = MP4SYS_IMPORTER_CHANGE; + info->next_dac3 = dac3; + } + else + importer_info->status = MP4SYS_IMPORTER_OK; + } + return current_status; +} + +static int mp4sys_ac3_probe( mp4sys_importer_t* importer ) +{ + uint8_t buf[AC3_MIN_SYNCFRAME_LENGTH]; + if( fread( buf, 1, AC3_MIN_SYNCFRAME_LENGTH, importer->stream ) != AC3_MIN_SYNCFRAME_LENGTH ) + return -1; + IF_A52_SYNCWORD( buf ) + return -1; + mp4sys_ac3_info_t *info = mp4sys_create_ac3_info(); + if( !info ) + return -1; + if( ac3_parse_syncframe_header( &info->info, buf ) ) + { + mp4sys_remove_ac3_info( info ); + return -1; + } + lsmash_audio_summary_t *summary = ac3_create_summary( &info->info ); + if( !summary ) + { + mp4sys_remove_ac3_info( info ); + return -1; + } + info->status = MP4SYS_IMPORTER_OK; + info->info.au_number = 0; + memcpy( info->info.buffer, buf, AC3_MIN_SYNCFRAME_LENGTH ); + importer->info = info; + if( lsmash_add_entry( importer->summaries, summary ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + mp4sys_remove_ac3_info( importer->info ); + importer->info = NULL; + return -1; + } + return 0; +} + +static uint32_t mp4sys_ac3_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_ac3_info_t *info = (mp4sys_ac3_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + return AC3_SAMPLE_DURATION; +} + +const static mp4sys_importer_functions mp4sys_ac3_importer = +{ + "AC-3", + 1, + mp4sys_ac3_probe, + mp4sys_ac3_get_accessunit, + mp4sys_ac3_get_last_delta, + mp4sys_ac3_cleanup +}; + +/*************************************************************************** + Enhanced AC-3 importer + ETSI TS 102 366 V1.2.1 (2008-08) +***************************************************************************/ +#define EAC3_MIN_SAMPLE_DURATION 256 + +typedef struct +{ + mp4sys_importer_status status; + eac3_info_t info; +} mp4sys_eac3_info_t; + +static void mp4sys_remove_eac3_info( mp4sys_eac3_info_t *info ) +{ + if( !info ) + return; + lsmash_destroy_multiple_buffers( info->info.au_buffers ); + lsmash_bits_adhoc_cleanup( info->info.bits ); + free( info ); +} + +static mp4sys_eac3_info_t *mp4sys_create_eac3_info( void ) +{ + mp4sys_eac3_info_t *info = (mp4sys_eac3_info_t *)lsmash_malloc_zero( sizeof(mp4sys_eac3_info_t) ); + if( !info ) + return NULL; + eac3_info_t *eac3_info = &info->info; + eac3_info->buffer_pos = eac3_info->buffer; + eac3_info->buffer_end = eac3_info->buffer; + eac3_info->bits = lsmash_bits_adhoc_create(); + if( !eac3_info->bits ) + { + free( info ); + return NULL; + } + eac3_info->au_buffers = lsmash_create_multiple_buffers( 2, EAC3_MAX_SYNCFRAME_LENGTH ); + if( !eac3_info->au_buffers ) + { + lsmash_bits_adhoc_cleanup( eac3_info->bits ); + free( info ); + return NULL; + } + eac3_info->au = lsmash_withdraw_buffer( eac3_info->au_buffers, 1 ); + eac3_info->incomplete_au = lsmash_withdraw_buffer( eac3_info->au_buffers, 2 ); + return info; +} + +static void mp4sys_eac3_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_eac3_info( importer->info ); +} + +static void eac3_update_sample_rate( lsmash_audio_summary_t *summary, lsmash_eac3_specific_parameters_t *dec3_param ) +{ + /* Additional independent substreams 1 to 7 must be encoded at the same sample rate as independent substream 0. */ + summary->frequency = ac3_sample_rate_table[ dec3_param->independent_info[0].fscod ]; + if( summary->frequency == 0 ) + { + static const uint32_t eac3_reduced_sample_rate_table[4] = { 24000, 22050, 16000, 0 }; + summary->frequency = eac3_reduced_sample_rate_table[ dec3_param->independent_info[0].fscod2 ]; + } +} + +#if 0 +/* FIXME: though this table is for EAC-3 in QTFF, we don't support it yet since the structure of CODEC specific info is unknown. */ +static void eac3_update_channel_layout( lsmash_audio_summary_t *summary, lsmash_eac3_substream_info_t *independent_info ) +{ + if( independent_info->chan_loc == 0 ) + { + summary->layout_tag = ac3_channel_layout_table[ independent_info->acmod ][ independent_info->lfeon ]; + return; + } + else if( independent_info->acmod != 0x7 ) + { + summary->layout_tag = QT_CHANNEL_LAYOUT_UNKNOWN; + return; + } + /* OK. All L, C, R, Ls and Rs exsist. */ + if( !independent_info->lfeon ) + { + if( independent_info->chan_loc == 0x80 ) + summary->layout_tag = QT_CHANNEL_LAYOUT_EAC_7_0_A; + else if( independent_info->chan_loc == 0x40 ) + summary->layout_tag = QT_CHANNEL_LAYOUT_EAC_6_0_A; + else + summary->layout_tag = QT_CHANNEL_LAYOUT_UNKNOWN; + return; + } + /* Also LFE exsists. */ + static const struct + { + uint16_t chan_loc; + lsmash_channel_layout_tag tag; + } eac3_channel_layout_table[] + = { + { 0x100, QT_CHANNEL_LAYOUT_EAC3_7_1_B }, + { 0x80, QT_CHANNEL_LAYOUT_EAC3_7_1_A }, + { 0x40, QT_CHANNEL_LAYOUT_EAC3_6_1_A }, + { 0x20, QT_CHANNEL_LAYOUT_EAC3_6_1_B }, + { 0x10, QT_CHANNEL_LAYOUT_EAC3_7_1_C }, + { 0x10, QT_CHANNEL_LAYOUT_EAC3_7_1_D }, + { 0x4, QT_CHANNEL_LAYOUT_EAC3_7_1_E }, + { 0x2, QT_CHANNEL_LAYOUT_EAC3_6_1_C }, + { 0x60, QT_CHANNEL_LAYOUT_EAC3_7_1_F }, + { 0x42, QT_CHANNEL_LAYOUT_EAC3_7_1_G }, + { 0x22, QT_CHANNEL_LAYOUT_EAC3_7_1_H }, + { 0 } + }; + for( int i = 0; eac3_channel_layout_table[i].chan_loc; i++ ) + if( independent_info->chan_loc == eac3_channel_layout_table[i].chan_loc ) + { + summary->layout_tag = eac3_channel_layout_table[i].tag; + return; + } + summary->layout_tag = QT_CHANNEL_LAYOUT_UNKNOWN; +} +#endif + +static void eac3_update_channel_info( lsmash_audio_summary_t *summary, lsmash_eac3_specific_parameters_t *dec3_param ) +{ + summary->channels = 0; + for( int i = 0; i <= dec3_param->num_ind_sub; i++ ) + { + int channel_count = 0; + lsmash_eac3_substream_info_t *independent_info = &dec3_param->independent_info[i]; + channel_count = ac3_channel_count_table[ independent_info->acmod ] /* L/C/R/Ls/Rs combination */ + + 2 * !!(independent_info->chan_loc & 0x100) /* Lc/Rc pair */ + + 2 * !!(independent_info->chan_loc & 0x80) /* Lrs/Rrs pair */ + + !!(independent_info->chan_loc & 0x40) /* Cs */ + + !!(independent_info->chan_loc & 0x20) /* Ts */ + + 2 * !!(independent_info->chan_loc & 0x10) /* Lsd/Rsd pair */ + + 2 * !!(independent_info->chan_loc & 0x8) /* Lw/Rw pair */ + + 2 * !!(independent_info->chan_loc & 0x4) /* Lvh/Rvh pair */ + + !!(independent_info->chan_loc & 0x2) /* Cvh */ + + !!(independent_info->chan_loc & 0x1) /* LFE2 */ + + independent_info->lfeon; /* LFE */ + if( channel_count > summary->channels ) + { + /* Pick the maximum number of channels. */ + summary->channels = channel_count; + //eac3_update_channel_layout( summary, independent_info ); + } + } +} + +static int eac3_get_next_accessunit_internal( mp4sys_importer_t *importer ) +{ + int complete_au = 0; + mp4sys_eac3_info_t *importer_info = (mp4sys_eac3_info_t *)importer->info; + eac3_info_t *info = &importer_info->info; + while( !complete_au ) + { + /* Read data from the stream if needed. */ + uint32_t remainder_length = info->buffer_end - info->buffer_pos; + if( !info->no_more_read && remainder_length < EAC3_MAX_SYNCFRAME_LENGTH ) + { + if( remainder_length ) + memmove( info->buffer, info->buffer_pos, remainder_length ); + uint32_t read_size = fread( info->buffer + remainder_length, 1, EAC3_MAX_SYNCFRAME_LENGTH, importer->stream ); + remainder_length += read_size; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer + remainder_length; + info->no_more_read = read_size == 0 ? feof( importer->stream ) : 0; + } + /* Check the remainder length of the buffer. + * If there is enough length, then parse the syncframe in it. + * The length 5 is the required byte length to get frame size. */ + if( remainder_length < 5 ) + { + /* Reached the end of stream. + * According to ETSI TS 102 366 V1.2.1 (2008-08), + * one access unit consists of 6 audio blocks and begins with independent substream 0. + * The specification doesn't mention the case where a enhanced AC-3 stream ends at non-mod6 audio blocks. + * At the end of the stream, therefore, we might make an access unit which has less than 6 audio blocks anyway. */ + importer_info->status = MP4SYS_IMPORTER_EOF; + complete_au = !!info->incomplete_au_length; + if( !complete_au ) + return remainder_length ? -1 : 0; /* No more access units in the stream. */ + if( !info->dec3_param_initialized ) + eac3_update_specific_param( info ); + } + else + { + /* Parse syncframe. */ + IF_A52_SYNCWORD( info->buffer_pos ) + return -1; + info->frame_size = 0; + if( eac3_parse_syncframe( info, info->buffer_pos, LSMASH_MIN( remainder_length, EAC3_MAX_SYNCFRAME_LENGTH ) ) ) + return -1; + if( remainder_length < info->frame_size ) + return -1; + int independent = info->strmtyp != 0x1; + if( independent && info->substreamid == 0x0 ) + { + if( info->number_of_audio_blocks == 6 ) + { + /* Encountered the first syncframe of the next access unit. */ + info->number_of_audio_blocks = 0; + complete_au = 1; + } + else if( info->number_of_audio_blocks > 6 ) + return -1; + info->number_of_audio_blocks += eac3_audio_block_table[ info->numblkscod ]; + info->number_of_independent_substreams = 0; + } + else if( info->syncframe_count == 0 ) + /* The first syncframe in an AU must be independent and assigned substream ID 0. */ + return -1; + if( independent ) + info->independent_info[info->number_of_independent_substreams ++].num_dep_sub = 0; + else + ++ info->independent_info[info->number_of_independent_substreams - 1].num_dep_sub; + } + if( complete_au ) + { + memcpy( info->au, info->incomplete_au, info->incomplete_au_length ); + info->au_length = info->incomplete_au_length; + info->incomplete_au_length = 0; + info->syncframe_count_in_au = info->syncframe_count; + info->syncframe_count = 0; + if( importer_info->status == MP4SYS_IMPORTER_EOF ) + break; + } + /* Increase buffer size to store AU if short. */ + if( info->incomplete_au_length + info->frame_size > info->au_buffers->buffer_size ) + { + lsmash_multiple_buffers_t *temp = lsmash_resize_multiple_buffers( info->au_buffers, info->au_buffers->buffer_size + EAC3_MAX_SYNCFRAME_LENGTH ); + if( !temp ) + return -1; + info->au_buffers = temp; + info->au = lsmash_withdraw_buffer( info->au_buffers, 1 ); + info->incomplete_au = lsmash_withdraw_buffer( info->au_buffers, 2 ); + } + /* Append syncframe data. */ + memcpy( info->incomplete_au + info->incomplete_au_length, info->buffer_pos, info->frame_size ); + info->incomplete_au_length += info->frame_size; + info->buffer_pos += info->frame_size; + ++ info->syncframe_count; + } + return info->bits->bs->error ? -1 : 0; +} + +static int mp4sys_eac3_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_get_entry_data( importer->summaries, track_number ); + if( !summary ) + return -1; + mp4sys_eac3_info_t *importer_info = (mp4sys_eac3_info_t *)importer->info; + eac3_info_t *info = &importer_info->info; + mp4sys_importer_status current_status = importer_info->status; + if( current_status == MP4SYS_IMPORTER_ERROR || buffered_sample->length < info->au_length ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF && info->au_length == 0 ) + { + buffered_sample->length = 0; + return 0; + } + if( current_status == MP4SYS_IMPORTER_CHANGE ) + { + lsmash_codec_specific_t *specific = isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 ); + if( specific ) + { + specific->destruct( specific->data.unstructured ); + specific->data.unstructured = info->next_dec3; + specific->size = info->next_dec3_length; + } + summary->max_au_length = info->syncframe_count_in_au * EAC3_MAX_SYNCFRAME_LENGTH; + eac3_update_sample_rate( summary, &info->dec3_param ); + eac3_update_channel_info( summary, &info->dec3_param ); + } + memcpy( buffered_sample->data, info->au, info->au_length ); + buffered_sample->length = info->au_length; + buffered_sample->dts = info->au_number++ * summary->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->prop.pre_roll.distance = 1; /* MDCT */ + if( importer_info->status == MP4SYS_IMPORTER_EOF ) + { + info->au_length = 0; + return 0; + } + uint32_t old_syncframe_count_in_au = info->syncframe_count_in_au; + if( eac3_get_next_accessunit_internal( importer ) ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return current_status; + } + if( info->syncframe_count_in_au ) + { + /* Check sample description change. */ + uint32_t new_length; + uint8_t *dec3 = lsmash_create_eac3_specific_info( &info->dec3_param, &new_length ); + if( !dec3 ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return current_status; + } + lsmash_codec_specific_t *specific = isom_get_codec_specific( summary->opaque, LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3 ); + if( (info->syncframe_count_in_au > old_syncframe_count_in_au) + || (specific && (new_length != specific->size || memcmp( dec3, specific->data.unstructured, specific->size ))) ) + { + importer_info->status = MP4SYS_IMPORTER_CHANGE; + info->next_dec3 = dec3; + info->next_dec3_length = new_length; + } + else + { + if( importer_info->status != MP4SYS_IMPORTER_EOF ) + importer_info->status = MP4SYS_IMPORTER_OK; + free( dec3 ); + } + } + return current_status; +} + +static lsmash_audio_summary_t *eac3_create_summary( eac3_info_t *info ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_EC_3, + LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + specific->data.unstructured = lsmash_create_eac3_specific_info( &info->dec3_param, &specific->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + summary->sample_type = ISOM_CODEC_TYPE_EC_3_AUDIO; + summary->max_au_length = info->syncframe_count_in_au * EAC3_MAX_SYNCFRAME_LENGTH; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_NULL; /* no effect */ + summary->sample_size = 16; /* no effect */ + summary->samples_in_frame = EAC3_MIN_SAMPLE_DURATION * 6; /* 256 (samples per audio block) * 6 (audio blocks) */ + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ + eac3_update_sample_rate( summary, &info->dec3_param ); + eac3_update_channel_info( summary, &info->dec3_param ); + return summary; +} + +static int mp4sys_eac3_probe( mp4sys_importer_t* importer ) +{ + mp4sys_eac3_info_t *info = mp4sys_create_eac3_info(); + if( !info ) + return -1; + importer->info = info; + if( eac3_get_next_accessunit_internal( importer ) ) + { + mp4sys_remove_eac3_info( importer->info ); + importer->info = NULL; + return -1; + } + lsmash_audio_summary_t *summary = eac3_create_summary( &info->info ); + if( !summary ) + { + mp4sys_remove_eac3_info( importer->info ); + importer->info = NULL; + return -1; + } + if( info->status != MP4SYS_IMPORTER_EOF ) + info->status = MP4SYS_IMPORTER_OK; + info->info.au_number = 0; + if( lsmash_add_entry( importer->summaries, summary ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + mp4sys_remove_eac3_info( importer->info ); + importer->info = NULL; + return -1; + } + return 0; +} + +static uint32_t mp4sys_eac3_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_eac3_info_t *info = (mp4sys_eac3_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF || info->info.au_length ) + return 0; + return EAC3_MIN_SAMPLE_DURATION * info->info.number_of_audio_blocks; +} + +const static mp4sys_importer_functions mp4sys_eac3_importer = +{ + "Enhanced AC-3", + 1, + mp4sys_eac3_probe, + mp4sys_eac3_get_accessunit, + mp4sys_eac3_get_last_delta, + mp4sys_eac3_cleanup +}; + +/*************************************************************************** + MPEG-4 ALS importer + ISO/IEC 14496-3 2009 Fourth edition +***************************************************************************/ +#define ALSSC_TWELVE_LENGTH 22 + +typedef struct +{ + uint32_t size; + uint32_t samp_freq; + uint32_t samples; + uint32_t channels; + uint16_t frame_length; + uint8_t resolution; + uint8_t random_access; + uint8_t ra_flag; + uint32_t access_unit_size; + uint32_t number_of_ra_units; + uint32_t *ra_unit_size; + uint8_t *sc_data; +} als_specific_config_t; + +typedef struct +{ + mp4sys_importer_status status; + als_specific_config_t alssc; + uint32_t samples_in_frame; + uint32_t au_number; +} mp4sys_als_info_t; + +typedef struct +{ + FILE *stream; + uint32_t pos; + uint32_t buffer_size; + uint8_t *buffer; + uint8_t *end; +} als_stream_manager; + +static void mp4sys_remove_als_info( mp4sys_als_info_t *info ) +{ + if( info->alssc.ra_unit_size ) + free( info->alssc.ra_unit_size ); + if( info->alssc.sc_data ) + free( info->alssc.sc_data ); + free( info ); +} + +static void mp4sys_als_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_als_info( importer->info ); +} + +static int als_stream_read( als_stream_manager *manager, uint32_t read_size ) +{ + if( manager->buffer + manager->buffer_size >= manager->end ) + { + uint8_t *temp = realloc( manager->buffer, manager->buffer_size + read_size ); + if( !temp ) + return -1; + manager->buffer = temp; + manager->buffer_size += read_size; + } + uint32_t actual_read_size = fread( manager->buffer + manager->pos, 1, read_size, manager->stream ); + if( actual_read_size == 0 ) + return -1; + manager->end = manager->buffer + manager->pos + actual_read_size; + return 0; +} + +static int als_cleanup_stream_manager( als_stream_manager *manager ) +{ + free( manager->buffer ); + return -1; +} + +static uint32_t als_get_be32( als_stream_manager *manager ) +{ + uint32_t value = (manager->buffer[ manager->pos ] << 24) + | (manager->buffer[ manager->pos + 1 ] << 16) + | (manager->buffer[ manager->pos + 2 ] << 8) + | manager->buffer[ manager->pos + 3 ]; + manager->pos += 4; + return value; +} + +static int als_parse_specific_config( mp4sys_importer_t *importer, uint8_t *buf, als_specific_config_t *alssc ) +{ + alssc->samp_freq = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; + alssc->samples = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11]; + if( alssc->samples == 0xffffffff ) + return -1; /* We don't support this case. */ + alssc->channels = (buf[12] << 8) | buf[13]; + alssc->resolution = (buf[14] & 0x1c) >> 2; + if( alssc->resolution > 3 ) + return -1; /* reserved */ + alssc->frame_length = (buf[15] << 8) | buf[16]; + alssc->random_access = buf[17]; + alssc->ra_flag = (buf[18] & 0xc0) >> 6; + if( alssc->ra_flag == 0 ) + return -1; /* We don't support this case. */ + buf[18] &= 0x3f; /* Set 0 to ra_flag. We will remove ra_unit_size in each access unit. */ +#if 0 + if( alssc->samples == 0xffffffff && alssc->ra_flag == 2 ) + return -1; +#endif + int chan_sort = !!(buf[20] & 0x1); + if( alssc->channels == 0 ) + { + if( buf[20] & 0x8 ) + return -1; /* If channels = 0 (mono), joint_stereo = 0. */ + else if( buf[20] & 0x4 ) + return -1; /* If channels = 0 (mono), mc_coding = 0. */ + else if( chan_sort ) + return -1; /* If channels = 0 (mono), chan_sort = 0. */ + } + int chan_config = !!(buf[20] & 0x2); + int crc_enabled = !!(buf[21] & 0x80); + int aux_data_enabled = !!(buf[21] & 0x1); + uint32_t read_size = 0; + if( chan_config ) + read_size += 2; /* chan_config_info */ + if( chan_sort ) + { + uint32_t ChBits; + for( ChBits = 1; alssc->channels >> ChBits; ChBits++ ); + uint32_t chan_pos_length = (alssc->channels + 1) * ChBits; + read_size += chan_pos_length / 8 + !!(chan_pos_length % 8); + } + /* Set up stream manager. */ + als_stream_manager manager; + manager.stream = importer->stream; + manager.buffer_size = ALSSC_TWELVE_LENGTH; + manager.buffer = malloc( manager.buffer_size ); + if( !manager.buffer ) + return -1; + manager.pos = ALSSC_TWELVE_LENGTH + read_size; + manager.end = manager.buffer + manager.buffer_size; + memcpy( manager.buffer, buf, ALSSC_TWELVE_LENGTH ); + /* Continue to read and parse. */ + read_size += 8; /* header_size and trailer_size */ + if( als_stream_read( &manager, read_size ) ) + return als_cleanup_stream_manager( &manager ); + uint32_t header_size = als_get_be32( &manager ); + uint32_t trailer_size = als_get_be32( &manager ); + read_size = header_size * (header_size != 0xffffffff) + trailer_size * (trailer_size != 0xffffffff) + 4 * crc_enabled; + if( als_stream_read( &manager, read_size ) ) + return -1; + manager.pos += read_size; /* Skip orig_header, orig_trailer and crc. */ + /* Random access unit */ + uint32_t number_of_frames = (alssc->samples / (alssc->frame_length + 1)) + !!(alssc->samples % (alssc->frame_length + 1)); + if( alssc->random_access != 0 ) + alssc->number_of_ra_units = number_of_frames / alssc->random_access + !!(number_of_frames % alssc->random_access); + else + alssc->number_of_ra_units = 0; + if( alssc->ra_flag == 2 && alssc->random_access != 0 ) + { + uint32_t pos = manager.pos; + read_size = alssc->number_of_ra_units * 4; + if( als_stream_read( &manager, read_size ) ) + return als_cleanup_stream_manager( &manager ); + alssc->ra_unit_size = malloc( alssc->number_of_ra_units * sizeof(uint32_t) ); + if( !alssc->ra_unit_size ) + return als_cleanup_stream_manager( &manager ); + for( uint32_t i = 0; i < alssc->number_of_ra_units; i++ ) + alssc->ra_unit_size[i] = als_get_be32( &manager ); + manager.pos = pos; /* Remove ra_unit_size. */ + } + else + alssc->ra_unit_size = NULL; + /* auxiliary data */ + if( aux_data_enabled ) + { + if( als_stream_read( &manager, 4 ) ) + return als_cleanup_stream_manager( &manager ); + uint32_t aux_size = als_get_be32( &manager ); + read_size = aux_size * (aux_size != 0xffffffff); + if( als_stream_read( &manager, read_size ) ) + return als_cleanup_stream_manager( &manager ); + manager.pos += read_size; + } + /* Copy ALSSpecificConfig. */ + alssc->size = manager.pos; + alssc->sc_data = malloc( alssc->size ); + if( !alssc->sc_data ) + return als_cleanup_stream_manager( &manager ); + memcpy( alssc->sc_data, manager.buffer, alssc->size ); + als_cleanup_stream_manager( &manager ); + return 0; +} + +static int mp4sys_als_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_get_entry_data( importer->summaries, track_number ); + if( !summary ) + return -1; + mp4sys_als_info_t *info = (mp4sys_als_info_t *)importer->info; + mp4sys_importer_status current_status = info->status; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + als_specific_config_t *alssc = &info->alssc; + if( alssc->number_of_ra_units == 0 ) + { + if( fread( buffered_sample->data, 1, alssc->access_unit_size, importer->stream ) != alssc->access_unit_size ) + return -1; + buffered_sample->length = alssc->access_unit_size; + buffered_sample->cts = buffered_sample->dts = 0; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + info->status = MP4SYS_IMPORTER_EOF; + return 0; + } + uint32_t au_length; + if( alssc->ra_flag == 2 ) + au_length = alssc->ra_unit_size[info->au_number]; + else /* if( alssc->ra_flag == 1 ) */ + { + uint8_t temp[4]; + if( fread( temp, 1, 4, importer->stream ) != 4 ) + return -1; + au_length = (temp[0] << 24) | (temp[1] << 16) | (temp[2] << 8) | temp[3]; /* We remove ra_unit_size. */ + } + if( buffered_sample->length < au_length ) + return -1; + if( fread( buffered_sample->data, 1, au_length, importer->stream ) != au_length ) + return -1; + buffered_sample->length = au_length; + buffered_sample->dts = info->au_number++ * info->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + if( info->au_number == alssc->number_of_ra_units ) + info->status = MP4SYS_IMPORTER_EOF; + return 0; +} + +static lsmash_audio_summary_t *als_create_summary( mp4sys_importer_t *importer, als_specific_config_t *alssc ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + summary->sample_type = ISOM_CODEC_TYPE_MP4A_AUDIO; + summary->aot = MP4A_AUDIO_OBJECT_TYPE_ALS; + summary->frequency = alssc->samp_freq; + summary->channels = alssc->channels + 1; + summary->sample_size = (alssc->resolution + 1) * 8; + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ + if( alssc->random_access != 0 ) + { + summary->samples_in_frame = (alssc->frame_length + 1) * alssc->random_access; + summary->max_au_length = summary->channels * (summary->sample_size / 8) * summary->samples_in_frame; + } + else + { + summary->samples_in_frame = 0; /* hack for mp4sys_als_get_last_delta */ + uint64_t pos = lsmash_ftell( importer->stream ); + lsmash_fseek( importer->stream, 0, SEEK_END ); + summary->max_au_length = alssc->access_unit_size = lsmash_ftell( importer->stream ) - pos; + lsmash_fseek( importer->stream, pos, SEEK_SET ); + } + uint32_t data_length; + uint8_t *data = mp4a_export_AudioSpecificConfig( MP4A_AUDIO_OBJECT_TYPE_ALS, + summary->frequency, summary->channels, summary->sbr_mode, + alssc->sc_data, alssc->size, &data_length ); + if( !data ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return NULL; + } + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( !specific ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + free( data ); + return NULL; + } + lsmash_mp4sys_decoder_parameters_t *param = (lsmash_mp4sys_decoder_parameters_t *)specific->data.structured; + param->objectTypeIndication = MP4SYS_OBJECT_TYPE_Audio_ISO_14496_3; + param->streamType = MP4SYS_STREAM_TYPE_AudioStream; + if( lsmash_set_mp4sys_decoder_specific_info( param, data, data_length ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + free( data ); + return NULL; + } + free( data ); + if( lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + return summary; +} + +static int mp4sys_als_probe( mp4sys_importer_t *importer ) +{ + uint8_t buf[ALSSC_TWELVE_LENGTH]; + if( fread( buf, 1, ALSSC_TWELVE_LENGTH, importer->stream ) != ALSSC_TWELVE_LENGTH ) + return -1; + /* Check ALS identifier( = 0x414C5300). */ + if( buf[0] != 0x41 || buf[1] != 0x4C || buf[2] != 0x53 || buf[3] != 0x00 ) + return -1; + als_specific_config_t alssc; + if( als_parse_specific_config( importer, buf, &alssc ) ) + return -1; + lsmash_audio_summary_t *summary = als_create_summary( importer, &alssc ); + if( !summary ) + return -1; + /* importer status */ + mp4sys_als_info_t *info = lsmash_malloc_zero( sizeof(mp4sys_als_info_t) ); + if( !info ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + info->status = MP4SYS_IMPORTER_OK; + info->alssc = alssc; + info->samples_in_frame = summary->samples_in_frame; + if( lsmash_add_entry( importer->summaries, summary ) ) + { + free( info ); + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + return -1; + } + importer->info = info; + return 0; +} + +static uint32_t mp4sys_als_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_als_info_t *info = (mp4sys_als_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + als_specific_config_t *alssc = &info->alssc; + /* If alssc->number_of_ra_units == 0, then the last sample duration is just alssc->samples + * since als_create_summary sets 0 to summary->samples_in_frame i.e. info->samples_in_frame. */ + return alssc->samples - (alssc->number_of_ra_units - 1) * info->samples_in_frame; +} + +const static mp4sys_importer_functions mp4sys_als_importer = +{ + "MPEG-4 ALS", + 1, + mp4sys_als_probe, + mp4sys_als_get_accessunit, + mp4sys_als_get_last_delta, + mp4sys_als_cleanup +}; + +/*************************************************************************** + DTS importer + ETSI TS 102 114 V1.2.1 (2002-12) + ETSI TS 102 114 V1.3.1 (2011-08) +***************************************************************************/ +#include "dts.h" + +typedef struct +{ + mp4sys_importer_status status; + dts_info_t info; +} mp4sys_dts_info_t; + +static void mp4sys_remove_dts_info( mp4sys_dts_info_t *info ) +{ + if( !info ) + return; + lsmash_destroy_multiple_buffers( info->info.au_buffers ); + lsmash_bits_adhoc_cleanup( info->info.bits ); + free( info ); +} + +static mp4sys_dts_info_t *mp4sys_create_dts_info( void ) +{ + mp4sys_dts_info_t *info = (mp4sys_dts_info_t *)lsmash_malloc_zero( sizeof(mp4sys_dts_info_t) ); + if( !info ) + return NULL; + dts_info_t *dts_info = &info->info; + dts_info->buffer_pos = dts_info->buffer; + dts_info->buffer_end = dts_info->buffer; + dts_info->bits = lsmash_bits_adhoc_create(); + if( !dts_info->bits ) + { + free( info ); + return NULL; + } + dts_info->au_buffers = lsmash_create_multiple_buffers( 2, DTS_MAX_EXTENSION_SIZE ); + if( !dts_info->au_buffers ) + { + lsmash_bits_adhoc_cleanup( dts_info->bits ); + free( info ); + return NULL; + } + dts_info->au = lsmash_withdraw_buffer( dts_info->au_buffers, 1 ); + dts_info->incomplete_au = lsmash_withdraw_buffer( dts_info->au_buffers, 2 ); + return info; +} + +static void mp4sys_dts_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_dts_info( importer->info ); +} + +static int dts_get_next_accessunit_internal( mp4sys_importer_t *importer ) +{ + int complete_au = 0; + mp4sys_dts_info_t *importer_info = (mp4sys_dts_info_t *)importer->info; + dts_info_t *info = &importer_info->info; + while( !complete_au ) + { + /* Read data from the stream if needed. */ + uint32_t remainder_length = info->buffer_end - info->buffer_pos; + if( !info->no_more_read && remainder_length < DTS_MAX_EXTENSION_SIZE ) + { + if( remainder_length ) + memmove( info->buffer, info->buffer_pos, remainder_length ); + uint32_t read_size = fread( info->buffer + remainder_length, 1, DTS_MAX_EXTENSION_SIZE, importer->stream ); + remainder_length += read_size; + info->buffer_pos = info->buffer; + info->buffer_end = info->buffer + remainder_length; + info->no_more_read = read_size == 0 ? feof( importer->stream ) : 0; + } + /* Check the remainder length of the buffer. + * If there is enough length, then parse the frame in it. + * The length 10 is the required byte length to get frame size. */ + if( remainder_length < 10 ) + { + /* Reached the end of stream. */ + importer_info->status = MP4SYS_IMPORTER_EOF; + complete_au = !!info->incomplete_au_length; + if( !complete_au ) + return remainder_length ? -1 : 0; /* No more access units in the stream. */ + if( !info->ddts_param_initialized ) + dts_update_specific_param( info ); + } + else + { + /* Parse substream frame. */ + dts_substream_type prev_substream_type = info->substream_type; + info->substream_type = dts_get_substream_type( info ); + int (*dts_parse_frame)( dts_info_t *, uint8_t *, uint32_t ) = NULL; + switch( info->substream_type ) + { + /* Decide substream frame parser and check if this frame and the previous frame belong to the same AU. */ + case DTS_SUBSTREAM_TYPE_CORE : + if( prev_substream_type != DTS_SUBSTREAM_TYPE_NONE ) + complete_au = 1; + dts_parse_frame = dts_parse_core_substream; + break; + case DTS_SUBSTREAM_TYPE_EXTENSION : + { + uint8_t prev_extension_index = info->extension_index; + if( dts_get_extension_index( info, &info->extension_index ) ) + return -1; + if( prev_substream_type == DTS_SUBSTREAM_TYPE_EXTENSION && info->extension_index <= prev_extension_index ) + complete_au = 1; + dts_parse_frame = dts_parse_extension_substream; + break; + } + default : + return -1; + } + if( !info->ddts_param_initialized && complete_au ) + dts_update_specific_param( info ); + info->frame_size = 0; + if( dts_parse_frame( info, info->buffer_pos, LSMASH_MIN( remainder_length, DTS_MAX_EXTENSION_SIZE ) ) ) + return -1; /* Failed to parse. */ + } + if( complete_au ) + { + memcpy( info->au, info->incomplete_au, info->incomplete_au_length ); + info->au_length = info->incomplete_au_length; + info->incomplete_au_length = 0; + info->extension_substream_count = (info->substream_type == DTS_SUBSTREAM_TYPE_EXTENSION); + if( importer_info->status == MP4SYS_IMPORTER_EOF ) + break; + } + /* Increase buffer size to store AU if short. */ + if( info->incomplete_au_length + info->frame_size > info->au_buffers->buffer_size ) + { + lsmash_multiple_buffers_t *temp = lsmash_resize_multiple_buffers( info->au_buffers, info->au_buffers->buffer_size + DTS_MAX_EXTENSION_SIZE ); + if( !temp ) + return -1; + info->au_buffers = temp; + info->au = lsmash_withdraw_buffer( info->au_buffers, 1 ); + info->incomplete_au = lsmash_withdraw_buffer( info->au_buffers, 2 ); + } + /* Append frame data. */ + memcpy( info->incomplete_au + info->incomplete_au_length, info->buffer_pos, info->frame_size ); + info->incomplete_au_length += info->frame_size; + info->buffer_pos += info->frame_size; + } + return info->bits->bs->error ? -1 : 0; +} + +static int mp4sys_dts_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_get_entry_data( importer->summaries, track_number ); + if( !summary ) + return -1; + mp4sys_dts_info_t *importer_info = (mp4sys_dts_info_t *)importer->info; + dts_info_t *info = &importer_info->info; + mp4sys_importer_status current_status = importer_info->status; + if( current_status == MP4SYS_IMPORTER_ERROR || buffered_sample->length < info->au_length ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF && info->au_length == 0 ) + { + buffered_sample->length = 0; + return 0; + } + if( current_status == MP4SYS_IMPORTER_CHANGE ) + summary->max_au_length = 0; + memcpy( buffered_sample->data, info->au, info->au_length ); + buffered_sample->length = info->au_length; + buffered_sample->dts = info->au_number++ * summary->samples_in_frame; + buffered_sample->cts = buffered_sample->dts; + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->prop.pre_roll.distance = !!(info->flags & DTS_EXT_SUBSTREAM_LBR_FLAG); /* MDCT */ + if( importer_info->status == MP4SYS_IMPORTER_EOF ) + { + info->au_length = 0; + return 0; + } + if( dts_get_next_accessunit_internal( importer ) ) + importer_info->status = MP4SYS_IMPORTER_ERROR; + return current_status; +} + +static lsmash_audio_summary_t *dts_create_summary( dts_info_t *info ) +{ + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_AUDIO ); + if( !summary ) + return NULL; + lsmash_dts_specific_parameters_t *param = &info->ddts_param; + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_AUDIO_DTS, + LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + specific->data.unstructured = lsmash_create_dts_specific_info( param, &specific->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + summary->aot = MP4A_AUDIO_OBJECT_TYPE_NULL; /* no effect */ + summary->sbr_mode = MP4A_AAC_SBR_NOT_SPECIFIED; /* no effect */ + summary->sample_type = lsmash_dts_get_codingname( param ); + switch( param->DTSSamplingFrequency ) + { + case 12000 : /* Invalid? (No reference in the spec) */ + case 24000 : + case 48000 : + case 96000 : + case 192000 : + case 384000 : /* Invalid? (No reference in the spec) */ + summary->frequency = 48000; + break; + case 22050 : + case 44100 : + case 88200 : + case 176400 : + case 352800 : /* Invalid? (No reference in the spec) */ + summary->frequency = 44100; + break; + case 8000 : /* Invalid? (No reference in the spec) */ + case 16000 : + case 32000 : + case 64000 : + case 128000 : + summary->frequency = 32000; + break; + default : + summary->frequency = 0; + break; + } + summary->samples_in_frame = (summary->frequency * info->frame_duration) / param->DTSSamplingFrequency; + summary->max_au_length = DTS_MAX_CORE_SIZE + info->extension_substream_count * DTS_MAX_EXTENSION_SIZE; + summary->sample_size = param->pcmSampleDepth; + int core_channel_count = dts_get_channel_count_from_channel_layout( info->core.channel_layout ); + summary->channels = core_channel_count; + summary->channels = LSMASH_MAX( summary->channels, dts_get_channel_count_from_channel_layout( info->extension.channel_layout ) ); + summary->channels = LSMASH_MAX( summary->channels, dts_get_channel_count_from_channel_layout( info->lbr.channel_layout ) ); + summary->channels = LSMASH_MAX( summary->channels, dts_get_channel_count_from_channel_layout( info->lossless.channel_layout ) ); + summary->channels += core_channel_count == summary->channels + ? lsmash_count_bits( info->core.xxch_lower_planes ) + : lsmash_count_bits( info->extension.xxch_lower_planes ); + return summary; +} + +static int mp4sys_dts_probe( mp4sys_importer_t* importer ) +{ + mp4sys_dts_info_t *info = mp4sys_create_dts_info(); + if( !info ) + return -1; + importer->info = info; + if( dts_get_next_accessunit_internal( importer ) ) + { + mp4sys_remove_dts_info( importer->info ); + importer->info = NULL; + return -1; + } + lsmash_audio_summary_t *summary = dts_create_summary( &info->info ); + if( !summary ) + { + mp4sys_remove_dts_info( importer->info ); + importer->info = NULL; + return -1; + } + if( info->status != MP4SYS_IMPORTER_EOF ) + info->status = MP4SYS_IMPORTER_OK; + info->info.au_number = 0; + if( lsmash_add_entry( importer->summaries, summary ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + mp4sys_remove_dts_info( importer->info ); + importer->info = NULL; + return -1; + } + return 0; +} + +static uint32_t mp4sys_dts_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_dts_info_t *info = (mp4sys_dts_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF || info->info.au_length ) + return 0; + lsmash_audio_summary_t *summary = (lsmash_audio_summary_t *)lsmash_get_entry_data( importer->summaries, track_number ); + if( !summary ) + return 0; + return (summary->frequency * info->info.frame_duration) / info->info.ddts_param.DTSSamplingFrequency; +} + +const static mp4sys_importer_functions mp4sys_dts_importer = +{ + "DTS Coherent Acoustics", + 1, + mp4sys_dts_probe, + mp4sys_dts_get_accessunit, + mp4sys_dts_get_last_delta, + mp4sys_dts_cleanup +}; + +/*************************************************************************** + H.264 importer + ITU-T Recommendation H.264 (03/10) + ISO/IEC 14496-15:2010 +***************************************************************************/ +#include "h264.h" + +typedef struct +{ + mp4sys_importer_status status; + h264_info_t info; + h264_sps_t first_sps; + lsmash_media_ts_list_t ts_list; + uint32_t max_au_length; + uint32_t num_undecodable; + uint64_t last_intra_cts; + uint8_t composition_reordering_present; + uint8_t field_pic_present; +} mp4sys_h264_info_t; + +static void mp4sys_remove_h264_info( mp4sys_h264_info_t *info ) +{ + if( !info ) + return; + h264_cleanup_parser( &info->info ); + if( info->ts_list.timestamp ) + free( info->ts_list.timestamp ); + free( info ); +} + +static void mp4sys_h264_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_h264_info( importer->info ); +} + +static uint32_t h264_update_buffer_from_stream( h264_info_t *info, void *src, uint32_t anticipation_bytes ) +{ + h264_stream_buffer_t *buffer = &info->buffer; + assert( anticipation_bytes < buffer->bank->buffer_size ); + uint32_t remainder_bytes = buffer->end - buffer->pos; + if( info->no_more_read ) + return remainder_bytes; + if( remainder_bytes <= anticipation_bytes ) + { + /* Move unused data to the head of buffer. */ + for( uint32_t i = 0; i < remainder_bytes; i++ ) + *(buffer->start + i) = *(buffer->pos + i); + /* Read and store the next data into the buffer. + * Move the position of buffer on the head. */ + FILE *stream = (FILE *)src; + uint32_t read_size = fread( buffer->start + remainder_bytes, 1, buffer->bank->buffer_size - remainder_bytes, stream ); + remainder_bytes += read_size; + buffer->pos = buffer->start; + buffer->end = buffer->start + remainder_bytes; + info->no_more_read = read_size == 0 ? feof( stream ) : 0; + } + return remainder_bytes; +} + +static mp4sys_h264_info_t *mp4sys_create_h264_info( void ) +{ + mp4sys_h264_info_t *info = lsmash_malloc_zero( sizeof(mp4sys_h264_info_t) ); + if( !info ) + return NULL; + if( h264_setup_parser( &info->info, 0, h264_update_buffer_from_stream ) ) + { + mp4sys_remove_h264_info( info ); + return NULL; + } + return info; +} + +static int h264_process_parameter_set( h264_info_t *info, lsmash_h264_parameter_set_type ps_type, + uint16_t nalu_header_length, uint64_t ebsp_length, int probe ) +{ + h264_stream_buffer_t *buffer = &info->buffer; + if( probe ) + return h264_try_to_append_parameter_set( info, ps_type, buffer->pos, nalu_header_length + ebsp_length ); + switch( ps_type ) + { + case H264_PARAMETER_SET_TYPE_SPS : + return h264_parse_sps( info, buffer->rbsp, buffer->pos + nalu_header_length, ebsp_length ); + case H264_PARAMETER_SET_TYPE_PPS : + return h264_parse_pps( info, buffer->rbsp, buffer->pos + nalu_header_length, ebsp_length ); + case H264_PARAMETER_SET_TYPE_SPSEXT : + return 0; + default : + return -1; + } +} + +static inline int h264_complete_au( h264_picture_info_t *picture, int probe ) +{ + if( !picture->incomplete_au_has_primary || picture->incomplete_au_length == 0 ) + return 0; + if( !probe ) + memcpy( picture->au, picture->incomplete_au, picture->incomplete_au_length ); + picture->au_length = picture->incomplete_au_length; + picture->incomplete_au_length = 0; + picture->incomplete_au_has_primary = 0; + return 1; +} + +static void h264_append_nalu_to_au( h264_picture_info_t *picture, uint8_t *src_nalu, uint32_t nalu_length, int probe ) +{ + if( !probe ) + { + uint8_t *dst_nalu = picture->incomplete_au + picture->incomplete_au_length + H264_DEFAULT_NALU_LENGTH_SIZE; + for( int i = H264_DEFAULT_NALU_LENGTH_SIZE; i; i-- ) + *(dst_nalu - i) = (nalu_length >> ((i - 1) * 8)) & 0xff; + memcpy( dst_nalu, src_nalu, nalu_length ); + } + /* Note: picture->incomplete_au_length shall be 0 immediately after AU has completed. + * Therefore, possible_au_length in h264_get_access_unit_internal() can't be used here + * to avoid increasing AU length monotonously through the entire stream. */ + picture->incomplete_au_length += H264_DEFAULT_NALU_LENGTH_SIZE + nalu_length; +} + +static inline void h264_get_au_internal_end( mp4sys_h264_info_t *info, h264_picture_info_t *picture, h264_nalu_header_t *nalu_header, int no_more_buf ) +{ + info->status = info->info.no_more_read && no_more_buf && (picture->incomplete_au_length == 0) + ? MP4SYS_IMPORTER_EOF + : MP4SYS_IMPORTER_OK; + info->info.nalu_header = *nalu_header; +} + +static int h264_get_au_internal_succeeded( mp4sys_h264_info_t *info, h264_picture_info_t *picture, h264_nalu_header_t *nalu_header, int no_more_buf ) +{ + h264_get_au_internal_end( info, picture, nalu_header, no_more_buf ); + picture->au_number += 1; + return 0; +} + +static int h264_get_au_internal_failed( mp4sys_h264_info_t *info, h264_picture_info_t *picture, h264_nalu_header_t *nalu_header, int no_more_buf, int complete_au ) +{ + h264_get_au_internal_end( info, picture, nalu_header, no_more_buf ); + if( complete_au ) + picture->au_number += 1; + return -1; +} + +/* If probe equals 0, don't get the actual data (EBPS) of an access unit and only parse NALU. + * Currently, you can get AU of AVC video elemental stream only, not AVC parameter set elemental stream defined in 14496-15. */ +static int h264_get_access_unit_internal( mp4sys_importer_t *importer, int probe ) +{ + mp4sys_h264_info_t *importer_info = (mp4sys_h264_info_t *)importer->info; + h264_info_t *info = &importer_info->info; + h264_slice_info_t *slice = &info->slice; + h264_picture_info_t *picture = &info->picture; + h264_stream_buffer_t *buffer = &info->buffer; + h264_nalu_header_t nalu_header = info->nalu_header; + uint64_t consecutive_zero_byte_count = 0; + uint64_t ebsp_length = 0; + int no_more_buf = 0; + int complete_au = 0; + picture->au_length = 0; + picture->type = H264_PICTURE_TYPE_NONE; + picture->random_accessible = 0; + picture->recovery_frame_cnt = 0; + picture->has_mmco5 = 0; + picture->has_redundancy = 0; + while( 1 ) + { + buffer->update( info, importer->stream, 2 ); + no_more_buf = buffer->pos >= buffer->end; + int no_more = info->no_more_read && no_more_buf; + if( !h264_check_next_short_start_code( buffer->pos, buffer->end ) && !no_more ) + { + if( *(buffer->pos ++) ) + consecutive_zero_byte_count = 0; + else + ++consecutive_zero_byte_count; + ++ebsp_length; + continue; + } + if( no_more && ebsp_length == 0 ) + { + /* For the last NALU. + * This NALU already has been appended into the latest access unit and parsed. */ + h264_update_picture_info( picture, slice, &info->sei ); + h264_complete_au( picture, probe ); + return h264_get_au_internal_succeeded( importer->info, picture, &nalu_header, no_more_buf ); + } + uint64_t next_nalu_head_pos = info->ebsp_head_pos + ebsp_length + !no_more * H264_SHORT_START_CODE_LENGTH; + uint8_t *next_short_start_code_pos = buffer->pos; /* Memorize position of short start code of the next NALU in buffer. + * This is used when backward reading of stream doesn't occur. */ + uint8_t nalu_type = nalu_header.nal_unit_type; + int read_back = 0; +#if 0 + if( probe ) + { + fprintf( stderr, "NALU type: %"PRIu8"\n", nalu_type ); + fprintf( stderr, " NALU header position: %"PRIx64"\n", info->ebsp_head_pos - nalu_header.length ); + fprintf( stderr, " EBSP position: %"PRIx64"\n", info->ebsp_head_pos ); + fprintf( stderr, " EBSP length: %"PRIx64" (%"PRIu64")\n", ebsp_length - consecutive_zero_byte_count, + ebsp_length - consecutive_zero_byte_count ); + fprintf( stderr, " consecutive_zero_byte_count: %"PRIx64"\n", consecutive_zero_byte_count ); + fprintf( stderr, " Next NALU header position: %"PRIx64"\n", next_nalu_head_pos ); + } +#endif + if( nalu_type == 12 ) + { + /* We don't support streams with both filler and HRD yet. + * Otherwise, just skip filler because elemental streams defined in 14496-15 are forbidden to use filler. */ + if( info->sps.hrd_present ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + } + else if( (nalu_type >= 1 && nalu_type <= 13) || nalu_type == 19 ) + { + /* Get the EBSP of the current NALU here. + * AVC elemental stream defined in 14496-15 can recognizes from 0 to 13, and 19 of nal_unit_type. + * We don't support SVC and MVC elemental stream defined in 14496-15 yet. */ + ebsp_length -= consecutive_zero_byte_count; /* Any EBSP doesn't have zero bytes at the end. */ + uint64_t nalu_length = nalu_header.length + ebsp_length; + uint64_t possible_au_length = picture->incomplete_au_length + H264_DEFAULT_NALU_LENGTH_SIZE + nalu_length; + if( buffer->bank->buffer_size < possible_au_length ) + { + if( h264_supplement_buffer( buffer, picture, 2 * possible_au_length ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + next_short_start_code_pos = buffer->pos; + } + /* Move to the first byte of the current NALU. */ + read_back = (buffer->pos - buffer->start) < (nalu_length + consecutive_zero_byte_count); + if( read_back ) + { + lsmash_fseek( importer->stream, info->ebsp_head_pos - nalu_header.length, SEEK_SET ); + int read_fail = fread( buffer->start, 1, nalu_length, importer->stream ) != nalu_length; + buffer->pos = buffer->start; + buffer->end = buffer->start + nalu_length; + if( read_fail ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); +#if 0 + if( probe ) + fprintf( stderr, " ----Read Back\n" ); +#endif + } + else + buffer->pos -= nalu_length + consecutive_zero_byte_count; + if( nalu_type >= 1 && nalu_type <= 5 ) + { + /* VCL NALU (slice) */ + h264_slice_info_t prev_slice = *slice; + if( h264_parse_slice( info, &nalu_header, buffer->rbsp, + buffer->pos + nalu_header.length, ebsp_length ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + if( prev_slice.present ) + { + /* Check whether the AU that contains the previous VCL NALU completed or not. */ + if( h264_find_au_delimit_by_slice_info( slice, &prev_slice ) ) + { + /* The current NALU is the first VCL NALU of the primary coded picture of an new AU. + * Therefore, the previous slice belongs to the AU you want at this time. */ + h264_update_picture_info( picture, &prev_slice, &info->sei ); + complete_au = h264_complete_au( picture, probe ); + } + else + h264_update_picture_info_for_slice( picture, &prev_slice ); + } + h264_append_nalu_to_au( picture, buffer->pos, nalu_length, probe ); + slice->present = 1; + } + else + { + if( h264_find_au_delimit_by_nalu_type( nalu_type, info->prev_nalu_type ) ) + { + /* The last slice belongs to the AU you want at this time. */ + h264_update_picture_info( picture, slice, &info->sei ); + complete_au = h264_complete_au( picture, probe ); + } + else if( no_more ) + complete_au = h264_complete_au( picture, probe ); + switch( nalu_type ) + { + case 6 : /* Supplemental Enhancement Information */ + if( h264_parse_sei( info->bits, &info->sei, buffer->rbsp, buffer->pos + nalu_header.length, ebsp_length ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + h264_append_nalu_to_au( picture, buffer->pos, nalu_length, probe ); + break; + case 7 : /* Sequence Parameter Set */ + if( h264_process_parameter_set( info, H264_PARAMETER_SET_TYPE_SPS, nalu_header.length, ebsp_length, probe ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + if( probe && !importer_info->first_sps.present ) + importer_info->first_sps = info->sps; + break; + case 8 : /* Picture Parameter Set */ + if( h264_process_parameter_set( info, H264_PARAMETER_SET_TYPE_PPS, nalu_header.length, ebsp_length, probe ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + break; + case 9 : /* We drop access unit delimiters. */ + break; + case 13 : /* Sequence Parameter Set Extension */ + if( h264_process_parameter_set( info, H264_PARAMETER_SET_TYPE_SPSEXT, nalu_header.length, ebsp_length, probe ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + break; + default : + h264_append_nalu_to_au( picture, buffer->pos, nalu_length, probe ); + break; + } + } + } + /* Move to the first byte of the next NALU. */ + if( read_back ) + { + lsmash_fseek( importer->stream, next_nalu_head_pos, SEEK_SET ); + buffer->pos = buffer->start; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + } + else + buffer->pos = next_short_start_code_pos + H264_SHORT_START_CODE_LENGTH; + info->prev_nalu_type = nalu_type; + buffer->update( info, importer->stream, 0 ); + no_more_buf = buffer->pos >= buffer->end; + ebsp_length = 0; + no_more = info->no_more_read && no_more_buf; + if( !no_more ) + { + /* Check the next NALU header. */ + if( h264_check_nalu_header( &nalu_header, &buffer->pos, !!consecutive_zero_byte_count ) ) + return h264_get_au_internal_failed( importer->info, picture, &nalu_header, no_more_buf, complete_au ); + info->ebsp_head_pos = next_nalu_head_pos + nalu_header.length; + } + /* If there is no more data in the stream, and flushed chunk of NALUs, flush it as complete AU here. */ + else if( picture->incomplete_au_length && picture->au_length == 0 ) + { + h264_update_picture_info( picture, slice, &info->sei ); + h264_complete_au( picture, probe ); + return h264_get_au_internal_succeeded( importer->info, picture, &nalu_header, no_more_buf ); + } + if( complete_au ) + return h264_get_au_internal_succeeded( importer->info, picture, &nalu_header, no_more_buf ); + consecutive_zero_byte_count = 0; + } +} + +static int mp4sys_h264_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + mp4sys_h264_info_t *importer_info = (mp4sys_h264_info_t *)importer->info; + h264_info_t *info = &importer_info->info; + mp4sys_importer_status current_status = importer_info->status; + if( current_status == MP4SYS_IMPORTER_ERROR || buffered_sample->length < importer_info->max_au_length ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + if( h264_get_access_unit_internal( importer, 0 ) ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return -1; + } + h264_sps_t *sps = &info->sps; + h264_picture_info_t *picture = &info->picture; + buffered_sample->dts = importer_info->ts_list.timestamp[picture->au_number - 1].dts; + buffered_sample->cts = importer_info->ts_list.timestamp[picture->au_number - 1].cts; + if( picture->au_number < importer_info->num_undecodable ) + buffered_sample->prop.leading = ISOM_SAMPLE_IS_UNDECODABLE_LEADING; + else + buffered_sample->prop.leading = picture->independent || buffered_sample->cts >= importer_info->last_intra_cts + ? ISOM_SAMPLE_IS_NOT_LEADING : ISOM_SAMPLE_IS_UNDECODABLE_LEADING; + if( picture->independent ) + importer_info->last_intra_cts = buffered_sample->cts; + if( importer_info->composition_reordering_present && !picture->disposable && !picture->idr ) + buffered_sample->prop.allow_earlier = QT_SAMPLE_EARLIER_PTS_ALLOWED; + buffered_sample->prop.independent = picture->independent ? ISOM_SAMPLE_IS_INDEPENDENT : ISOM_SAMPLE_IS_NOT_INDEPENDENT; + buffered_sample->prop.disposable = picture->disposable ? ISOM_SAMPLE_IS_DISPOSABLE : ISOM_SAMPLE_IS_NOT_DISPOSABLE; + buffered_sample->prop.redundant = picture->has_redundancy ? ISOM_SAMPLE_HAS_REDUNDANCY : ISOM_SAMPLE_HAS_NO_REDUNDANCY; + buffered_sample->prop.post_roll.identifier = picture->frame_num; + if( picture->random_accessible ) + { + if( picture->idr ) + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + else if( picture->recovery_frame_cnt ) + { + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_POST_ROLL_START; + buffered_sample->prop.post_roll.complete = (picture->frame_num + picture->recovery_frame_cnt) % sps->MaxFrameNum; + } + else + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_RAP | QT_SAMPLE_RANDOM_ACCESS_FLAG_PARTIAL_SYNC; + } + buffered_sample->length = picture->au_length; + memcpy( buffered_sample->data, picture->au, picture->au_length ); + return current_status; +} + +static lsmash_video_summary_t *h264_create_summary( h264_info_t *info, h264_sps_t *sps, uint8_t field_pic_present, uint32_t max_au_length ) +{ + lsmash_h264_specific_parameters_t *param = &info->avcC_param; + if( !info->sps.present || !info->pps.present ) + return NULL; + lsmash_video_summary_t *summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO ); + if( !summary ) + return NULL; + /* Update summary here. + * max_au_length is set at the last of mp4sys_h264_probe function. */ + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264, + LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + specific->data.unstructured = lsmash_create_h264_specific_info( param, &specific->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO; + summary->max_au_length = max_au_length; + summary->timescale = sps->vui.time_scale; + summary->timebase = sps->vui.num_units_in_tick; + summary->vfr = !sps->vui.fixed_frame_rate_flag; + summary->sample_per_field = field_pic_present; + summary->width = sps->cropped_width; + summary->height = sps->cropped_height; + summary->par_h = sps->vui.sar_width; + summary->par_v = sps->vui.sar_height; + summary->color.primaries_index = sps->vui.colour_primaries; + summary->color.transfer_index = sps->vui.transfer_characteristics; + summary->color.matrix_index = sps->vui.matrix_coefficients; + summary->color.full_range = sps->vui.video_full_range_flag; + return summary; +} + +static int mp4sys_h264_probe( mp4sys_importer_t *importer ) +{ +#define H264_MAX_NUM_REORDER_FRAMES 16 +#define H264_LONG_START_CODE_LENGTH 4 +#define H264_CHECK_NEXT_LONG_START_CODE( x ) (!(x)[0] && !(x)[1] && !(x)[2] && ((x)[3] == 0x01)) + /* Find the first start code. */ + mp4sys_h264_info_t *importer_info = mp4sys_create_h264_info(); + if( !importer_info ) + return -1; + h264_info_t *info = &importer_info->info; + h264_stream_buffer_t *buffer = &info->buffer; + buffer->pos = buffer->start; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + info->no_more_read = buffer->start >= buffer->end ? feof( importer->stream ) : 0; + while( 1 ) + { + /* Invalid if encountered any value of non-zero before the first start code. */ + if( *buffer->pos ) + goto fail; + /* The first NALU of an AU in decoding order shall have long start code (0x00000001). */ + if( H264_CHECK_NEXT_LONG_START_CODE( buffer->pos ) ) + break; + /* If the first trial of finding long start code failed, we assume this stream is not byte stream format of H.264. */ + if( (buffer->pos + H264_LONG_START_CODE_LENGTH) == buffer->end ) + goto fail; + ++ buffer->pos; + } + /* OK. It seems the stream has a long start code of H.264. */ + importer->info = importer_info; + buffer->pos += H264_LONG_START_CODE_LENGTH; + buffer->update( info, importer->stream, 0 ); + h264_nalu_header_t first_nalu_header; + if( h264_check_nalu_header( &first_nalu_header, &buffer->pos, 1 ) ) + goto fail; + if( buffer->pos >= buffer->end ) + goto fail; /* It seems the stream ends at the first incomplete access unit. */ + uint64_t first_ebsp_head_pos = buffer->pos - buffer->start; /* EBSP doesn't include NALU header. */ + importer_info->status = MP4SYS_IMPORTER_OK; + info->nalu_header = first_nalu_header; + info->ebsp_head_pos = first_ebsp_head_pos; + /* Parse all NALU in the stream for preparation of calculating timestamps. */ + uint32_t poc_alloc = (1 << 12) * sizeof(uint64_t); + int64_t *poc = malloc( poc_alloc ); + if( !poc ) + goto fail; + uint32_t num_access_units = 0; + fprintf( stderr, "Analyzing stream as H.264\r" ); + while( importer_info->status != MP4SYS_IMPORTER_EOF ) + { +#if 0 + fprintf( stderr, "Analyzing stream as H.264: %"PRIu32"\n", num_access_units + 1 ); +#endif + h264_picture_info_t prev_picture = info->picture; + if( h264_get_access_unit_internal( importer, 1 ) + || h264_calculate_poc( info, &info->picture, &prev_picture ) ) + { + free( poc ); + goto fail; + } + if( poc_alloc <= num_access_units * sizeof(int64_t) ) + { + uint32_t alloc = 2 * num_access_units * sizeof(int64_t); + int64_t *temp = realloc( poc, alloc ); + if( !temp ) + { + free( poc ); + goto fail; + } + poc = temp; + poc_alloc = alloc; + } + importer_info->field_pic_present |= info->picture.field_pic_flag; + poc[num_access_units++] = info->picture.PicOrderCnt; + importer_info->max_au_length = LSMASH_MAX( info->picture.au_length, importer_info->max_au_length ); + } + fprintf( stderr, " \r" ); + lsmash_video_summary_t *summary = h264_create_summary( info, &importer_info->first_sps, importer_info->field_pic_present, importer_info->max_au_length ); + if( !summary || lsmash_add_entry( importer->summaries, summary ) ) + { + free( poc ); + goto fail; + } + lsmash_media_ts_t *timestamp = malloc( num_access_units * sizeof(lsmash_media_ts_t) ); + if( !timestamp ) + { + free( poc ); + goto fail; + } + /* Count leading samples that are undecodable. */ + for( uint32_t i = 0; i < num_access_units; i++ ) + { + if( poc[i] == 0 ) + break; + ++ importer_info->num_undecodable; + } + /* Deduplicate POCs. */ + int64_t poc_offset = 0; + int64_t poc_min = 0; + int64_t invalid_poc_min = 0; + uint32_t last_idr = 0; + uint32_t invalid_poc_start = 0; + uint32_t max_composition_delay = 0; + int invalid_poc_present = 0; + for( uint32_t i = 0; ; i++ ) + { + if( i < num_access_units && poc[i] != 0 ) + { + /* poc_offset is not added to each POC here. + * It is done when we encounter the next coded video sequence. */ + if( poc[i] < 0 ) + { + /* Pictures with negative POC shall precede IDR-picture in composition order. + * The minimum POC is added to poc_offset when we encounter the next coded video sequence. */ + if( i > last_idr + H264_MAX_NUM_REORDER_FRAMES ) + { + if( !invalid_poc_present ) + { + invalid_poc_present = 1; + invalid_poc_start = i; + } + if( invalid_poc_min > poc[i] ) + invalid_poc_min = poc[i]; + } + else if( poc_min > poc[i] ) + { + poc_min = poc[i]; + max_composition_delay = LSMASH_MAX( max_composition_delay, i - last_idr ); + } + } + continue; + } + /* Encountered a new coded video sequence or no more POCs. + * Add poc_offset to each POC of the previous coded video sequence. */ + poc_offset -= poc_min; + int64_t poc_max = 0; + for( uint32_t j = last_idr; j < i; j++ ) + if( poc[j] >= 0 || (j <= last_idr + H264_MAX_NUM_REORDER_FRAMES) ) + { + poc[j] += poc_offset; + if( poc_max < poc[j] ) + poc_max = poc[j]; + } + poc_offset = poc_max + 1; + if( invalid_poc_present ) + { + /* Pictures with invalid negative POC is probably supposed to be composited + * both before the next coded video sequence and after the current one. */ + poc_offset -= invalid_poc_min; + for( uint32_t j = invalid_poc_start; j < i; j++ ) + if( poc[j] < 0 ) + { + poc[j] += poc_offset; + if( poc_max < poc[j] ) + poc_max = poc[j]; + } + invalid_poc_present = 0; + invalid_poc_start = 0; + invalid_poc_min = 0; + poc_offset = poc_max + 1; + } + if( i < num_access_units ) + { + poc_min = 0; + last_idr = i; + } + else + break; /* no more POCs */ + } + /* Check if composition delay derived from reordering is present. */ + if( max_composition_delay == 0 ) + { + for( uint32_t i = 1; i < num_access_units; i++ ) + if( poc[i] < poc[i - 1] ) + { + importer_info->composition_reordering_present = 1; + break; + } + } + else + importer_info->composition_reordering_present = 1; + /* Generate timestamps. */ + if( importer_info->composition_reordering_present ) + { + /* Generate DTSs. + * Here, CTSs are temporary values for sort. */ + for( uint32_t i = 0; i < num_access_units; i++ ) + { + timestamp[i].cts = (uint64_t)poc[i]; + timestamp[i].dts = (uint64_t)i; + } + qsort( timestamp, num_access_units, sizeof(lsmash_media_ts_t), (int(*)( const void *, const void * ))lsmash_compare_cts ); + /* Get the maximum composition delay derived from reordering. */ + for( uint32_t i = 0; i < num_access_units; i++ ) + if( i < timestamp[i].dts ) + { + uint32_t composition_delay = timestamp[i].dts - i; + max_composition_delay = LSMASH_MAX( max_composition_delay, composition_delay ); + } + /* Generate CTSs. */ + for( uint32_t i = 0; i < num_access_units; i++ ) + timestamp[i].cts = i + max_composition_delay; + qsort( timestamp, num_access_units, sizeof(lsmash_media_ts_t), (int(*)( const void *, const void * ))lsmash_compare_dts ); + } + else + for( uint32_t i = 0; i < num_access_units; i++ ) + timestamp[i].cts = timestamp[i].dts = i; +#if 0 + for( uint32_t i = 0; i < num_access_units; i++ ) + fprintf( stderr, "Timestamp[%"PRIu32"]: POC=%"PRId64", DTS=%"PRIu64", CTS=%"PRIu64"\n", + i, poc[i], timestamp[i].dts, timestamp[i].cts ); +#endif + free( poc ); + importer_info->ts_list.sample_count = num_access_units; + importer_info->ts_list.timestamp = timestamp; + /* Go back to EBSP of the first NALU. */ + lsmash_fseek( importer->stream, first_ebsp_head_pos, SEEK_SET ); + importer_info->status = MP4SYS_IMPORTER_OK; + info->nalu_header = first_nalu_header; + info->prev_nalu_type = 0; + info->no_more_read = 0; + buffer->pos = buffer->start; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + info->ebsp_head_pos = first_ebsp_head_pos; + uint8_t *temp_au = info->picture.au; + uint8_t *temp_incomplete_au = info->picture.incomplete_au; + memset( &info->picture, 0, sizeof(h264_picture_info_t) ); + info->picture.au = temp_au; + info->picture.incomplete_au = temp_incomplete_au; + memset( &info->slice, 0, sizeof(h264_slice_info_t) ); + memset( &info->sps, 0, sizeof(h264_sps_t) ); + memset( &info->pps, 0, sizeof(h264_pps_t) ); + lsmash_remove_entries( info->avcC_param.parameter_sets->sps_list, isom_remove_avcC_ps ); + lsmash_remove_entries( info->avcC_param.parameter_sets->pps_list, isom_remove_avcC_ps ); + lsmash_remove_entries( info->avcC_param.parameter_sets->spsext_list, isom_remove_avcC_ps ); + return 0; +fail: + mp4sys_remove_h264_info( importer_info ); + importer->info = NULL; + lsmash_remove_entries( importer->summaries, lsmash_cleanup_summary ); + return -1; +#undef H264_MAX_NUM_REORDER_FRAMES +#undef H264_LONG_START_CODE_LENGTH +#undef H264_CHECK_NEXT_LONG_START_CODE +} + +static uint32_t mp4sys_h264_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_h264_info_t *info = (mp4sys_h264_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + return info->ts_list.sample_count > 1 + ? 1 + : UINT32_MAX; /* arbitrary */ +} + +const static mp4sys_importer_functions mp4sys_h264_importer = +{ + "H.264", + 1, + mp4sys_h264_probe, + mp4sys_h264_get_accessunit, + mp4sys_h264_get_last_delta, + mp4sys_h264_cleanup +}; + +/*************************************************************************** + SMPTE VC-1 importer (only for Advanced Profile) + SMPTE 421M-2006 + SMPTE RP 2025-2007 +***************************************************************************/ +#include "vc1.h" + +typedef struct +{ + mp4sys_importer_status status; + vc1_info_t info; + vc1_sequence_header_t first_sequence; + lsmash_media_ts_list_t ts_list; + uint8_t composition_reordering_present; + uint32_t max_au_length; + uint32_t num_undecodable; + uint64_t last_ref_intra_cts; +} mp4sys_vc1_info_t; + +static void mp4sys_remove_vc1_info( mp4sys_vc1_info_t *info ) +{ + if( !info ) + return; + vc1_cleanup_parser( &info->info ); + if( info->ts_list.timestamp ) + free( info->ts_list.timestamp ); + free( info ); +} + +static void mp4sys_vc1_cleanup( mp4sys_importer_t *importer ) +{ + debug_if( importer && importer->info ) + mp4sys_remove_vc1_info( importer->info ); +} + +static uint32_t vc1_update_buffer_from_stream( vc1_info_t *info, void *src, uint32_t anticipation_bytes ) +{ + vc1_stream_buffer_t *buffer = &info->buffer; + assert( anticipation_bytes < buffer->bank->buffer_size ); + uint32_t remainder_bytes = buffer->end - buffer->pos; + if( info->no_more_read ) + return remainder_bytes; + if( remainder_bytes <= anticipation_bytes ) + { + /* Move unused data to the head of buffer. */ + for( uint32_t i = 0; i < remainder_bytes; i++ ) + *(buffer->start + i) = *(buffer->pos + i); + /* Read and store the next data into the buffer. + * Move the position of buffer on the head. */ + FILE *stream = (FILE *)src; + uint32_t read_size = fread( buffer->start + remainder_bytes, 1, buffer->bank->buffer_size - remainder_bytes, stream ); + remainder_bytes += read_size; + buffer->pos = buffer->start; + buffer->end = buffer->start + remainder_bytes; + info->no_more_read = read_size == 0 ? feof( stream ) : 0; + } + return remainder_bytes; +} + +static mp4sys_vc1_info_t *mp4sys_create_vc1_info( void ) +{ + mp4sys_vc1_info_t *info = lsmash_malloc_zero( sizeof(mp4sys_vc1_info_t) ); + if( !info ) + return NULL; + if( vc1_setup_parser( &info->info, 0, vc1_update_buffer_from_stream ) ) + { + mp4sys_remove_vc1_info( info ); + return NULL; + } + return info; +} + +static inline int vc1_complete_au( vc1_access_unit_t *access_unit, vc1_picture_info_t *picture, int probe ) +{ + if( !picture->present ) + return 0; + if( !probe ) + memcpy( access_unit->data, access_unit->incomplete_data, access_unit->incomplete_data_length ); + access_unit->data_length = access_unit->incomplete_data_length; + access_unit->incomplete_data_length = 0; + vc1_update_au_property( access_unit, picture ); + return 1; +} + +static inline void vc1_append_ebdu_to_au( vc1_access_unit_t *access_unit, uint8_t *ebdu, uint32_t ebdu_length, int probe ) +{ + if( !probe ) + memcpy( access_unit->incomplete_data + access_unit->incomplete_data_length, ebdu, ebdu_length ); + /* Note: access_unit->incomplete_data_length shall be 0 immediately after AU has completed. + * Therefore, possible_au_length in vc1_get_access_unit_internal() can't be used here + * to avoid increasing AU length monotonously through the entire stream. */ + access_unit->incomplete_data_length += ebdu_length; +} + +static inline void vc1_get_au_internal_end( mp4sys_vc1_info_t *info, vc1_access_unit_t *access_unit, uint8_t bdu_type, int no_more_buf ) +{ + info->status = info->info.no_more_read && no_more_buf && (access_unit->incomplete_data_length == 0) + ? MP4SYS_IMPORTER_EOF + : MP4SYS_IMPORTER_OK; + info->info.bdu_type = bdu_type; +} + +static int vc1_get_au_internal_succeeded( mp4sys_vc1_info_t *info, vc1_access_unit_t *access_unit, uint8_t bdu_type, int no_more_buf ) +{ + vc1_get_au_internal_end( info, access_unit, bdu_type, no_more_buf ); + access_unit->number += 1; + return 0; +} + +static int vc1_get_au_internal_failed( mp4sys_vc1_info_t *info, vc1_access_unit_t *access_unit, uint8_t bdu_type, int no_more_buf, int complete_au ) +{ + vc1_get_au_internal_end( info, access_unit, bdu_type, no_more_buf ); + if( complete_au ) + access_unit->number += 1; + return -1; +} + +static int vc1_get_access_unit_internal( mp4sys_importer_t *importer, int probe ) +{ + mp4sys_vc1_info_t *importer_info = (mp4sys_vc1_info_t *)importer->info; + vc1_info_t *info = &importer_info->info; + vc1_stream_buffer_t *buffer = &info->buffer; + vc1_access_unit_t *access_unit = &info->access_unit; + uint8_t bdu_type = info->bdu_type; + uint64_t consecutive_zero_byte_count = 0; + uint64_t ebdu_length = 0; + int no_more_buf = 0; + int complete_au = 0; + access_unit->data_length = 0; + while( 1 ) + { + buffer->update( info, importer->stream, 2 ); + no_more_buf = buffer->pos >= buffer->end; + int no_more = info->no_more_read && no_more_buf; + if( !vc1_check_next_start_code_prefix( buffer->pos, buffer->end ) && !no_more ) + { + if( *(buffer->pos ++) ) + consecutive_zero_byte_count = 0; + else + ++consecutive_zero_byte_count; + ++ebdu_length; + continue; + } + if( no_more && ebdu_length == 0 ) + { + /* For the last EBDU. + * This EBDU already has been appended into the latest access unit and parsed. */ + vc1_complete_au( access_unit, &info->picture, probe ); + return vc1_get_au_internal_succeeded( importer->info, access_unit, bdu_type, no_more_buf ); + } + ebdu_length += VC1_START_CODE_LENGTH; + uint64_t next_scs_file_offset = info->ebdu_head_pos + ebdu_length + !no_more * VC1_START_CODE_PREFIX_LENGTH; + uint8_t *next_ebdu_pos = buffer->pos; /* Memorize position of beginning of the next EBDU in buffer. + * This is used when backward reading of stream doesn't occur. */ + int read_back = 0; +#if 0 + if( probe ) + { + fprintf( stderr, "BDU type: %"PRIu8" \n", bdu_type ); + fprintf( stderr, " EBDU position: %"PRIx64" \n", info->ebdu_head_pos ); + fprintf( stderr, " EBDU length: %"PRIx64" (%"PRIu64") \n", ebdu_length - consecutive_zero_byte_count, + ebdu_length - consecutive_zero_byte_count ); + fprintf( stderr, " consecutive_zero_byte_count: %"PRIx64" \n", consecutive_zero_byte_count ); + fprintf( stderr, " Next start code suffix position: %"PRIx64"\n", next_scs_file_offset ); + } +#endif + if( bdu_type >= 0x0A && bdu_type <= 0x0F ) + { + /* Get the current EBDU here. */ + ebdu_length -= consecutive_zero_byte_count; /* Any EBDU doesn't have zero bytes at the end. */ + uint64_t possible_au_length = access_unit->incomplete_data_length + ebdu_length; + if( buffer->bank->buffer_size < possible_au_length ) + { + if( vc1_supplement_buffer( buffer, access_unit, 2 * possible_au_length ) ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + next_ebdu_pos = buffer->pos; + } + /* Move to the first byte of the current EBDU. */ + read_back = (buffer->pos - buffer->start) < (ebdu_length + consecutive_zero_byte_count); + if( read_back ) + { + lsmash_fseek( importer->stream, info->ebdu_head_pos, SEEK_SET ); + int read_fail = fread( buffer->start, 1, ebdu_length, importer->stream ) != ebdu_length; + buffer->pos = buffer->start; + buffer->end = buffer->start + ebdu_length; + if( read_fail ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); +#if 0 + if( probe ) + fprintf( stderr, " ----Read Back\n" ); +#endif + } + else + buffer->pos -= ebdu_length + consecutive_zero_byte_count; + /* Complete the current access unit if encountered delimiter of current access unit. */ + if( vc1_find_au_delimit_by_bdu_type( bdu_type, info->prev_bdu_type ) ) + /* The last video coded EBDU belongs to the access unit you want at this time. */ + complete_au = vc1_complete_au( access_unit, &info->picture, probe ); + /* Process EBDU by its BDU type and append it to access unit. */ + switch( bdu_type ) + { + /* FRM_SC: Frame start code + * FLD_SC: Field start code + * SLC_SC: Slice start code + * SEQ_SC: Sequence header start code + * EP_SC: Entry-point start code + * PIC_L: Picture layer + * SLC_L: Slice layer + * SEQ_L: Sequence layer + * EP_L: Entry-point layer */ + case 0x0D : /* Frame + * For the Progressive or Frame Interlace mode, shall signal the beginning of a new video frame. + * For the Field Interlace mode, shall signal the beginning of a sequence of two independently coded video fields. + * [FRM_SC][PIC_L][[FLD_SC][PIC_L] (optional)][[SLC_SC][SLC_L] (optional)] ... */ + if( vc1_parse_advanced_picture( info->bits, &info->sequence, &info->picture, buffer->rbdu, + buffer->pos, ebdu_length ) ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + case 0x0C : /* Field + * Shall only be used for Field Interlaced frames + * and shall only be used to signal the beginning of the second field of the frame. + * [FRM_SC][PIC_L][FLD_SC][PIC_L][[SLC_SC][SLC_L] (optional)] ... + * Field start code is followed by INTERLACE_FIELD_PICTURE_FIELD2() which doesn't have info of its field picture type.*/ + break; + case 0x0B : /* Slice + * Shall not be used for start code of the first slice of a frame. + * Shall not be used for start code of the first slice of an interlace field coded picture. + * [FRM_SC][PIC_L][[FLD_SC][PIC_L] (optional)][SLC_SC][SLC_L][[SLC_SC][SLC_L] (optional)] ... + * Slice layer may repeat frame header. We just ignore it. */ + info->dvc1_param.slice_present = 1; + break; + case 0x0E : /* Entry-point header + * Entry-point indicates the direct followed frame is a start of group of frames. + * Entry-point doesn't indicates the frame is a random access point when multiple sequence headers are present, + * since it is necessary to decode sequence header which subsequent frames belong to for decoding them. + * Entry point shall be followed by + * 1. I-picture - progressive or frame interlace + * 2. I/I-picture, I/P-picture, or P/I-picture - field interlace + * [[SEQ_SC][SEQ_L] (optional)][EP_SC][EP_L][FRM_SC][PIC_L] ... */ + if( vc1_parse_entry_point_header( info, buffer->pos, ebdu_length, probe ) ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + /* Signal random access type of the frame that follows this entry-point header. */ + info->picture.closed_gop = info->entry_point.closed_entry_point; + info->picture.random_accessible = info->dvc1_param.multiple_sequence ? info->picture.start_of_sequence : 1; + break; + case 0x0F : /* Sequence header + * [SEQ_SC][SEQ_L][EP_SC][EP_L][FRM_SC][PIC_L] ... */ + if( vc1_parse_sequence_header( info, buffer->pos, ebdu_length, probe ) ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + /* The frame that is the first frame after this sequence header shall be a random accessible point. */ + info->picture.start_of_sequence = 1; + if( probe && !importer_info->first_sequence.present ) + importer_info->first_sequence = info->sequence; + break; + default : /* End-of-sequence (0x0A) */ + break; + } + vc1_append_ebdu_to_au( access_unit, buffer->pos, ebdu_length, probe ); + } + else /* We don't support other BDU types such as user data yet. */ + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + /* Move to the first byte of the next start code suffix. */ + if( read_back ) + { + lsmash_fseek( importer->stream, next_scs_file_offset, SEEK_SET ); + buffer->pos = buffer->start; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + } + else + buffer->pos = next_ebdu_pos + VC1_START_CODE_PREFIX_LENGTH; + info->prev_bdu_type = bdu_type; + buffer->update( info, importer->stream, 0 ); + no_more_buf = buffer->pos >= buffer->end; + ebdu_length = 0; + no_more = info->no_more_read && no_more_buf; + if( !no_more ) + { + /* Check the next BDU type. */ + if( vc1_check_next_start_code_suffix( &bdu_type, &buffer->pos ) ) + return vc1_get_au_internal_failed( importer->info, access_unit, bdu_type, no_more_buf, complete_au ); + info->ebdu_head_pos = next_scs_file_offset - VC1_START_CODE_PREFIX_LENGTH; + } + /* If there is no more data in the stream, and flushed chunk of EBDUs, flush it as complete AU here. */ + else if( access_unit->incomplete_data_length && access_unit->data_length == 0 ) + { + vc1_complete_au( access_unit, &info->picture, probe ); + return vc1_get_au_internal_succeeded( importer->info, access_unit, bdu_type, no_more_buf ); + } + if( complete_au ) + return vc1_get_au_internal_succeeded( importer->info, access_unit, bdu_type, no_more_buf ); + consecutive_zero_byte_count = 0; + } +} + +static int mp4sys_vc1_get_accessunit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + debug_if( !importer || !importer->info || !buffered_sample->data || !buffered_sample->length ) + return -1; + if( !importer->info || track_number != 1 ) + return -1; + mp4sys_vc1_info_t *importer_info = (mp4sys_vc1_info_t *)importer->info; + vc1_info_t *info = &importer_info->info; + mp4sys_importer_status current_status = importer_info->status; + if( current_status == MP4SYS_IMPORTER_ERROR || buffered_sample->length < importer_info->max_au_length ) + return -1; + if( current_status == MP4SYS_IMPORTER_EOF ) + { + buffered_sample->length = 0; + return 0; + } + if( vc1_get_access_unit_internal( importer, 0 ) ) + { + importer_info->status = MP4SYS_IMPORTER_ERROR; + return -1; + } + vc1_access_unit_t *access_unit = &info->access_unit; + buffered_sample->dts = importer_info->ts_list.timestamp[access_unit->number - 1].dts; + buffered_sample->cts = importer_info->ts_list.timestamp[access_unit->number - 1].cts; + buffered_sample->prop.leading = access_unit->independent || access_unit->non_bipredictive || buffered_sample->cts >= importer_info->last_ref_intra_cts + ? ISOM_SAMPLE_IS_NOT_LEADING : ISOM_SAMPLE_IS_UNDECODABLE_LEADING; + if( access_unit->independent && !access_unit->disposable ) + importer_info->last_ref_intra_cts = buffered_sample->cts; + if( importer_info->composition_reordering_present && !access_unit->disposable && !access_unit->closed_gop ) + buffered_sample->prop.allow_earlier = QT_SAMPLE_EARLIER_PTS_ALLOWED; + buffered_sample->prop.independent = access_unit->independent ? ISOM_SAMPLE_IS_INDEPENDENT : ISOM_SAMPLE_IS_NOT_INDEPENDENT; + buffered_sample->prop.disposable = access_unit->disposable ? ISOM_SAMPLE_IS_DISPOSABLE : ISOM_SAMPLE_IS_NOT_DISPOSABLE; + buffered_sample->prop.redundant = ISOM_SAMPLE_HAS_NO_REDUNDANCY; + if( access_unit->random_accessible ) + /* All random access point is a sync sample even if it's an open RAP. */ + buffered_sample->prop.ra_flags = ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC; + buffered_sample->length = access_unit->data_length; + memcpy( buffered_sample->data, access_unit->data, access_unit->data_length ); + return current_status; +} + +static lsmash_video_summary_t *vc1_create_summary( vc1_info_t *info, vc1_sequence_header_t *sequence, uint32_t max_au_length ) +{ + if( !info->sequence.present || !info->entry_point.present ) + return NULL; + lsmash_video_summary_t *summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO ); + if( !summary ) + return NULL; + lsmash_codec_specific_t *specific = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_VC_1, + LSMASH_CODEC_SPECIFIC_FORMAT_UNSTRUCTURED ); + specific->data.unstructured = lsmash_create_vc1_specific_info( &info->dvc1_param, &specific->size ); + if( !specific->data.unstructured + || lsmash_add_entry( &summary->opaque->list, specific ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( specific ); + return NULL; + } + summary->sample_type = ISOM_CODEC_TYPE_VC_1_VIDEO; + summary->max_au_length = max_au_length; + summary->timescale = sequence->framerate_numerator; + summary->timebase = sequence->framerate_denominator; + summary->vfr = !sequence->framerate_flag; + summary->sample_per_field = 0; + summary->width = sequence->disp_horiz_size; + summary->height = sequence->disp_vert_size; + summary->par_h = sequence->aspect_width; + summary->par_v = sequence->aspect_height; + summary->color.primaries_index = sequence->color_prim; + summary->color.transfer_index = sequence->transfer_char; + summary->color.matrix_index = sequence->matrix_coef; + return summary; +} + +static int mp4sys_vc1_probe( mp4sys_importer_t *importer ) +{ +#define VC1_CHECK_FIRST_START_CODE( x ) (!(x)[0] && !(x)[1] && ((x)[2] == 0x01)) + /* Find the first start code. */ + mp4sys_vc1_info_t *importer_info = mp4sys_create_vc1_info(); + if( !importer_info ) + return -1; + vc1_info_t *info = &importer_info->info; + vc1_stream_buffer_t *buffer = &info->buffer; + buffer->pos = buffer->start; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + info->no_more_read = buffer->start >= buffer->end ? feof( importer->stream ) : 0; + while( 1 ) + { + /* Invalid if encountered any value of non-zero before the first start code. */ + if( *buffer->pos ) + goto fail; + /* The first EBDU in decoding order of the stream shall have start code (0x000001). */ + if( VC1_CHECK_FIRST_START_CODE( buffer->pos ) ) + break; + /* If the first trial of finding start code of sequence header failed, we assume this stream is not byte stream format of VC-1. */ + if( (buffer->pos + VC1_START_CODE_LENGTH) == buffer->end ) + goto fail; + ++ buffer->pos; + } + /* OK. It seems the stream has a sequence header of VC-1. */ + importer->info = importer_info; + uint64_t first_ebdu_head_pos = buffer->pos - buffer->start; + buffer->pos += VC1_START_CODE_PREFIX_LENGTH; + buffer->update( info, importer->stream, 0 ); + uint8_t first_bdu_type = *(buffer->pos ++); + if( buffer->pos >= buffer->end ) + goto fail; /* It seems the stream ends at the first incomplete access unit. */ + importer_info->status = MP4SYS_IMPORTER_OK; + info->bdu_type = first_bdu_type; + info->ebdu_head_pos = first_ebdu_head_pos; + /* Parse all EBDU in the stream for preparation of calculating timestamps. */ + uint32_t cts_alloc = (1 << 12) * sizeof(uint64_t); + uint64_t *cts = malloc( cts_alloc ); + if( !cts ) + goto fail; + uint32_t num_access_units = 0; + uint32_t num_consecutive_b = 0; + fprintf( stderr, "Analyzing stream as VC-1\r" ); + while( importer_info->status != MP4SYS_IMPORTER_EOF ) + { +#if 0 + fprintf( stderr, "Analyzing stream as VC-1: %"PRIu32"\n", num_access_units + 1 ); +#endif + if( vc1_get_access_unit_internal( importer, 1 ) ) + { + free( cts ); + goto fail; + } + /* In the case where B-pictures exist + * Decode order + * I[0]P[1]P[2]B[3]B[4]P[5]... + * DTS + * 0 1 2 3 4 5 ... + * Composition order + * I[0]P[1]B[3]B[4]P[2]P[5]... + * CTS + * 1 2 3 4 5 6 ... + * We assumes B or BI-pictures always be present in the stream here. */ + if( !info->access_unit.disposable ) + { + /* Apply CTS of the last B-picture plus 1 to the last non-B-picture. */ + if( num_access_units > num_consecutive_b ) + cts[ num_access_units - num_consecutive_b - 1 ] = num_access_units; + num_consecutive_b = 0; + } + else /* B or BI-picture */ + { + /* B and BI-pictures shall be output or displayed in the same order as they are encoded. */ + cts[ num_access_units ] = num_access_units; + ++num_consecutive_b; + info->dvc1_param.bframe_present = 1; + } + if( cts_alloc <= num_access_units * sizeof(uint64_t) ) + { + uint32_t alloc = 2 * num_access_units * sizeof(uint64_t); + uint64_t *temp = realloc( cts, alloc ); + if( !temp ) + { + free( cts ); + goto fail; + } + cts = temp; + cts_alloc = alloc; + } + importer_info->max_au_length = LSMASH_MAX( info->access_unit.data_length, importer_info->max_au_length ); + ++num_access_units; + } + if( num_access_units > num_consecutive_b ) + cts[ num_access_units - num_consecutive_b - 1 ] = num_access_units; + else + { + free( cts ); + goto fail; + } + fprintf( stderr, " \r" ); + /* Construct timestamps. */ + lsmash_media_ts_t *timestamp = malloc( num_access_units * sizeof(lsmash_media_ts_t) ); + if( !timestamp ) + { + free( cts ); + goto fail; + } + for( uint32_t i = 1; i < num_access_units; i++ ) + if( cts[i] < cts[i - 1] ) + { + importer_info->composition_reordering_present = 1; + break; + } + if( importer_info->composition_reordering_present ) + for( uint32_t i = 0; i < num_access_units; i++ ) + { + timestamp[i].cts = cts[i]; + timestamp[i].dts = i; + } + else + for( uint32_t i = 0; i < num_access_units; i++ ) + timestamp[i].cts = timestamp[i].dts = i; + free( cts ); +#if 0 + for( uint32_t i = 0; i < num_access_units; i++ ) + fprintf( stderr, "Timestamp[%"PRIu32"]: DTS=%"PRIu64", CTS=%"PRIu64"\n", i, timestamp[i].dts, timestamp[i].cts ); +#endif + lsmash_video_summary_t *summary = vc1_create_summary( info, &importer_info->first_sequence, importer_info->max_au_length ); + if( !summary || lsmash_add_entry( importer->summaries, summary ) ) + { + free( timestamp ); + goto fail; + } + importer_info->ts_list.sample_count = num_access_units; + importer_info->ts_list.timestamp = timestamp; + /* Go back to layer of the first EBDU. */ + lsmash_fseek( importer->stream, first_ebdu_head_pos, SEEK_SET ); + importer_info->status = MP4SYS_IMPORTER_OK; + info->bdu_type = first_bdu_type; + info->prev_bdu_type = 0; + info->no_more_read = 0; + buffer->pos = buffer->start + VC1_START_CODE_LENGTH; + buffer->end = buffer->start + fread( buffer->start, 1, buffer->bank->buffer_size, importer->stream ); + info->ebdu_head_pos = first_ebdu_head_pos; + uint8_t *temp_access_unit = info->access_unit.data; + uint8_t *temp_incomplete_access_unit = info->access_unit.incomplete_data; + memset( &info->access_unit, 0, sizeof(vc1_access_unit_t) ); + info->access_unit.data = temp_access_unit; + info->access_unit.incomplete_data = temp_incomplete_access_unit; + memset( &info->picture, 0, sizeof(vc1_picture_info_t) ); + return 0; +fail: + mp4sys_remove_vc1_info( importer_info ); + importer->info = NULL; + lsmash_remove_entries( importer->summaries, lsmash_cleanup_summary ); + return -1; +#undef VC1_CHECK_FIRST_START_CODE +} + +static uint32_t mp4sys_vc1_get_last_delta( mp4sys_importer_t* importer, uint32_t track_number ) +{ + debug_if( !importer || !importer->info ) + return 0; + mp4sys_vc1_info_t *info = (mp4sys_vc1_info_t *)importer->info; + if( !info || track_number != 1 || info->status != MP4SYS_IMPORTER_EOF ) + return 0; + return info->ts_list.sample_count > 1 + ? 1 + : UINT32_MAX; /* arbitrary */ +} + +const static mp4sys_importer_functions mp4sys_vc1_importer = +{ + "VC-1", + 1, + mp4sys_vc1_probe, + mp4sys_vc1_get_accessunit, + mp4sys_vc1_get_last_delta, + mp4sys_vc1_cleanup +}; + +/*************************************************************************** + importer public interfaces +***************************************************************************/ + + +/******** importer listing table ********/ +const static mp4sys_importer_functions* mp4sys_importer_tbl[] = { + &mp4sys_adts_importer, + &mp4sys_mp3_importer, + &mp4sys_amr_importer, + &mp4sys_ac3_importer, + &mp4sys_eac3_importer, + &mp4sys_als_importer, + &mp4sys_dts_importer, + &mp4sys_h264_importer, + &mp4sys_vc1_importer, + NULL, +}; + +/******** importer public functions ********/ + +void mp4sys_importer_close( mp4sys_importer_t* importer ) +{ + if( !importer ) + return; + if( !importer->is_stdin && importer->stream ) + fclose( importer->stream ); + if( importer->funcs.cleanup ) + importer->funcs.cleanup( importer ); + lsmash_remove_list( importer->summaries, lsmash_cleanup_summary ); + free( importer ); +} + +mp4sys_importer_t *mp4sys_importer_open( const char *identifier, const char *format ) +{ + if( identifier == NULL ) + return NULL; + + int auto_detect = ( format == NULL || !strcmp( format, "auto" ) ); + mp4sys_importer_t *importer = (mp4sys_importer_t *)lsmash_malloc_zero( sizeof(mp4sys_importer_t) ); + if( !importer ) + return NULL; + + if( !strcmp( identifier, "-" ) ) + { + /* special treatment for stdin */ + if( auto_detect ) + { + free( importer ); + return NULL; + } + importer->stream = stdin; + importer->is_stdin = 1; + } + else if( (importer->stream = fopen( identifier, "rb" )) == NULL ) + { + mp4sys_importer_close( importer ); + return NULL; + } + importer->summaries = lsmash_create_entry_list(); + if( !importer->summaries ) + { + mp4sys_importer_close( importer ); + return NULL; + } + /* find importer */ + const mp4sys_importer_functions *funcs; + if( auto_detect ) + { + /* just rely on detector. */ + for( int i = 0; (funcs = mp4sys_importer_tbl[i]) != NULL; i++ ) + { + if( !funcs->detectable ) + continue; + if( !funcs->probe( importer ) || lsmash_fseek( importer->stream, 0, SEEK_SET ) ) + break; + } + } + else + { + /* needs name matching. */ + for( int i = 0; (funcs = mp4sys_importer_tbl[i]) != NULL; i++ ) + { + if( strcmp( funcs->name, format ) ) + continue; + if( funcs->probe( importer ) ) + funcs = NULL; + break; + } + } + if( !funcs ) + { + mp4sys_importer_close( importer ); + return NULL; + } + importer->funcs = *funcs; + return importer; +} + +/* 0 if success, positive if changed, negative if failed */ +int mp4sys_importer_get_access_unit( mp4sys_importer_t* importer, uint32_t track_number, lsmash_sample_t *buffered_sample ) +{ + if( !importer || !importer->funcs.get_accessunit || !buffered_sample->data || buffered_sample->length == 0 ) + return -1; + return importer->funcs.get_accessunit( importer, track_number, buffered_sample ); +} + +/* Return 0 if failed, otherwise succeeded. */ +uint32_t mp4sys_importer_get_last_delta( mp4sys_importer_t *importer, uint32_t track_number ) +{ + if( !importer || !importer->funcs.get_last_delta ) + return -1; + return importer->funcs.get_last_delta( importer, track_number ); +} + +uint32_t mp4sys_importer_get_track_count( mp4sys_importer_t *importer ) +{ + if( !importer || !importer->summaries ) + return 0; + return importer->summaries->entry_count; +} + +lsmash_summary_t *mp4sys_duplicate_summary( mp4sys_importer_t *importer, uint32_t track_number ) +{ + if( !importer ) + return NULL; + lsmash_summary_t *src_summary = lsmash_get_entry_data( importer->summaries, track_number ); + if( !src_summary ) + return NULL; + lsmash_summary_t *summary = lsmash_create_summary( src_summary->summary_type ); + if( !summary ) + return NULL; + lsmash_codec_specific_list_t *opaque = summary->opaque; + switch( src_summary->summary_type ) + { + case LSMASH_SUMMARY_TYPE_VIDEO : + *(lsmash_video_summary_t *)summary = *(lsmash_video_summary_t *)src_summary; + break; + case LSMASH_SUMMARY_TYPE_AUDIO : + *(lsmash_audio_summary_t *)summary = *(lsmash_audio_summary_t *)src_summary; + break; + default : + lsmash_cleanup_summary( summary ); + return NULL; + } + summary->opaque = opaque; + for( lsmash_entry_t *entry = src_summary->opaque->list.head; entry; entry = entry->next ) + { + lsmash_codec_specific_t *src_specific = (lsmash_codec_specific_t *)entry->data; + if( !src_specific ) + continue; + lsmash_codec_specific_t *dup_data = isom_duplicate_codec_specific_data( src_specific ); + if( lsmash_add_entry( &summary->opaque->list, dup_data ) ) + { + lsmash_cleanup_summary( (lsmash_summary_t *)summary ); + lsmash_destroy_codec_specific_data( dup_data ); + return NULL; + } + } + return summary; +} diff --git a/output/mp4/importer.h b/output/mp4/importer.h new file mode 100644 index 0000000..9abe247 --- /dev/null +++ b/output/mp4/importer.h @@ -0,0 +1,47 @@ +/***************************************************************************** + * importer.h: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Takashi Hirata + * Contributors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#ifndef LSMASH_IMPORTER_H +#define LSMASH_IMPORTER_H + +/*************************************************************************** + importer +***************************************************************************/ + +#ifndef LSMASH_IMPORTER_INTERNAL + +typedef void mp4sys_importer_t; + +/* importing functions */ +mp4sys_importer_t *mp4sys_importer_open( const char *identifier, const char *format ); +void mp4sys_importer_close( mp4sys_importer_t *importer ); +int mp4sys_importer_get_access_unit( mp4sys_importer_t *importer, uint32_t track_number, lsmash_sample_t *buffered_sample ); +uint32_t mp4sys_importer_get_last_delta( mp4sys_importer_t *importer, uint32_t track_number ); +uint32_t mp4sys_importer_get_track_count( mp4sys_importer_t *importer ); +lsmash_summary_t *mp4sys_duplicate_summary( mp4sys_importer_t *importer, uint32_t track_number ); + +int mp4sys_amr_create_damr( lsmash_audio_summary_t *summary ); + +#endif /* #ifndef LSMASH_IMPORTER_INTERNAL */ + +#endif /* #ifndef LSMASH_IMPORTER_H */ diff --git a/output/mp4/internal.h b/output/mp4/internal.h new file mode 100644 index 0000000..9864ca8 --- /dev/null +++ b/output/mp4/internal.h @@ -0,0 +1,37 @@ +/***************************************************************************** + * internal.h: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Takashi Hirata + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#ifndef INTERNAL_H +#define INTERNAL_H + +#include "common/osdep.h" /* must be placed before stdio.h */ +#include +#include + +#ifndef lsmash_fseek +#define lsmash_fseek fseek +#define lsmash_ftell ftell +#endif + +#include "lsmash.h" + +#endif diff --git a/output/mp4/isom.c b/output/mp4/isom.c new file mode 100644 index 0000000..4ea1e43 --- /dev/null +++ b/output/mp4/isom.c @@ -0,0 +1,7912 @@ +/***************************************************************************** + * isom.c: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * Contributors: Takashi Hirata + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#include "internal.h" /* must be placed first */ + +#include +#include +#include + +#include "box.h" +#include "isom.h" +#include "mp4a.h" +#include "mp4sys.h" +#include "write.h" +#include "description.h" +#ifdef LSMASH_DEMUXER_ENABLED +#include "read.h" +#include "print.h" +#include "timeline.h" +#endif + + +/*---- ----*/ +char *isom_4cc2str( uint32_t fourcc ) +{ + static char str[5]; + str[0] = (fourcc >> 24) & 0xff; + str[1] = (fourcc >> 16) & 0xff; + str[2] = (fourcc >> 8) & 0xff; + str[3] = fourcc & 0xff; + str[4] = 0; + return str; +} + +isom_trak_entry_t *isom_get_trak( lsmash_root_t *root, uint32_t track_ID ) +{ + if( !track_ID || !root || !root->moov || !root->moov->trak_list ) + return NULL; + for( lsmash_entry_t *entry = root->moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak || !trak->tkhd ) + return NULL; + if( trak->tkhd->track_ID == track_ID ) + return trak; + } + return NULL; +} + +isom_trex_entry_t *isom_get_trex( isom_mvex_t *mvex, uint32_t track_ID ) +{ + if( !track_ID || !mvex || !mvex->trex_list ) + return NULL; + for( lsmash_entry_t *entry = mvex->trex_list->head; entry; entry = entry->next ) + { + isom_trex_entry_t *trex = (isom_trex_entry_t *)entry->data; + if( !trex ) + return NULL; + if( trex->track_ID == track_ID ) + return trex; + } + return NULL; +} + +static isom_traf_entry_t *isom_get_traf( isom_moof_entry_t *moof, uint32_t track_ID ) +{ + if( !track_ID || !moof || !moof->traf_list ) + return NULL; + for( lsmash_entry_t *entry = moof->traf_list->head; entry; entry = entry->next ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)entry->data; + if( !traf || !traf->tfhd ) + return NULL; + if( traf->tfhd->track_ID == track_ID ) + return traf; + } + return NULL; +} + +isom_tfra_entry_t *isom_get_tfra( isom_mfra_t *mfra, uint32_t track_ID ) +{ + if( !track_ID || !mfra || !mfra->tfra_list ) + return NULL; + for( lsmash_entry_t *entry = mfra->tfra_list->head; entry; entry = entry->next ) + { + isom_tfra_entry_t *tfra = (isom_tfra_entry_t *)entry->data; + if( !tfra ) + return NULL; + if( tfra->track_ID == track_ID ) + return tfra; + } + return NULL; +} + +static int isom_add_elst_entry( isom_elst_t *elst, uint64_t segment_duration, int64_t media_time, int32_t media_rate ) +{ + isom_elst_entry_t *data = malloc( sizeof(isom_elst_entry_t) ); + if( !data ) + return -1; + data->segment_duration = segment_duration; + data->media_time = media_time; + data->media_rate = media_rate; + if( lsmash_add_entry( elst->list, data ) ) + { + free( data ); + return -1; + } + if( data->segment_duration > UINT32_MAX || data->media_time > INT32_MAX || data->media_time < INT32_MIN ) + elst->version = 1; + return 0; +} + +isom_tref_type_t *isom_add_track_reference_type( isom_tref_t *tref, isom_track_reference_type type, uint32_t ref_count, uint32_t *track_ID ) +{ + if( !tref || !tref->ref_list ) + return NULL; + isom_tref_type_t *ref = lsmash_malloc_zero( sizeof(isom_tref_type_t) ); + if( !ref ) + return NULL; + /* Initialize common fields. */ + ref->root = tref->root; + ref->parent = (isom_box_t *)tref; + ref->size = 0; + ref->type = lsmash_form_iso_box_type( type ); + /* */ + ref->ref_count = ref_count; + ref->track_ID = track_ID; + if( lsmash_add_entry( tref->ref_list, ref ) ) + { + free( ref ); + return NULL; + } + return ref; +} + +static int isom_add_dref_entry( isom_dref_t *dref, uint32_t flags, char *name, char *location ) +{ + if( !dref || !dref->list ) + return -1; + isom_dref_entry_t *data = lsmash_malloc_zero( sizeof(isom_dref_entry_t) ); + if( !data ) + return -1; + isom_init_box_common( data, dref, name ? ISOM_BOX_TYPE_URN : ISOM_BOX_TYPE_URL ); + data->flags = flags; + if( location ) + { + data->location_length = strlen( location ) + 1; + data->location = lsmash_memdup( location, data->location_length ); + if( !data->location ) + { + free( data ); + return -1; + } + } + if( name ) + { + data->name_length = strlen( name ) + 1; + data->name = lsmash_memdup( name, data->name_length ); + if( !data->name ) + { + if( data->location ) + free( data->location ); + free( data ); + return -1; + } + } + if( lsmash_add_entry( dref->list, data ) ) + { + if( data->location ) + free( data->location ); + if( data->name ) + free( data->name ); + free( data ); + return -1; + } + return 0; +} + +isom_avcC_ps_entry_t *isom_create_ps_entry( uint8_t *ps, uint32_t ps_size ) +{ + isom_avcC_ps_entry_t *entry = malloc( sizeof(isom_avcC_ps_entry_t) ); + if( !entry ) + return NULL; + entry->parameterSetNALUnit = lsmash_memdup( ps, ps_size ); + if( !entry->parameterSetNALUnit ) + { + free( entry ); + return NULL; + } + entry->parameterSetLength = ps_size; + return entry; +} + +void isom_remove_avcC_ps( isom_avcC_ps_entry_t *ps ) +{ + if( !ps ) + return; + if( ps->parameterSetNALUnit ) + free( ps->parameterSetNALUnit ); + free( ps ); +} + +#if 0 +static int isom_add_mp4s_entry( isom_stsd_t *stsd ) +{ + if( !stsd || !stsd->list ) + return -1; + isom_mp4s_entry_t *mp4s = lsmash_malloc_zero( sizeof(isom_mp4s_entry_t) ); + if( !mp4s ) + return -1; + isom_init_box_common( mp4s, stsd, ISOM_CODEC_TYPE_MP4S_SYSTEM ); + mp4s->data_reference_index = 1; + if( lsmash_add_entry( stsd->list, mp4s ) ) + { + free( mp4s ); + return -1; + } + return 0; +} +#endif + +int isom_add_frma( isom_wave_t *wave ) +{ + if( !wave || wave->frma ) + return -1; + isom_create_box( frma, wave, QT_BOX_TYPE_FRMA ); + wave->frma = frma; + return 0; +} + +int isom_add_enda( isom_wave_t *wave ) +{ + if( !wave || wave->enda ) + return -1; + isom_create_box( enda, wave, QT_BOX_TYPE_ENDA ); + wave->enda = enda; + return 0; +} + +int isom_add_mp4a( isom_wave_t *wave ) +{ + if( !wave || wave->mp4a ) + return -1; + isom_create_box( mp4a, wave, QT_BOX_TYPE_MP4A ); + wave->mp4a = mp4a; + return 0; +} + +int isom_add_terminator( isom_wave_t *wave ) +{ + if( !wave || wave->terminator ) + return -1; + isom_create_box( terminator, wave, QT_BOX_TYPE_TERMINATOR ); + wave->terminator = terminator; + return 0; +} + +static int isom_add_text_entry( isom_stsd_t *stsd ) +{ + if( !stsd || !stsd->list ) + return -1; + isom_text_entry_t *text = lsmash_malloc_zero( sizeof(isom_text_entry_t) ); + if( !text ) + return -1; + isom_init_box_common( text, stsd, QT_CODEC_TYPE_TEXT_TEXT ); + text->data_reference_index = 1; + if( lsmash_add_entry( stsd->list, text ) ) + { + free( text ); + return -1; + } + return 0; +} + +int isom_add_ftab( isom_tx3g_entry_t *tx3g ) +{ + if( !tx3g ) + return -1; + isom_ftab_t *ftab = lsmash_malloc_zero( sizeof(isom_ftab_t) ); + if( !ftab ) + return -1; + isom_init_box_common( ftab, tx3g, ISOM_BOX_TYPE_FTAB ); + ftab->list = lsmash_create_entry_list(); + if( !ftab->list ) + { + free( ftab ); + return -1; + } + tx3g->ftab = ftab; + return 0; +} + +static int isom_add_tx3g_entry( isom_stsd_t *stsd ) +{ + if( !stsd || !stsd->list ) + return -1; + isom_tx3g_entry_t *tx3g = lsmash_malloc_zero( sizeof(isom_tx3g_entry_t) ); + if( !tx3g ) + return -1; + isom_init_box_common( tx3g, stsd, ISOM_CODEC_TYPE_TX3G_TEXT ); + tx3g->data_reference_index = 1; + if( isom_add_ftab( tx3g ) || + lsmash_add_entry( stsd->list, tx3g ) ) + { + free( tx3g ); + return -1; + } + return 0; +} + +/* This function returns 0 if failed, sample_entry_number if succeeded. */ +int lsmash_add_sample_entry( lsmash_root_t *root, uint32_t track_ID, void *summary ) +{ + if( !summary ) + return 0; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->root || !trak->root->ftyp || !trak->mdia || !trak->mdia->minf + || !trak->mdia->minf->stbl || !trak->mdia->minf->stbl->stsd || !trak->mdia->minf->stbl->stsd->list ) + return 0; + isom_stsd_t *stsd = trak->mdia->minf->stbl->stsd; + lsmash_entry_list_t *list = stsd->list; + int ret = -1; + lsmash_codec_type_t sample_type = ((lsmash_summary_t *)summary)->sample_type; + if( lsmash_check_codec_type_identical( sample_type, LSMASH_CODEC_TYPE_RAW ) ) + { + if( trak->mdia->minf->vmhd ) + ret = isom_setup_visual_description( stsd, sample_type, (lsmash_video_summary_t *)summary ); + else if( trak->mdia->minf->smhd ) + ret = isom_setup_audio_description( stsd, sample_type, (lsmash_audio_summary_t *)summary ); + return ret ? 0 : list->entry_count; + } + static struct description_setup_table_tag + { + lsmash_codec_type_t type; + void *func; + } description_setup_table[128] = { { LSMASH_CODEC_TYPE_INITIALIZER, NULL } }; + if( !description_setup_table[0].func ) + { + int i = 0; +#define ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( type, func ) \ + description_setup_table[i++] = (struct description_setup_table_tag){ type, func } + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC1_VIDEO, isom_setup_visual_description ); +#if 0 + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVCP_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SVC1_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC1_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4V_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRAC_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCV_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MJP2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_S263_VIDEO, isom_setup_visual_description ); +#endif + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_VC_1_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_APCH_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_APCN_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_APCS_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_APCO_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_AP4H_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVC_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVCP_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVPP_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DV5N_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DV5P_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVH2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVH3_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVH5_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVH6_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVHP_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_DVHQ_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_ULRA_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_ULRG_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_ULY2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_ULY0_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_V210_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_V216_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_V308_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_V408_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_V410_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_YUV2_VIDEO, isom_setup_visual_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4A_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_AC_3_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_ALAC_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_EC_3_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAMR_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWB_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSC_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSE_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSH_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSL_AUDIO, isom_setup_audio_description ); +#if 0 + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRA1_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCA_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_G719_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_G726_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_M4AE_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MLPA_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_RAW_AUDIO , isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWP_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SEVC_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SQCP_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_SSMV_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_TWOS_AUDIO, isom_setup_audio_description ); +#endif + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_MP4A_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_23NI_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_NONE_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_LPCM_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_SOWT_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_TWOS_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_FL32_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_FL64_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_IN24_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_IN32_AUDIO, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_NOT_SPECIFIED, isom_setup_audio_description ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_TX3G_TEXT, isom_add_tx3g_entry ); + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( QT_CODEC_TYPE_TEXT_TEXT, isom_add_text_entry ); +#if 0 + ADD_DESCRIPTION_SETUP_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4S_SYSTEM, isom_add_mp4s_entry ); +#endif + } + for( int i = 0; description_setup_table[i].func; i++ ) + if( lsmash_check_codec_type_identical( sample_type, description_setup_table[i].type ) ) + { + if( isom_setup_visual_description == description_setup_table[i].func ) + ret = isom_setup_visual_description( stsd, sample_type, (lsmash_video_summary_t *)summary ); + else if( isom_setup_audio_description == description_setup_table[i].func ) + ret = isom_setup_audio_description( stsd, sample_type, (lsmash_audio_summary_t *)summary ); + else if( isom_add_tx3g_entry == description_setup_table[i].func ) + ret = isom_add_tx3g_entry( stsd ); + else if( isom_add_text_entry == description_setup_table[i].func ) + ret = isom_add_text_entry( stsd ); + break; + } + return ret ? 0 : list->entry_count; +} + +static int isom_add_stts_entry( isom_stbl_t *stbl, uint32_t sample_delta ) +{ + if( !stbl || !stbl->stts || !stbl->stts->list ) + return -1; + isom_stts_entry_t *data = malloc( sizeof(isom_stts_entry_t) ); + if( !data ) + return -1; + data->sample_count = 1; + data->sample_delta = sample_delta; + if( lsmash_add_entry( stbl->stts->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_ctts_entry( isom_stbl_t *stbl, uint32_t sample_offset ) +{ + if( !stbl || !stbl->ctts || !stbl->ctts->list ) + return -1; + isom_ctts_entry_t *data = malloc( sizeof(isom_ctts_entry_t) ); + if( !data ) + return -1; + data->sample_count = 1; + data->sample_offset = sample_offset; + if( lsmash_add_entry( stbl->ctts->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_stsc_entry( isom_stbl_t *stbl, uint32_t first_chunk, uint32_t samples_per_chunk, uint32_t sample_description_index ) +{ + if( !stbl || !stbl->stsc || !stbl->stsc->list ) + return -1; + isom_stsc_entry_t *data = malloc( sizeof(isom_stsc_entry_t) ); + if( !data ) + return -1; + data->first_chunk = first_chunk; + data->samples_per_chunk = samples_per_chunk; + data->sample_description_index = sample_description_index; + if( lsmash_add_entry( stbl->stsc->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_stsz_entry( isom_stbl_t *stbl, uint32_t entry_size ) +{ + if( !stbl || !stbl->stsz ) + return -1; + isom_stsz_t *stsz = stbl->stsz; + /* retrieve initial sample_size */ + if( !stsz->sample_count ) + stsz->sample_size = entry_size; + /* if it seems constant access_unit size at present, update sample_count only */ + if( !stsz->list && stsz->sample_size == entry_size ) + { + ++ stsz->sample_count; + return 0; + } + /* found sample_size varies, create sample_size list */ + if( !stsz->list ) + { + stsz->list = lsmash_create_entry_list(); + if( !stsz->list ) + return -1; + for( uint32_t i = 0; i < stsz->sample_count; i++ ) + { + isom_stsz_entry_t *data = malloc( sizeof(isom_stsz_entry_t) ); + if( !data ) + return -1; + data->entry_size = stsz->sample_size; + if( lsmash_add_entry( stsz->list, data ) ) + { + free( data ); + return -1; + } + } + stsz->sample_size = 0; + } + isom_stsz_entry_t *data = malloc( sizeof(isom_stsz_entry_t) ); + if( !data ) + return -1; + data->entry_size = entry_size; + if( lsmash_add_entry( stsz->list, data ) ) + { + free( data ); + return -1; + } + ++ stsz->sample_count; + return 0; +} + +static int isom_add_stss_entry( isom_stbl_t *stbl, uint32_t sample_number ) +{ + if( !stbl || !stbl->stss || !stbl->stss->list ) + return -1; + isom_stss_entry_t *data = malloc( sizeof(isom_stss_entry_t) ); + if( !data ) + return -1; + data->sample_number = sample_number; + if( lsmash_add_entry( stbl->stss->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_stps_entry( isom_stbl_t *stbl, uint32_t sample_number ) +{ + if( !stbl || !stbl->stps || !stbl->stps->list ) + return -1; + isom_stps_entry_t *data = malloc( sizeof(isom_stps_entry_t) ); + if( !data ) + return -1; + data->sample_number = sample_number; + if( lsmash_add_entry( stbl->stps->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_sdtp_entry( isom_box_t *parent, lsmash_sample_property_t *prop, uint8_t avc_extensions ) +{ + if( !prop || !parent ) + return -1; + isom_sdtp_t *sdtp = NULL; + if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_STBL ) ) + sdtp = ((isom_stbl_t *)parent)->sdtp; + else if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_TRAF ) ) + sdtp = ((isom_traf_entry_t *)parent)->sdtp; + else + assert( 0 ); + if( !sdtp || !sdtp->list ) + return -1; + isom_sdtp_entry_t *data = malloc( sizeof(isom_sdtp_entry_t) ); + if( !data ) + return -1; + /* isom_sdtp_entry_t is smaller than lsmash_sample_property_t. */ + data->is_leading = (avc_extensions ? prop->leading : prop->allow_earlier) & 0x03; + data->sample_depends_on = prop->independent & 0x03; + data->sample_is_depended_on = prop->disposable & 0x03; + data->sample_has_redundancy = prop->redundant & 0x03; + if( lsmash_add_entry( sdtp->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_add_co64( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stco ) + return -1; + isom_create_list_box( stco, stbl, ISOM_BOX_TYPE_CO64 ); + stco->large_presentation = 1; + stbl->stco = stco; + return 0; +} + +static int isom_add_stco( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stco ) + return -1; + isom_create_list_box( stco, stbl, ISOM_BOX_TYPE_STCO ); + stco->large_presentation = 0; + stbl->stco = stco; + return 0; +} + +static int isom_add_co64_entry( isom_stbl_t *stbl, uint64_t chunk_offset ) +{ + if( !stbl || !stbl->stco || !stbl->stco->list ) + return -1; + isom_co64_entry_t *data = malloc( sizeof(isom_co64_entry_t) ); + if( !data ) + return -1; + data->chunk_offset = chunk_offset; + if( lsmash_add_entry( stbl->stco->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +static int isom_convert_stco_to_co64( isom_stbl_t* stbl ) +{ + /* backup stco */ + isom_stco_t *stco = stbl->stco; + stbl->stco = NULL; + if( isom_add_co64( stbl ) ) + return -1; + /* move chunk_offset to co64 from stco */ + for( lsmash_entry_t *entry = stco->list->head; entry; entry = entry->next ) + { + isom_stco_entry_t *data = (isom_stco_entry_t*)entry->data; + if( isom_add_co64_entry( stbl, data->chunk_offset ) ) + return -1; + } + lsmash_remove_list( stco->list, NULL ); + free( stco ); + return 0; +} + +static int isom_add_stco_entry( isom_stbl_t *stbl, uint64_t chunk_offset ) +{ + if( !stbl || !stbl->stco || !stbl->stco->list ) + return -1; + if( stbl->stco->large_presentation ) + return isom_add_co64_entry( stbl, chunk_offset ); + if( chunk_offset > UINT32_MAX ) + { + if( isom_convert_stco_to_co64( stbl ) ) + return -1; + return isom_add_co64_entry( stbl, chunk_offset ); + } + isom_stco_entry_t *data = malloc( sizeof(isom_stco_entry_t) ); + if( !data ) + return -1; + data->chunk_offset = (uint32_t)chunk_offset; + if( lsmash_add_entry( stbl->stco->list, data ) ) + { + free( data ); + return -1; + } + return 0; +} + +isom_sgpd_entry_t *isom_get_sample_group_description( isom_stbl_t *stbl, uint32_t grouping_type ) +{ + if( !stbl->sgpd_list ) + return NULL; + for( lsmash_entry_t *entry = stbl->sgpd_list->head; entry; entry = entry->next ) + { + isom_sgpd_entry_t *sgpd = (isom_sgpd_entry_t *)entry->data; + if( !sgpd || !sgpd->list ) + return NULL; + if( sgpd->grouping_type == grouping_type ) + return sgpd; + } + return NULL; +} + +isom_sbgp_entry_t *isom_get_sample_to_group( isom_stbl_t *stbl, uint32_t grouping_type ) +{ + if( !stbl->sbgp_list ) + return NULL; + for( lsmash_entry_t *entry = stbl->sbgp_list->head; entry; entry = entry->next ) + { + isom_sbgp_entry_t *sbgp = (isom_sbgp_entry_t *)entry->data; + if( !sbgp || !sbgp->list ) + return NULL; + if( sbgp->grouping_type == grouping_type ) + return sbgp; + } + return NULL; +} + +static isom_rap_entry_t *isom_add_rap_group_entry( isom_sgpd_entry_t *sgpd ) +{ + if( !sgpd ) + return NULL; + isom_rap_entry_t *data = malloc( sizeof(isom_rap_entry_t) ); + if( !data ) + return NULL; + data->description_length = 0; + data->num_leading_samples_known = 0; + data->num_leading_samples = 0; + if( lsmash_add_entry( sgpd->list, data ) ) + { + free( data ); + return NULL; + } + return data; +} + +static isom_roll_entry_t *isom_add_roll_group_entry( isom_sgpd_entry_t *sgpd, int16_t roll_distance ) +{ + if( !sgpd ) + return NULL; + isom_roll_entry_t *data = malloc( sizeof(isom_roll_entry_t) ); + if( !data ) + return NULL; + data->description_length = 0; + data->roll_distance = roll_distance; + if( lsmash_add_entry( sgpd->list, data ) ) + { + free( data ); + return NULL; + } + return data; +} + +static isom_group_assignment_entry_t *isom_add_group_assignment_entry( isom_sbgp_entry_t *sbgp, uint32_t sample_count, uint32_t group_description_index ) +{ + if( !sbgp ) + return NULL; + isom_group_assignment_entry_t *data = malloc( sizeof(isom_group_assignment_entry_t) ); + if( !data ) + return NULL; + data->sample_count = sample_count; + data->group_description_index = group_description_index; + if( lsmash_add_entry( sbgp->list, data ) ) + { + free( data ); + return NULL; + } + return data; +} + +int isom_add_chpl_entry( isom_chpl_t *chpl, isom_chapter_entry_t *chap_data ) +{ + if( !chap_data->chapter_name || !chpl || !chpl->list ) + return -1; + isom_chpl_entry_t *data = malloc( sizeof(isom_chpl_entry_t) ); + if( !data ) + return -1; + data->start_time = chap_data->start_time; + data->chapter_name_length = strlen( chap_data->chapter_name ); + data->chapter_name = (char *)malloc( data->chapter_name_length + 1 ); + if( !data->chapter_name ) + { + free( data ); + return -1; + } + memcpy( data->chapter_name, chap_data->chapter_name, data->chapter_name_length ); + data->chapter_name[data->chapter_name_length] = '\0'; + if( lsmash_add_entry( chpl->list, data ) ) + { + free( data->chapter_name ); + free( data ); + return -1; + } + return 0; +} + +static isom_trex_entry_t *isom_add_trex( isom_mvex_t *mvex ) +{ + if( !mvex ) + return NULL; + if( !mvex->trex_list ) + { + mvex->trex_list = lsmash_create_entry_list(); + if( !mvex->trex_list ) + return NULL; + } + isom_trex_entry_t *trex = lsmash_malloc_zero( sizeof(isom_trex_entry_t) ); + if( !trex ) + return NULL; + isom_init_box_common( trex, mvex, ISOM_BOX_TYPE_TREX ); + if( lsmash_add_entry( mvex->trex_list, trex ) ) + { + free( trex ); + return NULL; + } + return trex; +} + +static isom_trun_entry_t *isom_add_trun( isom_traf_entry_t *traf ) +{ + if( !traf ) + return NULL; + if( !traf->trun_list ) + { + traf->trun_list = lsmash_create_entry_list(); + if( !traf->trun_list ) + return NULL; + } + isom_trun_entry_t *trun = lsmash_malloc_zero( sizeof(isom_trun_entry_t) ); + if( !trun ) + return NULL; + isom_init_box_common( trun, traf, ISOM_BOX_TYPE_TRUN ); + if( lsmash_add_entry( traf->trun_list, trun ) ) + { + free( trun ); + return NULL; + } + return trun; +} + +static isom_traf_entry_t *isom_add_traf( lsmash_root_t *root, isom_moof_entry_t *moof ) +{ + if( !root || !root->moof_list || !moof ) + return NULL; + if( !moof->traf_list ) + { + moof->traf_list = lsmash_create_entry_list(); + if( !moof->traf_list ) + return NULL; + } + isom_traf_entry_t *traf = lsmash_malloc_zero( sizeof(isom_traf_entry_t) ); + if( !traf ) + return NULL; + isom_init_box_common( traf, moof, ISOM_BOX_TYPE_TRAF ); + isom_cache_t *cache = malloc( sizeof(isom_cache_t) ); + if( !cache ) + { + free( traf ); + return NULL; + } + memset( cache, 0, sizeof(isom_cache_t) ); + if( lsmash_add_entry( moof->traf_list, traf ) ) + { + free( cache ); + free( traf ); + return NULL; + } + traf->cache = cache; + return traf; +} + +static isom_moof_entry_t *isom_add_moof( lsmash_root_t *root ) +{ + if( !root ) + return NULL; + if( !root->moof_list ) + { + root->moof_list = lsmash_create_entry_list(); + if( !root->moof_list ) + return NULL; + } + isom_moof_entry_t *moof = lsmash_malloc_zero( sizeof(isom_moof_entry_t) ); + if( !moof ) + return NULL; + isom_init_box_common( moof, root, ISOM_BOX_TYPE_MOOF ); + if( lsmash_add_entry( root->moof_list, moof ) ) + { + free( moof ); + return NULL; + } + return moof; +} + +static isom_tfra_entry_t *isom_add_tfra( isom_mfra_t *mfra ) +{ + if( !mfra ) + return NULL; + if( !mfra->tfra_list ) + { + mfra->tfra_list = lsmash_create_entry_list(); + if( !mfra->tfra_list ) + return NULL; + } + isom_tfra_entry_t *tfra = lsmash_malloc_zero( sizeof(isom_tfra_entry_t) ); + if( !tfra ) + return NULL; + isom_init_box_common( tfra, mfra, ISOM_BOX_TYPE_TFRA ); + if( lsmash_add_entry( mfra->tfra_list, tfra ) ) + { + free( tfra ); + return NULL; + } + return tfra; +} + +static int isom_add_ftyp( lsmash_root_t *root ) +{ + if( root->ftyp ) + return -1; + isom_create_box( ftyp, root, ISOM_BOX_TYPE_FTYP ); + ftyp->size = ISOM_BASEBOX_COMMON_SIZE + 8; + root->ftyp = ftyp; + return 0; +} + +static int isom_add_moov( lsmash_root_t *root ) +{ + if( root->moov ) + return -1; + isom_create_box( moov, root, ISOM_BOX_TYPE_MOOV ); + root->moov = moov; + return 0; +} + +static int isom_add_mvhd( isom_moov_t *moov ) +{ + if( !moov || moov->mvhd ) + return -1; + isom_create_box( mvhd, moov, ISOM_BOX_TYPE_MVHD ); + mvhd->rate = 0x00010000; + mvhd->volume = 0x0100; + mvhd->matrix[0] = 0x00010000; + mvhd->matrix[4] = 0x00010000; + mvhd->matrix[8] = 0x40000000; + mvhd->next_track_ID = 1; + moov->mvhd = mvhd; + return 0; +} + +static int isom_scan_trak_profileLevelIndication( isom_trak_entry_t *trak, mp4a_audioProfileLevelIndication *audio_pli, mp4sys_visualProfileLevelIndication *visual_pli ) +{ + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + isom_stsd_t *stsd = trak->mdia->minf->stbl->stsd; + if( !stsd || !stsd->list || !stsd->list->head ) + return -1; + for( lsmash_entry_t *entry = stsd->list->head; entry; entry = entry->next ) + { + isom_sample_entry_t *sample_entry = (isom_sample_entry_t *)entry->data; + if( !sample_entry ) + return -1; + lsmash_codec_type_t sample_type = (lsmash_codec_type_t)sample_entry->type; + if( trak->mdia->minf->vmhd ) + { + if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVC1_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVC2_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVCP_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_SVC1_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MVC1_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MVC2_VIDEO ) ) + { + /* FIXME: Do we have to arbitrate like audio? */ + if( *visual_pli == MP4SYS_VISUAL_PLI_NONE_REQUIRED ) + *visual_pli = MP4SYS_VISUAL_PLI_H264_AVC; + } + else + *visual_pli = MP4SYS_VISUAL_PLI_NOT_SPECIFIED; + } + else if( trak->mdia->minf->smhd ) + { + if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MP4A_AUDIO ) ) + { + isom_audio_entry_t *audio = (isom_audio_entry_t *)sample_entry; +#ifdef LSMASH_DEMUXER_ENABLED + isom_esds_t *esds = (isom_esds_t *)isom_get_extension_box( &audio->extensions, ISOM_BOX_TYPE_ESDS ); + if( !esds || !esds->ES ) + return -1; + if( !lsmash_check_codec_type_identical( audio->summary.sample_type, ISOM_CODEC_TYPE_MP4A_AUDIO ) ) + /* This is needed when copying descriptions. */ + mp4sys_setup_summary_from_DecoderSpecificInfo( &audio->summary, esds->ES ); +#endif + *audio_pli = mp4a_max_audioProfileLevelIndication( *audio_pli, mp4a_get_audioProfileLevelIndication( &audio->summary ) ); + } + else + /* NOTE: Audio CODECs other than 'mp4a' does not have appropriate pli. */ + *audio_pli = MP4A_AUDIO_PLI_NOT_SPECIFIED; + } + else + ; /* FIXME: Do we have to set OD_profileLevelIndication? */ + } + return 0; +} + +static int isom_add_iods( isom_moov_t *moov ) +{ + if( !moov || !moov->trak_list || moov->iods ) + return -1; + isom_create_box( iods, moov, ISOM_BOX_TYPE_IODS ); + iods->OD = mp4sys_create_ObjectDescriptor( 1 ); /* NOTE: Use 1 for ObjectDescriptorID of IOD. */ + if( !iods->OD ) + { + free( iods ); + return -1; + } + mp4a_audioProfileLevelIndication audio_pli = MP4A_AUDIO_PLI_NONE_REQUIRED; + mp4sys_visualProfileLevelIndication visual_pli = MP4SYS_VISUAL_PLI_NONE_REQUIRED; + for( lsmash_entry_t *entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t* trak = (isom_trak_entry_t*)entry->data; + if( !trak || !trak->tkhd ) + { + free( iods ); + return -1; + } + if( isom_scan_trak_profileLevelIndication( trak, &audio_pli, &visual_pli ) ) + { + free( iods ); + return -1; + } + if( mp4sys_add_ES_ID_Inc( iods->OD, trak->tkhd->track_ID ) ) + { + free( iods ); + return -1; + } + } + if( mp4sys_to_InitialObjectDescriptor( iods->OD, + 0, /* FIXME: I'm not quite sure what the spec says. */ + MP4SYS_OD_PLI_NONE_REQUIRED, MP4SYS_SCENE_PLI_NONE_REQUIRED, + audio_pli, visual_pli, + MP4SYS_GRAPHICS_PLI_NONE_REQUIRED ) ) + { + free( iods ); + return -1; + } + moov->iods = iods; + return 0; +} + +static int isom_add_tkhd( isom_trak_entry_t *trak, uint32_t handler_type ) +{ + if( !trak || !trak->root || !trak->root->moov || !trak->root->moov->mvhd || !trak->root->moov->trak_list ) + return -1; + if( !trak->tkhd ) + { + isom_create_box( tkhd, trak, ISOM_BOX_TYPE_TKHD ); + if( handler_type == ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK ) + tkhd->volume = 0x0100; + tkhd->matrix[0] = 0x00010000; + tkhd->matrix[4] = 0x00010000; + tkhd->matrix[8] = 0x40000000; + tkhd->duration = 0xffffffff; + tkhd->track_ID = trak->root->moov->mvhd->next_track_ID; + ++ trak->root->moov->mvhd->next_track_ID; + trak->tkhd = tkhd; + } + return 0; +} + +static int isom_add_clef( isom_tapt_t *tapt ) +{ + if( tapt->clef ) + return 0; + isom_create_box( clef, tapt, QT_BOX_TYPE_CLEF ); + tapt->clef = clef; + return 0; +} + +static int isom_add_prof( isom_tapt_t *tapt ) +{ + if( tapt->prof ) + return 0; + isom_create_box( prof, tapt, QT_BOX_TYPE_PROF ); + tapt->prof = prof; + return 0; +} + +static int isom_add_enof( isom_tapt_t *tapt ) +{ + if( tapt->enof ) + return 0; + isom_create_box( enof, tapt, QT_BOX_TYPE_ENOF ); + tapt->enof = enof; + return 0; +} + +static int isom_add_tapt( isom_trak_entry_t *trak ) +{ + if( trak->tapt ) + return 0; + isom_create_box( tapt, trak, QT_BOX_TYPE_TAPT ); + trak->tapt = tapt; + return 0; +} + +int isom_add_elst( isom_edts_t *edts ) +{ + if( edts->elst ) + return 0; + isom_create_list_box( elst, edts, ISOM_BOX_TYPE_ELST ); + edts->elst = elst; + return 0; +} + +int isom_add_edts( isom_trak_entry_t *trak ) +{ + if( trak->edts ) + return 0; + isom_create_box( edts, trak, ISOM_BOX_TYPE_EDTS ); + trak->edts = edts; + return 0; +} + +int isom_add_tref( isom_trak_entry_t *trak ) +{ + if( trak->tref ) + return 0; + isom_create_box( tref, trak, ISOM_BOX_TYPE_TREF ); + tref->ref_list = lsmash_create_entry_list(); + if( !tref->ref_list ) + { + free( tref ); + return -1; + } + trak->tref = tref; + return 0; +} + +static int isom_add_mdhd( isom_mdia_t *mdia, uint16_t default_language ) +{ + if( !mdia || mdia->mdhd ) + return -1; + isom_create_box( mdhd, mdia, ISOM_BOX_TYPE_MDHD ); + mdhd->language = default_language; + mdia->mdhd = mdhd; + return 0; +} + +static int isom_add_mdia( isom_trak_entry_t *trak ) +{ + if( !trak || trak->mdia ) + return -1; + isom_create_box( mdia, trak, ISOM_BOX_TYPE_MDIA ); + trak->mdia = mdia; + return 0; +} + +int isom_add_hdlr( isom_mdia_t *mdia, isom_meta_t *meta, isom_minf_t *minf, uint32_t media_type ) +{ + if( (!mdia && !meta && !minf) || (mdia && meta) || (meta && minf) || (minf && mdia) ) + return -1; /* Either one must be given. */ + if( (mdia && mdia->hdlr) || (meta && meta->hdlr) || (minf && minf->hdlr) ) + return -1; /* Selected one must not have hdlr yet. */ + isom_box_t *parent = mdia ? (isom_box_t *)mdia : meta ? (isom_box_t *)meta : (isom_box_t *)minf; + isom_create_box( hdlr, parent, ISOM_BOX_TYPE_HDLR ); + lsmash_root_t *root = hdlr->root; + uint32_t type = mdia ? (root->qt_compatible ? QT_HANDLER_TYPE_MEDIA : 0) : (meta ? 0 : QT_HANDLER_TYPE_DATA); + uint32_t subtype = media_type; + hdlr->componentType = type; + hdlr->componentSubtype = subtype; + char *type_name = NULL; + char *subtype_name = NULL; + uint8_t type_name_length = 0; + uint8_t subtype_name_length = 0; + if( mdia ) + type_name = "Media "; + else if( meta ) + type_name = "Metadata "; + else /* if( minf ) */ + type_name = "Data "; + type_name_length = strlen( type_name ); + struct + { + uint32_t subtype; + char *subtype_name; + uint8_t subtype_name_length; + } subtype_table[] = + { + { ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK, "Sound ", 6 }, + { ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK, "Video", 6 }, + { ISOM_MEDIA_HANDLER_TYPE_HINT_TRACK, "Hint ", 5 }, + { ISOM_MEDIA_HANDLER_TYPE_TIMED_METADATA_TRACK, "Metadata ", 9 }, + { ISOM_MEDIA_HANDLER_TYPE_TEXT_TRACK, "Text ", 5 }, + { ISOM_META_HANDLER_TYPE_ITUNES_METADATA, "iTunes ", 7 }, + { QT_REFERENCE_HANDLER_TYPE_ALIAS, "Alias ", 6 }, + { QT_REFERENCE_HANDLER_TYPE_RESOURCE, "Resource ", 9 }, + { QT_REFERENCE_HANDLER_TYPE_URL, "URL ", 4 }, + { subtype, "Unknown ", 8 } + }; + for( int i = 0; subtype_table[i].subtype; i++ ) + if( subtype == subtype_table[i].subtype ) + { + subtype_name = subtype_table[i].subtype_name; + subtype_name_length = subtype_table[i].subtype_name_length; + break; + } + uint32_t name_length = 15 + subtype_name_length + type_name_length + root->isom_compatible + root->qt_compatible; + uint8_t *name = malloc( name_length ); + if( !name ) + { + free( hdlr ); + return -1; + } + if( root->qt_compatible ) + name[0] = name_length & 0xff; + memcpy( name + root->qt_compatible, "L-SMASH ", 8 ); + memcpy( name + root->qt_compatible + 8, subtype_name, subtype_name_length ); + memcpy( name + root->qt_compatible + 8 + subtype_name_length, type_name, type_name_length ); + memcpy( name + root->qt_compatible + 8 + subtype_name_length + type_name_length, "Handler", 7 ); + if( root->isom_compatible ) + name[name_length - 1] = 0; + hdlr->componentName = name; + hdlr->componentName_length = name_length; + if( mdia ) + mdia->hdlr = hdlr; + else if( meta ) + meta->hdlr = hdlr; + else + minf->hdlr = hdlr; + return 0; +} + +static int isom_add_minf( isom_mdia_t *mdia ) +{ + if( !mdia || mdia->minf ) + return -1; + isom_create_box( minf, mdia, ISOM_BOX_TYPE_MINF ); + mdia->minf = minf; + return 0; +} + +static int isom_add_vmhd( isom_minf_t *minf ) +{ + if( !minf || minf->vmhd ) + return -1; + isom_create_box( vmhd, minf, ISOM_BOX_TYPE_VMHD ); + vmhd->flags = 0x000001; + minf->vmhd = vmhd; + return 0; +} + +static int isom_add_smhd( isom_minf_t *minf ) +{ + if( !minf || minf->smhd ) + return -1; + isom_create_box( smhd, minf, ISOM_BOX_TYPE_SMHD ); + minf->smhd = smhd; + return 0; +} + +static int isom_add_hmhd( isom_minf_t *minf ) +{ + if( !minf || minf->hmhd ) + return -1; + isom_create_box( hmhd, minf, ISOM_BOX_TYPE_HMHD ); + minf->hmhd = hmhd; + return 0; +} + +static int isom_add_nmhd( isom_minf_t *minf ) +{ + if( !minf || minf->nmhd ) + return -1; + isom_create_box( nmhd, minf, ISOM_BOX_TYPE_NMHD ); + minf->nmhd = nmhd; + return 0; +} + +static int isom_add_gmin( isom_gmhd_t *gmhd ) +{ + if( !gmhd || gmhd->gmin ) + return -1; + isom_create_box( gmin, gmhd, QT_BOX_TYPE_GMIN ); + gmhd->gmin = gmin; + return 0; +} + +static int isom_add_text( isom_gmhd_t *gmhd ) +{ + if( !gmhd || gmhd->text ) + return -1; + isom_create_box( text, gmhd, QT_BOX_TYPE_TEXT ); + text->matrix[0] = 0x00010000; + text->matrix[4] = 0x00010000; + text->matrix[8] = 0x40000000; + gmhd->text = text; + return 0; +} + +static int isom_add_gmhd( isom_minf_t *minf ) +{ + if( !minf || minf->gmhd ) + return -1; + isom_create_box( gmhd, minf, QT_BOX_TYPE_GMHD ); + minf->gmhd = gmhd; + return 0; +} + +static int isom_add_dinf( isom_minf_t *minf ) +{ + if( !minf || minf->dinf ) + return -1; + isom_create_box( dinf, minf, ISOM_BOX_TYPE_DINF ); + minf->dinf = dinf; + return 0; +} + +static int isom_add_dref( isom_dinf_t *dinf ) +{ + if( !dinf || dinf->dref ) + return -1; + isom_create_list_box( dref, dinf, ISOM_BOX_TYPE_DREF ); + dinf->dref = dref; + if( isom_add_dref_entry( dref, 0x000001, NULL, NULL ) ) + return -1; + return 0; +} + +static int isom_add_stsd( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stsd ) + return -1; + isom_create_list_box( stsd, stbl, ISOM_BOX_TYPE_STSD ); + stbl->stsd = stsd; + return 0; +} + +static int isom_add_stts( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stts ) + return -1; + isom_create_list_box( stts, stbl, ISOM_BOX_TYPE_STTS ); + stbl->stts = stts; + return 0; +} + +static int isom_add_ctts( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->ctts ) + return -1; + isom_create_list_box( ctts, stbl, ISOM_BOX_TYPE_CTTS ); + stbl->ctts = ctts; + return 0; +} + +static int isom_add_cslg( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->cslg ) + return -1; + isom_create_box( cslg, stbl, ISOM_BOX_TYPE_CSLG ); + stbl->cslg = cslg; + return 0; +} + +static int isom_add_stsc( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stsc ) + return -1; + isom_create_list_box( stsc, stbl, ISOM_BOX_TYPE_STSC ); + stbl->stsc = stsc; + return 0; +} + +static int isom_add_stsz( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stsz ) + return -1; + isom_create_box( stsz, stbl, ISOM_BOX_TYPE_STSZ ); /* We don't create a list here. */ + stbl->stsz = stsz; + return 0; +} + +static int isom_add_stss( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stss ) + return -1; + isom_create_list_box( stss, stbl, ISOM_BOX_TYPE_STSS ); + stbl->stss = stss; + return 0; +} + +static int isom_add_stps( isom_stbl_t *stbl ) +{ + if( !stbl || stbl->stps ) + return -1; + isom_create_list_box( stps, stbl, QT_BOX_TYPE_STPS ); + stbl->stps = stps; + return 0; +} + +static int isom_add_sdtp( isom_box_t *parent ) +{ + if( !parent ) + return -1; + if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_STBL ) ) + { + isom_stbl_t *stbl = (isom_stbl_t *)parent; + if( stbl->sdtp ) + return -1; + isom_create_list_box( sdtp, stbl, ISOM_BOX_TYPE_SDTP ); + stbl->sdtp = sdtp; + } + else if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_TRAF ) ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)parent; + if( traf->sdtp ) + return -1; + isom_create_list_box( sdtp, traf, ISOM_BOX_TYPE_SDTP ); + traf->sdtp = sdtp; + } + else + assert( 0 ); + return 0; +} + +static isom_sgpd_entry_t *isom_add_sgpd( isom_stbl_t *stbl, uint32_t grouping_type ) +{ + if( !stbl ) + return NULL; + if( !stbl->sgpd_list ) + { + stbl->sgpd_list = lsmash_create_entry_list(); + if( !stbl->sgpd_list ) + return NULL; + } + isom_sgpd_entry_t *sgpd = lsmash_malloc_zero( sizeof(isom_sgpd_entry_t) ); + if( !sgpd ) + return NULL; + isom_init_box_common( sgpd, stbl, ISOM_BOX_TYPE_SGPD ); + sgpd->list = lsmash_create_entry_list(); + if( !sgpd->list || lsmash_add_entry( stbl->sgpd_list, sgpd ) ) + { + free( sgpd ); + return NULL; + } + sgpd->grouping_type = grouping_type; + sgpd->version = 1; /* We use version 1 because it is recommended in the spec. */ + switch( grouping_type ) + { + case ISOM_GROUP_TYPE_RAP : + sgpd->default_length = 1; + break; + case ISOM_GROUP_TYPE_ROLL : + sgpd->default_length = 2; + break; + default : + /* We don't consider other grouping types currently. */ + break; + } + return sgpd; +} + +static isom_sbgp_entry_t *isom_add_sbgp( isom_stbl_t *stbl, uint32_t grouping_type ) +{ + if( !stbl ) + return NULL; + if( !stbl->sbgp_list ) + { + stbl->sbgp_list = lsmash_create_entry_list(); + if( !stbl->sbgp_list ) + return NULL; + } + isom_sbgp_entry_t *sbgp = lsmash_malloc_zero( sizeof(isom_sbgp_entry_t) ); + if( !sbgp ) + return NULL; + isom_init_box_common( sbgp, stbl, ISOM_BOX_TYPE_SBGP ); + sbgp->list = lsmash_create_entry_list(); + if( !sbgp->list || lsmash_add_entry( stbl->sbgp_list, sbgp ) ) + { + free( sbgp ); + return NULL; + } + sbgp->grouping_type = grouping_type; + return sbgp; +} + +static int isom_add_stbl( isom_minf_t *minf ) +{ + if( !minf || minf->stbl ) + return -1; + isom_create_box( stbl, minf, ISOM_BOX_TYPE_STBL ); + minf->stbl = stbl; + return 0; +} + +int isom_add_chpl( isom_moov_t *moov ) +{ + if( !moov || !moov->udta || moov->udta->chpl ) + return -1; + isom_create_list_box( chpl, moov->udta, ISOM_BOX_TYPE_CHPL ); + chpl->version = 1; /* version = 1 is popular. */ + moov->udta->chpl = chpl; + return 0; +} + +int isom_add_metaitem( isom_ilst_t *ilst, lsmash_itunes_metadata_item item ) +{ + if( !ilst || !ilst->item_list ) + return -1; + lsmash_box_type_t type = lsmash_form_iso_box_type( item ); + isom_create_box( metaitem, ilst, type ); + if( lsmash_add_entry( ilst->item_list, metaitem ) ) + { + free( metaitem ); + return -1; + } + return 0; +} + +int isom_add_mean( isom_metaitem_t *metaitem ) +{ + if( !metaitem || metaitem->mean ) + return -1; + isom_create_box( mean, metaitem, ISOM_BOX_TYPE_MEAN ); + metaitem->mean = mean; + return 0; +} + +int isom_add_name( isom_metaitem_t *metaitem ) +{ + if( !metaitem || metaitem->name ) + return -1; + isom_create_box( name, metaitem, ISOM_BOX_TYPE_NAME ); + metaitem->name = name; + return 0; +} + +int isom_add_data( isom_metaitem_t *metaitem ) +{ + if( !metaitem || metaitem->data ) + return -1; + isom_create_box( data, metaitem, ISOM_BOX_TYPE_DATA ); + metaitem->data = data; + return 0; +} + +int isom_add_ilst( isom_moov_t *moov ) +{ + if( !moov || !moov->udta || !moov->udta->meta || moov->udta->meta->ilst ) + return -1; + isom_create_box( ilst, moov->udta->meta, ISOM_BOX_TYPE_ILST ); + ilst->item_list = lsmash_create_entry_list(); + if( !ilst->item_list ) + { + free( ilst ); + return -1; + } + moov->udta->meta->ilst = ilst; + return 0; +} + +int isom_add_meta( isom_box_t *parent ) +{ + if( !parent ) + return -1; + isom_create_box( meta, parent, ISOM_BOX_TYPE_META ); + if( lsmash_check_box_type_identical( parent->type, LSMASH_BOX_TYPE_UNSPECIFIED ) ) + { + lsmash_root_t *root = (lsmash_root_t *)parent; + if( root->meta ) + { + free( meta ); + return -1; + } + root->meta = meta; + } + else if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_MOOV ) ) + { + isom_moov_t *moov = (isom_moov_t *)parent; + if( moov->meta ) + { + free( meta ); + return -1; + } + moov->meta = meta; + } + else if( lsmash_check_box_type_identical( parent->type, ISOM_BOX_TYPE_TRAK ) ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)parent; + if( trak->meta ) + { + free( meta ); + return -1; + } + trak->meta = meta; + } + else + { + isom_udta_t *udta = (isom_udta_t *)parent; + if( udta->meta ) + { + free( meta ); + return -1; + } + udta->meta = meta; + } + return 0; +} + +static int isom_add_cprt( isom_udta_t *udta ) +{ + if( !udta ) + return -1; + if( !udta->cprt_list ) + { + udta->cprt_list = lsmash_create_entry_list(); + if( !udta->cprt_list ) + return -1; + } + isom_create_box( cprt, udta, ISOM_BOX_TYPE_CPRT ); + if( lsmash_add_entry( udta->cprt_list, cprt ) ) + { + free( cprt ); + return -1; + } + return 0; +} + +int isom_add_udta( lsmash_root_t *root, uint32_t track_ID ) +{ + /* track_ID == 0 means the direct addition to moov box */ + if( !track_ID ) + { + if( !root || !root->moov ) + return -1; + if( root->moov->udta ) + return 0; + isom_create_box( udta, root->moov, ISOM_BOX_TYPE_UDTA ); + root->moov->udta = udta; + return 0; + } + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + return -1; + if( trak->udta ) + return 0; + isom_create_box( udta, trak, ISOM_BOX_TYPE_UDTA ); + trak->udta = udta; + return 0; +} + +static isom_trak_entry_t *isom_add_trak( lsmash_root_t *root ) +{ + if( !root || !root->moov ) + return NULL; + isom_moov_t *moov = root->moov; + if( !moov->trak_list ) + { + moov->trak_list = lsmash_create_entry_list(); + if( !moov->trak_list ) + return NULL; + } + isom_trak_entry_t *trak = lsmash_malloc_zero( sizeof(isom_trak_entry_t) ); + if( !trak ) + return NULL; + isom_init_box_common( trak, moov, ISOM_BOX_TYPE_TRAK ); + isom_cache_t *cache = lsmash_malloc_zero( sizeof(isom_cache_t) ); + if( !cache ) + { + free( trak ); + return NULL; + } + isom_fragment_t *fragment = NULL; + if( root->fragment ) + { + fragment = lsmash_malloc_zero( sizeof(isom_fragment_t) ); + if( !fragment ) + { + free( cache ); + free( trak ); + return NULL; + } + cache->fragment = fragment; + } + if( lsmash_add_entry( moov->trak_list, trak ) ) + { + if( fragment ) + free( fragment ); + free( cache ); + free( trak ); + return NULL; + } + trak->cache = cache; + return trak; +} + +static int isom_add_mvex( isom_moov_t *moov ) +{ + if( !moov || moov->mvex ) + return -1; + isom_create_box( mvex, moov, ISOM_BOX_TYPE_MVEX ); + moov->mvex = mvex; + return 0; +} + +static int isom_add_mehd( isom_mvex_t *mvex ) +{ + if( !mvex || mvex->mehd ) + return -1; + isom_create_box( mehd, mvex, ISOM_BOX_TYPE_MEHD ); + mvex->mehd = mehd; + return 0; +} + +static int isom_add_tfhd( isom_traf_entry_t *traf ) +{ + if( !traf || traf->tfhd ) + return -1; + isom_create_box( tfhd, traf, ISOM_BOX_TYPE_TFHD ); + traf->tfhd = tfhd; + return 0; +} + +static int isom_add_tfdt( isom_traf_entry_t *traf ) +{ + if( !traf || traf->tfdt ) + return -1; + isom_create_box( tfdt, traf, ISOM_BOX_TYPE_TFDT ); + traf->tfdt = tfdt; + return 0; +} + +static int isom_add_mfhd( isom_moof_entry_t *moof ) +{ + if( !moof || moof->mfhd ) + return -1; + isom_create_box( mfhd, moof, ISOM_BOX_TYPE_MFHD ); + moof->mfhd = mfhd; + return 0; +} + +static int isom_add_mfra( lsmash_root_t *root ) +{ + if( !root || root->mfra ) + return -1; + isom_create_box( mfra, root, ISOM_BOX_TYPE_MFRA ); + root->mfra = mfra; + return 0; +} + +static int isom_add_mfro( isom_mfra_t *mfra ) +{ + if( !mfra || mfra->mfro ) + return -1; + isom_create_box( mfro, mfra, ISOM_BOX_TYPE_MFRO ); + mfra->mfro = mfro; + return 0; +} + +#define isom_remove_box( box_name, parent_type ) \ + do \ + { \ + parent_type *parent = (parent_type *)box_name->parent; \ + free( box_name ); \ + if( parent ) \ + parent->box_name = NULL; \ + } while( 0 ) + +void isom_remove_unknown_box( isom_unknown_box_t *unknown_box ) +{ + if( !unknown_box ) + return; + if( unknown_box->unknown_field ) + free( unknown_box->unknown_field ); + free( unknown_box ); +} + +static void isom_remove_ftyp( isom_ftyp_t *ftyp ) +{ + if( !ftyp ) + return; + if( ftyp->compatible_brands ) + free( ftyp->compatible_brands ); + isom_remove_box( ftyp, lsmash_root_t ); +} + +static void isom_remove_tkhd( isom_tkhd_t *tkhd ) +{ + if( !tkhd ) + return; + isom_remove_box( tkhd, isom_trak_entry_t ); +} + +static void isom_remove_clef( isom_clef_t *clef ) +{ + if( !clef ) + return; + isom_remove_box( clef, isom_tapt_t ); +} + +static void isom_remove_prof( isom_prof_t *prof ) +{ + if( !prof ) + return; + isom_remove_box( prof, isom_tapt_t ); +} + +static void isom_remove_enof( isom_enof_t *enof ) +{ + if( !enof ) + return; + isom_remove_box( enof, isom_tapt_t ); +} + +void isom_remove_tapt( isom_tapt_t *tapt ) +{ + if( !tapt ) + return; + isom_remove_clef( tapt->clef ); + isom_remove_prof( tapt->prof ); + isom_remove_enof( tapt->enof ); + isom_remove_box( tapt, isom_trak_entry_t ); +} + +static void isom_remove_elst( isom_elst_t *elst ) +{ + if( !elst ) + return; + lsmash_remove_list( elst->list, NULL ); + isom_remove_box( elst, isom_edts_t ); +} + +static void isom_remove_edts( isom_edts_t *edts ) +{ + if( !edts ) + return; + isom_remove_elst( edts->elst ); + isom_remove_box( edts, isom_trak_entry_t ); +} + +void isom_remove_track_reference_type( isom_tref_type_t *ref ) +{ + if( !ref ) + return; + if( ref->track_ID ) + free( ref->track_ID ); + free( ref ); +} + +void isom_remove_tref( isom_tref_t *tref ) +{ + if( !tref ) + return; + lsmash_remove_list( tref->ref_list, isom_remove_track_reference_type ); + isom_remove_box( tref, isom_trak_entry_t ); +} + +static void isom_remove_mdhd( isom_mdhd_t *mdhd ) +{ + if( !mdhd ) + return; + isom_remove_box( mdhd, isom_mdia_t ); +} + +static void isom_remove_vmhd( isom_vmhd_t *vmhd ) +{ + if( !vmhd ) + return; + isom_remove_box( vmhd, isom_minf_t ); +} + +static void isom_remove_smhd( isom_smhd_t *smhd ) +{ + if( !smhd ) + return; + isom_remove_box( smhd, isom_minf_t ); +} + +static void isom_remove_hmhd( isom_hmhd_t *hmhd ) +{ + if( !hmhd ) + return; + isom_remove_box( hmhd, isom_minf_t ); +} + +static void isom_remove_nmhd( isom_nmhd_t *nmhd ) +{ + if( !nmhd ) + return; + isom_remove_box( nmhd, isom_minf_t ); +} + +static void isom_remove_gmin( isom_gmin_t *gmin ) +{ + if( !gmin ) + return; + isom_remove_box( gmin, isom_gmhd_t ); +} + +static void isom_remove_text( isom_text_t *text ) +{ + if( !text ) + return; + isom_remove_box( text, isom_gmhd_t ); +} + +static void isom_remove_gmhd( isom_gmhd_t *gmhd ) +{ + if( !gmhd ) + return; + isom_remove_gmin( gmhd->gmin ); + isom_remove_text( gmhd->text ); + isom_remove_box( gmhd, isom_minf_t ); +} + +static void isom_remove_hdlr( isom_hdlr_t *hdlr ) +{ + if( !hdlr ) + return; + if( hdlr->componentName ) + free( hdlr->componentName ); + if( hdlr->parent ) + { + if( lsmash_check_box_type_identical( hdlr->parent->type, ISOM_BOX_TYPE_MDIA ) ) + isom_remove_box( hdlr, isom_mdia_t ); + else if( lsmash_check_box_type_identical( hdlr->parent->type, ISOM_BOX_TYPE_META ) + || lsmash_check_box_type_identical( hdlr->parent->type, QT_BOX_TYPE_META ) ) + isom_remove_box( hdlr, isom_meta_t ); + else if( lsmash_check_box_type_identical( hdlr->parent->type, ISOM_BOX_TYPE_MINF ) ) + isom_remove_box( hdlr, isom_minf_t ); + else + assert( 0 ); + return; + } + free( hdlr ); +} + +void isom_remove_clap( isom_clap_t *clap ) +{ + if( !clap ) + return; + free( clap ); +} + +void isom_remove_pasp( isom_pasp_t *pasp ) +{ + if( !pasp ) + return; + free( pasp ); +} + +void isom_remove_glbl( isom_glbl_t *glbl ) +{ + if( !glbl ) + return; + if( glbl->header_data ) + free( glbl->header_data ); + free( glbl ); +} + +void isom_remove_colr( isom_colr_t *colr ) +{ + if( !colr ) + return; + free( colr ); +} + +void isom_remove_gama( isom_gama_t *gama ) +{ + if( !gama ) + return; + free( gama ); +} + +void isom_remove_fiel( isom_fiel_t *fiel ) +{ + if( !fiel ) + return; + free( fiel ); +} + +void isom_remove_cspc( isom_cspc_t *cspc ) +{ + if( !cspc ) + return; + free( cspc ); +} + +void isom_remove_sgbt( isom_sgbt_t *sgbt ) +{ + if( !sgbt ) + return; + free( sgbt ); +} + +void isom_remove_stsl( isom_stsl_t *stsl ) +{ + if( !stsl ) + return; + free( stsl ); +} + +void isom_remove_esds( isom_esds_t *esds ) +{ + if( !esds ) + return; + mp4sys_remove_ES_Descriptor( esds->ES ); + free( esds ); +} + +void isom_remove_avcC( isom_avcC_t *avcC ) +{ + if( !avcC ) + return; + lsmash_remove_list( avcC->sequenceParameterSets, isom_remove_avcC_ps ); + lsmash_remove_list( avcC->pictureParameterSets, isom_remove_avcC_ps ); + lsmash_remove_list( avcC->sequenceParameterSetExt, isom_remove_avcC_ps ); + free( avcC ); +} + +void isom_remove_btrt( isom_btrt_t *btrt ) +{ + if( !btrt ) + return; + free( btrt ); +} + +static void isom_remove_font_record( isom_font_record_t *font_record ) +{ + if( !font_record ) + return; + if( font_record->font_name ) + free( font_record->font_name ); + free( font_record ); +} + +void isom_remove_ftab( isom_ftab_t *ftab ) +{ + if( !ftab ) + return; + lsmash_remove_list( ftab->list, isom_remove_font_record ); + isom_remove_box( ftab, isom_tx3g_entry_t ); +} + +void isom_remove_frma( isom_frma_t *frma ) +{ + if( !frma ) + return; + isom_remove_box( frma, isom_wave_t ); +} + +void isom_remove_enda( isom_enda_t *enda ) +{ + if( !enda ) + return; + isom_remove_box( enda, isom_wave_t ); +} + +void isom_remove_mp4a( isom_mp4a_t *mp4a ) +{ + if( !mp4a ) + return; + isom_remove_box( mp4a, isom_wave_t ); +} + +void isom_remove_terminator( isom_terminator_t *terminator ) +{ + if( !terminator ) + return; + isom_remove_box( terminator, isom_wave_t ); +} + +void isom_remove_wave( isom_wave_t *wave ) +{ + if( !wave ) + return; + isom_remove_frma( wave->frma ); + isom_remove_enda( wave->enda ); + isom_remove_mp4a( wave->mp4a ); + isom_remove_terminator( wave->terminator ); + free( wave ); +} + +void isom_remove_chan( isom_chan_t *chan ) +{ + if( !chan ) + return; + if( chan->channelDescriptions ) + free( chan->channelDescriptions ); + free( chan ); +} + +static void isom_remove_visual_description( isom_sample_entry_t *description ) +{ + isom_visual_entry_t *visual = (isom_visual_entry_t *)description; + isom_remove_sample_description_extensions( &visual->extensions ); + if( visual->color_table.array ) + free( visual->color_table.array ); + free( visual ); +} + +static void isom_remove_audio_description( isom_sample_entry_t *description ) +{ + isom_audio_entry_t *audio = (isom_audio_entry_t *)description; + isom_remove_sample_description_extensions( &audio->extensions ); + free( audio ); +} + +static void isom_remove_hint_description( isom_sample_entry_t *description ) +{ + isom_hint_entry_t *hint = (isom_hint_entry_t *)description; + isom_remove_sample_description_extensions( &hint->extensions ); + if( hint->data ) + free( hint->data ); + free( hint ); +} + +static void isom_remove_metadata_description( isom_sample_entry_t *description ) +{ + isom_metadata_entry_t *metadata = (isom_metadata_entry_t *)description; + isom_remove_sample_description_extensions( &metadata->extensions ); + free( metadata ); +} + +static void isom_remove_tx3g_description( isom_sample_entry_t *description ) +{ + isom_tx3g_entry_t *tx3g = (isom_tx3g_entry_t *)description; + isom_remove_sample_description_extensions( &tx3g->extensions ); + if( tx3g->ftab ) + isom_remove_ftab( tx3g->ftab ); + free( tx3g ); +} + +static void isom_remove_qt_text_description( isom_sample_entry_t *description ) +{ + isom_text_entry_t *text = (isom_text_entry_t *)description; + isom_remove_sample_description_extensions( &text->extensions ); + if( text->font_name ) + free( text->font_name ); + free( text ); +} + +static void isom_remove_mp4s_description( isom_sample_entry_t *description ) +{ + isom_mp4s_entry_t *mp4s = (isom_mp4s_entry_t *)description; + isom_remove_sample_description_extensions( &mp4s->extensions ); + free( mp4s ); +} + +void isom_remove_sample_description( isom_sample_entry_t *sample ) +{ + if( !sample ) + return; + lsmash_codec_type_t sample_type = sample->type; + if( lsmash_check_box_type_identical( sample_type, LSMASH_CODEC_TYPE_RAW ) ) + { + if( sample->manager & LSMASH_VIDEO_DESCRIPTION ) + { + isom_remove_visual_description( sample ); + return; + } + else if( sample->manager & LSMASH_AUDIO_DESCRIPTION ) + { + isom_remove_audio_description( sample ); + return; + } + } + static struct description_remover_table_tag + { + lsmash_codec_type_t type; + void (*func)( isom_sample_entry_t * ); + } description_remover_table[128] = { { LSMASH_CODEC_TYPE_INITIALIZER, NULL } }; + if( !description_remover_table[0].func ) + { + /* Initialize the table. */ + int i = 0; +#define ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( type, func ) \ + description_remover_table[i++] = (struct description_remover_table_tag){ type, func } + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVCP_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SVC1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4V_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRAC_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCV_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MJP2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_S263_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_VC_1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_CFHD_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DV10_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVOO_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVOR_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVTV_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVVT_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_HD10_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_M105_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_PNTG_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SVQ1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SVQ3_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SHR0_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SHR1_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SHR2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SHR3_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SHR4_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_WRLE_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_APCH_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_APCN_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_APCS_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_APCO_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_AP4H_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_CIVD_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DRAC_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVC_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVCP_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVPP_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DV5N_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DV5P_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVH2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVH3_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVH5_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVH6_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVHP_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_DVHQ_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_FLIC_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_GIF_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_H261_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_H263_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_JPEG_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_MJPA_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_MJPB_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_PNG_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_RLE_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_RPZA_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_TGA_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_TIFF_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_ULRA_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_ULRG_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_ULY2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_ULY0_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_V210_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_V216_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_V308_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_V408_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_V410_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_YUV2_VIDEO, isom_remove_visual_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4A_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_AC_3_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_ALAC_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSC_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSE_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSH_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSL_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_EC_3_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAMR_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWB_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_23NI_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_NONE_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_LPCM_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_SOWT_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_TWOS_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_FL32_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_FL64_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_IN24_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_IN32_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_NOT_SPECIFIED, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRA1_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCA_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_G719_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_G726_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_M4AE_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MLPA_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWP_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SEVC_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SQCP_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SSMV_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_TWOS_AUDIO, isom_remove_audio_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_FDP_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_M2TS_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_PM2T_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_PRTP_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_RM2T_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_RRTP_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_RSRP_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_RTP_HINT , isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SM2T_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SRTP_HINT, isom_remove_hint_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_IXSE_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_METT_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_METX_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MLIX_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_OKSD_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_SVCM_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_TEXT_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_URIM_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_XML_META, isom_remove_metadata_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_TX3G_TEXT, isom_remove_tx3g_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( QT_CODEC_TYPE_TEXT_TEXT, isom_remove_qt_text_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4S_SYSTEM, isom_remove_mp4s_description ); + ADD_DESCRIPTION_REMOVER_TABLE_ELEMENT( LSMASH_CODEC_TYPE_UNSPECIFIED, NULL ); + } + for( int i = 0; description_remover_table[i].func; i++ ) + if( lsmash_check_codec_type_identical( sample_type, description_remover_table[i].type ) ) + { + description_remover_table[i].func( sample ); + return; + } +} + +static void isom_remove_stsd( isom_stsd_t *stsd ) +{ + if( !stsd ) + return; + lsmash_remove_list( stsd->list, isom_remove_sample_description ); + isom_remove_box( stsd, isom_stbl_t ); +} + +static void isom_remove_stts( isom_stts_t *stts ) +{ + if( !stts ) + return; + lsmash_remove_list( stts->list, NULL ); + isom_remove_box( stts, isom_stbl_t ); +} + +static void isom_remove_ctts( isom_ctts_t *ctts ) +{ + if( !ctts ) + return; + lsmash_remove_list( ctts->list, NULL ); + isom_remove_box( ctts, isom_stbl_t ); +} + +static void isom_remove_cslg( isom_cslg_t *cslg ) +{ + if( !cslg ) + return; + isom_remove_box( cslg, isom_stbl_t ); +} + +static void isom_remove_stsc( isom_stsc_t *stsc ) +{ + if( !stsc ) + return; + lsmash_remove_list( stsc->list, NULL ); + isom_remove_box( stsc, isom_stbl_t ); +} + +static void isom_remove_stsz( isom_stsz_t *stsz ) +{ + if( !stsz ) + return; + lsmash_remove_list( stsz->list, NULL ); + isom_remove_box( stsz, isom_stbl_t ); +} + +static void isom_remove_stss( isom_stss_t *stss ) +{ + if( !stss ) + return; + lsmash_remove_list( stss->list, NULL ); + isom_remove_box( stss, isom_stbl_t ); +} + +static void isom_remove_stps( isom_stps_t *stps ) +{ + if( !stps ) + return; + lsmash_remove_list( stps->list, NULL ); + isom_remove_box( stps, isom_stbl_t ); +} + +static void isom_remove_sdtp( isom_sdtp_t *sdtp ) +{ + if( !sdtp ) + return; + lsmash_remove_list( sdtp->list, NULL ); + if( sdtp->parent ) + { + if( lsmash_check_box_type_identical( sdtp->parent->type, ISOM_BOX_TYPE_STBL ) ) + isom_remove_box( sdtp, isom_stbl_t ); + else if( lsmash_check_box_type_identical( sdtp->parent->type, ISOM_BOX_TYPE_TRAF ) ) + isom_remove_box( sdtp, isom_traf_entry_t ); + else + assert( 0 ); + return; + } + free( sdtp ); +} + +static void isom_remove_stco( isom_stco_t *stco ) +{ + if( !stco ) + return; + lsmash_remove_list( stco->list, NULL ); + isom_remove_box( stco, isom_stbl_t ); +} + +static void isom_remove_sgpd( isom_sgpd_entry_t *sgpd ) +{ + if( !sgpd ) + return; + lsmash_remove_list( sgpd->list, NULL ); + free( sgpd ); +} + +static void isom_remove_sbgp( isom_sbgp_entry_t *sbgp ) +{ + if( !sbgp ) + return; + lsmash_remove_list( sbgp->list, NULL ); + free( sbgp ); +} + +static void isom_remove_stbl( isom_stbl_t *stbl ) +{ + if( !stbl ) + return; + isom_remove_stsd( stbl->stsd ); + isom_remove_stts( stbl->stts ); + isom_remove_ctts( stbl->ctts ); + isom_remove_cslg( stbl->cslg ); + isom_remove_stsc( stbl->stsc ); + isom_remove_stsz( stbl->stsz ); + isom_remove_stss( stbl->stss ); + isom_remove_stps( stbl->stps ); + isom_remove_sdtp( stbl->sdtp ); + isom_remove_stco( stbl->stco ); + lsmash_remove_list( stbl->sgpd_list, isom_remove_sgpd ); + lsmash_remove_list( stbl->sbgp_list, isom_remove_sbgp ); + isom_remove_box( stbl, isom_minf_t ); +} + +static void isom_remove_dref( isom_dref_t *dref ) +{ + if( !dref ) + return; + if( !dref->list ) + { + free( dref ); + return; + } + for( lsmash_entry_t *entry = dref->list->head; entry; ) + { + isom_dref_entry_t *data = (isom_dref_entry_t *)entry->data; + if( data ) + { + if( data->name ) + free( data->name ); + if( data->location ) + free( data->location ); + free( data ); + } + lsmash_entry_t *next = entry->next; + free( entry ); + entry = next; + } + free( dref->list ); + isom_remove_box( dref, isom_dinf_t ); +} + +static void isom_remove_dinf( isom_dinf_t *dinf ) +{ + if( !dinf ) + return; + isom_remove_dref( dinf->dref ); + isom_remove_box( dinf, isom_minf_t ); +} + +static void isom_remove_minf( isom_minf_t *minf ) +{ + if( !minf ) + return; + isom_remove_vmhd( minf->vmhd ); + isom_remove_smhd( minf->smhd ); + isom_remove_hmhd( minf->hmhd ); + isom_remove_nmhd( minf->nmhd ); + isom_remove_gmhd( minf->gmhd ); + isom_remove_hdlr( minf->hdlr ); + isom_remove_dinf( minf->dinf ); + isom_remove_stbl( minf->stbl ); + isom_remove_box( minf, isom_mdia_t ); +} + +static void isom_remove_mdia( isom_mdia_t *mdia ) +{ + if( !mdia ) + return; + isom_remove_mdhd( mdia->mdhd ); + isom_remove_minf( mdia->minf ); + isom_remove_hdlr( mdia->hdlr ); + isom_remove_box( mdia, isom_trak_entry_t ); +} + +static void isom_remove_chpl( isom_chpl_t *chpl ) +{ + if( !chpl ) + return; + if( !chpl->list ) + { + free( chpl ); + return; + } + for( lsmash_entry_t *entry = chpl->list->head; entry; ) + { + isom_chpl_entry_t *data = (isom_chpl_entry_t *)entry->data; + if( data ) + { + if( data->chapter_name ) + free( data->chapter_name ); + free( data ); + } + lsmash_entry_t *next = entry->next; + free( entry ); + entry = next; + } + free( chpl->list ); + isom_remove_box( chpl, isom_udta_t ); +} + +static void isom_remove_keys_entry( isom_keys_entry_t *data ) +{ + if( !data ) + return; + if( data->key_value ) + free( data->key_value ); + free( data ); +} + +static void isom_remove_keys( isom_keys_t *keys ) +{ + if( !keys ) + return; + lsmash_remove_list( keys->list, isom_remove_keys_entry ); + isom_remove_box( keys, isom_meta_t ); +} + +void isom_remove_mean( isom_mean_t *mean ) +{ + if( !mean ) + return; + if( mean->meaning_string ) + free( mean->meaning_string ); + isom_remove_box( mean, isom_metaitem_t ); +} + +void isom_remove_name( isom_name_t *name ) +{ + if( !name ) + return; + if( name->name ) + free( name->name ); + isom_remove_box( name, isom_metaitem_t ); +} + +void isom_remove_data( isom_data_t *data ) +{ + if( !data ) + return; + if( data->value ) + free( data->value ); + isom_remove_box( data, isom_metaitem_t ); +} + +void isom_remove_metaitem( isom_metaitem_t *metaitem ) +{ + if( !metaitem ) + return; + isom_remove_mean( metaitem->mean ); + isom_remove_name( metaitem->name ); + isom_remove_data( metaitem->data ); + free( metaitem ); +} + +void isom_remove_ilst( isom_ilst_t *ilst ) +{ + if( !ilst ) + return; + lsmash_remove_list( ilst->item_list, isom_remove_metaitem ); + isom_remove_box( ilst, isom_meta_t ); +} + +static void isom_remove_meta( isom_meta_t *meta ) +{ + if( !meta ) + return; + isom_remove_hdlr( meta->hdlr ); + isom_remove_dinf( meta->dinf ); + isom_remove_keys( meta->keys ); + isom_remove_ilst( meta->ilst ); + if( meta->parent ) + { + if( lsmash_check_box_type_identical( meta->parent->type, LSMASH_BOX_TYPE_UNSPECIFIED ) ) + isom_remove_box( meta, lsmash_root_t ); + else if( lsmash_check_box_type_identical( meta->parent->type, ISOM_BOX_TYPE_MOOV ) ) + isom_remove_box( meta, isom_moov_t ); + else if( lsmash_check_box_type_identical( meta->parent->type, ISOM_BOX_TYPE_TRAK ) ) + isom_remove_box( meta, isom_trak_entry_t ); + else if( lsmash_check_box_type_identical( meta->parent->type, ISOM_BOX_TYPE_UDTA ) ) + isom_remove_box( meta, isom_udta_t ); + else + assert( 0 ); + return; + } + free( meta ); +} + +static void isom_remove_cprt( isom_cprt_t *cprt ) +{ + if( !cprt ) + return; + if( cprt->notice ) + free( cprt->notice ); + free( cprt ); +} + +static void isom_remove_udta( isom_udta_t *udta ) +{ + if( !udta ) + return; + isom_remove_chpl( udta->chpl ); + isom_remove_meta( udta->meta ); + free( udta->WLOC ); + free( udta->LOOP ); + free( udta->SelO ); + free( udta->AllF ); + lsmash_remove_list( udta->cprt_list, isom_remove_cprt ); + if( udta->parent ) + { + if( lsmash_check_box_type_identical( udta->parent->type, ISOM_BOX_TYPE_MOOV ) ) + isom_remove_box( udta, isom_moov_t ); + else if( lsmash_check_box_type_identical( udta->parent->type, ISOM_BOX_TYPE_TRAK ) ) + isom_remove_box( udta, isom_trak_entry_t ); + else + assert( 0 ); + return; + } + free( udta ); +} + +static void isom_remove_sample_pool( isom_sample_pool_t *pool ); + +void isom_remove_trak( isom_trak_entry_t *trak ) +{ + if( !trak ) + return; + isom_remove_tkhd( trak->tkhd ); + isom_remove_tapt( trak->tapt ); + isom_remove_edts( trak->edts ); + isom_remove_tref( trak->tref ); + isom_remove_mdia( trak->mdia ); + isom_remove_udta( trak->udta ); + isom_remove_meta( trak->meta ); + if( trak->cache ) + { + isom_remove_sample_pool( trak->cache->chunk.pool ); + lsmash_remove_list( trak->cache->roll.pool, NULL ); + if( trak->cache->rap ) + free( trak->cache->rap ); + free( trak->cache ); + } + free( trak ); /* Note: the list that contains this trak still has the address of the entry. */ +} + +static void isom_remove_iods( isom_iods_t *iods ) +{ + if( !iods ) + return; + mp4sys_remove_ObjectDescriptor( iods->OD ); + isom_remove_box( iods, isom_moov_t ); +} + +void isom_remove_ctab( isom_ctab_t *ctab ) +{ + if( !ctab ) + return; + if( ctab->color_table.array ) + free( ctab->color_table.array ); + if( ctab->parent && lsmash_check_box_type_identical( ctab->parent->type, ISOM_BOX_TYPE_MOOV ) ) + isom_remove_box( ctab, isom_moov_t ); + else + free( ctab ); +} + +static void isom_remove_mehd( isom_mehd_t *mehd ) +{ + if( !mehd ) + return; + isom_remove_box( mehd, isom_mvex_t ); +} + +static void isom_remove_mvex( isom_mvex_t *mvex ) +{ + if( !mvex ) + return; + isom_remove_mehd( mvex->mehd ); + lsmash_remove_list( mvex->trex_list, NULL ); + isom_remove_box( mvex, isom_moov_t ); +} + +static void isom_remove_moov( lsmash_root_t *root ) +{ + if( !root || !root->moov ) + return; + isom_moov_t *moov = root->moov; + if( moov->mvhd ) + free( moov->mvhd ); + isom_remove_iods( moov->iods ); + lsmash_remove_list( moov->trak_list, isom_remove_trak ); + isom_remove_udta( moov->udta ); + isom_remove_ctab( moov->ctab ); + isom_remove_meta( moov->meta ); + isom_remove_mvex( moov->mvex ); + free( moov ); + root->moov = NULL; +} + +static void isom_remove_mfhd( isom_mfhd_t *mfhd ) +{ + if( !mfhd ) + return; + isom_remove_box( mfhd, isom_moof_entry_t ); +} + +static void isom_remove_tfhd( isom_tfhd_t *tfhd ) +{ + if( !tfhd ) + return; + isom_remove_box( tfhd, isom_traf_entry_t ); +} + +static void isom_remove_tfdt( isom_tfdt_t *tfdt ) +{ + if( !tfdt ) + return; + isom_remove_box( tfdt, isom_traf_entry_t ); +} + +static void isom_remove_trun( isom_trun_entry_t *trun ) +{ + if( !trun ) + return; + lsmash_remove_list( trun->optional, NULL ); + free( trun ); /* Note: the list that contains this trun still has the address of the entry. */ +} + +static void isom_remove_traf( isom_traf_entry_t *traf ) +{ + if( !traf ) + return; + isom_remove_tfhd( traf->tfhd ); + isom_remove_tfdt( traf->tfdt ); + lsmash_remove_list( traf->trun_list, isom_remove_trun ); + isom_remove_sdtp( traf->sdtp ); + free( traf ); /* Note: the list that contains this traf still has the address of the entry. */ +} + +static void isom_remove_moof( isom_moof_entry_t *moof ) +{ + if( !moof ) + return; + isom_remove_mfhd( moof->mfhd ); + lsmash_remove_list( moof->traf_list, isom_remove_traf ); + free( moof ); +} + +static void isom_remove_mdat( isom_mdat_t *mdat ) +{ + if( !mdat ) + return; + isom_remove_box( mdat, lsmash_root_t ); +} + +static void isom_remove_free( isom_free_t *skip ) +{ + if( !skip ) + return; + if( skip->data ) + free( skip->data ); + lsmash_root_t *root = (lsmash_root_t *)skip->parent; + free( skip ); + root->free = NULL; +} + +static void isom_remove_tfra( isom_tfra_entry_t *tfra ) +{ + if( !tfra ) + return; + lsmash_remove_list( tfra->list, NULL ); + free( tfra ); +} + +static void isom_remove_mfro( isom_mfro_t *mfro ) +{ + if( !mfro ) + return; + isom_remove_box( mfro, isom_mfra_t ); +} + +static void isom_remove_mfra( isom_mfra_t *mfra ) +{ + if( !mfra ) + return; + lsmash_remove_list( mfra->tfra_list, isom_remove_tfra ); + isom_remove_mfro( mfra->mfro ); + isom_remove_box( mfra, lsmash_root_t ); +} + +/* We put a placeholder for 64-bit media data if the media_size of the argument is set to 0. + * If a Media Data Box already exists and we don't pick movie fragments structure, + * write the actual size of the current one and start a new one. */ +static int isom_new_mdat( lsmash_root_t *root, uint64_t media_size ) +{ + if( !root ) + return 0; + if( root->mdat ) + { + /* Write the actual size of the current Media Data Box. */ + if( !root->fragment && isom_write_mdat_size( root ) ) + return -1; + } + else + { + isom_create_box( mdat, root, ISOM_BOX_TYPE_MDAT ); + root->mdat = mdat; + } + /* Start a new Media Data Box. */ + return isom_write_mdat_header( root, media_size ); +} + +int isom_check_compatibility( lsmash_root_t *root ) +{ + if( !root ) + return -1; + root->qt_compatible = 0; + /* Check brand to decide mandatory boxes. */ + if( !root->ftyp || !root->ftyp->brand_count ) + { + /* No brand declaration means this file is a MP4 version 1 or QuickTime file format. */ + if( root->moov && root->moov->iods ) + { + root->mp4_version1 = 1; + root->isom_compatible = 1; + } + else + root->qt_compatible = 1; + return 0; + } + for( uint32_t i = 0; i <= root->ftyp->brand_count; i++ ) + { + uint32_t brand = (i == root->ftyp->brand_count ? root->ftyp->major_brand : root->ftyp->compatible_brands[i]); + switch( brand ) + { + case ISOM_BRAND_TYPE_QT : + root->qt_compatible = 1; + break; + case ISOM_BRAND_TYPE_MP41 : + root->mp4_version1 = 1; + break; + case ISOM_BRAND_TYPE_MP42 : + root->mp4_version2 = 1; + break; + case ISOM_BRAND_TYPE_AVC1 : + case ISOM_BRAND_TYPE_ISOM : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 1 ); + break; + case ISOM_BRAND_TYPE_ISO2 : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 2 ); + break; + case ISOM_BRAND_TYPE_ISO3 : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 3 ); + break; + case ISOM_BRAND_TYPE_ISO4 : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 4 ); + break; + case ISOM_BRAND_TYPE_ISO5 : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 5 ); + break; + case ISOM_BRAND_TYPE_ISO6 : + root->max_isom_version = LSMASH_MAX( root->max_isom_version, 6 ); + break; + case ISOM_BRAND_TYPE_M4A : + case ISOM_BRAND_TYPE_M4B : + case ISOM_BRAND_TYPE_M4P : + case ISOM_BRAND_TYPE_M4V : + root->itunes_movie = 1; + break; + case ISOM_BRAND_TYPE_3GP4 : + root->max_3gpp_version = LSMASH_MAX( root->max_3gpp_version, 4 ); + break; + case ISOM_BRAND_TYPE_3GP5 : + root->max_3gpp_version = LSMASH_MAX( root->max_3gpp_version, 5 ); + break; + case ISOM_BRAND_TYPE_3GE6 : + case ISOM_BRAND_TYPE_3GG6 : + case ISOM_BRAND_TYPE_3GP6 : + case ISOM_BRAND_TYPE_3GR6 : + case ISOM_BRAND_TYPE_3GS6 : + root->max_3gpp_version = LSMASH_MAX( root->max_3gpp_version, 6 ); + break; + default : + break; + } + switch( brand ) + { + case ISOM_BRAND_TYPE_AVC1 : + case ISOM_BRAND_TYPE_ISO2 : + case ISOM_BRAND_TYPE_ISO3 : + case ISOM_BRAND_TYPE_ISO4 : + case ISOM_BRAND_TYPE_ISO5 : + case ISOM_BRAND_TYPE_ISO6 : + root->avc_extensions = 1; + break; + default : + break; + } + } + root->isom_compatible = !root->qt_compatible || root->mp4_version1 || root->mp4_version2 || root->itunes_movie || root->max_3gpp_version; + return 0; +} + +static uint32_t isom_get_sample_count( isom_trak_entry_t *trak ) +{ + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl || !trak->mdia->minf->stbl->stsz ) + return 0; + return trak->mdia->minf->stbl->stsz->sample_count; +} + +static uint64_t isom_get_dts( isom_stts_t *stts, uint32_t sample_number ) +{ + if( !stts || !stts->list ) + return 0; + uint64_t dts = 0; + uint32_t i = 1; + lsmash_entry_t *entry; + isom_stts_entry_t *data; + for( entry = stts->list->head; entry; entry = entry->next ) + { + data = (isom_stts_entry_t *)entry->data; + if( !data ) + return 0; + if( i + data->sample_count > sample_number ) + break; + dts += (uint64_t)data->sample_delta * data->sample_count; + i += data->sample_count; + } + if( !entry ) + return 0; + dts += (uint64_t)data->sample_delta * (sample_number - i); + return dts; +} + +#if 0 +static uint64_t isom_get_cts( isom_stts_t *stts, isom_ctts_t *ctts, uint32_t sample_number ) +{ + if( !stts || !stts->list ) + return 0; + if( !ctts ) + return isom_get_dts( stts, sample_number ); + uint32_t i = 1; /* This can be 0 (and then condition below shall be changed) but I dare use same algorithm with isom_get_dts. */ + lsmash_entry_t *entry; + isom_ctts_entry_t *data; + if( sample_number == 0 ) + return 0; + for( entry = ctts->list->head; entry; entry = entry->next ) + { + data = (isom_ctts_entry_t *)entry->data; + if( !data ) + return 0; + if( i + data->sample_count > sample_number ) + break; + i += data->sample_count; + } + if( !entry ) + return 0; + return isom_get_dts( stts, sample_number ) + data->sample_offset; +} +#endif + +static int isom_replace_last_sample_delta( isom_stbl_t *stbl, uint32_t sample_delta ) +{ + if( !stbl || !stbl->stts || !stbl->stts->list || !stbl->stts->list->tail || !stbl->stts->list->tail->data ) + return -1; + isom_stts_entry_t *last_stts_data = (isom_stts_entry_t *)stbl->stts->list->tail->data; + if( sample_delta != last_stts_data->sample_delta ) + { + if( last_stts_data->sample_count > 1 ) + { + last_stts_data->sample_count -= 1; + if( isom_add_stts_entry( stbl, sample_delta ) ) + return -1; + } + else + last_stts_data->sample_delta = sample_delta; + } + return 0; +} + +static int isom_update_mdhd_duration( isom_trak_entry_t *trak, uint32_t last_sample_delta ) +{ + if( !trak || !trak->root || !trak->cache || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->minf + || !trak->mdia->minf->stbl || !trak->mdia->minf->stbl->stts || !trak->mdia->minf->stbl->stts->list ) + return -1; + lsmash_root_t *root = trak->root; + isom_mdhd_t *mdhd = trak->mdia->mdhd; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_stts_t *stts = stbl->stts; + isom_ctts_t *ctts = stbl->ctts; + isom_cslg_t *cslg = stbl->cslg; + mdhd->duration = 0; + uint32_t sample_count = isom_get_sample_count( trak ); + if( !sample_count ) + { + /* Return error if non-fragmented movie has no samples. */ + if( !root->fragment && !stts->list->entry_count ) + return -1; + return 0; + } + /* Now we have at least 1 sample, so do stts_entry. */ + lsmash_entry_t *last_stts = stts->list->tail; + isom_stts_entry_t *last_stts_data = (isom_stts_entry_t *)last_stts->data; + if( sample_count == 1 ) + mdhd->duration = last_stts_data->sample_delta; + /* Now we have at least 2 samples, + * but dunno whether 1 stts_entry which has 2 samples or 2 stts_entry which has 1 samle each. */ + else if( !ctts ) + { + /* use dts instead of cts */ + mdhd->duration = isom_get_dts( stts, sample_count ); + if( last_sample_delta ) + { + mdhd->duration += last_sample_delta; + if( isom_replace_last_sample_delta( stbl, last_sample_delta ) ) + return -1; + } + else if( last_stts_data->sample_count > 1 ) + mdhd->duration += last_stts_data->sample_delta; /* no need to update last_stts_data->sample_delta */ + else + { + /* Remove the last entry. */ + if( lsmash_remove_entry( stts->list, stts->list->entry_count, NULL ) ) + return -1; + /* copy the previous sample_delta. */ + ++ ((isom_stts_entry_t *)stts->list->tail->data)->sample_count; + mdhd->duration += ((isom_stts_entry_t *)stts->list->tail->data)->sample_delta; + } + } + else + { + if( !ctts->list || ctts->list->entry_count == 0 ) + return -1; + uint64_t dts = 0; + uint64_t max_cts = 0, max2_cts = 0, min_cts = UINT64_MAX; + uint32_t max_offset = 0, min_offset = UINT32_MAX; + int32_t ctd_shift = trak->cache->timestamp.ctd_shift; + uint32_t j, k; + lsmash_entry_t *stts_entry = stts->list->head; + lsmash_entry_t *ctts_entry = ctts->list->head; + j = k = 0; + for( uint32_t i = 0; i < sample_count; i++ ) + { + if( !ctts_entry || !stts_entry ) + return -1; + isom_stts_entry_t *stts_data = (isom_stts_entry_t *)stts_entry->data; + isom_ctts_entry_t *ctts_data = (isom_ctts_entry_t *)ctts_entry->data; + if( !stts_data || !ctts_data ) + return -1; + uint64_t cts; + if( ctd_shift ) + { + /* Anyway, add composition to decode timeline shift for calculating maximum and minimum CTS correctly. */ + int32_t sample_offset = (int32_t)ctts_data->sample_offset; + cts = dts + sample_offset + ctd_shift; + max_offset = LSMASH_MAX( (int32_t)max_offset, sample_offset ); + min_offset = LSMASH_MIN( (int32_t)min_offset, sample_offset ); + } + else + { + cts = dts + ctts_data->sample_offset; + max_offset = LSMASH_MAX( max_offset, ctts_data->sample_offset ); + min_offset = LSMASH_MIN( min_offset, ctts_data->sample_offset ); + } + min_cts = LSMASH_MIN( min_cts, cts ); + if( max_cts < cts ) + { + max2_cts = max_cts; + max_cts = cts; + } + else if( max2_cts < cts ) + max2_cts = cts; + dts += stts_data->sample_delta; + /* If finished sample_count of current entry, move to next. */ + if( ++j == ctts_data->sample_count ) + { + ctts_entry = ctts_entry->next; + j = 0; + } + if( ++k == stts_data->sample_count ) + { + stts_entry = stts_entry->next; + k = 0; + } + } + dts -= last_stts_data->sample_delta; + if( root->fragment ) + /* Overall presentation is extended exceeding this initial movie. + * So, any players shall display the movie exceeding the durations + * indicated in Movie Header Box, Track Header Boxes and Media Header Boxes. + * Samples up to the duration indicated in Movie Extends Header Box shall be displayed. + * In the absence of Movie Extends Header Box, all samples shall be displayed. */ + mdhd->duration += dts + last_sample_delta; + else + { + if( !last_sample_delta ) + { + /* The spec allows an arbitrary value for the duration of the last sample. So, we pick last-1 sample's. */ + last_sample_delta = max_cts - max2_cts; + } + mdhd->duration = max_cts - min_cts + last_sample_delta; + /* To match dts and media duration, update stts and mdhd relatively. */ + if( mdhd->duration > dts ) + last_sample_delta = mdhd->duration - dts; + else + mdhd->duration = dts + last_sample_delta; /* media duration must not less than last dts. */ + } + if( isom_replace_last_sample_delta( stbl, last_sample_delta ) ) + return -1; + /* Explicit composition information and timeline shifting */ + if( cslg || root->qt_compatible || root->max_isom_version >= 4 ) + { + if( ctd_shift ) + { + /* Remove composition to decode timeline shift. */ + max_cts -= ctd_shift; + max2_cts -= ctd_shift; + min_cts -= ctd_shift; + } + int64_t composition_end_time = max_cts + (max_cts - max2_cts); + if( !root->fragment + && ((int32_t)min_offset <= INT32_MAX) && ((int32_t)max_offset <= INT32_MAX) + && ((int64_t)min_cts <= INT32_MAX) && (composition_end_time <= INT32_MAX) ) + { + if( !cslg ) + { + if( isom_add_cslg( trak->mdia->minf->stbl ) ) + return -1; + cslg = stbl->cslg; + } + cslg->compositionToDTSShift = ctd_shift; + cslg->leastDecodeToDisplayDelta = min_offset; + cslg->greatestDecodeToDisplayDelta = max_offset; + cslg->compositionStartTime = min_cts; + cslg->compositionEndTime = composition_end_time; + } + else + { + if( cslg ) + free( cslg ); + stbl->cslg = NULL; + } + } + } + if( mdhd->duration > UINT32_MAX ) + mdhd->version = 1; + return 0; +} + +static int isom_update_mvhd_duration( isom_moov_t *moov ) +{ + if( !moov || !moov->mvhd ) + return -1; + isom_mvhd_t *mvhd = moov->mvhd; + mvhd->duration = 0; + for( lsmash_entry_t *entry = moov->trak_list->head; entry; entry = entry->next ) + { + /* We pick maximum track duration as movie duration. */ + isom_trak_entry_t *data = (isom_trak_entry_t *)entry->data; + if( !data || !data->tkhd ) + return -1; + mvhd->duration = entry != moov->trak_list->head ? LSMASH_MAX( mvhd->duration, data->tkhd->duration ) : data->tkhd->duration; + } + if( mvhd->duration > UINT32_MAX ) + mvhd->version = 1; + return 0; +} + +static int isom_update_tkhd_duration( isom_trak_entry_t *trak ) +{ + if( !trak || !trak->tkhd || !trak->root || !trak->root->moov ) + return -1; + lsmash_root_t *root = trak->root; + isom_tkhd_t *tkhd = trak->tkhd; + tkhd->duration = 0; + if( root->fragment || !trak->edts || !trak->edts->elst ) + { + /* If this presentation might be extended or this track doesn't have edit list, calculate track duration from media duration. */ + if( !trak->mdia || !trak->mdia->mdhd || !root->moov->mvhd || !trak->mdia->mdhd->timescale ) + return -1; + if( !trak->mdia->mdhd->duration && isom_update_mdhd_duration( trak, 0 ) ) + return -1; + tkhd->duration = trak->mdia->mdhd->duration * ((double)root->moov->mvhd->timescale / trak->mdia->mdhd->timescale); + } + else + { + /* If the presentation won't be extended and this track has any edit, then track duration is just the sum of the segment_duartions. */ + for( lsmash_entry_t *entry = trak->edts->elst->list->head; entry; entry = entry->next ) + { + isom_elst_entry_t *data = (isom_elst_entry_t *)entry->data; + if( !data ) + return -1; + tkhd->duration += data->segment_duration; + } + } + if( tkhd->duration > UINT32_MAX ) + tkhd->version = 1; + if( !root->fragment && !tkhd->duration ) + tkhd->duration = tkhd->version == 1 ? 0xffffffffffffffff : 0xffffffff; + return isom_update_mvhd_duration( root->moov ); +} + +int lsmash_update_track_duration( lsmash_root_t *root, uint32_t track_ID, uint32_t last_sample_delta ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + return -1; + if( isom_update_mdhd_duration( trak, last_sample_delta ) ) + return -1; + /* If the presentation won't be extended and this track has any edit, we don't change or update duration in tkhd. */ + return (!root->fragment && trak->edts && trak->edts->elst) + ? isom_update_mvhd_duration( root->moov ) /* Only update movie duration. */ + : isom_update_tkhd_duration( trak ); /* Also update movie duration internally. */ +} + +static inline int isom_increment_sample_number_in_entry( uint32_t *sample_number_in_entry, uint32_t sample_count_in_entry, lsmash_entry_t **entry ) +{ + if( *sample_number_in_entry != sample_count_in_entry ) + { + *sample_number_in_entry += 1; + return 0; + } + /* Precede the next entry. */ + *sample_number_in_entry = 1; + if( *entry ) + { + *entry = (*entry)->next; + if( *entry && !(*entry)->data ) + return -1; + } + return 0; +} + +static int isom_calculate_bitrate_description( isom_mdia_t *mdia, uint32_t *bufferSizeDB, uint32_t *maxBitrate, uint32_t *avgBitrate, uint32_t sample_description_index ) +{ + isom_stsz_t *stsz = mdia->minf->stbl->stsz; + lsmash_entry_t *stsz_entry = stsz->list ? stsz->list->head : NULL; + lsmash_entry_t *stts_entry = mdia->minf->stbl->stts->list->head; + lsmash_entry_t *stsc_entry = NULL; + lsmash_entry_t *next_stsc_entry = mdia->minf->stbl->stsc->list->head; + isom_stts_entry_t *stts_data = NULL; + isom_stsc_entry_t *stsc_data = NULL; + if( next_stsc_entry && !next_stsc_entry->data ) + return -1; + uint32_t rate = 0; + uint64_t dts = 0; + uint32_t time_wnd = 0; + uint32_t timescale = mdia->mdhd->timescale; + uint32_t chunk_number = 0; + uint32_t sample_number_in_stts = 1; + uint32_t sample_number_in_chunk = 1; + *bufferSizeDB = 0; + *maxBitrate = 0; + *avgBitrate = 0; + while( stts_entry ) + { + if( !stsc_data || sample_number_in_chunk == stsc_data->samples_per_chunk ) + { + /* Move the next chunk. */ + sample_number_in_chunk = 1; + ++chunk_number; + /* Check if the next entry is broken. */ + while( next_stsc_entry && ((isom_stsc_entry_t *)next_stsc_entry->data)->first_chunk < chunk_number ) + { + /* Just skip broken next entry. */ + next_stsc_entry = next_stsc_entry->next; + if( next_stsc_entry && !next_stsc_entry->data ) + return -1; + } + /* Check if the next chunk belongs to the next sequence of chunks. */ + if( next_stsc_entry && ((isom_stsc_entry_t *)next_stsc_entry->data)->first_chunk == chunk_number ) + { + stsc_entry = next_stsc_entry; + next_stsc_entry = next_stsc_entry->next; + if( next_stsc_entry && !next_stsc_entry->data ) + return -1; + stsc_data = (isom_stsc_entry_t *)stsc_entry->data; + /* Check if the next contiguous chunks belong to given sample description. */ + if( stsc_data->sample_description_index != sample_description_index ) + { + /* Skip chunks which don't belong to given sample description. */ + uint32_t number_of_skips = 0; + uint32_t first_chunk = stsc_data->first_chunk; + uint32_t samples_per_chunk = stsc_data->samples_per_chunk; + while( next_stsc_entry ) + { + if( ((isom_stsc_entry_t *)next_stsc_entry->data)->sample_description_index != sample_description_index ) + { + stsc_data = (isom_stsc_entry_t *)next_stsc_entry->data; + number_of_skips += (stsc_data->first_chunk - first_chunk) * samples_per_chunk; + first_chunk = stsc_data->first_chunk; + samples_per_chunk = stsc_data->samples_per_chunk; + } + else if( ((isom_stsc_entry_t *)next_stsc_entry->data)->first_chunk <= first_chunk ) + ; /* broken entry */ + else + break; + /* Just skip the next entry. */ + next_stsc_entry = next_stsc_entry->next; + if( next_stsc_entry && !next_stsc_entry->data ) + return -1; + } + if( !next_stsc_entry ) + break; /* There is no more chunks which don't belong to given sample description. */ + number_of_skips += (((isom_stsc_entry_t *)next_stsc_entry->data)->first_chunk - first_chunk) * samples_per_chunk; + for( uint32_t i = 0; i < number_of_skips; i++ ) + { + if( stsz->list ) + { + if( !stsz_entry ) + break; + stsz_entry = stsz_entry->next; + } + if( !stts_entry ) + break; + if( isom_increment_sample_number_in_entry( &sample_number_in_stts, ((isom_stts_entry_t *)stts_entry->data)->sample_count, &stts_entry ) ) + return -1; + } + if( (stsz->list && !stsz_entry) || !stts_entry ) + break; + chunk_number = stsc_data->first_chunk; + } + } + } + else + ++sample_number_in_chunk; + /* Get current sample's size. */ + uint32_t size; + if( stsz->list ) + { + if( !stsz_entry ) + break; + isom_stsz_entry_t *stsz_data = (isom_stsz_entry_t *)stsz_entry->data; + if( !stsz_data ) + return -1; + size = stsz_data->entry_size; + stsz_entry = stsz_entry->next; + } + else + size = stsz->sample_size; + /* Get current sample's DTS. */ + if( stts_data ) + dts += stts_data->sample_delta; + stts_data = (isom_stts_entry_t *)stts_entry->data; + if( !stts_data ) + return -1; + isom_increment_sample_number_in_entry( &sample_number_in_stts, stts_data->sample_count, &stts_entry ); + /* Calculate bitrate description. */ + if( *bufferSizeDB < size ) + *bufferSizeDB = size; + *avgBitrate += size; + rate += size; + if( dts > time_wnd + timescale ) + { + if( rate > *maxBitrate ) + *maxBitrate = rate; + time_wnd = dts; + rate = 0; + } + } + double duration = (double)mdia->mdhd->duration / timescale; + *avgBitrate = (uint32_t)(*avgBitrate / duration); + if( !*maxBitrate ) + *maxBitrate = *avgBitrate; + /* Convert to bits per second. */ + *maxBitrate *= 8; + *avgBitrate *= 8; + return 0; +} + +static int isom_update_bitrate_description( isom_mdia_t *mdia ) +{ + if( !mdia || !mdia->mdhd || !mdia->minf || !mdia->minf->stbl ) + return -1; + isom_stbl_t *stbl = mdia->minf->stbl; + if( !stbl->stsd || !stbl->stsd->list + || !stbl->stsz + || !stbl->stsc || !stbl->stsc->list + || !stbl->stts || !stbl->stts->list ) + return -1; + uint32_t sample_description_index = 0; + for( lsmash_entry_t *entry = stbl->stsd->list->head; entry; entry = entry->next ) + { + isom_sample_entry_t *sample_entry = (isom_sample_entry_t *)entry->data; + if( !sample_entry ) + return -1; + ++sample_description_index; + uint32_t bufferSizeDB; + uint32_t maxBitrate; + uint32_t avgBitrate; + /* set bitrate info */ + lsmash_codec_type_t sample_type = (lsmash_codec_type_t)sample_entry->type; + if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVC1_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVC2_VIDEO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_AVCP_VIDEO ) ) + { + isom_visual_entry_t *stsd_data = (isom_visual_entry_t *)sample_entry; + if( !stsd_data ) + return -1; + isom_btrt_t *btrt = (isom_btrt_t *)isom_get_extension_box( &stsd_data->extensions, ISOM_BOX_TYPE_BTRT ); + if( btrt ) + { + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + btrt->bufferSizeDB = bufferSizeDB; + btrt->maxBitrate = maxBitrate; + btrt->avgBitrate = avgBitrate; + } + } + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MP4V_VIDEO ) ) + { + isom_visual_entry_t *stsd_data = (isom_visual_entry_t *)sample_entry; + if( !stsd_data ) + return -1; + isom_esds_t *esds = (isom_esds_t *)isom_get_extension_box( &stsd_data->extensions, ISOM_BOX_TYPE_ESDS ); + if( !esds || !esds->ES ) + return -1; + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + /* FIXME: avgBitrate is 0 only if VBR in proper. */ + if( mp4sys_update_DecoderConfigDescriptor( esds->ES, bufferSizeDB, maxBitrate, 0 ) ) + return -1; + } + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_MP4A_AUDIO ) ) + { + isom_audio_entry_t *stsd_data = (isom_audio_entry_t *)sample_entry; + if( !stsd_data ) + return -1; + isom_esds_t *esds = NULL; + if( ((isom_audio_entry_t *)sample_entry)->version ) + { + /* MPEG-4 Audio in QTFF */ + isom_wave_t *wave = (isom_wave_t *)isom_get_extension_box( &stsd_data->extensions, QT_BOX_TYPE_WAVE ); + if( !wave ) + return -1; + esds = (isom_esds_t *)isom_get_extension_box( &wave->extensions, ISOM_BOX_TYPE_ESDS ); + } + else + esds = (isom_esds_t *)isom_get_extension_box( &stsd_data->extensions, ISOM_BOX_TYPE_ESDS ); + if( !esds || !esds->ES ) + return -1; + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + /* FIXME: avgBitrate is 0 only if VBR in proper. */ + if( mp4sys_update_DecoderConfigDescriptor( esds->ES, bufferSizeDB, maxBitrate, 0 ) ) + return -1; + } + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_ALAC_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, QT_CODEC_TYPE_ALAC_AUDIO ) ) + { + isom_audio_entry_t *alac = (isom_audio_entry_t *)sample_entry; + if( !alac ) + return -1; + uint8_t *exdata = NULL; + uint32_t exdata_size = 0; + isom_extension_box_t *alac_ext = isom_get_sample_description_extension( &alac->extensions, QT_BOX_TYPE_WAVE ); + if( alac_ext ) + { + /* Apple Lossless Audio inside QuickTime file format + * Though average bitrate field we found is always set to 0 apparently, + * we set up maxFrameBytes and avgBitRate fields. */ + if( alac_ext->format == EXTENSION_FORMAT_BINARY ) + exdata = isom_get_child_box_position( alac_ext->form.binary, alac_ext->size, QT_BOX_TYPE_ALAC, &exdata_size ); + else + { + isom_wave_t *wave = (isom_wave_t *)alac_ext->form.box; + isom_extension_box_t *wave_ext = isom_get_sample_description_extension( &wave->extensions, QT_BOX_TYPE_ALAC ); + if( !wave_ext || wave_ext->format != EXTENSION_FORMAT_BINARY ) + return -1; + exdata = wave_ext->form.binary; + exdata_size = wave_ext->size; + } + } + else + { + /* Apple Lossless Audio inside ISO Base Media file format */ + isom_extension_box_t *ext = isom_get_sample_description_extension( &alac->extensions, ISOM_BOX_TYPE_ALAC ); + if( !ext || ext->format != EXTENSION_FORMAT_BINARY ) + return -1; + exdata = ext->form.binary; + exdata_size = ext->size; + } + if( !exdata || exdata_size < 36 ) + return -1; + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + exdata += 24; + /* maxFrameBytes */ + exdata[0] = (bufferSizeDB >> 24) & 0xff; + exdata[1] = (bufferSizeDB >> 16) & 0xff; + exdata[2] = (bufferSizeDB >> 8) & 0xff; + exdata[3] = bufferSizeDB & 0xff; + /* avgBitRate */ + exdata[4] = (avgBitrate >> 24) & 0xff; + exdata[5] = (avgBitrate >> 16) & 0xff; + exdata[6] = (avgBitrate >> 8) & 0xff; + exdata[7] = avgBitrate & 0xff; + } + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSC_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSE_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSH_AUDIO ) + || lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_DTSL_AUDIO ) ) + { + isom_audio_entry_t *dts_audio = (isom_audio_entry_t *)sample_entry; + if( !dts_audio ) + return -1; + isom_extension_box_t *ext = isom_get_sample_description_extension( &dts_audio->extensions, ISOM_BOX_TYPE_DDTS ); + if( !(ext && ext->format == EXTENSION_FORMAT_BINARY && ext->form.binary && ext->size >= 28) ) + return -1; + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + if( !stbl->stsz->list ) + maxBitrate = avgBitrate; + uint8_t *exdata = ext->form.binary + 12; + exdata[0] = (maxBitrate >> 24) & 0xff; + exdata[1] = (maxBitrate >> 16) & 0xff; + exdata[2] = (maxBitrate >> 8) & 0xff; + exdata[3] = maxBitrate & 0xff; + exdata[4] = (avgBitrate >> 24) & 0xff; + exdata[5] = (avgBitrate >> 16) & 0xff; + exdata[6] = (avgBitrate >> 8) & 0xff; + exdata[7] = avgBitrate & 0xff; + } + else if( lsmash_check_codec_type_identical( sample_type, ISOM_CODEC_TYPE_EC_3_AUDIO ) ) + { + isom_audio_entry_t *eac3 = (isom_audio_entry_t *)sample_entry; + if( !eac3 ) + return -1; + isom_extension_box_t *ext = isom_get_sample_description_extension( &eac3->extensions, ISOM_BOX_TYPE_DEC3 ); + if( !(ext && ext->format == EXTENSION_FORMAT_BINARY && ext->form.binary && ext->size >= 10) ) + return -1; + uint16_t bitrate; + if( stbl->stsz->list ) + { + if( isom_calculate_bitrate_description( mdia, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index ) ) + return -1; + bitrate = maxBitrate / 1000; /* Use maximum bitrate if VBR. */ + } + else + bitrate = stbl->stsz->sample_size * (eac3->samplerate >> 16) / 192000; /* 192000 == 1536 * 1000 / 8 */ + uint8_t *exdata = ext->form.binary + 8; + exdata[0] = (bitrate >> 5) & 0xff; + exdata[1] = (bitrate & 0x1f) << 3; + } + } + return sample_description_index ? 0 : -1; +} + +static int isom_check_mandatory_boxes( lsmash_root_t *root ) +{ + if( !root ) + return -1; + if( !root->moov || !root->moov->mvhd ) + return -1; + if( !root->moov->trak_list ) + return -1; + /* A movie requires at least one track. */ + if( !root->moov->trak_list->head ) + return -1; + for( lsmash_entry_t *entry = root->moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak + || !trak->tkhd + || !trak->mdia + || !trak->mdia->mdhd + || !trak->mdia->hdlr + || !trak->mdia->minf + || !trak->mdia->minf->dinf + || !trak->mdia->minf->dinf->dref + || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stsd + || !trak->mdia->minf->stbl->stsz + || !trak->mdia->minf->stbl->stts + || !trak->mdia->minf->stbl->stsc + || !trak->mdia->minf->stbl->stco ) + return -1; + if( root->qt_compatible && !trak->mdia->minf->hdlr ) + return -1; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !stbl->stsd->list || !stbl->stsd->list->head ) + return -1; + if( !root->fragment + && (!stbl->stsd->list || !stbl->stsd->list->head + || !stbl->stts->list || !stbl->stts->list->head + || !stbl->stsc->list || !stbl->stsc->list->head + || !stbl->stco->list || !stbl->stco->list->head) ) + return -1; + } + if( !root->fragment ) + return 0; + if( !root->moov->mvex || !root->moov->mvex->trex_list ) + return -1; + for( lsmash_entry_t *entry = root->moov->mvex->trex_list->head; entry; entry = entry->next ) + if( !entry->data ) /* trex */ + return -1; + return 0; +} + +static inline uint64_t isom_get_current_mp4time( void ) +{ + return (uint64_t)time( NULL ) + ISOM_MAC_EPOCH_OFFSET; +} + +static int isom_set_media_creation_time( isom_trak_entry_t *trak, uint64_t current_mp4time ) +{ + if( !trak->mdia || !trak->mdia->mdhd ) + return -1; + isom_mdhd_t *mdhd = trak->mdia->mdhd; + if( !mdhd->creation_time ) + mdhd->creation_time = mdhd->modification_time = current_mp4time; + return 0; +} + +static int isom_set_track_creation_time( isom_trak_entry_t *trak, uint64_t current_mp4time ) +{ + if( !trak || !trak->tkhd ) + return -1; + isom_tkhd_t *tkhd = trak->tkhd; + if( !tkhd->creation_time ) + tkhd->creation_time = tkhd->modification_time = current_mp4time; + if( isom_set_media_creation_time( trak, current_mp4time ) ) + return -1; + return 0; +} + +static int isom_set_movie_creation_time( lsmash_root_t *root ) +{ + if( !root || !root->moov || !root->moov->mvhd || !root->moov->trak_list ) + return -1; + uint64_t current_mp4time = isom_get_current_mp4time(); + for( uint32_t i = 1; i <= root->moov->trak_list->entry_count; i++ ) + if( isom_set_track_creation_time( isom_get_trak( root, i ), current_mp4time ) ) + return -1; + isom_mvhd_t *mvhd = root->moov->mvhd; + if( !mvhd->creation_time ) + mvhd->creation_time = mvhd->modification_time = current_mp4time; + return 0; +} + +#define CHECK_LARGESIZE( x ) \ + (x->size) += isom_update_extension_boxes( x ); \ + if( (x->size) > UINT32_MAX ) (x->size) += 8 + +static uint64_t isom_update_extension_boxes( void *box ); + +static uint64_t isom_update_unknown_box_size( isom_unknown_box_t *unknown_box ) +{ + if( !unknown_box ) + return 0; + unknown_box->size = ISOM_BASEBOX_COMMON_SIZE + unknown_box->unknown_size; + CHECK_LARGESIZE( unknown_box ); + return unknown_box->size; +} + +static uint64_t isom_update_mvhd_size( isom_mvhd_t *mvhd ) +{ + if( !mvhd ) + return 0; + mvhd->version = 0; + if( mvhd->creation_time > UINT32_MAX || mvhd->modification_time > UINT32_MAX || mvhd->duration > UINT32_MAX ) + mvhd->version = 1; + mvhd->size = ISOM_FULLBOX_COMMON_SIZE + 96 + (uint64_t)mvhd->version * 12; + CHECK_LARGESIZE( mvhd ); + return mvhd->size; +} + +static uint64_t isom_update_iods_size( isom_iods_t *iods ) +{ + if( !iods || !iods->OD ) + return 0; + iods->size = ISOM_FULLBOX_COMMON_SIZE + mp4sys_update_ObjectDescriptor_size( iods->OD ); + CHECK_LARGESIZE( iods ); + return iods->size; +} + +static uint64_t isom_update_ctab_size( isom_ctab_t *ctab ) +{ + if( !ctab ) + return 0; + ctab->size = ISOM_BASEBOX_COMMON_SIZE + (uint64_t)(1 + ctab->color_table.size + !!ctab->color_table.array) * 8; + CHECK_LARGESIZE( ctab ); + return ctab->size; +} + +static uint64_t isom_update_tkhd_size( isom_tkhd_t *tkhd ) +{ + if( !tkhd ) + return 0; + tkhd->version = 0; + if( tkhd->creation_time > UINT32_MAX || tkhd->modification_time > UINT32_MAX || tkhd->duration > UINT32_MAX ) + tkhd->version = 1; + tkhd->size = ISOM_FULLBOX_COMMON_SIZE + 80 + (uint64_t)tkhd->version * 12; + CHECK_LARGESIZE( tkhd ); + return tkhd->size; +} + +static uint64_t isom_update_clef_size( isom_clef_t *clef ) +{ + if( !clef ) + return 0; + clef->size = ISOM_FULLBOX_COMMON_SIZE + 8; + CHECK_LARGESIZE( clef ); + return clef->size; +} + +static uint64_t isom_update_prof_size( isom_prof_t *prof ) +{ + if( !prof ) + return 0; + prof->size = ISOM_FULLBOX_COMMON_SIZE + 8; + CHECK_LARGESIZE( prof ); + return prof->size; +} + +static uint64_t isom_update_enof_size( isom_enof_t *enof ) +{ + if( !enof ) + return 0; + enof->size = ISOM_FULLBOX_COMMON_SIZE + 8; + CHECK_LARGESIZE( enof ); + return enof->size; +} + +static uint64_t isom_update_tapt_size( isom_tapt_t *tapt ) +{ + if( !tapt ) + return 0; + tapt->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_clef_size( tapt->clef ) + + isom_update_prof_size( tapt->prof ) + + isom_update_enof_size( tapt->enof ); + CHECK_LARGESIZE( tapt ); + return tapt->size; +} + +static uint64_t isom_update_elst_size( isom_elst_t *elst ) +{ + if( !elst || !elst->list ) + return 0; + uint32_t i = 0; + elst->version = 0; + for( lsmash_entry_t *entry = elst->list->head; entry; entry = entry->next, i++ ) + { + isom_elst_entry_t *data = (isom_elst_entry_t *)entry->data; + if( data->segment_duration > UINT32_MAX || data->media_time > INT32_MAX || data->media_time < INT32_MIN ) + elst->version = 1; + } + elst->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)i * ( elst->version ? 20 : 12 ); + CHECK_LARGESIZE( elst ); + return elst->size; +} + +static uint64_t isom_update_edts_size( isom_edts_t *edts ) +{ + if( !edts ) + return 0; + edts->size = ISOM_BASEBOX_COMMON_SIZE + isom_update_elst_size( edts->elst ); + CHECK_LARGESIZE( edts ); + return edts->size; +} + +static uint64_t isom_update_tref_size( isom_tref_t *tref ) +{ + if( !tref ) + return 0; + tref->size = ISOM_BASEBOX_COMMON_SIZE; + if( tref->ref_list ) + for( lsmash_entry_t *entry = tref->ref_list->head; entry; entry = entry->next ) + { + isom_tref_type_t *ref = (isom_tref_type_t *)entry->data; + ref->size = ISOM_BASEBOX_COMMON_SIZE + (uint64_t)ref->ref_count * 4; + CHECK_LARGESIZE( ref ); + tref->size += ref->size; + } + CHECK_LARGESIZE( tref ); + return tref->size; +} + +static uint64_t isom_update_mdhd_size( isom_mdhd_t *mdhd ) +{ + if( !mdhd ) + return 0; + mdhd->version = 0; + if( mdhd->creation_time > UINT32_MAX || mdhd->modification_time > UINT32_MAX || mdhd->duration > UINT32_MAX ) + mdhd->version = 1; + mdhd->size = ISOM_FULLBOX_COMMON_SIZE + 20 + (uint64_t)mdhd->version * 12; + CHECK_LARGESIZE( mdhd ); + return mdhd->size; +} + +static uint64_t isom_update_hdlr_size( isom_hdlr_t *hdlr ) +{ + if( !hdlr ) + return 0; + hdlr->size = ISOM_FULLBOX_COMMON_SIZE + 20 + (uint64_t)hdlr->componentName_length; + CHECK_LARGESIZE( hdlr ); + return hdlr->size; +} + +static uint64_t isom_update_dref_entry_size( isom_dref_entry_t *urln ) +{ + if( !urln ) + return 0; + urln->size = ISOM_FULLBOX_COMMON_SIZE + (uint64_t)urln->name_length + urln->location_length; + CHECK_LARGESIZE( urln ); + return urln->size; +} + +static uint64_t isom_update_dref_size( isom_dref_t *dref ) +{ + if( !dref || !dref->list ) + return 0; + dref->size = ISOM_LIST_FULLBOX_COMMON_SIZE; + if( dref->list ) + for( lsmash_entry_t *entry = dref->list->head; entry; entry = entry->next ) + { + isom_dref_entry_t *data = (isom_dref_entry_t *)entry->data; + dref->size += isom_update_dref_entry_size( data ); + } + CHECK_LARGESIZE( dref ); + return dref->size; +} + +static uint64_t isom_update_dinf_size( isom_dinf_t *dinf ) +{ + if( !dinf ) + return 0; + dinf->size = ISOM_BASEBOX_COMMON_SIZE + isom_update_dref_size( dinf->dref ); + CHECK_LARGESIZE( dinf ); + return dinf->size; +} + +static uint64_t isom_update_vmhd_size( isom_vmhd_t *vmhd ) +{ + if( !vmhd ) + return 0; + vmhd->size = ISOM_FULLBOX_COMMON_SIZE + 8; + CHECK_LARGESIZE( vmhd ); + return vmhd->size; +} + +static uint64_t isom_update_smhd_size( isom_smhd_t *smhd ) +{ + if( !smhd ) + return 0; + smhd->size = ISOM_FULLBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( smhd ); + return smhd->size; +} + +static uint64_t isom_update_hmhd_size( isom_hmhd_t *hmhd ) +{ + if( !hmhd ) + return 0; + hmhd->size = ISOM_FULLBOX_COMMON_SIZE + 16; + CHECK_LARGESIZE( hmhd ); + return hmhd->size; +} + +static uint64_t isom_update_nmhd_size( isom_nmhd_t *nmhd ) +{ + if( !nmhd ) + return 0; + nmhd->size = ISOM_FULLBOX_COMMON_SIZE; + CHECK_LARGESIZE( nmhd ); + return nmhd->size; +} + +static uint64_t isom_update_gmin_size( isom_gmin_t *gmin ) +{ + if( !gmin ) + return 0; + gmin->size = ISOM_FULLBOX_COMMON_SIZE + 12; + CHECK_LARGESIZE( gmin ); + return gmin->size; +} + +static uint64_t isom_update_text_size( isom_text_t *text ) +{ + if( !text ) + return 0; + text->size = ISOM_BASEBOX_COMMON_SIZE + 36; + CHECK_LARGESIZE( text ); + return text->size; +} + +static uint64_t isom_update_gmhd_size( isom_gmhd_t *gmhd ) +{ + if( !gmhd ) + return 0; + gmhd->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_gmin_size( gmhd->gmin ) + + isom_update_text_size( gmhd->text ); + CHECK_LARGESIZE( gmhd); + return gmhd->size; +} + +static uint64_t isom_update_pasp_size( isom_pasp_t *pasp ) +{ + if( !pasp ) + return 0; + pasp->size = ISOM_BASEBOX_COMMON_SIZE + 8; + CHECK_LARGESIZE( pasp ); + return pasp->size; +} + +static uint64_t isom_update_clap_size( isom_clap_t *clap ) +{ + if( !clap ) + return 0; + clap->size = ISOM_BASEBOX_COMMON_SIZE + 32; + CHECK_LARGESIZE( clap ); + return clap->size; +} + +static uint64_t isom_update_glbl_size( isom_glbl_t *glbl ) +{ + if( !glbl ) + return 0; + glbl->size = ISOM_BASEBOX_COMMON_SIZE + (uint64_t)glbl->header_size; + CHECK_LARGESIZE( glbl ); + return glbl->size; +} + +static uint64_t isom_update_colr_size( isom_colr_t *colr ) +{ + if( !colr + || (colr->color_parameter_type != ISOM_COLOR_PARAMETER_TYPE_NCLX + && colr->color_parameter_type != QT_COLOR_PARAMETER_TYPE_NCLC) ) + return 0; + colr->size = ISOM_BASEBOX_COMMON_SIZE + 10 + (colr->color_parameter_type == ISOM_COLOR_PARAMETER_TYPE_NCLX); + CHECK_LARGESIZE( colr ); + return colr->size; +} + +static uint64_t isom_update_gama_size( isom_gama_t *gama ) +{ + if( !gama || !gama->parent ) + return 0; + /* Note: 'gama' box is superseded by 'colr' box. + * Therefore, writers of QTFF should never write both 'colr' and 'gama' box into an Image Description. */ + if( isom_get_extension_box( &((isom_visual_entry_t *)gama->parent)->extensions, QT_BOX_TYPE_COLR ) ) + return 0; + gama->size = ISOM_BASEBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( gama ); + return gama->size; +} + +static uint64_t isom_update_fiel_size( isom_fiel_t *fiel ) +{ + if( !fiel ) + return 0; + fiel->size = ISOM_BASEBOX_COMMON_SIZE + 2; + CHECK_LARGESIZE( fiel ); + return fiel->size; +} + +static uint64_t isom_update_cspc_size( isom_cspc_t *cspc ) +{ + if( !cspc ) + return 0; + cspc->size = ISOM_BASEBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( cspc ); + return cspc->size; +} + +static uint64_t isom_update_sgbt_size( isom_sgbt_t *sgbt ) +{ + if( !sgbt ) + return 0; + sgbt->size = ISOM_BASEBOX_COMMON_SIZE + 1; + CHECK_LARGESIZE( sgbt ); + return sgbt->size; +} + +static uint64_t isom_update_stsl_size( isom_stsl_t *stsl ) +{ + if( !stsl ) + return 0; + stsl->size = ISOM_FULLBOX_COMMON_SIZE + 6; + CHECK_LARGESIZE( stsl ); + return stsl->size; +} + +static uint64_t isom_update_esds_size( isom_esds_t *esds ) +{ + if( !esds ) + return 0; + esds->size = ISOM_FULLBOX_COMMON_SIZE + mp4sys_update_ES_Descriptor_size( esds->ES ); + CHECK_LARGESIZE( esds ); + return esds->size; +} + +static uint64_t isom_update_btrt_size( isom_btrt_t *btrt ) +{ + if( !btrt ) + return 0; + btrt->size = ISOM_BASEBOX_COMMON_SIZE + 12; + CHECK_LARGESIZE( btrt ); + return btrt->size; +} + +static uint64_t isom_update_visual_entry_size( isom_sample_entry_t *description ) +{ + if( !description ) + return 0; + isom_visual_entry_t *visual = (isom_visual_entry_t *)description; + visual->size = ISOM_BASEBOX_COMMON_SIZE + 78; + if( visual->color_table_ID == 0 ) + visual->size += (uint64_t)(1 + visual->color_table.size + !!visual->color_table.array) * 8; + CHECK_LARGESIZE( visual ); + return visual->size; +} + +#if 0 +static uint64_t isom_update_mp4s_entry_size( isom_sample_entry_t *description ) +{ + if( !description || !lsmash_check_box_type_identical( description->type, ISOM_CODEC_TYPE_MP4S_SYSTEM ) ) + return 0; + isom_mp4s_entry_t *mp4s = (isom_mp4s_entry_t *)description; + mp4s->size = ISOM_BASEBOX_COMMON_SIZE + 8 + isom_update_esds_size( mp4s->esds ); + CHECK_LARGESIZE( mp4s ); + return mp4s->size; +} +#endif + +static uint64_t isom_update_frma_size( isom_frma_t *frma ) +{ + if( !frma ) + return 0; + frma->size = ISOM_BASEBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( frma ); + return frma->size; +} + +static uint64_t isom_update_enda_size( isom_enda_t *enda ) +{ + if( !enda ) + return 0; + enda->size = ISOM_BASEBOX_COMMON_SIZE + 2; + CHECK_LARGESIZE( enda ); + return enda->size; +} + +static uint64_t isom_update_mp4a_size( isom_mp4a_t *mp4a ) +{ + if( !mp4a ) + return 0; + mp4a->size = ISOM_BASEBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( mp4a ); + return mp4a->size; +} + +static uint64_t isom_update_terminator_size( isom_terminator_t *terminator ) +{ + if( !terminator ) + return 0; + terminator->size = ISOM_BASEBOX_COMMON_SIZE; + CHECK_LARGESIZE( terminator ); + return terminator->size; +} + +static uint64_t isom_update_wave_size( isom_wave_t *wave ) +{ + if( !wave ) + return 0; + wave->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_frma_size( wave->frma ) + + isom_update_enda_size( wave->enda ) + + isom_update_mp4a_size( wave->mp4a ) + + isom_update_terminator_size( wave->terminator ); + CHECK_LARGESIZE( wave ); + return wave->size; +} + +static uint64_t isom_update_chan_size( isom_chan_t *chan ) +{ + if( !chan ) + return 0; + chan->size = ISOM_FULLBOX_COMMON_SIZE + 12 + 20 * (uint64_t)chan->numberChannelDescriptions; + CHECK_LARGESIZE( chan ); + return chan->size; +} + +static uint64_t isom_update_audio_entry_size( isom_sample_entry_t *description ) +{ + if( !description ) + return 0; + isom_audio_entry_t *audio = (isom_audio_entry_t *)description; + audio->size = ISOM_BASEBOX_COMMON_SIZE + 28; + if( audio->version == 1 ) + audio->size += 16; + else if( audio->version == 2 ) + audio->size += 36; + CHECK_LARGESIZE( audio ); + return audio->size; +} + +static uint64_t isom_update_text_entry_size( isom_sample_entry_t *description ) +{ + if( !description ) + return 0; + isom_text_entry_t *text = (isom_text_entry_t *)description; + text->size = ISOM_BASEBOX_COMMON_SIZE + 51 + (uint64_t)text->font_name_length; + CHECK_LARGESIZE( text ); + return text->size; +} + +static uint64_t isom_update_ftab_size( isom_ftab_t *ftab ) +{ + if( !ftab || !ftab->list ) + return 0; + ftab->size = ISOM_BASEBOX_COMMON_SIZE + 2; + for( lsmash_entry_t *entry = ftab->list->head; entry; entry = entry->next ) + { + isom_font_record_t *data = (isom_font_record_t *)entry->data; + ftab->size += 3 + data->font_name_length; + } + CHECK_LARGESIZE( ftab ); + return ftab->size; +} + +static uint64_t isom_update_tx3g_entry_size( isom_sample_entry_t *description ) +{ + if( !description ) + return 0; + isom_tx3g_entry_t *tx3g = (isom_tx3g_entry_t *)description; + tx3g->size = ISOM_BASEBOX_COMMON_SIZE + 38 + isom_update_ftab_size( tx3g->ftab ); + CHECK_LARGESIZE( tx3g ); + return tx3g->size; +} + +static uint64_t isom_update_stsd_size( isom_stsd_t *stsd ) +{ + if( !stsd || !stsd->list ) + return 0; + uint64_t size = ISOM_LIST_FULLBOX_COMMON_SIZE; + for( lsmash_entry_t *entry = stsd->list->head; entry; entry = entry->next ) + { + isom_sample_entry_t *data = (isom_sample_entry_t *)entry->data; + lsmash_codec_type_t sample_type = (lsmash_codec_type_t)data->type; + if( lsmash_check_codec_type_identical( sample_type, LSMASH_CODEC_TYPE_RAW ) ) + { + if( data->manager & LSMASH_VIDEO_DESCRIPTION ) + size += isom_update_visual_entry_size( data ); + else if( data->manager & LSMASH_AUDIO_DESCRIPTION ) + size += isom_update_audio_entry_size( data ); + continue; + } + static struct description_update_size_table_tag + { + lsmash_codec_type_t type; + uint64_t (*func)( isom_sample_entry_t * ); + } description_update_size_table[128] = { { LSMASH_CODEC_TYPE_INITIALIZER, NULL } }; + if( !description_update_size_table[0].func ) + { + /* Initialize the table. */ + int i = 0; +#define ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( type, func ) \ + description_update_size_table[i++] = (struct description_update_size_table_tag){ type, func } + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC1_VIDEO, isom_update_visual_entry_size ); +#ifdef LSMASH_DEMUXER_ENABLED + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4V_VIDEO, isom_update_visual_entry_size ); +#endif +#if 0 + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVC2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_AVCP_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SVC1_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC1_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MVC2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRAC_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCV_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MJP2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_S263_VIDEO, isom_update_visual_entry_size ); +#endif + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_VC_1_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_APCH_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_APCN_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_APCS_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_APCO_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_AP4H_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVC_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVCP_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVPP_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DV5N_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DV5P_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVH2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVH3_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVH5_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVH6_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVHP_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_DVHQ_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_ULRA_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_ULRG_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_ULY2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_ULY0_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_V210_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_V216_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_V308_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_V408_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_V410_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_YUV2_VIDEO, isom_update_visual_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4A_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_AC_3_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_ALAC_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSC_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSE_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSH_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DTSL_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_EC_3_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAMR_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWB_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_ALAC_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_MP4A_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_23NI_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_NONE_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_LPCM_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_SOWT_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_TWOS_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_FL32_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_FL64_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_IN24_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_IN32_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_NOT_SPECIFIED, isom_update_audio_entry_size ); +#if 0 + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_DRA1_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_ENCA_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_G719_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_G726_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_M4AE_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MLPA_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_RAW_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SAWP_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SEVC_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SQCP_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_SSMV_AUDIO, isom_update_audio_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_TWOS_AUDIO, isom_update_audio_entry_size ); +#endif + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_TX3G_TEXT, isom_update_tx3g_entry_size ); + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( QT_CODEC_TYPE_TEXT_TEXT, isom_update_text_entry_size ); +#if 0 + ADD_DESCRIPTION_UPDATE_SIZE_TABLE_ELEMENT( ISOM_CODEC_TYPE_MP4S_SYSTEM, isom_update_mp4s_entry_size ); +#endif + } + for( int i = 0; description_update_size_table[i].func; i++ ) + if( lsmash_check_codec_type_identical( sample_type, description_update_size_table[i].type ) ) + { + size += description_update_size_table[i].func( data ); + break; + } + } + stsd->size = size; + CHECK_LARGESIZE( stsd ); + return stsd->size; +} + +static uint64_t isom_update_stts_size( isom_stts_t *stts ) +{ + if( !stts || !stts->list ) + return 0; + stts->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)stts->list->entry_count * 8; + CHECK_LARGESIZE( stts ); + return stts->size; +} + +static uint64_t isom_update_ctts_size( isom_ctts_t *ctts ) +{ + if( !ctts || !ctts->list ) + return 0; + ctts->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)ctts->list->entry_count * 8; + CHECK_LARGESIZE( ctts ); + return ctts->size; +} + +static uint64_t isom_update_cslg_size( isom_cslg_t *cslg ) +{ + if( !cslg ) + return 0; + cslg->size = ISOM_FULLBOX_COMMON_SIZE + 20; + CHECK_LARGESIZE( cslg ); + return cslg->size; +} + +static uint64_t isom_update_stsz_size( isom_stsz_t *stsz ) +{ + if( !stsz ) + return 0; + stsz->size = ISOM_FULLBOX_COMMON_SIZE + 8 + ( stsz->list ? (uint64_t)stsz->list->entry_count * 4 : 0 ); + CHECK_LARGESIZE( stsz ); + return stsz->size; +} + +static uint64_t isom_update_stss_size( isom_stss_t *stss ) +{ + if( !stss || !stss->list ) + return 0; + stss->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)stss->list->entry_count * 4; + CHECK_LARGESIZE( stss ); + return stss->size; +} + +static uint64_t isom_update_stps_size( isom_stps_t *stps ) +{ + if( !stps || !stps->list ) + return 0; + stps->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)stps->list->entry_count * 4; + CHECK_LARGESIZE( stps ); + return stps->size; +} + +static uint64_t isom_update_sdtp_size( isom_sdtp_t *sdtp ) +{ + if( !sdtp || !sdtp->list ) + return 0; + sdtp->size = ISOM_FULLBOX_COMMON_SIZE + (uint64_t)sdtp->list->entry_count; + CHECK_LARGESIZE( sdtp ); + return sdtp->size; +} + +static uint64_t isom_update_stsc_size( isom_stsc_t *stsc ) +{ + if( !stsc || !stsc->list ) + return 0; + stsc->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)stsc->list->entry_count * 12; + CHECK_LARGESIZE( stsc ); + return stsc->size; +} + +static uint64_t isom_update_stco_size( isom_stco_t *stco ) +{ + if( !stco || !stco->list ) + return 0; + stco->size = ISOM_LIST_FULLBOX_COMMON_SIZE + (uint64_t)stco->list->entry_count * (stco->large_presentation ? 8 : 4); + CHECK_LARGESIZE( stco ); + return stco->size; +} + +static uint64_t isom_update_sbgp_size( isom_sbgp_entry_t *sbgp ) +{ + if( !sbgp || !sbgp->list ) + return 0; + sbgp->size = ISOM_LIST_FULLBOX_COMMON_SIZE + 4 + (uint64_t)sbgp->list->entry_count * 8; + CHECK_LARGESIZE( sbgp ); + return sbgp->size; +} + +static uint64_t isom_update_sgpd_size( isom_sgpd_entry_t *sgpd ) +{ + if( !sgpd || !sgpd->list ) + return 0; + uint64_t size = ISOM_LIST_FULLBOX_COMMON_SIZE + (1 + (sgpd->version == 1)) * 4; + size += (uint64_t)sgpd->list->entry_count * ((sgpd->version == 1) && !sgpd->default_length) * 4; + switch( sgpd->grouping_type ) + { + case ISOM_GROUP_TYPE_RAP : + size += sgpd->list->entry_count; + break; + case ISOM_GROUP_TYPE_ROLL : + size += (uint64_t)sgpd->list->entry_count * 2; + break; + default : + /* We don't consider other grouping types currently. */ + break; + } + sgpd->size = size; + CHECK_LARGESIZE( sgpd ); + return sgpd->size; +} + +static uint64_t isom_update_stbl_size( isom_stbl_t *stbl ) +{ + if( !stbl ) + return 0; + stbl->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_stsd_size( stbl->stsd ) + + isom_update_stts_size( stbl->stts ) + + isom_update_ctts_size( stbl->ctts ) + + isom_update_cslg_size( stbl->cslg ) + + isom_update_stsz_size( stbl->stsz ) + + isom_update_stss_size( stbl->stss ) + + isom_update_stps_size( stbl->stps ) + + isom_update_sdtp_size( stbl->sdtp ) + + isom_update_stsc_size( stbl->stsc ) + + isom_update_stco_size( stbl->stco ); + if( stbl->sgpd_list ) + for( lsmash_entry_t *entry = stbl->sgpd_list->head; entry; entry = entry->next ) + stbl->size += isom_update_sgpd_size( (isom_sgpd_entry_t *)entry->data ); + if( stbl->sbgp_list ) + for( lsmash_entry_t *entry = stbl->sbgp_list->head; entry; entry = entry->next ) + stbl->size += isom_update_sbgp_size( (isom_sbgp_entry_t *)entry->data ); + CHECK_LARGESIZE( stbl ); + return stbl->size; +} + +static uint64_t isom_update_minf_size( isom_minf_t *minf ) +{ + if( !minf ) + return 0; + minf->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_vmhd_size( minf->vmhd ) + + isom_update_smhd_size( minf->smhd ) + + isom_update_hmhd_size( minf->hmhd ) + + isom_update_nmhd_size( minf->nmhd ) + + isom_update_gmhd_size( minf->gmhd ) + + isom_update_hdlr_size( minf->hdlr ) + + isom_update_dinf_size( minf->dinf ) + + isom_update_stbl_size( minf->stbl ); + CHECK_LARGESIZE( minf ); + return minf->size; +} + +static uint64_t isom_update_mdia_size( isom_mdia_t *mdia ) +{ + if( !mdia ) + return 0; + mdia->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_mdhd_size( mdia->mdhd ) + + isom_update_hdlr_size( mdia->hdlr ) + + isom_update_minf_size( mdia->minf ); + CHECK_LARGESIZE( mdia ); + return mdia->size; +} + +static uint64_t isom_update_chpl_size( isom_chpl_t *chpl ) +{ + if( !chpl ) + return 0; + chpl->size = ISOM_FULLBOX_COMMON_SIZE + 4 * (chpl->version == 1) + 1; + for( lsmash_entry_t *entry = chpl->list->head; entry; entry = entry->next ) + { + isom_chpl_entry_t *data = (isom_chpl_entry_t *)entry->data; + chpl->size += 9 + data->chapter_name_length; + } + CHECK_LARGESIZE( chpl ); + return chpl->size; +} + +static uint64_t isom_update_mean_size( isom_mean_t *mean ) +{ + if( !mean ) + return 0; + mean->size = ISOM_FULLBOX_COMMON_SIZE + mean->meaning_string_length; + CHECK_LARGESIZE( mean ); + return mean->size; +} + +static uint64_t isom_update_name_size( isom_name_t *name ) +{ + if( !name ) + return 0; + name->size = ISOM_FULLBOX_COMMON_SIZE + name->name_length; + CHECK_LARGESIZE( name ); + return name->size; +} + +static uint64_t isom_update_data_size( isom_data_t *data ) +{ + if( !data ) + return 0; + data->size = ISOM_BASEBOX_COMMON_SIZE + 8 + data->value_length; + CHECK_LARGESIZE( data ); + return data->size; +} + +static uint64_t isom_update_metaitem_size( isom_metaitem_t *metaitem ) +{ + if( !metaitem ) + return 0; + metaitem->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_mean_size( metaitem->mean ) + + isom_update_name_size( metaitem->name ) + + isom_update_data_size( metaitem->data ); + CHECK_LARGESIZE( metaitem ); + return metaitem->size; +} + +static uint64_t isom_update_ilst_size( isom_ilst_t *ilst ) +{ + if( !ilst ) + return 0; + ilst->size = ISOM_BASEBOX_COMMON_SIZE; + for( lsmash_entry_t *entry = ilst->item_list->head; entry; entry = entry->next ) + ilst->size += isom_update_metaitem_size( (isom_metaitem_t *)entry->data ); + CHECK_LARGESIZE( ilst ); + return ilst->size; +} + +static uint64_t isom_update_meta_size( isom_meta_t *meta ) +{ + if( !meta ) + return 0; + meta->size = ISOM_FULLBOX_COMMON_SIZE + + isom_update_hdlr_size( meta->hdlr ) + + isom_update_dinf_size( meta->dinf ) + + isom_update_ilst_size( meta->ilst ); + CHECK_LARGESIZE( meta ); + return meta->size; +} + +static uint64_t isom_update_cprt_size( isom_cprt_t *cprt ) +{ + if( !cprt ) + return 0; + cprt->size = ISOM_FULLBOX_COMMON_SIZE + 2 + cprt->notice_length; + CHECK_LARGESIZE( cprt ); + return cprt->size; +} + +static uint64_t isom_update_udta_size( isom_udta_t *udta_moov, isom_udta_t *udta_trak ) +{ + isom_udta_t *udta = udta_trak ? udta_trak : udta_moov ? udta_moov : NULL; + if( !udta ) + return 0; + udta->size = ISOM_BASEBOX_COMMON_SIZE + + (udta_moov ? isom_update_chpl_size( udta->chpl ) : 0) + + isom_update_meta_size( udta->meta ); + if( udta->cprt_list ) + for( lsmash_entry_t *entry = udta->cprt_list->head; entry; entry = entry->next ) + udta->size += isom_update_cprt_size( (isom_cprt_t *)entry->data ); + CHECK_LARGESIZE( udta ); + return udta->size; +} + +static uint64_t isom_update_trak_entry_size( isom_trak_entry_t *trak ) +{ + if( !trak ) + return 0; + trak->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_tkhd_size( trak->tkhd ) + + isom_update_tapt_size( trak->tapt ) + + isom_update_edts_size( trak->edts ) + + isom_update_tref_size( trak->tref ) + + isom_update_mdia_size( trak->mdia ) + + isom_update_udta_size( NULL, trak->udta ) + + isom_update_meta_size( trak->meta ); + CHECK_LARGESIZE( trak ); + return trak->size; +} + +static uint64_t isom_update_mehd_size( isom_mehd_t *mehd ) +{ + if( !mehd ) + return 0; + if( mehd->fragment_duration > UINT32_MAX ) + mehd->version = 1; + mehd->size = ISOM_FULLBOX_COMMON_SIZE + 4 * (1 + (mehd->version == 1)); + CHECK_LARGESIZE( mehd ); + return mehd->size; +} + +static uint64_t isom_update_trex_entry_size( isom_trex_entry_t *trex ) +{ + if( !trex ) + return 0; + trex->size = ISOM_FULLBOX_COMMON_SIZE + 20; + CHECK_LARGESIZE( trex ); + return trex->size; +} + +static uint64_t isom_update_mvex_size( isom_mvex_t *mvex ) +{ + if( !mvex ) + return 0; + mvex->size = ISOM_BASEBOX_COMMON_SIZE; + if( mvex->trex_list ) + for( lsmash_entry_t *entry = mvex->trex_list->head; entry; entry = entry->next ) + { + isom_trex_entry_t *trex = (isom_trex_entry_t *)entry->data; + mvex->size += isom_update_trex_entry_size( trex ); + } + if( mvex->root->bs->stream != stdout ) + mvex->size += mvex->mehd ? isom_update_mehd_size( mvex->mehd ) : 20; /* 20 bytes is of placeholder. */ + CHECK_LARGESIZE( mvex ); + return mvex->size; +} + +static int isom_update_moov_size( isom_moov_t *moov ) +{ + if( !moov ) + return -1; + moov->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_mvhd_size( moov->mvhd ) + + isom_update_iods_size( moov->iods ) + + isom_update_udta_size( moov->udta, NULL ) + + isom_update_ctab_size( moov->ctab ) + + isom_update_meta_size( moov->meta ) + + isom_update_mvex_size( moov->mvex ); + if( moov->trak_list ) + for( lsmash_entry_t *entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + moov->size += isom_update_trak_entry_size( trak ); + } + CHECK_LARGESIZE( moov ); + return 0; +} + +static uint64_t isom_update_mfhd_size( isom_mfhd_t *mfhd ) +{ + if( !mfhd ) + return 0; + mfhd->size = ISOM_FULLBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( mfhd ); + return mfhd->size; +} + +static uint64_t isom_update_tfhd_size( isom_tfhd_t *tfhd ) +{ + if( !tfhd ) + return 0; + tfhd->size = ISOM_FULLBOX_COMMON_SIZE + + 4 + + 8 * !!( tfhd->flags & ISOM_TF_FLAGS_BASE_DATA_OFFSET_PRESENT ) + + 4 * !!( tfhd->flags & ISOM_TF_FLAGS_SAMPLE_DESCRIPTION_INDEX_PRESENT ) + + 4 * !!( tfhd->flags & ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT ) + + 4 * !!( tfhd->flags & ISOM_TF_FLAGS_DEFAULT_SAMPLE_SIZE_PRESENT ) + + 4 * !!( tfhd->flags & ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT ); + CHECK_LARGESIZE( tfhd ); + return tfhd->size; +} + +static uint64_t isom_update_tfdt_size( isom_tfdt_t *tfdt ) +{ + if( !tfdt ) + return 0; + tfdt->size = ISOM_FULLBOX_COMMON_SIZE + 4 * (1 + (tfdt->version == 1)); + CHECK_LARGESIZE( tfdt ); + return tfdt->size; +} + +static uint64_t isom_update_trun_entry_size( isom_trun_entry_t *trun ) +{ + if( !trun ) + return 0; + trun->size = ISOM_FULLBOX_COMMON_SIZE + + 4 + + 4 * !!( trun->flags & ISOM_TR_FLAGS_DATA_OFFSET_PRESENT ) + + 4 * !!( trun->flags & ISOM_TR_FLAGS_FIRST_SAMPLE_FLAGS_PRESENT ); + uint64_t row_size = 4 * !!( trun->flags & ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT ) + + 4 * !!( trun->flags & ISOM_TR_FLAGS_SAMPLE_SIZE_PRESENT ) + + 4 * !!( trun->flags & ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT ) + + 4 * !!( trun->flags & ISOM_TR_FLAGS_SAMPLE_COMPOSITION_TIME_OFFSET_PRESENT ); + trun->size += row_size * trun->sample_count; + CHECK_LARGESIZE( trun ); + return trun->size; +} + +static uint64_t isom_update_traf_entry_size( isom_traf_entry_t *traf ) +{ + if( !traf ) + return 0; + traf->size = ISOM_BASEBOX_COMMON_SIZE + + isom_update_tfhd_size( traf->tfhd ) + + isom_update_tfdt_size( traf->tfdt ) + + isom_update_sdtp_size( traf->sdtp ); + if( traf->trun_list ) + for( lsmash_entry_t *entry = traf->trun_list->head; entry; entry = entry->next ) + { + isom_trun_entry_t *trun = (isom_trun_entry_t *)entry->data; + traf->size += isom_update_trun_entry_size( trun ); + } + CHECK_LARGESIZE( traf ); + return traf->size; +} + +static int isom_update_moof_entry_size( isom_moof_entry_t *moof ) +{ + if( !moof ) + return -1; + moof->size = ISOM_BASEBOX_COMMON_SIZE + isom_update_mfhd_size( moof->mfhd ); + if( moof->traf_list ) + for( lsmash_entry_t *entry = moof->traf_list->head; entry; entry = entry->next ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)entry->data; + moof->size += isom_update_traf_entry_size( traf ); + } + CHECK_LARGESIZE( moof ); + return 0; +} + +static uint64_t isom_update_tfra_entry_size( isom_tfra_entry_t *tfra ) +{ + if( !tfra ) + return 0; + tfra->size = ISOM_FULLBOX_COMMON_SIZE + 12; + uint32_t entry_size = 8 * (1 + (tfra->version == 1)) + + tfra->length_size_of_traf_num + 1 + + tfra->length_size_of_trun_num + 1 + + tfra->length_size_of_sample_num + 1; + tfra->size += entry_size * tfra->number_of_entry; + CHECK_LARGESIZE( tfra ); + return tfra->size; +} + +static uint64_t isom_update_mfro_size( isom_mfro_t *mfro ) +{ + if( !mfro ) + return 0; + mfro->size = ISOM_FULLBOX_COMMON_SIZE + 4; + CHECK_LARGESIZE( mfro ); + return mfro->size; +} + +static int isom_update_mfra_size( isom_mfra_t *mfra ) +{ + if( !mfra ) + return -1; + mfra->size = ISOM_BASEBOX_COMMON_SIZE; + if( mfra->tfra_list ) + for( lsmash_entry_t *entry = mfra->tfra_list->head; entry; entry = entry->next ) + { + isom_tfra_entry_t *tfra = (isom_tfra_entry_t *)entry->data; + mfra->size += isom_update_tfra_entry_size( tfra ); + } + CHECK_LARGESIZE( mfra ); + if( mfra->mfro ) + { + mfra->size += isom_update_mfro_size( mfra->mfro ); + mfra->mfro->length = mfra->size; + } + return 0; +} + +static uint64_t isom_update_extension_boxes( void *box ) +{ + assert( box ); + uint64_t size = 0; + lsmash_entry_list_t *extensions = &((isom_box_t *)box)->extensions; + for( lsmash_entry_t *entry = extensions->head; entry; entry = entry->next ) + { + isom_extension_box_t *ext = (isom_extension_box_t *)entry->data; + if( !ext ) + continue; + if( ext->format == EXTENSION_FORMAT_BINARY ) + { + size += ext->size; + continue; + } + static struct update_size_table_tag + { + lsmash_box_type_t type; + uint64_t (*func)( void * ); + } update_size_table[32] = { { LSMASH_BOX_TYPE_INITIALIZER, NULL } }; + if( !update_size_table[0].func ) + { + /* Initialize the table. */ + int i = 0; +#define ADD_UPDATE_SIZE_TABLE_ELEMENT( type, func ) update_size_table[i++] = (struct update_size_table_tag){ type, (uint64_t (*)( void * ))func } + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_ESDS, isom_update_esds_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_BTRT, isom_update_btrt_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_CLAP, isom_update_clap_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_PASP, isom_update_pasp_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_STSL, isom_update_stsl_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( ISOM_BOX_TYPE_COLR, isom_update_colr_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_CHAN, isom_update_chan_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_COLR, isom_update_colr_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_CSPC, isom_update_cspc_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_ENDA, isom_update_enda_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_ESDS, isom_update_esds_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_FIEL, isom_update_fiel_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_FRMA, isom_update_frma_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_GAMA, isom_update_gama_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_GLBL, isom_update_glbl_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_SGBT, isom_update_sgbt_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_WAVE, isom_update_wave_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( QT_BOX_TYPE_TERMINATOR, isom_update_terminator_size ); + ADD_UPDATE_SIZE_TABLE_ELEMENT( LSMASH_BOX_TYPE_UNSPECIFIED, NULL ); +#undef ADD_UPDATE_SIZE_TABLE_ELEMENT + } + uint64_t (*update_size_func)( void * ) = (uint64_t (*)( void * ))isom_update_unknown_box_size; + for( int i = 0; update_size_table[i].func; i++ ) + if( lsmash_check_box_type_identical( ext->type, update_size_table[i].type ) ) + { + update_size_func = update_size_table[i].func; + break; + } + size += update_size_func( ext->form.box ); + } + return size; +} + +/******************************* + public interfaces +*******************************/ + +/*---- track manipulators ----*/ + +void lsmash_delete_track( lsmash_root_t *root, uint32_t track_ID ) +{ + if( !root || !root->moov || !root->moov->trak_list ) + return; + for( lsmash_entry_t *entry = root->moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak || !trak->tkhd ) + return; + if( trak->tkhd->track_ID == track_ID ) + { + lsmash_remove_entry_direct( root->moov->trak_list, entry, isom_remove_trak ); + return; + } + } +} + +uint32_t lsmash_create_track( lsmash_root_t *root, lsmash_media_type media_type ) +{ + isom_trak_entry_t *trak = isom_add_trak( root ); + if( !trak ) + return 0; + if( isom_add_tkhd( trak, media_type ) + || isom_add_mdia( trak ) + || isom_add_mdhd( trak->mdia, root->qt_compatible ? 0 : ISOM_LANGUAGE_CODE_UNDEFINED ) + || isom_add_minf( trak->mdia ) + || isom_add_stbl( trak->mdia->minf ) + || isom_add_dinf( trak->mdia->minf ) + || isom_add_dref( trak->mdia->minf->dinf ) + || isom_add_stsd( trak->mdia->minf->stbl ) + || isom_add_stts( trak->mdia->minf->stbl ) + || isom_add_stsc( trak->mdia->minf->stbl ) + || isom_add_stco( trak->mdia->minf->stbl ) + || isom_add_stsz( trak->mdia->minf->stbl ) ) + return 0; + if( isom_add_hdlr( trak->mdia, NULL, NULL, media_type ) ) + return 0; + if( root->qt_compatible && isom_add_hdlr( NULL, NULL, trak->mdia->minf, QT_REFERENCE_HANDLER_TYPE_URL ) ) + return 0; + switch( media_type ) + { + case ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK : + if( isom_add_vmhd( trak->mdia->minf ) ) + return 0; + break; + case ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK : + if( isom_add_smhd( trak->mdia->minf ) ) + return 0; + break; + case ISOM_MEDIA_HANDLER_TYPE_HINT_TRACK : + if( isom_add_hmhd( trak->mdia->minf ) ) + return 0; + break; + case ISOM_MEDIA_HANDLER_TYPE_TEXT_TRACK : + if( root->qt_compatible || root->itunes_movie ) + { + if( isom_add_gmhd( trak->mdia->minf ) + || isom_add_gmin( trak->mdia->minf->gmhd ) + || isom_add_text( trak->mdia->minf->gmhd ) ) + return 0; + } + else + return 0; /* We support only reference text media track for chapter yet. */ + break; + default : + if( isom_add_nmhd( trak->mdia->minf ) ) + return 0; + break; + } + return trak->tkhd->track_ID; +} + +uint32_t lsmash_get_track_ID( lsmash_root_t *root, uint32_t track_number ) +{ + if( !root || !root->moov ) + return 0; + isom_trak_entry_t *trak = (isom_trak_entry_t *)lsmash_get_entry_data( root->moov->trak_list, track_number ); + if( !trak || !trak->tkhd ) + return 0; + return trak->tkhd->track_ID; +} + +void lsmash_initialize_track_parameters( lsmash_track_parameters_t *param ) +{ + memset( param, 0, sizeof(lsmash_track_parameters_t) ); + param->audio_volume = 0x0100; + param->matrix[0] = 0x00010000; + param->matrix[4] = 0x00010000; + param->matrix[8] = 0x40000000; +} + +int lsmash_set_track_parameters( lsmash_root_t *root, uint32_t track_ID, lsmash_track_parameters_t *param ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->hdlr || !root->moov->mvhd ) + return -1; + /* Prepare Track Aperture Modes if required. */ + if( root->qt_compatible && param->aperture_modes ) + { + if( !trak->tapt && isom_add_tapt( trak ) ) + return -1; + isom_tapt_t *tapt = trak->tapt; + if( (!tapt->clef && isom_add_clef( tapt )) + || (!tapt->prof && isom_add_prof( tapt )) + || (!tapt->enof && isom_add_enof( tapt )) ) + return -1; + } + else + isom_remove_tapt( trak->tapt ); + /* Set up Track Header. */ + uint32_t media_type = trak->mdia->hdlr->componentSubtype; + isom_tkhd_t *tkhd = trak->tkhd; + tkhd->flags = param->mode; + tkhd->track_ID = param->track_ID ? param->track_ID : tkhd->track_ID; + tkhd->duration = !trak->edts || !trak->edts->elst ? param->duration : tkhd->duration; + /* Template fields + * alternate_group, layer, volume and matrix + * According to 14496-14, these value are all set to defaut values in 14496-12. + * And when a file is read as an MPEG-4 file, these values shall be ignored. + * If a file complies with other specifications, then those fields may have non-default values + * as required by those other specifications. */ + if( param->alternate_group ) + { + if( root->qt_compatible || root->itunes_movie || root->max_3gpp_version >= 4 ) + tkhd->alternate_group = param->alternate_group; + else + { + tkhd->alternate_group = 0; + lsmash_log( LSMASH_LOG_WARNING, "alternate_group is specified but not compatible with any of the brands. It won't be set.\n" ); + } + } + else + tkhd->alternate_group = 0; + if( root->qt_compatible || root->itunes_movie ) + { + tkhd->layer = media_type == ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ? param->video_layer : 0; + tkhd->volume = media_type == ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK ? param->audio_volume : 0; + if( media_type == ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ) + for( int i = 0; i < 9; i++ ) + tkhd->matrix[i] = param->matrix[i]; + else + for( int i = 0; i < 9; i++ ) + tkhd->matrix[i] = 0; + } + else + { + tkhd->layer = 0; + tkhd->volume = media_type == ISOM_MEDIA_HANDLER_TYPE_AUDIO_TRACK ? 0x0100 : 0; + tkhd->matrix[0] = 0x00010000; + tkhd->matrix[1] = tkhd->matrix[2] = tkhd->matrix[3] = 0; + tkhd->matrix[4] = 0x00010000; + tkhd->matrix[5] = tkhd->matrix[6] = tkhd->matrix[7] = 0; + tkhd->matrix[8] = 0x40000000; + } + /* visual presentation size */ + tkhd->width = media_type == ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ? param->display_width : 0; + tkhd->height = media_type == ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ? param->display_height : 0; + /* Update next_track_ID if needed. */ + if( root->moov->mvhd->next_track_ID <= tkhd->track_ID ) + root->moov->mvhd->next_track_ID = tkhd->track_ID + 1; + return 0; +} + +int lsmash_get_track_parameters( lsmash_root_t *root, uint32_t track_ID, lsmash_track_parameters_t *param ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + return -1; + isom_tkhd_t *tkhd = trak->tkhd; + param->mode = tkhd->flags; + param->track_ID = tkhd->track_ID; + param->duration = tkhd->duration; + param->video_layer = tkhd->layer; + param->alternate_group = tkhd->alternate_group; + param->audio_volume = tkhd->volume; + for( int i = 0; i < 9; i++ ) + param->matrix[i] = tkhd->matrix[i]; + param->display_width = tkhd->width; + param->display_height = tkhd->height; + param->aperture_modes = !!trak->tapt; + return 0; +} + +static int isom_set_media_handler_name( lsmash_root_t *root, uint32_t track_ID, char *handler_name ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->hdlr ) + return -1; + isom_hdlr_t *hdlr = trak->mdia->hdlr; + uint8_t *name = NULL; + uint32_t name_length = strlen( handler_name ) + root->isom_compatible + root->qt_compatible; + if( root->qt_compatible ) + name_length = LSMASH_MIN( name_length, 255 ); + if( name_length > hdlr->componentName_length && hdlr->componentName ) + name = realloc( hdlr->componentName, name_length ); + else if( !hdlr->componentName ) + name = malloc( name_length ); + else + name = hdlr->componentName; + if( !name ) + return -1; + if( root->qt_compatible ) + name[0] = name_length & 0xff; + memcpy( name + root->qt_compatible, handler_name, strlen( handler_name ) ); + if( root->isom_compatible ) + name[name_length - 1] = 0; + hdlr->componentName = name; + hdlr->componentName_length = name_length; + return 0; +} + +static int isom_set_data_handler_name( lsmash_root_t *root, uint32_t track_ID, char *handler_name ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->hdlr ) + return -1; + isom_hdlr_t *hdlr = trak->mdia->minf->hdlr; + uint8_t *name = NULL; + uint32_t name_length = strlen( handler_name ) + root->isom_compatible + root->qt_compatible; + if( root->qt_compatible ) + name_length = LSMASH_MIN( name_length, 255 ); + if( name_length > hdlr->componentName_length && hdlr->componentName ) + name = realloc( hdlr->componentName, name_length ); + else if( !hdlr->componentName ) + name = malloc( name_length ); + else + name = hdlr->componentName; + if( !name ) + return -1; + if( root->qt_compatible ) + name[0] = name_length & 0xff; + memcpy( name + root->qt_compatible, handler_name, strlen( handler_name ) ); + if( root->isom_compatible ) + name[name_length - 1] = 0; + hdlr->componentName = name; + hdlr->componentName_length = name_length; + return 0; +} + +uint32_t lsmash_get_media_timescale( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd ) + return 0; + return trak->mdia->mdhd->timescale; +} + +uint64_t lsmash_get_media_duration( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd ) + return 0; + return trak->mdia->mdhd->duration; +} + +uint64_t lsmash_get_track_duration( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->tkhd ) + return 0; + return trak->tkhd->duration; +} + +uint32_t lsmash_get_last_sample_delta( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stts || !trak->mdia->minf->stbl->stts->list + || !trak->mdia->minf->stbl->stts->list->tail || !trak->mdia->minf->stbl->stts->list->tail->data ) + return 0; + return ((isom_stts_entry_t *)trak->mdia->minf->stbl->stts->list->tail->data)->sample_delta; +} + +uint32_t lsmash_get_start_time_offset( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->ctts || !trak->mdia->minf->stbl->ctts->list + || !trak->mdia->minf->stbl->ctts->list->head || !trak->mdia->minf->stbl->ctts->list->head->data ) + return 0; + return ((isom_ctts_entry_t *)trak->mdia->minf->stbl->ctts->list->head->data)->sample_offset; +} + +uint32_t lsmash_get_composition_to_decode_shift( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return 0; + uint32_t sample_count = isom_get_sample_count( trak ); + if( sample_count == 0 ) + return 0; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !stbl->stts || !stbl->stts->list || !stbl->ctts || !stbl->ctts->list ) + return 0; + if( !(root->max_isom_version >= 4 && stbl->ctts->version == 1) && !root->qt_compatible ) + return 0; /* This movie shall not have composition to decode timeline shift. */ + lsmash_entry_t *stts_entry = stbl->stts->list->head; + lsmash_entry_t *ctts_entry = stbl->ctts->list->head; + if( !stts_entry || !ctts_entry ) + return 0; + uint64_t dts = 0; + uint64_t cts = 0; + uint32_t ctd_shift = 0; + uint32_t i = 0; + uint32_t j = 0; + for( uint32_t k = 0; k < sample_count; i++ ) + { + isom_stts_entry_t *stts_data = (isom_stts_entry_t *)stts_entry->data; + isom_ctts_entry_t *ctts_data = (isom_ctts_entry_t *)ctts_entry->data; + if( !stts_data || !ctts_data ) + return 0; + cts = dts + (int32_t)ctts_data->sample_offset; + if( dts > cts + ctd_shift ) + ctd_shift = dts - cts; + dts += stts_data->sample_delta; + if( ++i == stts_data->sample_count ) + { + stts_entry = stts_entry->next; + if( !stts_entry ) + return 0; + i = 0; + } + if( ++j == ctts_data->sample_count ) + { + ctts_entry = ctts_entry->next; + if( !ctts_entry ) + return 0; + j = 0; + } + } + return ctd_shift; +} + +uint16_t lsmash_pack_iso_language( char *iso_language ) +{ + if( !iso_language || strlen( iso_language ) != 3 ) + return 0; + return (uint16_t)LSMASH_PACK_ISO_LANGUAGE( iso_language[0], iso_language[1], iso_language[2] ); +} + +static int isom_iso2mac_language( uint16_t ISO_language, uint16_t *MAC_language ) +{ + if( !MAC_language ) + return -1; + int i = 0; + for( ; isom_languages[i].iso_name; i++ ) + if( ISO_language == isom_languages[i].iso_name ) + break; + if( !isom_languages[i].iso_name ) + return -1; + *MAC_language = isom_languages[i].mac_value; + return 0; +} + +static int isom_mac2iso_language( uint16_t MAC_language, uint16_t *ISO_language ) +{ + if( !ISO_language ) + return -1; + int i = 0; + for( ; isom_languages[i].iso_name; i++ ) + if( MAC_language == isom_languages[i].mac_value ) + break; + *ISO_language = isom_languages[i].iso_name ? isom_languages[i].iso_name : ISOM_LANGUAGE_CODE_UNDEFINED; + return 0; +} + +static int isom_set_media_language( lsmash_root_t *root, uint32_t track_ID, uint16_t ISO_language, uint16_t MAC_language ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd ) + return -1; + uint16_t language = 0; + if( root->isom_compatible ) + { + if( ISO_language ) + language = ISO_language; + else if( MAC_language ) + { + if( isom_mac2iso_language( MAC_language, &language ) ) + return -1; + } + else + language = ISOM_LANGUAGE_CODE_UNDEFINED; + } + else if( root->qt_compatible ) + { + if( ISO_language ) + { + if( isom_iso2mac_language( ISO_language, &language ) ) + return -1; + } + else + language = MAC_language; + } + else + return -1; + trak->mdia->mdhd->language = language; + return 0; +} + +static int isom_create_grouping( isom_trak_entry_t *trak, isom_grouping_type grouping_type ) +{ + lsmash_root_t *root = trak->root; + switch( grouping_type ) + { + case ISOM_GROUP_TYPE_RAP : + assert( root->max_isom_version >= 6 ); + break; + case ISOM_GROUP_TYPE_ROLL : + assert( root->avc_extensions || root->qt_compatible ); + break; + default : + assert( 0 ); + break; + } + if( !isom_add_sgpd( trak->mdia->minf->stbl, grouping_type ) + || !isom_add_sbgp( trak->mdia->minf->stbl, grouping_type ) ) + return -1; + return 0; +} + +void lsmash_initialize_media_parameters( lsmash_media_parameters_t *param ) +{ + memset( param, 0, sizeof(lsmash_media_parameters_t) ); + param->timescale = 1; +} + +int lsmash_set_media_parameters( lsmash_root_t *root, uint32_t track_ID, lsmash_media_parameters_t *param ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + trak->mdia->mdhd->timescale = param->timescale; + if( isom_set_media_language( root, track_ID, param->ISO_language, param->MAC_language ) ) + return -1; + if( param->media_handler_name + && isom_set_media_handler_name( root, track_ID, param->media_handler_name ) ) + return -1; + if( root->qt_compatible && param->data_handler_name + && isom_set_data_handler_name( root, track_ID, param->data_handler_name ) ) + return -1; + if( (root->avc_extensions || root->qt_compatible) && param->roll_grouping + && isom_create_grouping( trak, ISOM_GROUP_TYPE_ROLL ) ) + return -1; + if( (root->max_isom_version >= 6) && param->rap_grouping + && isom_create_grouping( trak, ISOM_GROUP_TYPE_RAP ) ) + return -1; + return 0; +} + +int lsmash_get_media_parameters( lsmash_root_t *root, uint32_t track_ID, lsmash_media_parameters_t *param ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->hdlr + || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + isom_mdhd_t *mdhd = trak->mdia->mdhd; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_sbgp_entry_t *sbgp; + isom_sgpd_entry_t *sgpd; + param->timescale = mdhd->timescale; + param->handler_type = trak->mdia->hdlr->componentSubtype; + param->duration = mdhd->duration; + /* Whether sample grouping present. */ + sbgp = isom_get_sample_to_group( stbl, ISOM_GROUP_TYPE_ROLL ); + sgpd = isom_get_sample_group_description( stbl, ISOM_GROUP_TYPE_ROLL ); + param->roll_grouping = sbgp && sgpd; + sbgp = isom_get_sample_to_group( stbl, ISOM_GROUP_TYPE_RAP ); + sgpd = isom_get_sample_group_description( stbl, ISOM_GROUP_TYPE_RAP ); + param->rap_grouping = sbgp && sgpd; + /* Get media language. */ + if( mdhd->language >= 0x800 ) + { + param->MAC_language = 0; + param->ISO_language = mdhd->language; + } + else + { + param->MAC_language = mdhd->language; + param->ISO_language = 0; + } + /* Get handler name(s). */ + isom_hdlr_t *hdlr = trak->mdia->hdlr; + int length = LSMASH_MIN( 255, hdlr->componentName_length ); + if( length ) + { + memcpy( param->media_handler_name_shadow, hdlr->componentName + root->qt_compatible, length ); + param->media_handler_name_shadow[length - 2 + root->isom_compatible + root->qt_compatible] = '\0'; + param->media_handler_name = param->media_handler_name_shadow; + } + else + { + param->media_handler_name = NULL; + memset( param->media_handler_name_shadow, 0, sizeof(param->media_handler_name_shadow) ); + } + if( trak->mdia->minf->hdlr ) + { + hdlr = trak->mdia->minf->hdlr; + length = LSMASH_MIN( 255, hdlr->componentName_length ); + if( length ) + { + memcpy( param->data_handler_name_shadow, hdlr->componentName + root->qt_compatible, length ); + param->data_handler_name_shadow[length - 2 + root->isom_compatible + root->qt_compatible] = '\0'; + param->data_handler_name = param->data_handler_name_shadow; + } + else + { + param->data_handler_name = NULL; + memset( param->data_handler_name_shadow, 0, sizeof(param->data_handler_name_shadow) ); + } + } + else + { + param->data_handler_name = NULL; + memset( param->data_handler_name_shadow, 0, sizeof(param->data_handler_name_shadow) ); + } + return 0; +} + +/*---- movie manipulators ----*/ + +lsmash_root_t *lsmash_open_movie( const char *filename, lsmash_file_mode mode ) +{ + if( !filename ) + return NULL; + char open_mode[4] = { 0 }; + if( mode & LSMASH_FILE_MODE_WRITE ) + memcpy( open_mode, "w+b", 4 ); +#ifdef LSMASH_DEMUXER_ENABLED + else if( mode & LSMASH_FILE_MODE_READ ) + memcpy( open_mode, "rb", 3 ); +#endif + if( !open_mode[0] ) + return NULL; + lsmash_root_t *root = lsmash_malloc_zero( sizeof(lsmash_root_t) ); + if( !root ) + return NULL; + root->root = root; + root->bs = lsmash_malloc_zero( sizeof(lsmash_bs_t) ); + if( !root->bs ) + goto fail; + if( !strcmp( filename, "-" ) ) + { + if( mode & LSMASH_FILE_MODE_READ ) + root->bs->stream = stdin; + else if( (mode & LSMASH_FILE_MODE_WRITE) && (mode & LSMASH_FILE_MODE_FRAGMENTED) ) + root->bs->stream = stdout; + } + else + root->bs->stream = fopen( filename, open_mode ); + if( !root->bs->stream ) + goto fail; + root->flags = mode; + if( mode & LSMASH_FILE_MODE_WRITE ) + { + if( isom_add_moov( root ) || isom_add_mvhd( root->moov ) ) + goto fail; + root->qt_compatible = 1; /* QTFF is default file format. */ + } +#ifdef LSMASH_DEMUXER_ENABLED + if( (mode & (LSMASH_FILE_MODE_READ | LSMASH_FILE_MODE_DUMP)) ) + { + if( isom_read_root( root ) ) + goto fail; + root->max_read_size = 4 * 1024 * 1024; + } +#endif + if( mode & LSMASH_FILE_MODE_FRAGMENTED ) + { + root->fragment = lsmash_malloc_zero( sizeof(isom_fragment_manager_t) ); + if( !root->fragment ) + goto fail; + root->fragment->pool = lsmash_create_entry_list(); + if( !root->fragment->pool ) + goto fail; + } + return root; +fail: + lsmash_destroy_root( root ); + return NULL; +} + +static int isom_finish_fragment_movie( lsmash_root_t *root ); + +/* A movie fragment cannot switch a sample description to another. + * So you must call this function before switching sample descriptions. */ +int lsmash_create_fragment_movie( lsmash_root_t *root ) +{ + if( !root || !root->bs || !root->fragment || !root->moov || !root->moov->trak_list ) + return -1; + /* Finish the previous movie fragment before starting a new one. */ + if( isom_finish_fragment_movie( root ) ) + return -1; + /* We always hold only one movie fragment except for the initial movie (a pair of moov and mdat). */ + if( root->fragment->movie && root->moof_list->entry_count != 1 ) + return -1; + isom_moof_entry_t *moof = isom_add_moof( root ); + if( isom_add_mfhd( moof ) ) + return -1; + root->fragment->movie = moof; + moof->mfhd->sequence_number = ++ root->fragment->fragment_count; + if( root->moof_list->entry_count == 1 ) + return 0; + /* Remove the previous movie fragment. */ + return lsmash_remove_entry( root->moof_list, 1, isom_remove_moof ); +} + +static int isom_set_brands( lsmash_root_t *root, lsmash_brand_type major_brand, uint32_t minor_version, lsmash_brand_type *brands, uint32_t brand_count ) +{ + if( brand_count > 50 ) + return -1; /* We support setting brands up to 50. */ + if( !brand_count ) + { + /* Absence of File Type Box means this file is a QuickTime or MP4 version 1 format file. */ + if( root->ftyp ) + { + if( root->ftyp->compatible_brands ) + free( root->ftyp->compatible_brands ); + free( root->ftyp ); + root->ftyp = NULL; + } + return 0; + } + if( !root->ftyp && isom_add_ftyp( root ) ) + return -1; + isom_ftyp_t *ftyp = root->ftyp; + ftyp->major_brand = major_brand; + ftyp->minor_version = minor_version; + lsmash_brand_type *compatible_brands; + if( !ftyp->compatible_brands ) + compatible_brands = malloc( brand_count * sizeof(uint32_t) ); + else + compatible_brands = realloc( ftyp->compatible_brands, brand_count * sizeof(uint32_t) ); + if( !compatible_brands ) + return -1; + ftyp->compatible_brands = compatible_brands; + for( uint32_t i = 0; i < brand_count; i++ ) + { + ftyp->compatible_brands[i] = brands[i]; + ftyp->size += 4; + } + ftyp->brand_count = brand_count; + return isom_check_compatibility( root ); +} + +void lsmash_initialize_movie_parameters( lsmash_movie_parameters_t *param ) +{ + memset( param, 0, sizeof(lsmash_movie_parameters_t) ); + param->max_chunk_duration = 0.5; + param->max_async_tolerance = 2.0; + param->max_chunk_size = 4 * 1024 * 1024; + param->max_read_size = 4 * 1024 * 1024; + param->timescale = 600; + param->playback_rate = 0x00010000; + param->playback_volume = 0x0100; +} + +int lsmash_set_movie_parameters( lsmash_root_t *root, lsmash_movie_parameters_t *param ) +{ + if( !root || !root->moov || !root->moov->mvhd + || isom_set_brands( root, param->major_brand, param->minor_version, param->brands, param->number_of_brands ) ) + return -1; + isom_mvhd_t *mvhd = root->moov->mvhd; + root->max_chunk_duration = param->max_chunk_duration; + root->max_async_tolerance = LSMASH_MAX( param->max_async_tolerance, 2 * param->max_chunk_duration ); + root->max_chunk_size = param->max_chunk_size; + root->max_read_size = param->max_read_size; + mvhd->timescale = param->timescale; + if( root->qt_compatible || root->itunes_movie ) + { + mvhd->rate = param->playback_rate; + mvhd->volume = param->playback_volume; + mvhd->previewTime = param->preview_time; + mvhd->previewDuration = param->preview_duration; + mvhd->posterTime = param->poster_time; + } + else + { + mvhd->rate = 0x00010000; + mvhd->volume = 0x0100; + mvhd->previewTime = 0; + mvhd->previewDuration = 0; + mvhd->posterTime = 0; + } + return 0; +} + +int lsmash_get_movie_parameters( lsmash_root_t *root, lsmash_movie_parameters_t *param ) +{ + if( !root || !root->moov || !root->moov->mvhd ) + return -1; + isom_mvhd_t *mvhd = root->moov->mvhd; + if( root->ftyp ) + { + isom_ftyp_t *ftyp = root->ftyp; + uint32_t brand_count = LSMASH_MIN( ftyp->brand_count, 50 ); /* brands up to 50 */ + for( uint32_t i = 0; i < brand_count; i++ ) + param->brands_shadow[i] = ftyp->compatible_brands[i]; + param->major_brand = ftyp->major_brand; + param->brands = param->brands_shadow; + param->number_of_brands = brand_count; + param->minor_version = ftyp->minor_version; + } + param->max_chunk_duration = root->max_chunk_duration; + param->max_async_tolerance = root->max_async_tolerance; + param->max_chunk_size = root->max_chunk_size; + param->max_read_size = root->max_read_size; + param->timescale = mvhd->timescale; + param->duration = mvhd->duration; + param->playback_rate = mvhd->rate; + param->playback_volume = mvhd->volume; + param->preview_time = mvhd->previewTime; + param->preview_duration = mvhd->previewDuration; + param->poster_time = mvhd->posterTime; + param->number_of_tracks = root->moov->trak_list ? root->moov->trak_list->entry_count : 0; + return 0; +} + +uint32_t lsmash_get_movie_timescale( lsmash_root_t *root ) +{ + if( !root || !root->moov || !root->moov->mvhd ) + return 0; + return root->moov->mvhd->timescale; +} + +int lsmash_set_free( lsmash_root_t *root, uint8_t *data, uint64_t data_length ) +{ + if( !root || !root->free || !data || !data_length ) + return -1; + isom_free_t *skip = root->free; + uint8_t *tmp = NULL; + if( !skip->data ) + tmp = malloc( data_length ); + else if( skip->length < data_length ) + tmp = realloc( skip->data, data_length ); + if( !tmp ) + return -1; + memcpy( tmp, data, data_length ); + skip->data = tmp; + skip->length = data_length; + return 0; +} + +int lsmash_add_free( lsmash_root_t *root, uint8_t *data, uint64_t data_length ) +{ + if( !root ) + return -1; + if( !root->free ) + { + isom_create_box( skip, root, ISOM_BOX_TYPE_FREE ); + root->free = skip; + } + if( data && data_length ) + return lsmash_set_free( root, data, data_length ); + return 0; +} + +int lsmash_create_object_descriptor( lsmash_root_t *root ) +{ + if( !root ) + return -1; + /* Return error if this file is not compatible with MP4 file format. */ + if( !root->mp4_version1 && !root->mp4_version2 ) + return -1; + return isom_add_iods( root->moov ); +} + +/*---- finishing functions ----*/ + +static int isom_set_fragment_overall_duration( lsmash_root_t *root ) +{ + isom_mvex_t *mvex = root->moov->mvex; + if( isom_add_mehd( mvex ) ) + return -1; + /* Get the longest duration of the tracks. */ + uint64_t longest_duration = 0; + for( lsmash_entry_t *entry = root->moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak || !trak->cache || !trak->cache->fragment || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->mdhd->timescale ) + return -1; + uint64_t duration; + if( !trak->edts || !trak->edts->elst || !trak->edts->elst->list ) + { + duration = trak->cache->fragment->largest_cts + trak->cache->fragment->last_duration; + duration = (uint64_t)(((double)duration / trak->mdia->mdhd->timescale) * root->moov->mvhd->timescale); + } + else + { + duration = 0; + for( lsmash_entry_t *elst_entry = trak->edts->elst->list->head; elst_entry; elst_entry = elst_entry->next ) + { + isom_elst_entry_t *data = (isom_elst_entry_t *)elst_entry->data; + if( !data ) + return -1; + duration += data->segment_duration; + } + } + longest_duration = LSMASH_MAX( duration, longest_duration ); + } + mvex->mehd->fragment_duration = longest_duration; + mvex->mehd->version = 1; + isom_update_mehd_size( mvex->mehd ); + /* Write Movie Extends Header Box here. */ + lsmash_bs_t *bs = root->bs; + FILE *stream = bs->stream; + uint64_t current_pos = lsmash_ftell( stream ); + lsmash_fseek( stream, mvex->placeholder_pos, SEEK_SET ); + int ret = isom_write_mehd( bs, mvex->mehd ); + if( !ret ) + ret = lsmash_bs_write_data( bs ); + lsmash_fseek( stream, current_pos, SEEK_SET ); + return ret; +} + +static int isom_write_fragment_random_access_info( lsmash_root_t *root ) +{ + if( !root->moov->mvex || !root->moov->mvex->trex_list ) + return 0; + /* Reconstruct the Movie Fragment Random Access Box. + * All 'time' field in the Track Fragment Random Access Boxes shall reflect edit list. */ + uint32_t movie_timescale = lsmash_get_movie_timescale( root ); + if( movie_timescale == 0 ) + return -1; /* Division by zero will occur. */ + for( lsmash_entry_t *trex_entry = root->moov->mvex->trex_list->head; trex_entry; trex_entry = trex_entry->next ) + { + isom_trex_entry_t *trex = (isom_trex_entry_t *)trex_entry->data; + if( !trex ) + return -1; + /* Get the edit list of the track associated with the trex->track_ID. + * If failed or absent, implicit timeline mapping edit is used, and skip this operation for the track. */ + isom_trak_entry_t *trak = isom_get_trak( root, trex->track_ID ); + if( !trak ) + return -1; + if( !trak->edts || !trak->edts->elst || !trak->edts->elst->list + || !trak->edts->elst->list->head || !trak->edts->elst->list->head->data ) + continue; + isom_elst_t *elst = trak->edts->elst; + /* Get the Track Fragment Random Access Boxes of the track associated with the trex->track_ID. + * If failed or absent, skip reconstructing the Track Fragment Random Access Box of the track. */ + isom_tfra_entry_t *tfra = isom_get_tfra( root->mfra, trex->track_ID ); + if( !tfra ) + continue; + /* Reconstruct the Track Fragment Random Access Box. */ + lsmash_entry_t *edit_entry = elst->list->head; + isom_elst_entry_t *edit = edit_entry->data; + uint64_t edit_offset = 0; /* units in media timescale */ + uint32_t media_timescale = lsmash_get_media_timescale( root, trex->track_ID ); + for( lsmash_entry_t *rap_entry = tfra->list->head; rap_entry; ) + { + isom_tfra_location_time_entry_t *rap = (isom_tfra_location_time_entry_t *)rap_entry->data; + if( !rap ) + { + /* Irregular case. Drop this entry. */ + lsmash_entry_t *next = rap_entry->next; + lsmash_remove_entry_direct( tfra->list, rap_entry, NULL ); + rap_entry = next; + continue; + } + uint64_t composition_time = rap->time; + /* Skip edits that doesn't need the current sync sample indicated in the Track Fragment Random Access Box. */ + while( edit ) + { + uint64_t segment_duration = ((edit->segment_duration - 1) / movie_timescale + 1) * media_timescale; + if( edit->media_time != ISOM_EDIT_MODE_EMPTY + && composition_time < edit->media_time + segment_duration ) + break; /* This Timeline Mapping Edit might require the current sync sample. + * Note: this condition doesn't cover all cases. + * For instance, matching the both following conditions + * 1. A sync sample isn't in the presentation. + * 2. The other samples, which precede it in the composition timeline, is in the presentation. */ + edit_offset += segment_duration; + edit_entry = edit_entry->next; + if( !edit_entry ) + { + /* No more presentation. */ + edit = NULL; + break; + } + edit = edit_entry->data; + } + if( !edit ) + { + /* No more presentation. + * Drop the rest of sync samples since they are generally absent in the whole presentation. + * Though the exceptions are sync samples with earlier composition time, we ignore them. (SAP type 2: TEPT = TDEC = TSAP < TPTF) + * To support this exception, we need sorting entries of the list by composition times. */ + for( ; rap_entry; rap_entry = rap_entry->next ) + lsmash_remove_entry_direct( tfra->list, rap_entry, NULL ); + break; + } + /* If the sync sample isn't in the presentation, + * we pick the earliest presentation time of the current edit as its presentation time. */ + rap->time = edit_offset; + if( composition_time >= edit->media_time ) + rap->time += composition_time - edit->media_time; + rap_entry = rap_entry->next; + } + } + /* Decide the size of the Movie Fragment Random Access Box. */ + if( isom_update_mfra_size( root->mfra ) ) + return -1; + /* Write the Movie Fragment Random Access Box. */ + return isom_write_mfra( root->bs, root->mfra ); +} + +int lsmash_finish_movie( lsmash_root_t *root, lsmash_adhoc_remux_t* remux ) +{ + if( !root || !root->bs || !root->moov || !root->moov->trak_list ) + return -1; + if( root->fragment ) + { + /* Output the final movie fragment. */ + if( isom_finish_fragment_movie( root ) ) + return -1; + if( root->bs->stream == stdout ) + return 0; + /* Write the overall random access information at the tail of the movie. */ + if( isom_write_fragment_random_access_info( root ) ) + return -1; + /* Set overall duration of the movie. */ + return isom_set_fragment_overall_duration( root ); + } + isom_moov_t *moov = root->moov; + for( lsmash_entry_t *entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak || !trak->cache || !trak->tkhd || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + uint32_t track_ID = trak->tkhd->track_ID; + uint32_t related_track_ID = trak->related_track_ID; + /* Disable the track if the track is a track reference chapter. */ + if( trak->is_chapter ) + trak->tkhd->flags &= ~ISOM_TRACK_ENABLED; + if( trak->is_chapter && related_track_ID ) + { + /* In order that the track duration of the chapter track doesn't exceed that of the related track. */ + lsmash_edit_t edit; + edit.duration = LSMASH_MIN( trak->tkhd->duration, lsmash_get_track_duration( root, related_track_ID ) ); + edit.start_time = 0; + edit.rate = ISOM_EDIT_MODE_NORMAL; + if( lsmash_create_explicit_timeline_map( root, track_ID, edit ) ) + return -1; + } + /* Add stss box if any samples aren't sync sample. */ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !trak->cache->all_sync && !stbl->stss && isom_add_stss( stbl ) ) + return -1; + if( isom_update_bitrate_description( trak->mdia ) ) + return -1; + } + if( root->mp4_version1 == 1 && isom_add_iods( moov ) ) + return -1; + if( isom_check_mandatory_boxes( root ) + || isom_set_movie_creation_time( root ) + || isom_update_moov_size( moov ) + || isom_write_mdat_size( root ) ) + return -1; + + lsmash_bs_t *bs = root->bs; + uint64_t meta_size = root->meta ? root->meta->size : 0; + if( !remux ) + { + if( isom_write_moov( root ) + || isom_write_meta( bs, root->meta ) ) + return -1; + root->size += moov->size + meta_size; + return 0; + } + + /* stco->co64 conversion, depending on last chunk's offset */ + for( lsmash_entry_t* entry = moov->trak_list->head; entry; ) + { + isom_trak_entry_t* trak = (isom_trak_entry_t*)entry->data; + isom_stco_t* stco = trak->mdia->minf->stbl->stco; + if( !stco->list->tail ) + return -1; + if( stco->large_presentation + || (((isom_stco_entry_t*)stco->list->tail->data)->chunk_offset + moov->size + meta_size) <= UINT32_MAX ) + { + entry = entry->next; + continue; /* no need to convert stco into co64 */ + } + /* stco->co64 conversion */ + if( isom_convert_stco_to_co64( trak->mdia->minf->stbl ) + || isom_update_moov_size( moov ) ) + return -1; + entry = moov->trak_list->head; /* whenever any conversion, re-check all traks */ + } + + /* now the amount of offset is fixed. */ + uint64_t mtf_size = moov->size + meta_size; /* sum of size of boxes moved to front */ + + /* buffer size must be at least mtf_size * 2 */ + remux->buffer_size = LSMASH_MAX( remux->buffer_size, mtf_size * 2 ); + + uint8_t* buf[2]; + if( (buf[0] = (uint8_t*)malloc( remux->buffer_size )) == NULL ) + return -1; /* NOTE: i think we still can fallback to "return isom_write_moov( root );" here. */ + uint64_t size = remux->buffer_size / 2; + buf[1] = buf[0] + size; /* split to 2 buffers */ + + /* now the amount of offset is fixed. apply that to stco/co64 */ + for( lsmash_entry_t* entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_stco_t* stco = ((isom_trak_entry_t*)entry->data)->mdia->minf->stbl->stco; + if( stco->large_presentation ) + for( lsmash_entry_t* co64_entry = stco->list->head ; co64_entry ; co64_entry = co64_entry->next ) + ((isom_co64_entry_t*)co64_entry->data)->chunk_offset += mtf_size; + else + for( lsmash_entry_t* stco_entry = stco->list->head ; stco_entry ; stco_entry = stco_entry->next ) + ((isom_stco_entry_t*)stco_entry->data)->chunk_offset += mtf_size; + } + + FILE *stream = bs->stream; + isom_mdat_t *mdat = root->mdat; + uint64_t total = root->size + mtf_size; + uint64_t readnum; + /* backup starting area of mdat and write moov + meta there instead */ + if( lsmash_fseek( stream, mdat->placeholder_pos, SEEK_SET ) ) + goto fail; + readnum = fread( buf[0], 1, size, stream ); + uint64_t read_pos = lsmash_ftell( stream ); + + /* write moov + meta there instead */ + if( lsmash_fseek( stream, mdat->placeholder_pos, SEEK_SET ) + || isom_write_moov( root ) + || isom_write_meta( bs, root->meta ) ) + goto fail; + uint64_t write_pos = lsmash_ftell( stream ); + + mdat->placeholder_pos += mtf_size; /* update placeholder */ + + /* copy-pastan */ + int buf_switch = 1; + while( readnum == size ) + { + if( lsmash_fseek( stream, read_pos, SEEK_SET ) ) + goto fail; + readnum = fread( buf[buf_switch], 1, size, stream ); + read_pos = lsmash_ftell( stream ); + + buf_switch ^= 0x1; + + if( lsmash_fseek( stream, write_pos, SEEK_SET ) + || fwrite( buf[buf_switch], 1, size, stream ) != size ) + goto fail; + write_pos = lsmash_ftell( stream ); + if( remux->func ) remux->func( remux->param, write_pos, total ); // FIXME: + } + if( fwrite( buf[buf_switch^0x1], 1, readnum, stream ) != readnum ) + goto fail; + if( remux->func ) remux->func( remux->param, total, total ); // FIXME: + + root->size += mtf_size; + free( buf[0] ); + return 0; + +fail: + free( buf[0] ); + return -1; +} + +#define GET_MOST_USED( box_name, index, flag_name ) \ + if( most_used[index] < stats.flag_name[i] ) \ + { \ + most_used[index] = stats.flag_name[i]; \ + box_name->default_sample_flags.flag_name = i; \ + } + +static int isom_create_fragment_overall_default_settings( lsmash_root_t *root ) +{ + if( isom_add_mvex( root->moov ) ) + return -1; + for( lsmash_entry_t *trak_entry = root->moov->trak_list->head; trak_entry; trak_entry = trak_entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)trak_entry->data; + if( !trak || !trak->cache || !trak->tkhd || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !stbl->stts || !stbl->stts->list || !stbl->stsz + || (stbl->stts->list->tail && !stbl->stts->list->tail->data) + || (stbl->stsz->list && stbl->stsz->list->head && !stbl->stsz->list->head->data) ) + return -1; + isom_trex_entry_t *trex = isom_add_trex( root->moov->mvex ); + if( !trex ) + return -1; + trex->track_ID = trak->tkhd->track_ID; + /* Set up defaults. */ + trex->default_sample_description_index = trak->cache->chunk.sample_description_index ? trak->cache->chunk.sample_description_index : 1; + trex->default_sample_duration = stbl->stts->list->tail ? ((isom_stts_entry_t *)stbl->stts->list->tail->data)->sample_delta : 1; + trex->default_sample_size = !stbl->stsz->list + ? stbl->stsz->sample_size : stbl->stsz->list->head + ? ((isom_stsz_entry_t *)stbl->stsz->list->head->data)->entry_size : 0; + if( stbl->sdtp && stbl->sdtp->list ) + { + struct sample_flags_stats_t + { + uint32_t is_leading [4]; + uint32_t sample_depends_on [4]; + uint32_t sample_is_depended_on[4]; + uint32_t sample_has_redundancy[4]; + } stats = { { 0 }, { 0 }, { 0 }, { 0 } }; + for( lsmash_entry_t *sdtp_entry = stbl->sdtp->list->head; sdtp_entry; sdtp_entry = sdtp_entry->next ) + { + isom_sdtp_entry_t *data = (isom_sdtp_entry_t *)sdtp_entry->data; + if( !data ) + return -1; + ++ stats.is_leading [ data->is_leading ]; + ++ stats.sample_depends_on [ data->sample_depends_on ]; + ++ stats.sample_is_depended_on[ data->sample_is_depended_on ]; + ++ stats.sample_has_redundancy[ data->sample_has_redundancy ]; + } + uint32_t most_used[4] = { 0, 0, 0, 0 }; + for( int i = 0; i < 4; i++ ) + { + GET_MOST_USED( trex, 0, is_leading ); + GET_MOST_USED( trex, 1, sample_depends_on ); + GET_MOST_USED( trex, 2, sample_is_depended_on ); + GET_MOST_USED( trex, 3, sample_has_redundancy ); + } + } + trex->default_sample_flags.sample_is_non_sync_sample = !trak->cache->all_sync; + } + return 0; +} + +static int isom_prepare_random_access_info( lsmash_root_t *root ) +{ + if( root->bs->stream == stdout ) + return 0; + if( isom_add_mfra( root ) + || isom_add_mfro( root->mfra ) ) + return -1; + return 0; +} + +static int isom_output_fragment_media_data( lsmash_root_t *root ) +{ + isom_fragment_manager_t *fragment = root->fragment; + if( !fragment->pool->entry_count ) + { + /* no need to write media data */ + lsmash_remove_entries( fragment->pool, lsmash_delete_sample ); + fragment->pool_size = 0; + return 0; + } + /* If there is no available Media Data Box to write samples, add and write a new one. */ + if( isom_new_mdat( root, fragment->pool_size ) ) + return -1; + /* Write samples in the current movie fragment. */ + for( lsmash_entry_t* entry = fragment->pool->head; entry; entry = entry->next ) + { + isom_sample_pool_t *pool = (isom_sample_pool_t *)entry->data; + if( !pool ) + return -1; + lsmash_bs_put_bytes( root->bs, pool->size, pool->data ); + } + if( lsmash_bs_write_data( root->bs ) ) + return -1; + root->size += root->mdat->size; + lsmash_remove_entries( fragment->pool, isom_remove_sample_pool ); + fragment->pool_size = 0; + return 0; +} + +static int isom_finish_fragment_initial_movie( lsmash_root_t *root ) +{ + if( !root->moov || !root->moov->trak_list ) + return -1; + isom_moov_t *moov = root->moov; + for( lsmash_entry_t *entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *trak = (isom_trak_entry_t *)entry->data; + if( !trak || !trak->cache || !trak->tkhd || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->minf || !trak->mdia->minf->stbl ) + return -1; + if( isom_get_sample_count( trak ) ) + { + /* Add stss box if any samples aren't sync sample. */ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !trak->cache->all_sync && !stbl->stss && isom_add_stss( stbl ) ) + return -1; + } + else + trak->tkhd->duration = 0; + if( isom_update_bitrate_description( trak->mdia ) ) + return -1; + } + if( root->mp4_version1 == 1 && isom_add_iods( moov ) ) + return -1; + if( isom_create_fragment_overall_default_settings( root ) + || isom_prepare_random_access_info( root ) + || isom_check_mandatory_boxes( root ) + || isom_set_movie_creation_time( root ) + || isom_update_moov_size( moov ) ) + return -1; + /* stco->co64 conversion, depending on last chunk's offset */ + uint64_t meta_size = root->meta ? root->meta->size : 0; + for( lsmash_entry_t* entry = moov->trak_list->head; entry; ) + { + isom_trak_entry_t* trak = (isom_trak_entry_t*)entry->data; + isom_stco_t* stco = trak->mdia->minf->stbl->stco; + if( !stco->list->tail /* no samples */ + || stco->large_presentation + || (((isom_stco_entry_t*)stco->list->tail->data)->chunk_offset + moov->size + meta_size) <= UINT32_MAX ) + { + entry = entry->next; + continue; /* no need to convert stco into co64 */ + } + /* stco->co64 conversion */ + if( isom_convert_stco_to_co64( trak->mdia->minf->stbl ) + || isom_update_moov_size( moov ) ) + return -1; + entry = moov->trak_list->head; /* whenever any conversion, re-check all traks */ + } + /* Now, the amount of offset is fixed. Apply that to stco/co64. */ + uint64_t preceding_size = moov->size + meta_size; + for( lsmash_entry_t* entry = moov->trak_list->head; entry; entry = entry->next ) + { + isom_stco_t* stco = ((isom_trak_entry_t*)entry->data)->mdia->minf->stbl->stco; + if( stco->large_presentation ) + for( lsmash_entry_t* co64_entry = stco->list->head ; co64_entry ; co64_entry = co64_entry->next ) + ((isom_co64_entry_t*)co64_entry->data)->chunk_offset += preceding_size; + else + for( lsmash_entry_t* stco_entry = stco->list->head ; stco_entry ; stco_entry = stco_entry->next ) + ((isom_stco_entry_t*)stco_entry->data)->chunk_offset += preceding_size; + } + /* Write File Type Box here if it was not written yet. */ + if( !root->file_type_written && isom_write_ftyp( root ) ) + return -1; + /* Write Movie Box. */ + if( isom_write_moov( root ) + || isom_write_meta( root->bs, root->meta ) ) + return -1; + root->size += preceding_size; + /* Output samples. */ + return isom_output_fragment_media_data( root ); +} + +/* Return 1 if there is diffrence, otherwise return 0. */ +static int isom_compare_sample_flags( isom_sample_flags_t *a, isom_sample_flags_t *b ) +{ + return (a->reserved != b->reserved) + || (a->is_leading != b->is_leading) + || (a->sample_depends_on != b->sample_depends_on) + || (a->sample_is_depended_on != b->sample_is_depended_on) + || (a->sample_has_redundancy != b->sample_has_redundancy) + || (a->sample_padding_value != b->sample_padding_value) + || (a->sample_is_non_sync_sample != b->sample_is_non_sync_sample) + || (a->sample_degradation_priority != b->sample_degradation_priority); +} + +static int isom_finish_fragment_movie( lsmash_root_t *root ) +{ + if( !root->moov || !root->moov->trak_list || !root->fragment || !root->fragment->pool ) + return -1; + isom_moof_entry_t *moof = root->fragment->movie; + if( !moof ) + return isom_finish_fragment_initial_movie( root ); + /* Calculate appropriate default_sample_flags of each Track Fragment Header Box. + * And check whether that default_sample_flags is useful or not. */ + for( lsmash_entry_t *entry = moof->traf_list->head; entry; entry = entry->next ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)entry->data; + if( !traf || !traf->tfhd || !traf->root || !traf->root->moov || !traf->root->moov->mvex ) + return -1; + isom_tfhd_t *tfhd = traf->tfhd; + isom_trex_entry_t *trex = isom_get_trex( root->moov->mvex, tfhd->track_ID ); + if( !trex ) + return -1; + struct sample_flags_stats_t + { + uint32_t is_leading [4]; + uint32_t sample_depends_on [4]; + uint32_t sample_is_depended_on [4]; + uint32_t sample_has_redundancy [4]; + uint32_t sample_is_non_sync_sample[2]; + } stats = { { 0 }, { 0 }, { 0 }, { 0 }, { 0 } }; + for( lsmash_entry_t *trun_entry = traf->trun_list->head; trun_entry; trun_entry = trun_entry->next ) + { + isom_trun_entry_t *trun = (isom_trun_entry_t *)trun_entry->data; + if( !trun || !trun->sample_count ) + return -1; + isom_sample_flags_t *sample_flags; + if( trun->flags & ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT ) + { + if( !trun->optional ) + return -1; + for( lsmash_entry_t *optional_entry = trun->optional->head; optional_entry; optional_entry = optional_entry->next ) + { + isom_trun_optional_row_t *row = (isom_trun_optional_row_t *)optional_entry->data; + if( !row ) + return -1; + sample_flags = &row->sample_flags; + ++ stats.is_leading [ sample_flags->is_leading ]; + ++ stats.sample_depends_on [ sample_flags->sample_depends_on ]; + ++ stats.sample_is_depended_on [ sample_flags->sample_is_depended_on ]; + ++ stats.sample_has_redundancy [ sample_flags->sample_has_redundancy ]; + ++ stats.sample_is_non_sync_sample[ sample_flags->sample_is_non_sync_sample ]; + } + } + else + { + sample_flags = &tfhd->default_sample_flags; + stats.is_leading [ sample_flags->is_leading ] += trun->sample_count; + stats.sample_depends_on [ sample_flags->sample_depends_on ] += trun->sample_count; + stats.sample_is_depended_on [ sample_flags->sample_is_depended_on ] += trun->sample_count; + stats.sample_has_redundancy [ sample_flags->sample_has_redundancy ] += trun->sample_count; + stats.sample_is_non_sync_sample[ sample_flags->sample_is_non_sync_sample ] += trun->sample_count; + } + } + uint32_t most_used[5] = { 0, 0, 0, 0, 0 }; + for( int i = 0; i < 4; i++ ) + { + GET_MOST_USED( tfhd, 0, is_leading ); + GET_MOST_USED( tfhd, 1, sample_depends_on ); + GET_MOST_USED( tfhd, 2, sample_is_depended_on ); + GET_MOST_USED( tfhd, 3, sample_has_redundancy ); + if( i < 2 ) + GET_MOST_USED( tfhd, 4, sample_is_non_sync_sample ); + } + int useful_default_sample_duration = 0; + int useful_default_sample_size = 0; + for( lsmash_entry_t *trun_entry = traf->trun_list->head; trun_entry; trun_entry = trun_entry->next ) + { + isom_trun_entry_t *trun = (isom_trun_entry_t *)trun_entry->data; + if( !(trun->flags & ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT) ) + useful_default_sample_duration = 1; + if( !(trun->flags & ISOM_TR_FLAGS_SAMPLE_SIZE_PRESENT) ) + useful_default_sample_size = 1; + int useful_first_sample_flags = 1; + int useful_default_sample_flags = 1; + if( trun->sample_count == 1 ) + { + /* It is enough to check only if first_sample_flags equals default_sample_flags or not. + * If it is equal, just use default_sample_flags. + * If not, just use first_sample_flags of this run. */ + if( !isom_compare_sample_flags( &trun->first_sample_flags, &tfhd->default_sample_flags ) ) + useful_first_sample_flags = 0; + } + else if( trun->optional && trun->optional->head ) + { + lsmash_entry_t *optional_entry = trun->optional->head->next; + isom_trun_optional_row_t *row = (isom_trun_optional_row_t *)optional_entry->data; + isom_sample_flags_t representative_sample_flags = row->sample_flags; + if( isom_compare_sample_flags( &tfhd->default_sample_flags, &representative_sample_flags ) ) + useful_default_sample_flags = 0; + if( !isom_compare_sample_flags( &trun->first_sample_flags, &representative_sample_flags ) ) + useful_first_sample_flags = 0; + if( useful_default_sample_flags ) + for( optional_entry = optional_entry->next; optional_entry; optional_entry = optional_entry->next ) + { + row = (isom_trun_optional_row_t *)optional_entry->data; + if( isom_compare_sample_flags( &representative_sample_flags, &row->sample_flags ) ) + { + useful_default_sample_flags = 0; + break; + } + } + } + if( useful_default_sample_flags ) + { + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT; + trun->flags &= ~ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT; + } + else + { + useful_first_sample_flags = 0; + trun->flags |= ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT; + } + if( useful_first_sample_flags ) + trun->flags |= ISOM_TR_FLAGS_FIRST_SAMPLE_FLAGS_PRESENT; + } + if( useful_default_sample_duration && tfhd->default_sample_duration != trex->default_sample_duration ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT; + else + tfhd->default_sample_duration = trex->default_sample_duration; /* This might be redundant, but is to be more natural. */ + if( useful_default_sample_size && tfhd->default_sample_size != trex->default_sample_size ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_SIZE_PRESENT; + else + tfhd->default_sample_size = trex->default_sample_size; /* This might be redundant, but is to be more natural. */ + if( !(tfhd->flags & ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT) ) + tfhd->default_sample_flags = trex->default_sample_flags; /* This might be redundant, but is to be more natural. */ + else if( !isom_compare_sample_flags( &tfhd->default_sample_flags, &trex->default_sample_flags ) ) + tfhd->flags &= ~ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT; + } + /* When using for live streaming, setting explicit base_data_offset is not preferable. + * However, it's OK because we haven't supported this yet. + * Implicit base_data_offsets that originate in the first byte of each Movie Fragment Box will be implemented + * by the feature of ISO Base Media File Format version 5 or later. + * Media Data Box starts immediately after Movie Fragment Box. */ + for( lsmash_entry_t *entry = moof->traf_list->head; entry; entry = entry->next ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)entry->data; + traf->tfhd->flags |= ISOM_TF_FLAGS_BASE_DATA_OFFSET_PRESENT; + } + /* Consider the update of tf_flags here. */ + if( isom_update_moof_entry_size( moof ) ) + return -1; + /* Now, we can calculate offsets in the current movie fragment, so do it. */ + for( lsmash_entry_t *entry = moof->traf_list->head; entry; entry = entry->next ) + { + isom_traf_entry_t *traf = (isom_traf_entry_t *)entry->data; + traf->tfhd->base_data_offset = root->size + moof->size + ISOM_BASEBOX_COMMON_SIZE; + } + if( isom_write_moof( root->bs, moof ) ) + return -1; + root->size += moof->size; + /* Output samples. */ + return isom_output_fragment_media_data( root ); +} + +#undef GET_MOST_USED + +static isom_trun_optional_row_t *isom_request_trun_optional_row( isom_trun_entry_t *trun, isom_tfhd_t *tfhd, uint32_t sample_number ) +{ + isom_trun_optional_row_t *row = NULL; + if( !trun->optional ) + { + trun->optional = lsmash_create_entry_list(); + if( !trun->optional ) + return NULL; + } + if( trun->optional->entry_count < sample_number ) + { + while( trun->optional->entry_count < sample_number ) + { + row = malloc( sizeof(isom_trun_optional_row_t) ); + if( !row ) + return NULL; + /* Copy from default. */ + row->sample_duration = tfhd->default_sample_duration; + row->sample_size = tfhd->default_sample_size; + row->sample_flags = tfhd->default_sample_flags; + row->sample_composition_time_offset = 0; + if( lsmash_add_entry( trun->optional, row ) ) + { + free( row ); + return NULL; + } + } + return row; + } + uint32_t i = 0; + for( lsmash_entry_t *entry = trun->optional->head; entry; entry = entry->next ) + { + row = (isom_trun_optional_row_t *)entry->data; + if( !row ) + return NULL; + if( ++i == sample_number ) + return row; + } + return NULL; +} + +int lsmash_create_fragment_empty_duration( lsmash_root_t *root, uint32_t track_ID, uint32_t duration ) +{ + if( !root || !root->fragment || !root->fragment->movie || !root->moov ) + return -1; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->tkhd ) + return -1; + isom_trex_entry_t *trex = isom_get_trex( root->moov->mvex, track_ID ); + if( !trex ) + return -1; + isom_moof_entry_t *moof = root->fragment->movie; + isom_traf_entry_t *traf = isom_get_traf( moof, track_ID ); + if( traf ) + return -1; + traf = isom_add_traf( root, moof ); + if( isom_add_tfhd( traf ) ) + return -1; + isom_tfhd_t *tfhd = traf->tfhd; + tfhd->flags = ISOM_TF_FLAGS_DURATION_IS_EMPTY; /* no samples for this track fragment yet */ + tfhd->track_ID = trak->tkhd->track_ID; + tfhd->default_sample_duration = duration; + if( duration != trex->default_sample_duration ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT; + traf->cache = trak->cache; + traf->cache->fragment->traf_number = moof->traf_list->entry_count; + traf->cache->fragment->last_duration += duration; /* The duration of the last sample includes this empty-duration. */ + return 0; +} + +static int isom_set_fragment_last_duration( isom_traf_entry_t *traf, uint32_t last_duration ) +{ + isom_tfhd_t *tfhd = traf->tfhd; + if( !traf->trun_list || !traf->trun_list->tail || !traf->trun_list->tail->data ) + { + /* There are no track runs in this track fragment, so it is a empty-duration. */ + isom_trex_entry_t *trex = isom_get_trex( traf->root->moov->mvex, tfhd->track_ID ); + if( !trex ) + return -1; + tfhd->flags |= ISOM_TF_FLAGS_DURATION_IS_EMPTY; + if( last_duration != trex->default_sample_duration ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT; + tfhd->default_sample_duration = last_duration; + traf->cache->fragment->last_duration = last_duration; + return 0; + } + /* Update the last sample_duration if needed. */ + isom_trun_entry_t *trun = (isom_trun_entry_t *)traf->trun_list->tail->data; + if( trun->sample_count == 1 && traf->trun_list->entry_count == 1 ) + { + isom_trex_entry_t *trex = isom_get_trex( traf->root->moov->mvex, tfhd->track_ID ); + if( !trex ) + return -1; + if( last_duration != trex->default_sample_duration ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT; + tfhd->default_sample_duration = last_duration; + } + else if( last_duration != tfhd->default_sample_duration ) + trun->flags |= ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT; + if( trun->flags ) + { + isom_trun_optional_row_t *row = isom_request_trun_optional_row( trun, tfhd, trun->sample_count ); + if( !row ) + return -1; + row->sample_duration = last_duration; + } + traf->cache->fragment->last_duration = last_duration; + return 0; +} + +int lsmash_set_last_sample_delta( lsmash_root_t *root, uint32_t track_ID, uint32_t sample_delta ) +{ + if( !root || !track_ID ) + return -1; + if( root->fragment && root->fragment->movie ) + { + isom_traf_entry_t *traf = isom_get_traf( root->fragment->movie, track_ID ); + if( !traf || !traf->cache || !traf->tfhd || !traf->trun_list ) + return -1; + return isom_set_fragment_last_duration( traf, sample_delta ); + } + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stsz || !trak->mdia->minf->stbl->stts || !trak->mdia->minf->stbl->stts->list ) + return -1; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_stts_t *stts = stbl->stts; + uint32_t sample_count = isom_get_sample_count( trak ); + if( !stts->list->tail ) + { + if( !sample_count ) + return 0; /* no samples */ + if( sample_count > 1 ) + return -1; /* irregular sample_count */ + if( isom_add_stts_entry( stbl, sample_delta ) ) + return -1; + return lsmash_update_track_duration( root, track_ID, 0 ); + } + uint32_t i = 0; + for( lsmash_entry_t *entry = stts->list->head; entry; entry = entry->next ) + i += ((isom_stts_entry_t *)entry->data)->sample_count; + if( sample_count < i ) + return -1; + isom_stts_entry_t *last_stts_data = (isom_stts_entry_t *)stts->list->tail->data; + if( !last_stts_data ) + return -1; + if( sample_count > i ) + { + if( sample_count - i > 1 ) + return -1; + /* Add a sample_delta. */ + if( sample_delta == last_stts_data->sample_delta ) + ++ last_stts_data->sample_count; + else if( isom_add_stts_entry( stbl, sample_delta ) ) + return -1; + } + else if( sample_count == i && isom_replace_last_sample_delta( stbl, sample_delta ) ) + return -1; + return lsmash_update_track_duration( root, track_ID, sample_delta ); +} + +void lsmash_discard_boxes( lsmash_root_t *root ) +{ + if( !root ) + return; + isom_remove_ftyp( root->ftyp ); + isom_remove_moov( root ); + lsmash_remove_list( root->moof_list, isom_remove_moof ); + isom_remove_mdat( root->mdat ); + isom_remove_free( root->free ); + isom_remove_meta( root->meta ); + isom_remove_mfra( root->mfra ); + root->ftyp = NULL; + root->moov = NULL; + root->moof_list = NULL; + root->mdat = NULL; + root->free = NULL; + root->mfra = NULL; +} + +void lsmash_destroy_root( lsmash_root_t *root ) +{ + if( !root ) + return; +#ifdef LSMASH_DEMUXER_ENABLED + isom_remove_print_funcs( root ); + isom_remove_timelines( root ); +#endif + lsmash_discard_boxes( root ); + if( root->bs ) + { + if( root->bs->stream ) + fclose( root->bs->stream ); + if( root->bs->data ) + free( root->bs->data ); + free( root->bs ); + } + if( root->fragment ) + { + lsmash_remove_list( root->fragment->pool, lsmash_delete_sample ); + free( root->fragment ); + } + free( root ); +} + +/*---- timeline manipulator ----*/ + +int lsmash_modify_explicit_timeline_map( lsmash_root_t *root, uint32_t track_ID, uint32_t edit_number, lsmash_edit_t edit ) +{ + if( !edit.duration || edit.start_time < -1 ) + return -1; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->edts || !trak->edts->elst || !trak->edts->elst->list ) + return -1; + isom_elst_t *elst = trak->edts->elst; + isom_elst_entry_t *data = (isom_elst_entry_t *)lsmash_get_entry_data( elst->list, edit_number ); + if( !data ) + return -1; + data->segment_duration = edit.duration; + data->media_time = edit.start_time; + data->media_rate = edit.rate; + if( !elst->pos || !root->fragment || root->bs->stream == stdout ) + return isom_update_tkhd_duration( trak ); + /* Rewrite the specified entry. + * Note: we don't update the version of the Edit List Box. */ + lsmash_bs_t *bs = root->bs; + FILE *stream = bs->stream; + uint64_t current_pos = lsmash_ftell( stream ); + uint64_t entry_pos = elst->pos + ISOM_LIST_FULLBOX_COMMON_SIZE + ((uint64_t)edit_number - 1) * (elst->version == 1 ? 20 : 12); + lsmash_fseek( stream, entry_pos, SEEK_SET ); + if( elst->version ) + { + lsmash_bs_put_be64( bs, data->segment_duration ); + lsmash_bs_put_be64( bs, data->media_time ); + } + else + { + lsmash_bs_put_be32( bs, (uint32_t)LSMASH_MIN( data->segment_duration, UINT32_MAX ) ); + lsmash_bs_put_be32( bs, (uint32_t)data->media_time ); + } + lsmash_bs_put_be32( bs, data->media_rate ); + int ret = lsmash_bs_write_data( bs ); + lsmash_fseek( stream, current_pos, SEEK_SET ); + return ret; +} + +int lsmash_create_explicit_timeline_map( lsmash_root_t *root, uint32_t track_ID, lsmash_edit_t edit ) +{ + if( edit.start_time < -1 ) + return -1; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->tkhd ) + return -1; + edit.duration = (edit.duration || root->fragment) ? edit.duration + : trak->tkhd->duration ? trak->tkhd->duration + : isom_update_tkhd_duration( trak ) ? 0 + : trak->tkhd->duration; + if( isom_add_edts( trak ) + || isom_add_elst( trak->edts ) + || isom_add_elst_entry( trak->edts->elst, edit.duration, edit.start_time, edit.rate ) ) + return -1; + return isom_update_tkhd_duration( trak ); +} + +int lsmash_get_explicit_timeline_map( lsmash_root_t *root, uint32_t track_ID, uint32_t edit_number, lsmash_edit_t *edit ) +{ + if( !edit ) + return -1; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + return -1; + if( !trak->edts || !trak->edts->elst ) + { + /* no edits */ + edit->duration = 0; + edit->start_time = 0; + edit->rate = 0; + return 0; + } + isom_elst_entry_t *elst = (isom_elst_entry_t *)lsmash_get_entry_data( trak->edts->elst->list, edit_number ); + if( !elst ) + return -1; + edit->duration = elst->segment_duration; + edit->start_time = elst->media_time; + edit->rate = elst->media_rate; + return 0; +} + +uint32_t lsmash_count_explicit_timeline_map( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->edts || !trak->edts->elst || !trak->edts->elst->list ) + return 0; + return trak->edts->elst->list->entry_count; +} + +/*---- create / modification time fields manipulators ----*/ + +int lsmash_update_media_modification_time( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->mdia || !trak->mdia->mdhd ) + return -1; + isom_mdhd_t *mdhd = trak->mdia->mdhd; + mdhd->modification_time = isom_get_current_mp4time(); + /* overwrite strange creation_time */ + if( mdhd->creation_time > mdhd->modification_time ) + mdhd->creation_time = mdhd->modification_time; + return 0; +} + +int lsmash_update_track_modification_time( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->tkhd ) + return -1; + isom_tkhd_t *tkhd = trak->tkhd; + tkhd->modification_time = isom_get_current_mp4time(); + /* overwrite strange creation_time */ + if( tkhd->creation_time > tkhd->modification_time ) + tkhd->creation_time = tkhd->modification_time; + return 0; +} + +int lsmash_update_movie_modification_time( lsmash_root_t *root ) +{ + if( !root || !root->moov || !root->moov->mvhd ) + return -1; + isom_mvhd_t *mvhd = root->moov->mvhd; + mvhd->modification_time = isom_get_current_mp4time(); + /* overwrite strange creation_time */ + if( mvhd->creation_time > mvhd->modification_time ) + mvhd->creation_time = mvhd->modification_time; + return 0; +} + +/*---- sample manipulators ----*/ +lsmash_sample_t *lsmash_create_sample( uint32_t size ) +{ + lsmash_sample_t *sample = lsmash_malloc_zero( sizeof(lsmash_sample_t) ); + if( !sample ) + return NULL; + if( !size ) + return sample; + sample->data = malloc( size ); + if( !sample->data ) + { + free( sample ); + return NULL; + } + sample->length = size; + return sample; +} + +int lsmash_sample_alloc( lsmash_sample_t *sample, uint32_t size ) +{ + if( !sample ) + return -1; + if( !size ) + { + if( sample->data ) + free( sample->data ); + sample->data = NULL; + sample->length = 0; + return 0; + } + if( size == sample->length ) + return 0; + uint8_t *data; + if( !sample->data ) + data = malloc( size ); + else + data = realloc( sample->data, size ); + if( !data ) + return -1; + sample->data = data; + sample->length = size; + return 0; +} + +void lsmash_delete_sample( lsmash_sample_t *sample ) +{ + if( !sample ) + return; + if( sample->data ) + free( sample->data ); + free( sample ); +} + +isom_sample_pool_t *isom_create_sample_pool( uint64_t size ) +{ + isom_sample_pool_t *pool = lsmash_malloc_zero( sizeof(isom_sample_pool_t) ); + if( !pool ) + return NULL; + if( size == 0 ) + return pool; + pool->data = malloc( size ); + if( !pool->data ) + { + free( pool ); + return NULL; + } + pool->alloc = size; + return pool; +} + +static void isom_remove_sample_pool( isom_sample_pool_t *pool ) +{ + if( !pool ) + return; + if( pool->data ) + free( pool->data ); + free( pool ); +} + +static uint32_t isom_add_size( isom_trak_entry_t *trak, uint32_t sample_size ) +{ + if( isom_add_stsz_entry( trak->mdia->minf->stbl, sample_size ) ) + return 0; + return isom_get_sample_count( trak ); +} + +static uint32_t isom_add_dts( isom_stbl_t *stbl, isom_timestamp_t *cache, uint64_t dts ) +{ + isom_stts_t *stts = stbl->stts; + if( !stts->list->entry_count ) + { + if( isom_add_stts_entry( stbl, dts ) ) + return 0; + cache->dts = dts; + return dts; + } + if( dts <= cache->dts ) + return 0; + uint32_t sample_delta = dts - cache->dts; + isom_stts_entry_t *data = (isom_stts_entry_t *)stts->list->tail->data; + if( data->sample_delta == sample_delta ) + ++ data->sample_count; + else if( isom_add_stts_entry( stbl, sample_delta ) ) + return 0; + cache->dts = dts; + return sample_delta; +} + +static int isom_add_cts( isom_stbl_t *stbl, isom_timestamp_t *cache, uint64_t cts ) +{ + isom_ctts_t *ctts = stbl->ctts; + if( !ctts ) + { + if( cts == cache->dts ) + { + cache->cts = cts; + return 0; + } + /* Add ctts box and the first ctts entry. */ + if( isom_add_ctts( stbl ) || isom_add_ctts_entry( stbl, 0 ) ) + return -1; + ctts = stbl->ctts; + isom_ctts_entry_t *data = (isom_ctts_entry_t *)ctts->list->head->data; + uint32_t sample_count = stbl->stsz->sample_count; + if( sample_count != 1 ) + { + data->sample_count = sample_count - 1; + if( isom_add_ctts_entry( stbl, cts - cache->dts ) ) + return -1; + } + else + data->sample_offset = cts; + cache->cts = cts; + return 0; + } + if( !ctts->list ) + return -1; + isom_ctts_entry_t *data = (isom_ctts_entry_t *)ctts->list->tail->data; + uint32_t sample_offset = cts - cache->dts; + if( data->sample_offset == sample_offset ) + ++ data->sample_count; + else if( isom_add_ctts_entry( stbl, sample_offset ) ) + return -1; + cache->cts = cts; + return 0; +} + +static int isom_add_timestamp( isom_trak_entry_t *trak, uint64_t dts, uint64_t cts ) +{ + if( !trak->cache || !trak->mdia->minf->stbl->stts || !trak->mdia->minf->stbl->stts->list ) + return -1; + lsmash_root_t *root = trak->root; + if( root->isom_compatible && root->qt_compatible && (cts - dts) > INT32_MAX ) + return -1; /* sample_offset is not compatible. */ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_timestamp_t *ts_cache = &trak->cache->timestamp; + uint32_t sample_count = isom_get_sample_count( trak ); + uint32_t sample_delta = sample_count > 1 ? isom_add_dts( stbl, ts_cache, dts ) : 0; + if( sample_count > 1 && !sample_delta ) + return -1; + if( isom_add_cts( stbl, ts_cache, cts ) ) + return -1; + if( (cts + ts_cache->ctd_shift) < dts ) + { + if( (root->max_isom_version < 4 && !root->qt_compatible) /* Negative sample offset is not supported. */ + || (root->max_isom_version >= 4 && trak->root->qt_compatible) /* ctts version 1 is not defined in QTFF. */ + || ((dts - cts) > INT32_MAX) ) /* Overflow */ + return -1; + ts_cache->ctd_shift = dts - cts; + if( stbl->ctts->version == 0 && !trak->root->qt_compatible ) + stbl->ctts->version = 1; + } + if( trak->cache->fragment ) + { + isom_fragment_t *fragment_cache = trak->cache->fragment; + fragment_cache->last_duration = sample_delta; + fragment_cache->largest_cts = LSMASH_MAX( ts_cache->cts, fragment_cache->largest_cts ); + } + return 0; +} + +static int isom_add_sync_point( isom_trak_entry_t *trak, uint32_t sample_number, lsmash_sample_property_t *prop ) +{ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_cache_t *cache = trak->cache; + if( !(prop->ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC) ) /* no null check for prop */ + { + if( !cache->all_sync ) + return 0; + if( !stbl->stss && isom_add_stss( stbl ) ) + return -1; + if( isom_add_stss_entry( stbl, 1 ) ) /* Declare here the first sample is a sync sample. */ + return -1; + cache->all_sync = 0; + return 0; + } + if( cache->all_sync ) /* We don't need stss box if all samples are sync sample. */ + return 0; + if( !stbl->stss ) + { + if( isom_get_sample_count( trak ) == 1 ) + { + cache->all_sync = 1; /* Also the first sample is a sync sample. */ + return 0; + } + if( isom_add_stss( stbl ) ) + return -1; + } + return isom_add_stss_entry( stbl, sample_number ); +} + +static int isom_add_partial_sync( isom_trak_entry_t *trak, uint32_t sample_number, lsmash_sample_property_t *prop ) +{ + if( !trak->root->qt_compatible ) + return 0; + if( !(prop->ra_flags & QT_SAMPLE_RANDOM_ACCESS_FLAG_PARTIAL_SYNC) + && !(LSMASH_IS_POST_ROLL_START( prop->ra_flags ) && prop->post_roll.identifier == prop->post_roll.complete) ) + return 0; + /* This sample is a partial sync sample. */ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !stbl->stps && isom_add_stps( stbl ) ) + return -1; + return isom_add_stps_entry( stbl, sample_number ); +} + +static int isom_add_dependency_type( isom_trak_entry_t *trak, lsmash_sample_property_t *prop ) +{ + if( !trak->root->qt_compatible && !trak->root->avc_extensions ) + return 0; + uint8_t avc_extensions = trak->root->avc_extensions; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( stbl->sdtp ) + return isom_add_sdtp_entry( (isom_box_t *)stbl, prop, avc_extensions ); + if( !prop->allow_earlier && !prop->leading && !prop->independent && !prop->disposable && !prop->redundant ) /* no null check for prop */ + return 0; + if( isom_add_sdtp( (isom_box_t *)stbl ) ) + return -1; + uint32_t count = isom_get_sample_count( trak ); + /* fill past samples with ISOM_SAMPLE_*_UNKNOWN */ + lsmash_sample_property_t null_prop = { 0 }; + for( uint32_t i = 1; i < count; i++ ) + if( isom_add_sdtp_entry( (isom_box_t *)stbl, &null_prop, avc_extensions ) ) + return -1; + return isom_add_sdtp_entry( (isom_box_t *)stbl, prop, avc_extensions ); +} + +static int isom_rap_grouping_established( isom_rap_group_t *group, int num_leading_samples_known, isom_sgpd_entry_t *sgpd ) +{ + isom_rap_entry_t *rap = group->random_access; + if( !rap ) + return 0; + assert( rap == (isom_rap_entry_t *)sgpd->list->tail->data ); + rap->num_leading_samples_known = num_leading_samples_known; + /* Avoid duplication of sample group descriptions. */ + uint32_t group_description_index = 1; + for( lsmash_entry_t *entry = sgpd->list->head; entry != sgpd->list->tail; entry = entry->next ) + { + isom_rap_entry_t *data = (isom_rap_entry_t *)entry->data; + if( !data ) + return -1; + if( rap->num_leading_samples_known == data->num_leading_samples_known + && rap->num_leading_samples == data->num_leading_samples ) + { + /* The same description already exists. + * Remove the latest random access entry. */ + lsmash_remove_entry_direct( sgpd->list, sgpd->list->tail, NULL ); + /* Replace assigned group_description_index with the one corresponding the same description. */ + if( group->assignment->group_description_index == 0 ) + { + if( group->prev_assignment ) + group->prev_assignment->group_description_index = group_description_index; + } + else + group->assignment->group_description_index = group_description_index; + break; + } + ++group_description_index; + } + group->random_access = NULL; + return 0; +} + +static int isom_group_random_access( isom_trak_entry_t *trak, lsmash_sample_property_t *prop ) +{ + if( trak->root->max_isom_version < 6 ) + return 0; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_sbgp_entry_t *sbgp = isom_get_sample_to_group( stbl, ISOM_GROUP_TYPE_RAP ); + isom_sgpd_entry_t *sgpd = isom_get_sample_group_description( stbl, ISOM_GROUP_TYPE_RAP ); + if( !sbgp || !sgpd ) + return 0; + uint8_t is_rap = (prop->ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC) + || (prop->ra_flags & QT_SAMPLE_RANDOM_ACCESS_FLAG_PARTIAL_SYNC) + || (prop->ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_RAP) + || (LSMASH_IS_POST_ROLL_START( prop->ra_flags ) && prop->post_roll.identifier == prop->post_roll.complete); + isom_rap_group_t *group = trak->cache->rap; + if( !group ) + { + /* This sample is the first sample, create a grouping cache. */ + assert( isom_get_sample_count( trak ) == 1 ); + group = malloc( sizeof(isom_rap_group_t) ); + if( !group ) + return -1; + if( is_rap ) + { + group->random_access = isom_add_rap_group_entry( sgpd ); + group->assignment = isom_add_group_assignment_entry( sbgp, 1, sgpd->list->entry_count ); + } + else + { + /* The first sample is not always random access point. */ + group->random_access = NULL; + group->assignment = isom_add_group_assignment_entry( sbgp, 1, 0 ); + } + if( !group->assignment ) + { + free( group ); + return -1; + } + group->prev_assignment = NULL; + group->is_prev_rap = is_rap; + trak->cache->rap = group; + return 0; + } + if( group->is_prev_rap ) + { + /* OK. here, the previous sample is a menber of 'rap '. */ + if( !is_rap ) + { + /* This sample isn't a member of 'rap ' and the previous sample is. + * So we create a new group and set 0 on its group_description_index. */ + group->prev_assignment = group->assignment; + group->assignment = isom_add_group_assignment_entry( sbgp, 1, 0 ); + if( !group->assignment ) + { + free( group ); + return -1; + } + } + else if( !LSMASH_IS_CLOSED_RAP( prop->ra_flags ) ) + { + /* Create a new group since there is the possibility the next sample is a leading sample. + * This sample is a member of 'rap ', so we set appropriate value on its group_description_index. */ + if( isom_rap_grouping_established( group, 1, sgpd ) ) + return -1; + group->random_access = isom_add_rap_group_entry( sgpd ); + group->prev_assignment = group->assignment; + group->assignment = isom_add_group_assignment_entry( sbgp, 1, sgpd->list->entry_count ); + if( !group->assignment ) + { + free( group ); + return -1; + } + } + else /* The previous and current sample are a member of 'rap ', and the next sample must not be a leading sample. */ + ++ group->assignment->sample_count; + } + else if( is_rap ) + { + /* This sample is a member of 'rap ' and the previous sample isn't. + * So we create a new group and set appropriate value on its group_description_index. */ + if( isom_rap_grouping_established( group, 1, sgpd ) ) + return -1; + group->random_access = isom_add_rap_group_entry( sgpd ); + group->prev_assignment = group->assignment; + group->assignment = isom_add_group_assignment_entry( sbgp, 1, sgpd->list->entry_count ); + if( !group->assignment ) + { + free( group ); + return -1; + } + } + else /* The previous and current sample aren't a member of 'rap '. */ + ++ group->assignment->sample_count; + /* Obtain the property of the latest random access point group. */ + if( !is_rap && group->random_access ) + { + if( prop->leading == ISOM_SAMPLE_LEADING_UNKNOWN ) + { + /* We can no longer know num_leading_samples in this group. */ + if( isom_rap_grouping_established( group, 0, sgpd ) ) + return -1; + } + else + { + if( prop->leading == ISOM_SAMPLE_IS_UNDECODABLE_LEADING || prop->leading == ISOM_SAMPLE_IS_DECODABLE_LEADING ) + ++ group->random_access->num_leading_samples; + /* no more consecutive leading samples in this group */ + else if( isom_rap_grouping_established( group, 1, sgpd ) ) + return -1; + } + } + group->is_prev_rap = is_rap; + return 0; +} + +static int isom_roll_grouping_established( isom_roll_group_t *group, int16_t roll_distance, isom_sgpd_entry_t *sgpd ) +{ + /* Avoid duplication of sample group descriptions. */ + uint32_t group_description_index = 1; + for( lsmash_entry_t *entry = sgpd->list->head; entry; entry = entry->next ) + { + isom_roll_entry_t *data = (isom_roll_entry_t *)entry->data; + if( !data ) + return -1; + if( roll_distance == data->roll_distance ) + { + /* The same description already exists. + * Set the group_description_index corresponding the same description. */ + group->assignment->group_description_index = group_description_index; + group->described = 1; + return 0; + } + ++group_description_index; + } + /* Add a new roll recovery description. */ + if( !isom_add_roll_group_entry( sgpd, roll_distance ) ) + return -1; + group->assignment->group_description_index = sgpd->list->entry_count; + group->described = 1; + return 0; +} + +static int isom_deduplicate_roll_group( isom_sbgp_entry_t *sbgp, lsmash_entry_list_t *pool ) +{ + /* Deduplication */ + uint32_t current_group_number = sbgp->list->entry_count - pool->entry_count + 1; + isom_group_assignment_entry_t *prev_assignment = (isom_group_assignment_entry_t *)lsmash_get_entry_data( sbgp->list, current_group_number - 1 ); + for( lsmash_entry_t *entry = pool->head; entry; ) + { + isom_roll_group_t *group = (isom_roll_group_t *)entry->data; + if( !group || !group->assignment ) + return -1; + if( !group->delimited || !group->described ) + return 0; + if( prev_assignment && prev_assignment->group_description_index == group->assignment->group_description_index ) + { + /* Merge the current group with the previous. */ + lsmash_entry_t *next_entry = entry->next; + prev_assignment->sample_count += group->assignment->sample_count; + if( lsmash_remove_entry( sbgp->list, current_group_number, NULL ) + || lsmash_remove_entry_direct( pool, entry, NULL ) ) + return -1; + entry = next_entry; + } + else + { + entry = entry->next; + prev_assignment = group->assignment; + ++current_group_number; + } + } + return 0; +} + +/* Remove pooled caches that has become unnecessary. */ +static int isom_clean_roll_pool( isom_sbgp_entry_t *sbgp, lsmash_entry_list_t *pool ) +{ + for( lsmash_entry_t *entry = pool->head; entry; entry = pool->head ) + { + isom_roll_group_t *group = (isom_roll_group_t *)entry->data; + if( !group ) + return -1; + if( !group->delimited || !group->described ) + return 0; + if( lsmash_remove_entry_direct( pool, entry, NULL ) ) + return -1; + } + return 0; +} + +static int isom_flush_roll_pool( isom_sbgp_entry_t *sbgp, lsmash_entry_list_t *pool ) +{ + if( isom_deduplicate_roll_group( sbgp, pool ) ) + return -1; + return isom_clean_roll_pool( sbgp, pool ); +} + +static int isom_all_recovery_described( isom_sbgp_entry_t *sbgp, lsmash_entry_list_t *pool ) +{ + for( lsmash_entry_t *entry = pool->head; entry; entry = entry->next ) + { + isom_roll_group_t *group = (isom_roll_group_t *)entry->data; + if( !group ) + return -1; + group->described = 1; + } + return isom_flush_roll_pool( sbgp, pool ); +} + +static int isom_group_roll_recovery( isom_trak_entry_t *trak, lsmash_sample_property_t *prop ) +{ + if( !trak->root->avc_extensions && !trak->root->qt_compatible ) + return 0; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_sbgp_entry_t *sbgp = isom_get_sample_to_group( stbl, ISOM_GROUP_TYPE_ROLL ); + isom_sgpd_entry_t *sgpd = isom_get_sample_group_description( stbl, ISOM_GROUP_TYPE_ROLL ); + if( !sbgp || !sgpd ) + return 0; + lsmash_entry_list_t *pool = trak->cache->roll.pool; + if( !pool ) + { + pool = lsmash_create_entry_list(); + if( !pool ) + return -1; + trak->cache->roll.pool = pool; + } + isom_roll_group_t *group = (isom_roll_group_t *)lsmash_get_entry_data( pool, pool->entry_count ); + uint32_t sample_count = isom_get_sample_count( trak ); + int is_recovery_start = LSMASH_IS_POST_ROLL_START( prop->ra_flags ); + int valid_pre_roll = !is_recovery_start && (prop->ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE) + && (prop->pre_roll.distance > 0) && (prop->pre_roll.distance <= -INT16_MIN); + int new_group = !group || is_recovery_start || (group->prev_is_recovery_start != is_recovery_start); + if( !new_group ) + { + /* Check pre-roll distance. */ + if( !group->assignment ) + return -1; + isom_roll_entry_t *prev_roll = (isom_roll_entry_t *)lsmash_get_entry_data( sgpd->list, group->assignment->group_description_index ); + if( !prev_roll ) + new_group = valid_pre_roll; + else if( !valid_pre_roll || (prop->pre_roll.distance != -prev_roll->roll_distance) ) + /* Pre-roll distance is different from the previous. */ + new_group = 1; + } + if( new_group ) + { + if( group ) + group->delimited = 1; + else + assert( sample_count == 1 ); + /* Create a new group. */ + group = lsmash_malloc_zero( sizeof(isom_roll_group_t) ); + if( !group ) + return -1; + group->prev_is_recovery_start = is_recovery_start; + group->assignment = isom_add_group_assignment_entry( sbgp, 1, 0 ); + if( !group->assignment || lsmash_add_entry( pool, group ) ) + { + free( group ); + return -1; + } + if( is_recovery_start ) + { + /* a member of non-roll or post-roll group */ + group->first_sample = sample_count; + group->recovery_point = prop->post_roll.complete; + } + else + { + if( valid_pre_roll ) + { + /* a member of pre-roll group */ + if( isom_roll_grouping_established( group, -prop->pre_roll.distance, sgpd ) ) + return -1; + } + else + /* a member of non-roll group */ + group->described = 1; + } + } + else + { + group->prev_is_recovery_start = is_recovery_start; + ++ group->assignment->sample_count; + } + /* If encountered a sync sample, all recovery is completed here. */ + if( prop->ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC ) + return isom_all_recovery_described( sbgp, pool ); + /* Check whether this sample is a random access recovery point or not. */ + for( lsmash_entry_t *entry = pool->head; entry; entry = entry->next ) + { + group = (isom_roll_group_t *)entry->data; + if( !group ) + return -1; + if( group->described ) + continue; + if( prop->post_roll.identifier == group->recovery_point ) + { + int16_t distance = sample_count - group->first_sample; + /* Add a roll recovery entry only when roll_distance isn't zero since roll_distance = 0 must not be used. */ + if( distance ) + { + /* Now, this group is a 'roll'. */ + if( isom_roll_grouping_established( group, distance, sgpd ) ) + return -1; + /* All groups before the current group are described. */ + lsmash_entry_t *current = entry; + for( entry = pool->head; entry != current; entry = entry->next ) + { + group = (isom_roll_group_t *)entry->data; + if( !group ) + return -1; + group->described = 1; + } + } + else + group->described = 1; + break; /* Avoid evaluating groups, in the pool, having the same identifier for recovery point again. */ + } + } + return isom_flush_roll_pool( sbgp, pool ); +} + +/* returns 1 if pooled samples must be flushed. */ +/* FIXME: I wonder if this function should have a extra argument which indicates force_to_flush_cached_chunk. + see lsmash_append_sample for detail. */ +static int isom_add_chunk( isom_trak_entry_t *trak, lsmash_sample_t *sample ) +{ + if( !trak->root || !trak->cache || !trak->mdia->mdhd || !trak->mdia->mdhd->timescale + || !trak->mdia->minf->stbl->stsc || !trak->mdia->minf->stbl->stsc->list ) + return -1; + lsmash_root_t *root = trak->root; + isom_chunk_t *current = &trak->cache->chunk; + if( !current->pool ) + { + /* Very initial settings, just once per track */ + current->pool = isom_create_sample_pool( 0 ); + if( !current->pool ) + return -1; + } + if( !current->pool->sample_count ) + { + /* Cannot decide whether we should flush the current sample or not here yet. */ + ++ current->chunk_number; + current->sample_description_index = sample->index; + current->first_dts = sample->dts; + return 0; + } + if( sample->dts < current->first_dts ) + return -1; /* easy error check. */ + if( (root->max_chunk_duration >= ((double)(sample->dts - current->first_dts) / trak->mdia->mdhd->timescale)) + && (root->max_chunk_size >= current->pool->size + sample->length) + && (current->sample_description_index == sample->index) ) + return 0; /* No need to flush current cached chunk, the current sample must be put into that. */ + /* NOTE: chunk relative stuff must be pushed into root after a chunk is fully determined with its contents. */ + /* now current cached chunk is fixed, actually add chunk relative properties to root accordingly. */ + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_stsc_entry_t *last_stsc_data = stbl->stsc->list->tail ? (isom_stsc_entry_t *)stbl->stsc->list->tail->data : NULL; + /* Create a new chunk sequence in this track if needed. */ + if( (!last_stsc_data + || current->pool->sample_count != last_stsc_data->samples_per_chunk + || current->sample_description_index != last_stsc_data->sample_description_index) + && isom_add_stsc_entry( stbl, current->chunk_number, current->pool->sample_count, current->sample_description_index ) ) + return -1; + /* Add a new chunk offset in this track. */ + uint64_t offset = root->size; + if( root->fragment ) + offset += ISOM_BASEBOX_COMMON_SIZE + root->fragment->pool_size; + if( isom_add_stco_entry( stbl, offset ) ) + return -1; + /* update cache information */ + ++ current->chunk_number; + /* re-initialize cache, using the current sample */ + current->sample_description_index = sample->index; + current->first_dts = sample->dts; + /* current->pool must be flushed in isom_append_sample_internal() */ + return 1; +} + +static int isom_write_pooled_samples( lsmash_root_t *root, isom_sample_pool_t *pool ) +{ + if( !root || !root->mdat || !root->bs || !root->bs->stream ) + return -1; + lsmash_bs_put_bytes( root->bs, pool->size, pool->data ); + if( lsmash_bs_write_data( root->bs ) ) + return -1; + root->mdat->size += pool->size; + root->size += pool->size; + pool->sample_count = 0; + pool->size = 0; + return 0; +} + +static int isom_update_sample_tables( isom_trak_entry_t *trak, lsmash_sample_t *sample ) +{ + /* Add a sample_size and increment sample_count. */ + uint32_t sample_count = isom_add_size( trak, sample->length ); + if( !sample_count ) + return -1; + /* Add a decoding timestamp and a composition timestamp. */ + if( isom_add_timestamp( trak, sample->dts, sample->cts ) ) + return -1; + /* Add a sync point if needed. */ + if( isom_add_sync_point( trak, sample_count, &sample->prop ) ) + return -1; + /* Add a partial sync point if needed. */ + if( isom_add_partial_sync( trak, sample_count, &sample->prop ) ) + return -1; + /* Add leading, independent, disposable and redundant information if needed. */ + if( isom_add_dependency_type( trak, &sample->prop ) ) + return -1; + /* Group samples into random access point type if needed. */ + if( isom_group_random_access( trak, &sample->prop ) ) + return -1; + /* Group samples into random access recovery point type if needed. */ + if( isom_group_roll_recovery( trak, &sample->prop ) ) + return -1; + /* Add a chunk if needed. */ + return isom_add_chunk( trak, sample ); +} + +static int isom_append_fragment_track_run( lsmash_root_t *root, isom_chunk_t *chunk ) +{ + if( !chunk->pool || !chunk->pool->size ) + return 0; + isom_fragment_manager_t *fragment = root->fragment; + /* Move data in the pool of the current track fragment to the pool of the current movie fragment. + * Empty the pool of current track. We don't delete data of samples here. */ + if( lsmash_add_entry( fragment->pool, chunk->pool ) ) + return -1; + fragment->pool->entry_count += chunk->pool->sample_count; + fragment->pool_size += chunk->pool->size; + chunk->pool = isom_create_sample_pool( chunk->pool->size ); + return chunk->pool ? 0 : -1; +} + +static int isom_output_cached_chunk( isom_trak_entry_t *trak ) +{ + lsmash_root_t *root = trak->root; + isom_chunk_t *chunk = &trak->cache->chunk; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + isom_stsc_entry_t *last_stsc_data = stbl->stsc->list->tail ? (isom_stsc_entry_t *)stbl->stsc->list->tail->data : NULL; + /* Create a new chunk sequence in this track if needed. */ + if( (!last_stsc_data + || chunk->pool->sample_count != last_stsc_data->samples_per_chunk + || chunk->sample_description_index != last_stsc_data->sample_description_index) + && isom_add_stsc_entry( stbl, chunk->chunk_number, chunk->pool->sample_count, chunk->sample_description_index ) ) + return -1; + if( root->fragment ) + { + /* Add a new chunk offset in this track. */ + if( isom_add_stco_entry( stbl, root->size + ISOM_BASEBOX_COMMON_SIZE + root->fragment->pool_size ) ) + return -1; + return isom_append_fragment_track_run( root, chunk ); + } + /* Add a new chunk offset in this track. */ + if( isom_add_stco_entry( stbl, root->size ) ) + return -1; + /* Output pooled samples in this track. */ + return isom_write_pooled_samples( root, chunk->pool ); +} + +static int isom_pool_sample( isom_sample_pool_t *pool, lsmash_sample_t *sample ) +{ + uint64_t pool_size = pool->size + sample->length; + if( pool->alloc < pool_size ) + { + uint8_t *data; + uint64_t alloc = pool_size + (1<<16); + if( !pool->data ) + data = malloc( alloc ); + else + data = realloc( pool->data, alloc ); + if( !data ) + return -1; + pool->data = data; + pool->alloc = alloc; + } + memcpy( pool->data + pool->size, sample->data, sample->length ); + pool->size = pool_size; + pool->sample_count += 1; + lsmash_delete_sample( sample ); + return 0; +} + +static int isom_append_sample_internal( isom_trak_entry_t *trak, lsmash_sample_t *sample ) +{ + int flush = isom_update_sample_tables( trak, sample ); + if( flush < 0 ) + return -1; + /* flush == 1 means pooled samples must be flushed. */ + lsmash_root_t *root = trak->root; + isom_sample_pool_t *current_pool = trak->cache->chunk.pool; + if( flush == 1 && isom_write_pooled_samples( root, current_pool ) ) + return -1; + /* Arbitration system between tracks with extremely scattering dts. + * Here, we check whether asynchronization between the tracks exceeds the tolerance. + * If a track has too old "first DTS" in its cached chunk than current sample's DTS, then its pooled samples must be flushed. + * We don't consider presentation of media since any edit can pick an arbitrary portion of media in track. + * Note: you needn't read this loop until you grasp the basic handling of chunks. */ + double tolerance = root->max_async_tolerance; + for( lsmash_entry_t *entry = root->moov->trak_list->head; entry; entry = entry->next ) + { + isom_trak_entry_t *other = (isom_trak_entry_t *)entry->data; + if( trak == other ) + continue; + if( !other || !other->cache || !other->mdia || !other->mdia->mdhd || !other->mdia->mdhd->timescale + || !other->mdia->minf || !other->mdia->minf->stbl || !other->mdia->minf->stbl->stsc || !other->mdia->minf->stbl->stsc->list ) + return -1; + isom_chunk_t *chunk = &other->cache->chunk; + if( !chunk->pool || !chunk->pool->sample_count ) + continue; + double diff = ((double)sample->dts / trak->mdia->mdhd->timescale) + - ((double)chunk->first_dts / other->mdia->mdhd->timescale); + if( diff > tolerance && isom_output_cached_chunk( other ) ) + return -1; + /* Note: we don't flush the cached chunk in the current track and the current sample here + * even if the conditional expression of '-diff > tolerance' meets. + * That's useless because appending a sample to another track would be a good equivalent. + * It's even harmful because it causes excess chunk division by calling + * isom_output_cached_chunk() which always generates a new chunk. + * Anyway some excess chunk division will be there, but rather less without it. + * To completely avoid this, we need to observe at least whether the current sample will be placed + * right next to the previous chunk of the same track or not. */ + } + /* anyway the current sample must be pooled. */ + return isom_pool_sample( current_pool, sample ); +} + +static int isom_append_sample( lsmash_root_t *root, uint32_t track_ID, lsmash_sample_t *sample ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->root || !trak->cache || !trak->mdia + || !trak->mdia->mdhd || !trak->mdia->mdhd->timescale + || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stsd || !trak->mdia->minf->stbl->stsd->list + || !trak->mdia->minf->stbl->stsc || !trak->mdia->minf->stbl->stsc->list ) + return -1; + /* If there is no available Media Data Box to write samples, add and write a new one before any chunk offset is decided. */ + if( !root->mdat ) + { + if( isom_new_mdat( root, 0 ) ) + return -1; + /* Add the size of the Media Data Box and the placeholder. */ + root->size += 2 * ISOM_BASEBOX_COMMON_SIZE; + } + isom_sample_entry_t *sample_entry = (isom_sample_entry_t *)lsmash_get_entry_data( trak->mdia->minf->stbl->stsd->list, sample->index ); + if( !sample_entry ) + return -1; + if( isom_is_lpcm_audio( sample_entry ) ) + { + uint32_t frame_size = ((isom_audio_entry_t *)sample_entry)->constBytesPerAudioPacket; + if( sample->length == frame_size ) + return isom_append_sample_internal( trak, sample ); + else if( sample->length < frame_size ) + return -1; + /* Append samples splitted into each LPCMFrame. */ + uint64_t dts = sample->dts; + uint64_t cts = sample->cts; + for( uint32_t offset = 0; offset < sample->length; offset += frame_size ) + { + lsmash_sample_t *lpcm_sample = lsmash_create_sample( frame_size ); + if( !lpcm_sample ) + return -1; + memcpy( lpcm_sample->data, sample->data + offset, frame_size ); + lpcm_sample->dts = dts++; + lpcm_sample->cts = cts++; + lpcm_sample->prop = sample->prop; + lpcm_sample->index = sample->index; + if( isom_append_sample_internal( trak, lpcm_sample ) ) + { + lsmash_delete_sample( lpcm_sample ); + return -1; + } + } + lsmash_delete_sample( sample ); + return 0; + } + return isom_append_sample_internal( trak, sample ); +} + +static int isom_output_cache( isom_trak_entry_t *trak ) +{ + if( trak->cache->chunk.pool && trak->cache->chunk.pool->sample_count + && isom_output_cached_chunk( trak ) ) + return -1; + isom_stbl_t *stbl = trak->mdia->minf->stbl; + if( !stbl->sgpd_list ) + return 0; + for( lsmash_entry_t *entry = stbl->sgpd_list->head; entry; entry = entry->next ) + { + isom_sgpd_entry_t *sgpd = (isom_sgpd_entry_t *)entry->data; + if( !sgpd ) + return -1; + switch( sgpd->grouping_type ) + { + case ISOM_GROUP_TYPE_RAP : + { + isom_rap_group_t *group = trak->cache->rap; + if( !group ) + { + if( trak->root->fragment ) + continue; + else + return -1; + } + if( !group->random_access ) + continue; + group->random_access->num_leading_samples_known = 1; + break; + } + case ISOM_GROUP_TYPE_ROLL : + if( !trak->cache->roll.pool ) + { + if( trak->root->fragment ) + continue; + else + return -1; + } + for( lsmash_entry_t *roll_entry = trak->cache->roll.pool->head; roll_entry; roll_entry = roll_entry->next ) + { + isom_roll_group_t *group = (isom_roll_group_t *)roll_entry->data; + if( !group ) + return -1; + group->described = 1; + group->delimited = 1; + } + isom_sbgp_entry_t *sbgp = isom_get_sample_to_group( stbl, ISOM_GROUP_TYPE_ROLL ); + if( isom_flush_roll_pool( sbgp, trak->cache->roll.pool ) ) + return -1; + break; + default : + break; + } + } + return 0; +} + +static int isom_flush_fragment_pooled_samples( lsmash_root_t *root, uint32_t track_ID, uint32_t last_sample_duration ) +{ + isom_traf_entry_t *traf = isom_get_traf( root->fragment->movie, track_ID ); + if( !traf ) + return 0; /* no samples */ + if( !traf->cache || !traf->cache->fragment ) + return -1; + if( traf->trun_list && traf->trun_list->entry_count && traf->trun_list->tail && traf->trun_list->tail->data ) + { + /* Media Data Box preceded by Movie Fragment Box could change base_data_offsets in each track fragments later. + * We can't consider this here because the length of Movie Fragment Box is unknown at this step yet. */ + isom_trun_entry_t *trun = (isom_trun_entry_t *)traf->trun_list->tail->data; + if( root->fragment->pool_size ) + trun->flags |= ISOM_TR_FLAGS_DATA_OFFSET_PRESENT; + trun->data_offset = root->fragment->pool_size; + } + if( isom_append_fragment_track_run( root, &traf->cache->chunk ) ) + return -1; + return isom_set_fragment_last_duration( traf, last_sample_duration ); +} + +int lsmash_flush_pooled_samples( lsmash_root_t *root, uint32_t track_ID, uint32_t last_sample_delta ) +{ + if( !root ) + return -1; + if( root->fragment && root->fragment->movie ) + return isom_flush_fragment_pooled_samples( root, track_ID, last_sample_delta ); + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->cache || !trak->mdia || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stsc || !trak->mdia->minf->stbl->stsc->list ) + return -1; + if( isom_output_cache( trak ) ) + return -1; + return lsmash_set_last_sample_delta( root, track_ID, last_sample_delta ); +} + +/* This function doesn't update sample_duration of the last sample in the previous movie fragment. + * Instead of this, isom_finish_movie_fragment undertakes this task. */ +static int isom_update_fragment_previous_sample_duration( isom_traf_entry_t *traf, isom_trex_entry_t *trex, uint32_t duration ) +{ + isom_tfhd_t *tfhd = traf->tfhd; + isom_trun_entry_t *trun = (isom_trun_entry_t *)traf->trun_list->tail->data; + int previous_run_has_previous_sample = 0; + if( trun->sample_count == 1 ) + { + if( traf->trun_list->entry_count == 1 ) + return 0; /* The previous track run belongs to the previous movie fragment if it exists. */ + if( !traf->trun_list->tail->prev || !traf->trun_list->tail->prev->data ) + return -1; + /* OK. The previous sample exists in the previous track run in the same track fragment. */ + trun = (isom_trun_entry_t *)traf->trun_list->tail->prev->data; + previous_run_has_previous_sample = 1; + } + /* Update default_sample_duration of the Track Fragment Header Box + * if this duration is what the first sample in the current track fragment owns. */ + if( (trun->sample_count == 2 && traf->trun_list->entry_count == 1) + || (trun->sample_count == 1 && traf->trun_list->entry_count == 2) ) + { + if( duration != trex->default_sample_duration ) + tfhd->flags |= ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT; + tfhd->default_sample_duration = duration; + } + /* Update the previous sample_duration if needed. */ + if( duration != tfhd->default_sample_duration ) + trun->flags |= ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT; + if( trun->flags ) + { + uint32_t sample_number = trun->sample_count - !previous_run_has_previous_sample; + isom_trun_optional_row_t *row = isom_request_trun_optional_row( trun, tfhd, sample_number ); + if( !row ) + return -1; + row->sample_duration = duration; + } + traf->cache->fragment->last_duration = duration; + return 0; +} + +static isom_sample_flags_t isom_generate_fragment_sample_flags( lsmash_sample_t *sample ) +{ + isom_sample_flags_t flags; + flags.reserved = 0; + flags.is_leading = sample->prop.leading & 0x3; + flags.sample_depends_on = sample->prop.independent & 0x3; + flags.sample_is_depended_on = sample->prop.disposable & 0x3; + flags.sample_has_redundancy = sample->prop.redundant & 0x3; + flags.sample_padding_value = 0; + flags.sample_is_non_sync_sample = !(sample->prop.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC); + flags.sample_degradation_priority = 0; + return flags; +} + +static int isom_update_fragment_sample_tables( isom_traf_entry_t *traf, lsmash_sample_t *sample ) +{ + isom_tfhd_t *tfhd = traf->tfhd; + isom_trex_entry_t *trex = isom_get_trex( traf->root->moov->mvex, tfhd->track_ID ); + if( !trex ) + return -1; + lsmash_root_t *root = traf->root; + isom_cache_t *cache = traf->cache; + isom_chunk_t *current = &cache->chunk; + if( !current->pool ) + { + /* Very initial settings, just once per track */ + current->pool = isom_create_sample_pool( 0 ); + if( !current->pool ) + return -1; + } + /* Create a new track run if the duration exceeds max_chunk_duration. + * Old one will be appended to the pool of this movie fragment. */ + int delimit = (root->max_chunk_duration < ((double)(sample->dts - current->first_dts) / lsmash_get_media_timescale( root, tfhd->track_ID ))) + || (root->max_chunk_size < (current->pool->size + sample->length)); + isom_trun_entry_t *trun = NULL; + if( !traf->trun_list || !traf->trun_list->entry_count || delimit ) + { + if( delimit && traf->trun_list && traf->trun_list->entry_count && traf->trun_list->tail && traf->trun_list->tail->data ) + { + /* Media Data Box preceded by Movie Fragment Box could change base data offsets in each track fragments later. + * We can't consider this here because the length of Movie Fragment Box is unknown at this step yet. */ + trun = (isom_trun_entry_t *)traf->trun_list->tail->data; + if( root->fragment->pool_size ) + trun->flags |= ISOM_TR_FLAGS_DATA_OFFSET_PRESENT; + trun->data_offset = root->fragment->pool_size; + } + trun = isom_add_trun( traf ); + if( !trun ) + return -1; + } + else + { + if( !traf->trun_list->tail || !traf->trun_list->tail->data ) + return -1; + trun = (isom_trun_entry_t *)traf->trun_list->tail->data; + } + isom_sample_flags_t sample_flags = isom_generate_fragment_sample_flags( sample ); + if( ++trun->sample_count == 1 ) + { + if( traf->trun_list->entry_count == 1 ) + { + /* This track fragment isn't empty-duration-fragment any more. */ + tfhd->flags &= ~ISOM_TF_FLAGS_DURATION_IS_EMPTY; + /* Set up sample_description_index in this track fragment. */ + if( sample->index != trex->default_sample_description_index ) + tfhd->flags |= ISOM_TF_FLAGS_SAMPLE_DESCRIPTION_INDEX_PRESENT; + tfhd->sample_description_index = current->sample_description_index = sample->index; + /* Set up default_sample_size used in this track fragment. */ + tfhd->default_sample_size = sample->length; + /* Set up default_sample_flags used in this track fragment. + * Note: we decide an appropriate default value at the end of this movie fragment. */ + tfhd->default_sample_flags = sample_flags; + /* Set up random access information if this sample is a sync sample. + * We inform only the first sample in each movie fragment. */ + if( root->bs->stream != stdout && (sample->prop.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC) ) + { + isom_tfra_entry_t *tfra = isom_get_tfra( root->mfra, tfhd->track_ID ); + if( !tfra ) + { + tfra = isom_add_tfra( root->mfra ); + if( !tfra ) + return -1; + tfra->track_ID = tfhd->track_ID; + } + if( !tfra->list ) + { + tfra->list = lsmash_create_entry_list(); + if( !tfra->list ) + return -1; + } + isom_tfra_location_time_entry_t *rap = malloc( sizeof(isom_tfra_location_time_entry_t) ); + if( !rap ) + return -1; + rap->time = sample->cts; /* Set composition timestamp temporally. + * At the end of the whole movie, this will be reset as presentation time. */ + rap->moof_offset = root->size; /* We place Movie Fragment Box in the head of each movie fragment. */ + rap->traf_number = cache->fragment->traf_number; + rap->trun_number = traf->trun_list->entry_count; + rap->sample_number = trun->sample_count; + if( lsmash_add_entry( tfra->list, rap ) ) + { + free( rap ); + return -1; + } + tfra->number_of_entry = tfra->list->entry_count; + int length; + for( length = 1; rap->traf_number >> (length * 8); length++ ); + tfra->length_size_of_traf_num = LSMASH_MAX( length - 1, tfra->length_size_of_traf_num ); + for( length = 1; rap->traf_number >> (length * 8); length++ ); + tfra->length_size_of_trun_num = LSMASH_MAX( length - 1, tfra->length_size_of_trun_num ); + for( length = 1; rap->sample_number >> (length * 8); length++ ); + tfra->length_size_of_sample_num = LSMASH_MAX( length - 1, tfra->length_size_of_sample_num ); + } + /* Set up the base media decode time of this track fragment. + * This feature is available under ISO Base Media version 6 or later. */ + if( root->max_isom_version >= 6 ) + { + assert( !traf->tfdt ); + if( isom_add_tfdt( traf ) ) + return -1; + if( sample->dts > UINT32_MAX ) + traf->tfdt->version = 1; + traf->tfdt->baseMediaDecodeTime = sample->dts; + } + } + trun->first_sample_flags = sample_flags; + current->first_dts = sample->dts; + } + /* Update the optional rows in the current track run except for sample_duration if needed. */ + if( sample->length != tfhd->default_sample_size ) + trun->flags |= ISOM_TR_FLAGS_SAMPLE_SIZE_PRESENT; + if( isom_compare_sample_flags( &sample_flags, &tfhd->default_sample_flags ) ) + trun->flags |= ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT; + uint32_t sample_composition_time_offset = sample->cts - sample->dts; + if( sample_composition_time_offset ) + { + trun->flags |= ISOM_TR_FLAGS_SAMPLE_COMPOSITION_TIME_OFFSET_PRESENT; + /* Check if negative composition time offset is present. */ + isom_timestamp_t *ts_cache = &cache->timestamp; + if( (sample->cts + ts_cache->ctd_shift) < sample->dts ) + { + if( root->max_isom_version < 6 ) + return -1; /* Negative composition time offset is not supported. */ + if( (sample->dts - sample->cts) > INT32_MAX ) + return -1; /* Overflow */ + ts_cache->ctd_shift = sample->dts - sample->cts; + if( trun->version == 0 && root->max_isom_version >= 6 ) + trun->version = 1; + } + } + if( trun->flags ) + { + isom_trun_optional_row_t *row = isom_request_trun_optional_row( trun, tfhd, trun->sample_count ); + if( !row ) + return -1; + row->sample_size = sample->length; + row->sample_flags = sample_flags; + row->sample_composition_time_offset = sample_composition_time_offset; + } + /* Set up the previous sample_duration if this sample is not the first sample in the overall movie. */ + if( cache->fragment->has_samples ) + { + /* Note: when using for live streaming, it is not good idea to return error (-1) by sample->dts < prev_dts + * since that's trivial for such semi-permanent presentation. */ + uint64_t prev_dts = cache->timestamp.dts; + if( sample->dts <= prev_dts || sample->dts > prev_dts + UINT32_MAX ) + return -1; + uint32_t sample_duration = sample->dts - prev_dts; + if( isom_update_fragment_previous_sample_duration( traf, trex, sample_duration ) ) + return -1; + } + cache->timestamp.dts = sample->dts; + cache->fragment->largest_cts = LSMASH_MAX( sample->cts, cache->fragment->largest_cts ); + return delimit; +} + +static int isom_append_fragment_sample_internal_initial( isom_trak_entry_t *trak, lsmash_sample_t *sample ) +{ + int delimit = 0; + /* Update the sample tables of this track fragment. + * If a new chunk was created, append the previous one to the pool of this movie fragment. */ + delimit = isom_update_sample_tables( trak, sample ); + if( delimit < 0 ) + return -1; + else if( delimit == 1 ) + isom_append_fragment_track_run( trak->root, &trak->cache->chunk ); + /* Add a new sample into the pool of this track fragment. */ + if( isom_pool_sample( trak->cache->chunk.pool, sample ) ) + return -1; + trak->cache->fragment->has_samples = 1; + return 0; +} + +static int isom_append_fragment_sample_internal( isom_traf_entry_t *traf, lsmash_sample_t *sample ) +{ + int delimit = 0; + /* Update the sample tables of this track fragment. + * If a new track run was created, append the previous one to the pool of this movie fragment. */ + delimit = isom_update_fragment_sample_tables( traf, sample ); + if( delimit < 0 ) + return -1; + else if( delimit == 1 ) + isom_append_fragment_track_run( traf->root, &traf->cache->chunk ); + /* Add a new sample into the pool of this track fragment. */ + if( isom_pool_sample( traf->cache->chunk.pool, sample ) ) + return -1; + traf->cache->fragment->has_samples = 1; + return 0; +} + +static int isom_append_fragment_sample( lsmash_root_t *root, uint32_t track_ID, lsmash_sample_t *sample ) +{ + isom_fragment_manager_t *fragment = root->fragment; + if( !fragment || !fragment->pool ) + return -1; + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || !trak->root || !trak->cache || !trak->cache->fragment || !trak->tkhd || !trak->mdia + || !trak->mdia->mdhd || !trak->mdia->mdhd->timescale + || !trak->mdia->minf || !trak->mdia->minf->stbl + || !trak->mdia->minf->stbl->stsd || !trak->mdia->minf->stbl->stsd->list + || !trak->mdia->minf->stbl->stsc || !trak->mdia->minf->stbl->stsc->list ) + return -1; + int (*append_sample_func)( void *, lsmash_sample_t * ) = NULL; + void *track_fragment = NULL; + if( !fragment->movie ) + { + append_sample_func = (int (*)( void *, lsmash_sample_t * ))isom_append_fragment_sample_internal_initial; + track_fragment = trak; + } + else + { + isom_traf_entry_t *traf = isom_get_traf( fragment->movie, track_ID ); + if( !traf ) + { + traf = isom_add_traf( root, fragment->movie ); + if( isom_add_tfhd( traf ) ) + return -1; + traf->tfhd->flags = ISOM_TF_FLAGS_DURATION_IS_EMPTY; /* no samples for this track fragment yet */ + traf->tfhd->track_ID = trak->tkhd->track_ID; + traf->cache = trak->cache; + traf->cache->fragment->traf_number = fragment->movie->traf_list->entry_count; + } + else if( !traf->root || !traf->root->moov || !traf->root->moov->mvex || !traf->cache || !traf->tfhd ) + return -1; + append_sample_func = (int (*)( void *, lsmash_sample_t * ))isom_append_fragment_sample_internal; + track_fragment = traf; + } + isom_sample_entry_t *sample_entry = (isom_sample_entry_t *)lsmash_get_entry_data( trak->mdia->minf->stbl->stsd->list, sample->index ); + if( !sample_entry ) + return -1; + if( isom_is_lpcm_audio( sample_entry ) ) + { + uint32_t frame_size = ((isom_audio_entry_t *)sample_entry)->constBytesPerAudioPacket; + if( sample->length == frame_size ) + return append_sample_func( track_fragment, sample ); + else if( sample->length < frame_size ) + return -1; + /* Append samples splitted into each LPCMFrame. */ + uint64_t dts = sample->dts; + uint64_t cts = sample->cts; + for( uint32_t offset = 0; offset < sample->length; offset += frame_size ) + { + lsmash_sample_t *lpcm_sample = lsmash_create_sample( frame_size ); + if( !lpcm_sample ) + return -1; + memcpy( lpcm_sample->data, sample->data + offset, frame_size ); + lpcm_sample->dts = dts++; + lpcm_sample->cts = cts++; + lpcm_sample->prop = sample->prop; + lpcm_sample->index = sample->index; + if( append_sample_func( track_fragment, lpcm_sample ) ) + { + lsmash_delete_sample( lpcm_sample ); + return -1; + } + } + lsmash_delete_sample( sample ); + return 0; + } + return append_sample_func( track_fragment, sample ); +} + +int lsmash_append_sample( lsmash_root_t *root, uint32_t track_ID, lsmash_sample_t *sample ) +{ + /* We think max_chunk_duration == 0, which means all samples will be cached on memory, should be prevented. + * This means removal of a feature that we used to have, but anyway very alone chunk does not make sense. */ + if( !root || !root->bs || !sample || !sample->data || !track_ID + || root->max_chunk_duration == 0 || root->max_async_tolerance == 0 ) + return -1; + /* Write File Type Box here if it was not written yet. */ + if( !root->file_type_written && isom_write_ftyp( root ) ) + return -1; + if( root->fragment && root->fragment->pool ) + return isom_append_fragment_sample( root, track_ID, sample ); + return isom_append_sample( root, track_ID, sample ); +} + +/*---- misc functions ----*/ + +int lsmash_delete_explicit_timeline_map( lsmash_root_t *root, uint32_t track_ID ) +{ + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak ) + return -1; + isom_remove_edts( trak->edts ); + trak->edts = NULL; + return isom_update_tkhd_duration( trak ); +} + +void lsmash_delete_tyrant_chapter( lsmash_root_t *root ) +{ + if( !root || !root->moov || !root->moov->udta ) + return; + isom_remove_chpl( root->moov->udta->chpl ); + root->moov->udta->chpl = NULL; +} + +int lsmash_set_copyright( lsmash_root_t *root, uint32_t track_ID, uint16_t ISO_language, char *notice ) +{ + if( !root || !root->moov || !root->isom_compatible || (ISO_language && ISO_language < 0x800) || !notice ) + return -1; + isom_udta_t *udta; + if( track_ID ) + { + isom_trak_entry_t *trak = isom_get_trak( root, track_ID ); + if( !trak || (!trak->udta && isom_add_udta( root, track_ID )) ) + return -1; + udta = trak->udta; + } + else + { + if( !root->moov->udta && isom_add_udta( root, 0 ) ) + return -1; + udta = root->moov->udta; + } + assert( udta ); + if( udta->cprt_list ) + for( lsmash_entry_t *entry = udta->cprt_list->head; entry; entry = entry->next ) + { + isom_cprt_t *cprt = (isom_cprt_t *)entry->data; + if( !cprt || cprt->language == ISO_language ) + return -1; + } + if( isom_add_cprt( udta ) ) + return -1; + isom_cprt_t *cprt = (isom_cprt_t *)udta->cprt_list->tail->data; + cprt->language = ISO_language; + cprt->notice_length = strlen( notice ) + 1; + cprt->notice = lsmash_memdup( notice, cprt->notice_length ); + return 0; +} diff --git a/output/mp4/isom.h b/output/mp4/isom.h new file mode 100644 index 0000000..8e68d28 --- /dev/null +++ b/output/mp4/isom.h @@ -0,0 +1,34 @@ +/***************************************************************************** + * isom.h: + ***************************************************************************** + * Copyright (C) 2011-2012 L-SMASH project + * + * Authors: Hiroki Taniura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#ifndef LSMASH_ISOM_H +#define LSMASH_ISOM_H + +isom_tref_type_t *isom_add_track_reference_type( isom_tref_t *tref, isom_track_reference_type type, uint32_t ref_count, uint32_t *track_ID ); +int isom_add_chpl_entry( isom_chpl_t *chpl, isom_chapter_entry_t *chap_data ); +int isom_add_tref( isom_trak_entry_t *trak ); +int isom_add_chpl( isom_moov_t *moov ); +void isom_remove_track_reference_type( isom_tref_type_t *ref ); +void isom_remove_tref( isom_tref_t *tref ); +void isom_remove_trak( isom_trak_entry_t *trak ); + +#endif diff --git a/output/mp4/lsmash.h b/output/mp4/lsmash.h new file mode 100644 index 0000000..c25823a --- /dev/null +++ b/output/mp4/lsmash.h @@ -0,0 +1,2294 @@ +/***************************************************************************** + * lsmash.h: + ***************************************************************************** + * Copyright (C) 2010-2012 L-SMASH project + * + * Authors: Yusuke Nakamura + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *****************************************************************************/ + +/* This file is available under an ISC license. */ + +#ifndef LSMASH_H +#define LSMASH_H + +#include + +#define PRIVATE /* If this declaration is placed at a variable, any user shouldn't use it. */ + +#define LSMASH_4CC( a, b, c, d ) (((a)<<24) | ((b)<<16) | ((c)<<8) | (d)) +#define LSMASH_PACK_ISO_LANGUAGE( a, b, c ) ((((a-0x60)&0x1f)<<10) | (((b-0x60)&0x1f)<<5) | ((c-0x60)&0x1f)) + +/**************************************************************************** + * ROOT + * This is the top level abstract layer for file handling. + ****************************************************************************/ +typedef struct lsmash_root_tag lsmash_root_t; + +typedef enum +{ + LSMASH_FILE_MODE_WRITE = 1, + LSMASH_FILE_MODE_READ = 1<<1, + LSMASH_FILE_MODE_FRAGMENTED = 1<<2, + LSMASH_FILE_MODE_DUMP = 1<<3, + LSMASH_FILE_MODE_WRITE_FRAGMENTED = LSMASH_FILE_MODE_WRITE | LSMASH_FILE_MODE_FRAGMENTED, + //LSMASH_FILE_MODE_READ_FRAGMENTED = LSMASH_FILE_MODE_READ | LSMASH_FILE_MODE_FRAGMENTED, +} lsmash_file_mode; + +typedef int (*lsmash_adhoc_remux_callback)( void* param, uint64_t done, uint64_t total ); +typedef struct { + uint64_t buffer_size; + lsmash_adhoc_remux_callback func; + void* param; +} lsmash_adhoc_remux_t; + +/**************************************************************************** + * Basic Types + ****************************************************************************/ +/* rational types */ +typedef struct +{ + uint32_t n; /* numerator */ + uint32_t d; /* denominator */ +} lsmash_rational_u32_t; + +typedef struct +{ + int32_t n; /* numerator */ + uint32_t d; /* denominator */ +} lsmash_rational_s32_t; + +typedef enum +{ + LSMASH_BOOLEAN_FALSE = 0, + LSMASH_BOOLEAN_TRUE = 1 +} lsmash_boolean_t; + +/**************************************************************************** + * Box + ****************************************************************************/ +typedef uint32_t lsmash_compact_box_type_t; + +/* An UUID structure for extended box type */ +typedef struct +{ + uint32_t fourcc; /* four characters codes that identify extended box type partially + * If the box is not a UUID box, this field shall be the same as the box type. + * Note: characters in this field aren't always printable. */ + uint8_t id[12]; /* If the box is not a UUID box, this field shall be set to 12-byte ISO reserved value + * { 0x00, 0x11, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } + * and shall not be written into the stream together with above-defined four characters codes. + * As an exception, we could set the value + * { 0x0F, 0x11, 0x4D, 0xA5, 0xBF, 0x4E, 0xF2, 0xC4, 0x8C, 0x6A, 0xA1, 0x1E } + * to indicate the box is derived from QuickTime file format. */ +} lsmash_extended_box_type_t; + +typedef struct +{ + lsmash_compact_box_type_t fourcc; /* four characters codes that identify box type + * Note: characters in this field aren't always printable. */ + lsmash_extended_box_type_t user; /* Universal Unique IDentifier, i.e. UUID */ + /* If 'fourcc' doesn't equal 'uuid', ignore this field. */ +} lsmash_box_type_t; + +#define LSMASH_BOX_TYPE_INITIALIZER { 0x00000000, { 0x00000000, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } } +#define LSMASH_BOX_TYPE_UNSPECIFIED static_lsmash_box_type_unspecified +static const lsmash_box_type_t static_lsmash_box_type_unspecified = LSMASH_BOX_TYPE_INITIALIZER; + +/* Return extended box type that consists of combination of given FourCC and 12-byte ID. */ +lsmash_extended_box_type_t lsmash_form_extended_box_type( uint32_t fourcc, const uint8_t id[12] ); + +/* Return box type that consists of combination of given compact and extended box type. */ +lsmash_box_type_t lsmash_form_box_type( lsmash_compact_box_type_t type, lsmash_extended_box_type_t user ); + +#define LSMASH_ISO_BOX_TYPE_INITIALIZER( x ) { x, { x, { 0x00, 0x11, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } } } +#define LSMASH_QTFF_BOX_TYPE_INITIALIZER( x ) { x, { x, { 0x0F, 0x11, 0x4D, 0xA5, 0xBF, 0x4E, 0xF2, 0xC4, 0x8C, 0x6A, 0xA1, 0x1E } } } +lsmash_box_type_t lsmash_form_iso_box_type( lsmash_compact_box_type_t type ); +lsmash_box_type_t lsmash_form_qtff_box_type( lsmash_compact_box_type_t type ); + +/* Return 1 if the both box types are identical. Otherwise return 0. */ +int lsmash_check_box_type_identical( lsmash_box_type_t a, lsmash_box_type_t b ); + +/* Return 1 if the box type is specified. Otherwise, i.e. LSMASH_BOX_TYPE_UNSPECIFIED, return 0. */ +int lsmash_check_box_type_specified( lsmash_box_type_t *box_type ); + +/**************************************************************************** + * Summary of Stream Configuration + * This is L-SMASH's original structure. + ****************************************************************************/ +typedef enum +{ + LSMASH_SUMMARY_TYPE_UNKOWN = 0, + LSMASH_SUMMARY_TYPE_VIDEO, + LSMASH_SUMMARY_TYPE_AUDIO, +} lsmash_summary_type; + +typedef lsmash_box_type_t lsmash_codec_type_t; + +#define LSMASH_CODEC_TYPE_INITIALIZER LSMASH_BOX_TYPE_INITIALIZER +#define LSMASH_CODEC_TYPE_UNSPECIFIED ((lsmash_codec_type_t)static_lsmash_box_type_unspecified) + +/* Return 1 if the both CODEC identifiers are identical. Otherwise return 0. */ +int lsmash_check_codec_type_identical( lsmash_codec_type_t a, lsmash_codec_type_t b ); + +#define DEFINE_ISOM_CODEC_TYPE( BOX_TYPE_NAME, BOX_TYPE_FOURCC ) \ + static const lsmash_codec_type_t BOX_TYPE_NAME = LSMASH_ISO_BOX_TYPE_INITIALIZER( BOX_TYPE_FOURCC ) +#define DEFINE_QTFF_CODEC_TYPE( BOX_TYPE_NAME, BOX_TYPE_FOURCC ) \ + static const lsmash_codec_type_t BOX_TYPE_NAME = LSMASH_QTFF_BOX_TYPE_INITIALIZER( BOX_TYPE_FOURCC ) + +/* Audio CODEC identifiers */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_AC_3_AUDIO, LSMASH_4CC( 'a', 'c', '-', '3' ) ); /* AC-3 audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_ALAC_AUDIO, LSMASH_4CC( 'a', 'l', 'a', 'c' ) ); /* Apple lossless audio codec */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_DRA1_AUDIO, LSMASH_4CC( 'd', 'r', 'a', '1' ) ); /* DRA Audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_DTSC_AUDIO, LSMASH_4CC( 'd', 't', 's', 'c' ) ); /* DTS Coherent Acoustics audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_DTSH_AUDIO, LSMASH_4CC( 'd', 't', 's', 'h' ) ); /* DTS-HD High Resolution Audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_DTSL_AUDIO, LSMASH_4CC( 'd', 't', 's', 'l' ) ); /* DTS-HD Master Audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_DTSE_AUDIO, LSMASH_4CC( 'd', 't', 's', 'e' ) ); /* DTS Express low bit rate audio, also known as DTS LBR */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_EC_3_AUDIO, LSMASH_4CC( 'e', 'c', '-', '3' ) ); /* Enhanced AC-3 audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_ENCA_AUDIO, LSMASH_4CC( 'e', 'n', 'c', 'a' ) ); /* Encrypted/Protected audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_G719_AUDIO, LSMASH_4CC( 'g', '7', '1', '9' ) ); /* ITU-T Recommendation G.719 (2008) ); */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_G726_AUDIO, LSMASH_4CC( 'g', '7', '2', '6' ) ); /* ITU-T Recommendation G.726 (1990) ); */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_M4AE_AUDIO, LSMASH_4CC( 'm', '4', 'a', 'e' ) ); /* MPEG-4 Audio Enhancement */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_MLPA_AUDIO, LSMASH_4CC( 'm', 'l', 'p', 'a' ) ); /* MLP Audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_MP4A_AUDIO, LSMASH_4CC( 'm', 'p', '4', 'a' ) ); /* MPEG-4 Audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_RAW_AUDIO, LSMASH_4CC( 'r', 'a', 'w', ' ' ) ); /* Uncompressed audio */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SAMR_AUDIO, LSMASH_4CC( 's', 'a', 'm', 'r' ) ); /* Narrowband AMR voice */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SAWB_AUDIO, LSMASH_4CC( 's', 'a', 'w', 'b' ) ); /* Wideband AMR voice */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SAWP_AUDIO, LSMASH_4CC( 's', 'a', 'w', 'p' ) ); /* Extended AMR-WB (AMR-WB+) ); */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SEVC_AUDIO, LSMASH_4CC( 's', 'e', 'v', 'c' ) ); /* EVRC Voice */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SQCP_AUDIO, LSMASH_4CC( 's', 'q', 'c', 'p' ) ); /* 13K Voice */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_SSMV_AUDIO, LSMASH_4CC( 's', 's', 'm', 'v' ) ); /* SMV Voice */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_TWOS_AUDIO, LSMASH_4CC( 't', 'w', 'o', 's' ) ); /* Uncompressed 16-bit audio */ + +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_23NI_AUDIO, LSMASH_4CC( '2', '3', 'n', 'i' ) ); /* 32-bit little endian integer uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_MAC3_AUDIO, LSMASH_4CC( 'M', 'A', 'C', '3' ) ); /* MACE 3:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_MAC6_AUDIO, LSMASH_4CC( 'M', 'A', 'C', '6' ) ); /* MACE 6:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_NONE_AUDIO, LSMASH_4CC( 'N', 'O', 'N', 'E' ) ); /* either 'raw ' or 'twos' */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_QDM2_AUDIO, LSMASH_4CC( 'Q', 'D', 'M', '2' ) ); /* Qdesign music 2 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_QDMC_AUDIO, LSMASH_4CC( 'Q', 'D', 'M', 'C' ) ); /* Qdesign music 1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_QCLP_AUDIO, LSMASH_4CC( 'Q', 'c', 'l', 'p' ) ); /* Qualcomm PureVoice */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_AC_3_AUDIO, LSMASH_4CC( 'a', 'c', '-', '3' ) ); /* Digital Audio Compression Standard (AC-3, Enhanced AC-3) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_AGSM_AUDIO, LSMASH_4CC( 'a', 'g', 's', 'm' ) ); /* GSM */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_ALAC_AUDIO, LSMASH_4CC( 'a', 'l', 'a', 'c' ) ); /* Apple lossless audio codec */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_ALAW_AUDIO, LSMASH_4CC( 'a', 'l', 'a', 'w' ) ); /* a-Law 2:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_CDX2_AUDIO, LSMASH_4CC( 'c', 'd', 'x', '2' ) ); /* CD/XA 2:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_CDX4_AUDIO, LSMASH_4CC( 'c', 'd', 'x', '4' ) ); /* CD/XA 4:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_DVCA_AUDIO, LSMASH_4CC( 'd', 'v', 'c', 'a' ) ); /* DV Audio */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_DVI_AUDIO, LSMASH_4CC( 'd', 'v', 'i', ' ' ) ); /* DVI (as used in RTP, 4:1 compression) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_FL32_AUDIO, LSMASH_4CC( 'f', 'l', '3', '2' ) ); /* 32-bit float */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_FL64_AUDIO, LSMASH_4CC( 'f', 'l', '6', '4' ) ); /* 64-bit float */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_IMA4_AUDIO, LSMASH_4CC( 'i', 'm', 'a', '4' ) ); /* IMA (International Multimedia Assocation, defunct, 4:1) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_IN24_AUDIO, LSMASH_4CC( 'i', 'n', '2', '4' ) ); /* 24-bit integer uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_IN32_AUDIO, LSMASH_4CC( 'i', 'n', '3', '2' ) ); /* 32-bit integer uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_LPCM_AUDIO, LSMASH_4CC( 'l', 'p', 'c', 'm' ) ); /* Uncompressed audio (various integer and float formats) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_MP4A_AUDIO, LSMASH_4CC( 'm', 'p', '4', 'a' ) ); /* MPEG-4 Audio */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_RAW_AUDIO, LSMASH_4CC( 'r', 'a', 'w', ' ' ) ); /* 8-bit offset-binary uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_SOWT_AUDIO, LSMASH_4CC( 's', 'o', 'w', 't' ) ); /* 16-bit little endian uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_TWOS_AUDIO, LSMASH_4CC( 't', 'w', 'o', 's' ) ); /* 8-bit or 16-bit big endian uncompressed */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_ULAW_AUDIO, LSMASH_4CC( 'u', 'l', 'a', 'w' ) ); /* uLaw 2:1 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_VDVA_AUDIO, LSMASH_4CC( 'v', 'd', 'v', 'a' ) ); /* DV audio (variable duration per video frame) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_FULLMP3_AUDIO, LSMASH_4CC( '.', 'm', 'p', '3' ) ); /* MPEG-1 layer 3, CBR & VBR (QT4.1 and later) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_MP3_AUDIO, 0x6D730055 ); /* MPEG-1 layer 3, CBR only (pre-QT4.1) */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_ADPCM2_AUDIO, 0x6D730002 ); /* Microsoft ADPCM - ACM code 2 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_ADPCM17_AUDIO, 0x6D730011 ); /* DVI/Intel IMA ADPCM - ACM code 17 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_GSM49_AUDIO, 0x6D730031 ); /* Microsoft GSM 6.10 - ACM code 49 */ +DEFINE_QTFF_CODEC_TYPE( QT_CODEC_TYPE_NOT_SPECIFIED, 0x00000000 ); /* either 'raw ' or 'twos' */ + +/* Video CODEC identifiers */ +DEFINE_ISOM_CODEC_TYPE( ISOM_CODEC_TYPE_AVC