diff --git a/Makefile b/Makefile index 0b43a3e..287923a 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ common/mvpred.c \ encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ encoder/set.c encoder/macroblock.c encoder/cabac.c \ + encoder/speed.c \ encoder/cavlc.c encoder/encoder.c encoder/lookahead.c SRCCLI = x264.c input/timecode.c \ diff --git a/common/common.c b/common/common.c index 62bef99..d326faa 100644 --- a/common/common.c +++ b/common/common.c @@ -112,6 +112,11 @@ void x264_param_default( x264_param_t *param ) param->rc.i_zones = 0; param->rc.b_mb_tree = 1; + // speedcontrol + param->sc.f_speed = 0; + param->sc.i_buffer_size = 30; + param->sc.f_buffer_init = 0.75; + /* Log */ param->pf_log = x264_log_default; param->p_log_private = NULL; @@ -913,6 +918,14 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->rc.f_complexity_blur = atof(value); OPT("zones") p->rc.psz_zones = strdup(value); + OPT("speed") + p->sc.f_speed = atof(value); + OPT("speed-bufsize") + p->sc.i_buffer_size = atoi(value); + OPT("speed-init") + p->sc.f_buffer_init = atof(value); + OPT("speed-alt-timer") + p->sc.b_alt_timer = atobool(value); OPT("psnr") p->analyse.b_psnr = atobool(value); OPT("ssim") @@ -1193,6 +1206,8 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, "timebase=%u/%u ", p->i_timebase_num, p->i_timebase_den ); } + // FIXME speedcontrol stuff + s += sprintf( s, "cabac=%d", p->b_cabac ); s += sprintf( s, " ref=%d", p->i_frame_reference ); s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter, diff --git a/common/common.h b/common/common.h index c564768..e60ec5a 100644 --- a/common/common.h +++ b/common/common.h @@ -359,6 +359,7 @@ typedef struct x264_lookahead_t } x264_lookahead_t; typedef struct x264_ratecontrol_t x264_ratecontrol_t; +typedef struct x264_speedcontrol_t x264_speedcontrol_t; struct x264_t { @@ -713,6 +714,7 @@ struct x264_t /* rate control encoding only */ x264_ratecontrol_t *rc; + x264_speedcontrol_t *sc; /* stats */ struct diff --git a/encoder/encoder.c b/encoder/encoder.c index de06251..2ba0bc6 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -745,6 +745,8 @@ static int x264_validate_parameters( x264_t *h ) h->param.rc.f_qblur = 0; if( h->param.rc.f_complexity_blur < 0 ) h->param.rc.f_complexity_blur = 0; + if( h->param.sc.i_buffer_size < 0 || h->param.sc.f_speed <= 0 ) + h->param.sc.i_buffer_size = 0; h->param.i_sps_id &= 31; @@ -990,6 +992,10 @@ x264_t *x264_encoder_open( x264_param_t *param ) mbcmp_init( h ); + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_new( h ); + + p = buf + sprintf( buf, "using cpu capabilities:" ); for( int i = 0; x264_cpu_names[i].flags; i++ ) { @@ -2471,6 +2477,8 @@ int x264_encoder_encode( x264_t *h, /* Init the rate control */ /* FIXME: Include slice header bit cost. */ + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_frame( h ); x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 ); i_global_qp = x264_ratecontrol_qp( h ); @@ -2639,6 +2647,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, h->out.i_nal = 0; x264_noise_reduction_update( h ); + + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_frame_end( h ); /* ---------------------- Compute/Print statistics --------------------- */ x264_thread_sync_stat( h, h->thread[0] ); @@ -3071,6 +3082,7 @@ void x264_encoder_close ( x264_t *h ) /* rc */ x264_ratecontrol_delete( h ); + x264_speedcontrol_delete( h ); /* param */ if( h->param.rc.psz_stat_out ) diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h index e052b2a..773bc89 100644 --- a/encoder/ratecontrol.h +++ b/encoder/ratecontrol.h @@ -48,5 +48,10 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t void x264_threads_distribute_ratecontrol( x264_t *h ); void x264_threads_merge_ratecontrol( x264_t *h ); int x264_hrd_fullness( x264_t *h ); +void x264_speedcontrol_new( x264_t *h ); +void x264_speedcontrol_delete( x264_t *h ); +void x264_speedcontrol_frame( x264_t *h ); +void x264_speedcontrol_frame_end( x264_t *h ); + #endif diff --git a/encoder/speed.c b/encoder/speed.c new file mode 100644 index 0000000..2f88887 --- /dev/null +++ b/encoder/speed.c @@ -0,0 +1,257 @@ +#include +#include +#include +#include "common/common.h" +#include "common/cpu.h" + +struct x264_speedcontrol_t +{ + // all times are in usec + int64_t timestamp; // when was speedcontrol last invoked + int64_t cpu_time; // time spent encoding the previous frame + int64_t buffer_size; // assumed application-side buffer of frames to be streamed, + int64_t buffer_fill; // where full = we don't have to hurry + int64_t compensation_period; // how quickly we try to return to the target buffer fullness + float fps, spf; + int preset; // which setting was used in the previous frame + int prev_frame; + float cplx_num; // rolling average of estimated spf for preset #0 + float cplx_den; + float cplx_decay; + float dither; + x264_param_t user_param; + + struct { + int64_t min_buffer, max_buffer; + double avg_preset; + int den; + } stat; +}; + +void x264_speedcontrol_new( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) ); + x264_emms(); + memset( sc, 0, sizeof(x264_speedcontrol_t) ); + + if( h->param.sc.f_speed <= 0 ) + h->param.sc.f_speed = 1; + sc->fps = h->param.i_fps_num / h->param.i_fps_den; + sc->spf = 1e6 / sc->fps; + h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size ); + sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps; + sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init; + sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size ); + sc->compensation_period = sc->buffer_size/4; + sc->timestamp = x264_mdate(); + sc->preset = -1; + sc->prev_frame = 0; + sc->cplx_num = 3e3; //FIXME estimate initial complexity + sc->cplx_den = .1; + sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size; + sc->stat.min_buffer = sc->buffer_size; + sc->stat.max_buffer = 0; + sc->user_param = h->param; +} + +void x264_speedcontrol_delete( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + if( !sc ) + return; + x264_log( h, X264_LOG_INFO, "speedcontrol: avg preset=%.3f buffer min=%.3f max=%.3f\n", + sc->stat.avg_preset / sc->stat.den, + (float)sc->stat.min_buffer / sc->buffer_size, + (float)sc->stat.max_buffer / sc->buffer_size ); +// x264_log( h, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", sc->cplx_num / sc->cplx_den ); + x264_free( sc ); +} + +static int dither( x264_speedcontrol_t *sc, float f ) +{ + int i = f; + if( f < 0 ) + i--; + sc->dither += f - i; + if( sc->dither >= 1. ) + { + sc->dither--; + i++; + } + return i; +} + +typedef struct +{ + float time; // relative encoding time, compared to the other presets + int subme; + int me; + int refs; + int mix; + int trellis; + int partitions; + int chromame; + float psy_rd; + float psy_trellis; +} sc_preset_t; + +#define PRESETS 13 +static const sc_preset_t presets[PRESETS] = +{ +#define I4 X264_ANALYSE_I4x4 +#define I8 X264_ANALYSE_I8x8 +#define P8 X264_ANALYSE_PSUB16x16 +#define B8 X264_ANALYSE_BSUB16x16 +/*0*/ { .time=1.060, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=0, .psy_rd=0 }, +/*1*/ { .time=1.120, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=0 }, +/*2*/ { .time=1.440, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=0 }, +/*3*/ { .time=1.620, .subme=5, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=1.0 }, +/*4*/ { .time=2.660, .subme=6, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=1.0 }, +/*5*/ { .time=3.560, .subme=6, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*6*/ { .time=4.640, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*7*/ { .time=5.190, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*8*/ { .time=6.190, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*9*/ { .time=6.920, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*10*/ { .time=7.070, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*11*/ { .time=12.800, .subme=9, .me=X264_ME_HEX, .refs=3, .mix=1, .chromame=1, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*12*/ { .time=18.570, .subme=10, .me=X264_ME_HEX, .refs=3, .mix=1, .chromame=1, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 } +}; + +static void apply_preset( x264_t *h, int preset ) +{ + x264_speedcontrol_t *sc = h->sc; + preset = x264_clip3( preset, 0, PRESETS-1 ); + //if( preset != sc->preset ) + { + const sc_preset_t *s = &presets[preset]; + x264_param_t p = sc->user_param; + + p.i_frame_reference = s->refs; + p.analyse.inter = s->partitions; + p.analyse.i_subpel_refine = s->subme; + p.analyse.i_me_method = s->me; + p.analyse.i_trellis = s->trellis; + p.analyse.b_mixed_references = s->mix; + p.analyse.b_chroma_me = s->chromame; + p.analyse.f_psy_rd = s->psy_rd; + p.analyse.f_psy_trellis = s->psy_trellis; + x264_encoder_reconfig( h, &p ); + sc->preset = preset; + x264_log( h, X264_LOG_DEBUG, "Applying speedcontrol preset %d.\n", preset ); + } +} + +void x264_speedcontrol_frame_end( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + if( h->param.sc.b_alt_timer ) + sc->cpu_time = x264_mdate() - sc->timestamp; +} + +void x264_speedcontrol_frame( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + int64_t t, delta_t, delta_buffer; + int delta_f; + + x264_emms(); + + // update buffer state after encoding and outputting the previous frame(s) + t = x264_mdate(); + delta_f = h->i_frame - sc->prev_frame; + delta_t = t - sc->timestamp; + delta_buffer = delta_f * sc->spf / h->param.sc.f_speed - delta_t; + sc->buffer_fill += delta_buffer; + sc->prev_frame = h->i_frame; + sc->timestamp = t; + + // update the time predictor + if( delta_f ) + { + int cpu_time = h->param.sc.b_alt_timer ? sc->cpu_time : delta_t; + float decay = powf( sc->cplx_decay, delta_f ); + sc->cplx_num *= decay; + sc->cplx_den *= decay; + sc->cplx_num += cpu_time / presets[sc->preset].time; + sc->cplx_den += delta_f; + + sc->stat.avg_preset += sc->preset * delta_f; + sc->stat.den += delta_f; + } + sc->stat.min_buffer = X264_MIN( sc->buffer_fill, sc->stat.min_buffer ); + sc->stat.max_buffer = X264_MAX( sc->buffer_fill, sc->stat.max_buffer ); + + if( sc->buffer_fill > sc->buffer_size ) // oops, cpu was idle + { + // not really an error, but we'll warn for debugging purposes + static int64_t idle_t = 0, print_interval = 0; + idle_t += sc->buffer_fill - sc->buffer_size; + if( t - print_interval > 1e6 ) + { + x264_log( h, X264_LOG_WARNING, "speedcontrol idle (%.6f sec)\n", idle_t/1e6 ); + print_interval = t; + idle_t = 0; + } + sc->buffer_fill = sc->buffer_size; + } + else if( sc->buffer_fill < 0 && delta_buffer < 0 ) // oops, we're late + { + // don't clip fullness to 0; we'll hope the real buffer was bigger than + // specified, and maybe we can catch up. if the application had to drop + // frames, then it should override the buffer fullness (FIXME implement this). + x264_log( h, X264_LOG_WARNING, "speedcontrol underflow (%.6f sec)\n", sc->buffer_fill/1e6 ); + } + + { + // pick the preset that should return the buffer to 3/4-full within a time + // specified by compensation_period + float target = sc->spf / h->param.sc.f_speed + * (sc->buffer_fill + sc->compensation_period) + / (sc->buffer_size*3/4 + sc->compensation_period); + float cplx = sc->cplx_num / sc->cplx_den; + float set, t0, t1; + float filled = (float) sc->buffer_fill / sc->buffer_size; + int i; + t0 = presets[0].time * cplx; + for( i=1;; i++ ) + { + t1 = presets[i].time * cplx; + if( t1 >= target || i == PRESETS-1 ) + break; + t0 = t1; + } + // linear interpolation between states + set = i-1 + (target - t0) / (t1 - t0); + // Even if our time estimations in the PRESETS array are off + // this will push us towards our target fullness + set += (20 * (filled-0.75)); + set = x264_clip3f(set,0,PRESETS-1); + apply_preset( h, dither( sc, set ) ); + + // FIXME + if (h->param.i_log_level >= X264_LOG_DEBUG) + { + static float cpu, wall, tgt, den; + float decay = 1-1/100.; + cpu = cpu*decay + sc->cpu_time; + wall = wall*decay + delta_t; + tgt = tgt*decay + target; + den = den*decay + 1; + fprintf( stderr, "speed: %.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r", + set, sc->preset, (float)sc->buffer_fill / sc->buffer_size, + tgt/den, cpu/den, wall/den, cpu/wall, 1e6*den/wall ); + } + } + +} + +void x264_speedcontrol_sync( x264_t *h, float f_buffer_fill, int i_buffer_size ) +{ + x264_speedcontrol_t *sc = h->sc; + if( !h->param.sc.i_buffer_size ) + return; + if( i_buffer_size ) + h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size ); + sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps; + sc->buffer_fill = sc->buffer_size * f_buffer_fill; +} diff --git a/x264.c b/x264.c index c4a7400..79d83e9 100644 --- a/x264.c +++ b/x264.c @@ -444,6 +444,12 @@ static void Help( x264_param_t *defaults, int longhelp ) " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n" " QPs are restricted by qpmin/qpmax.\n" ); H1( "\n" ); + H1( "Speedcontrol:\n" ); + H1( "\n" ); + H1( " --speed Automatically adjust other options to achieve\n" ); + H1( " this fraction of realtime.\n" ); + H1( " --speed-bufsize Averaging period for speed. (in frames) [%d]\n", defaults->sc.i_buffer_size ); + H1( "\n" ); H1( "Analysis:\n" ); H1( "\n" ); H1( " -A, --partitions Partitions to consider [\"p8x8,b8x8,i8x8,i4x4\"]\n" @@ -706,6 +712,8 @@ static struct option long_options[] = { "zones", required_argument, NULL, 0 }, { "qpfile", required_argument, NULL, OPT_QPFILE }, { "threads", required_argument, NULL, 0 }, + { "speed", required_argument, NULL, 0 }, + { "speed-bufsize", required_argument, NULL, 0 }, { "sliced-threads", no_argument, NULL, 0 }, { "no-sliced-threads", no_argument, NULL, 0 }, { "slice-max-size", required_argument, NULL, 0 }, diff --git a/x264.h b/x264.h index b11acf8..b082df7 100644 --- a/x264.h +++ b/x264.h @@ -320,6 +320,15 @@ typedef struct x264_param_t char *psz_zones; /* alternate method of specifying zones */ } rc; + /* Speed control parameters */ + struct + { + float f_speed; /* ratio from realtime */ + int i_buffer_size; /* number of frames */ + float f_buffer_init; /* fraction of size */ + int b_alt_timer; /* use a different method of measuring encode time FIXME */ + } sc; + /* Muxing parameters */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ @@ -656,4 +665,9 @@ int x264_encoder_delayed_frames( x264_t * ); * only occur when calling x264_encoder_intra_refresh. */ void x264_encoder_intra_refresh( x264_t * ); +/* x264_speedcontrol_sync: + * override speedcontrol's internal clock */ +void x264_speedcontrol_sync( x264_t *, float f_buffer_fill, int i_buffer_size ); + + #endif