diff --git a/Makefile b/Makefile index a4cc170..0064fc1 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ common/frame.c common/dct.c common/cpu.c common/cabac.c \ common/common.c common/mdate.c common/set.c \ common/quant.c common/vlc.c \ - encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ + encoder/analyse.c encoder/me.c encoder/ratecontrol.c encoder/lookahead.c\ encoder/set.c encoder/macroblock.c encoder/cabac.c \ encoder/cavlc.c encoder/encoder.c diff --git a/common/common.c b/common/common.c index 9260c64..30e4ee4 100644 --- a/common/common.c +++ b/common/common.c @@ -104,6 +104,7 @@ void x264_param_default( x264_param_t *param ) param->rc.f_qblur = 0.5; param->rc.f_complexity_blur = 20; param->rc.i_zones = 0; + param->rc.i_lookahead = 0; /* Log */ param->pf_log = x264_log_default; @@ -270,6 +271,13 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) else p->i_threads = atoi(value); } + OPT("rc-lookahead") + { + if( !strcmp(value, "auto") ) + p->rc.i_lookahead = X264_LOOKAHEAD_AUTO; + else + p->rc.i_lookahead = atoi(value); + } OPT2("deterministic", "n-deterministic") p->b_deterministic = atobool(value); OPT2("level", "level-idc") @@ -779,9 +787,9 @@ void x264_reduce_fraction( int *n, int *d ) c = a % b; while(c) { - a = b; - b = c; - c = a % b; + a = b; + b = c; + c = a % b; } *n /= b; *d /= b; @@ -886,6 +894,8 @@ char *x264_param2string( x264_param_t *p, int b_res ) if( p->rc.i_vbv_buffer_size ) s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); + if( p->rc.i_lookahead ) + s += sprintf( s, " lookahead=%d", p->rc.i_lookahead ); } else if( p->rc.i_rc_method == X264_RC_CQP ) s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); diff --git a/common/common.h b/common/common.h index 1e46ae8..400cc7a 100644 --- a/common/common.h +++ b/common/common.h @@ -218,6 +218,24 @@ typedef struct } x264_slice_header_t; +typedef struct x264_lookahead_t +{ + volatile int b_thread_active; + volatile int b_exit_thread; + volatile int i_last_idr_planned; + x264_pthread_t thread_handle; + x264_frame_t *last_nonb_planned; + /* Next buffer (next) which holds frames for next slicetype decision */ + synch_frame_list_t next; + /* Output buffer (ofbuf) for frames whose decisions have been made + * but not yet transfered to encoding h->frames.current list */ + synch_frame_list_t ofbuf; + synch_frame_list_t ifbuf; + /* Unused buffer (unused) for re-use of frames which were previously malloc'ed */ + synch_frame_list_t unused; + int i_frames; +} x264_lookahead_t; + /* From ffmpeg */ #define X264_SCAN8_SIZE (6*8) @@ -327,11 +345,7 @@ struct x264_t struct { /* Frames to be encoded (whose types have been decided) */ - x264_frame_t *current[X264_BFRAME_MAX*4+3]; - /* Temporary buffer (frames types not yet decided) */ - x264_frame_t *next[X264_BFRAME_MAX*4+3]; - /* Unused frames */ - x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4]; + x264_frame_t **current; /* For adaptive B decision */ x264_frame_t *last_nonb; @@ -638,6 +652,7 @@ struct x264_t #if VISUALIZE struct visualize_t *visualize; #endif + x264_lookahead_t *lookahead; }; // included at the end because it needs x264_t diff --git a/common/frame.c b/common/frame.c index 23e6824..8234431 100644 --- a/common/frame.c +++ b/common/frame.c @@ -937,15 +937,14 @@ void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ) assert( frame->i_reference_count > 0 ); frame->i_reference_count--; if( frame->i_reference_count == 0 ) - x264_frame_push( h->frames.unused, frame ); - assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL ); + x264_synch_frame_list_put( &h->lookahead->unused, frame ); } x264_frame_t *x264_frame_pop_unused( x264_t *h ) { x264_frame_t *frame; - if( h->frames.unused[0] ) - frame = x264_frame_pop( h->frames.unused ); + if( h->lookahead->unused.list[0] ) + frame = x264_synch_frame_list_get( &h->lookahead->unused ); else frame = x264_frame_new( h ); assert( frame->i_reference_count == 0 ); @@ -973,3 +972,104 @@ void x264_frame_sort( x264_frame_t **list, int b_dts ) } } while( !b_ok ); } + +void x264_frame_delete_list( x264_frame_t **frame_list ) +{ + int i; + if( !frame_list ) + return; + for( i = 0; frame_list[i]; i++ ) + x264_frame_delete( frame_list[i] ); + x264_free( frame_list ); +} + +int x264_synch_frame_list_init( synch_frame_list_t *slist, int max_size ) +{ + slist->i_max_size = max_size; + slist->i_size = 0; + assert( slist->i_max_size ); + slist->list = x264_malloc( (max_size + 3) * sizeof(x264_frame_t*) ); + if( !slist->list ) + return -1; + memset( slist->list, 0, (max_size + 3) * sizeof(x264_frame_t*) ); + x264_pthread_mutex_init( &slist->mutex, NULL ); + x264_pthread_cond_init( &slist->cv_full, NULL ); + x264_pthread_cond_init( &slist->cv_empty, NULL ); + return 0; +} + +void x264_synch_frame_list_invalidate( synch_frame_list_t *slist ) +{ + int i; + slist->i_max_size = -1; //safe -- not changed since initialization + for( i = 0; slist->list[i]; i++ ) + x264_frame_delete( slist->list[i] ); + x264_pthread_cond_broadcast( &slist->cv_full ); + x264_pthread_cond_broadcast( &slist->cv_empty ); +} + +int x264_synch_frame_list_destroy( synch_frame_list_t *slist ) +{ + if( slist->i_max_size > 0 ) + return -1; + x264_pthread_mutex_destroy( &slist->mutex ); + x264_pthread_cond_destroy( &slist->cv_full ); + x264_pthread_cond_destroy( &slist->cv_empty ); + x264_free( slist->list ); + return 0; +} + +void x264_synch_frame_list_put( synch_frame_list_t *slist, x264_frame_t *frame ) +{ + if( slist->i_max_size < 1 ) + return; + x264_pthread_mutex_lock( &slist->mutex ); + + if( ( slist->i_max_size > 0 ) && (slist->list[slist->i_max_size - 1]) ) + x264_pthread_cond_wait( &slist->cv_empty, &slist->mutex ); + + x264_frame_push( slist->list, frame ); + slist->i_size++; + x264_pthread_mutex_unlock( &slist->mutex ); +} + +x264_frame_t * x264_synch_frame_list_get( synch_frame_list_t *slist ) +{ + x264_frame_t *frame = NULL; + if( slist->i_max_size < 1 ) + return NULL; + x264_pthread_mutex_lock( &slist->mutex ); + if( ( slist->i_max_size > 0 ) && !slist->list[0] ) + x264_pthread_cond_wait( &slist->cv_full, &slist->mutex ); + + frame = x264_frame_shift( slist->list ); + slist->i_size--; + x264_pthread_mutex_unlock( &slist->mutex ); + x264_pthread_cond_broadcast( &slist->cv_empty ); + return frame; +} + +int x264_synch_frame_list_get_size( synch_frame_list_t *slist ) +{ + int fno = 0; + /* Tried to do this without mutex's but got segfaults */ + x264_pthread_mutex_lock( &slist->mutex ); + fno = slist->i_size; + x264_pthread_mutex_unlock( &slist->mutex ); + return fno; +} + +void x264_synch_frame_list_sort_pts( synch_frame_list_t *slist ) +{ + x264_pthread_mutex_lock( &slist->mutex ); + x264_frame_sort_pts( slist->list ); + x264_pthread_mutex_unlock( &slist->mutex ); +} + +void x264_synch_frame_list_unshift( synch_frame_list_t *slist, x264_frame_t *frame ) +{ + x264_pthread_mutex_lock( &slist->mutex ); + x264_frame_unshift( slist->list, frame ); + slist->i_size++; + x264_pthread_mutex_unlock( &slist->mutex ); +} diff --git a/common/frame.h b/common/frame.h index aad77f5..52cf551 100644 --- a/common/frame.h +++ b/common/frame.h @@ -93,6 +93,17 @@ typedef struct } x264_frame_t; +// synchronized frame list +typedef struct +{ + x264_frame_t **list; + int i_max_size; + int i_size; + x264_pthread_mutex_t mutex; + x264_pthread_cond_t cv_full; + x264_pthread_cond_t cv_empty; +} synch_frame_list_t; + typedef void (*x264_deblock_inter_t)( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); typedef void (*x264_deblock_intra_t)( uint8_t *pix, int stride, int alpha, int beta ); typedef struct @@ -135,6 +146,18 @@ x264_frame_t *x264_frame_shift( x264_frame_t **list ); void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ); x264_frame_t *x264_frame_pop_unused( x264_t *h ); void x264_frame_sort( x264_frame_t **list, int b_dts ); +void x264_frame_delete_list( x264_frame_t **frame_list ); + +int x264_synch_frame_list_init( synch_frame_list_t *slist, int nelem ); +void x264_synch_frame_list_invalidate( synch_frame_list_t *slist ); +int x264_synch_frame_list_destroy( synch_frame_list_t *slist ); +void x264_synch_frame_list_put( synch_frame_list_t *slist, x264_frame_t *frame ); +x264_frame_t *x264_synch_frame_list_get( synch_frame_list_t *slist ); + +int x264_synch_frame_list_get_size( synch_frame_list_t *slist ); +void x264_synch_frame_list_sort_pts( synch_frame_list_t *slist ); +void x264_synch_frame_list_unshift( synch_frame_list_t *slist, x264_frame_t *frame ); + #define x264_frame_sort_dts(list) x264_frame_sort(list, 1) #define x264_frame_sort_pts(list) x264_frame_sort(list, 0) diff --git a/encoder/analyse.h b/encoder/analyse.h index b8c828f..e37f915 100644 --- a/encoder/analyse.h +++ b/encoder/analyse.h @@ -26,5 +26,12 @@ void x264_macroblock_analyse( x264_t *h ); void x264_slicetype_decide( x264_t *h ); +int x264_lookahead_init( x264_t *h ); +int x264_lookahead_empty( x264_t *h ); +void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame ); +void x264_lookahead_get_frames( x264_t *h ); +int x264_lookahead_done_frames( x264_t *h ); +int x264_lookahead_try_frames( x264_t *h ); +int x264_lookahead_destroy( x264_t *h ); #endif diff --git a/encoder/encoder.c b/encoder/encoder.c index fdcc957..d264b23 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -358,7 +358,7 @@ static int x264_validate_parameters( x264_t *h ) return -1; } - if( h->param.i_threads == 0 ) + if( h->param.i_threads == X264_THREADS_AUTO ) h->param.i_threads = x264_cpu_num_processors() * 3/2; h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREAD_MAX ); if( h->param.i_threads > 1 ) @@ -477,6 +477,14 @@ static int x264_validate_parameters( x264_t *h ) && h->param.i_bframe && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read ); +#ifdef HAVE_PTHREAD + if( h->param.rc.i_lookahead == X264_LOOKAHEAD_AUTO ) + h->param.rc.i_lookahead = 2*(h->param.i_bframe + h->param.i_threads); + h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, 2 * (X264_BFRAME_MAX + X264_THREAD_MAX) + 1 ); +#else + h->param.rc.i_lookahead = 0; +#endif + h->param.i_deblocking_filter_alphac0 = x264_clip3( h->param.i_deblocking_filter_alphac0, -6, 6 ); h->param.i_deblocking_filter_beta = x264_clip3( h->param.i_deblocking_filter_beta, -6, 6 ); h->param.analyse.i_luma_deadzone[0] = x264_clip3( h->param.analyse.i_luma_deadzone[0], 0, 32 ); @@ -509,7 +517,7 @@ static int x264_validate_parameters( x264_t *h ) h->param.analyse.inter &= ~X264_ANALYSE_I8x8; h->param.analyse.intra &= ~X264_ANALYSE_I8x8; } - h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12); + h->param.analyse.i_chroma_qp_offset = x264_clip3( h->param.analyse.i_chroma_qp_offset, -12, 12 ); if( !h->param.b_cabac ) h->param.analyse.i_trellis = 0; h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 ); @@ -598,6 +606,7 @@ static int x264_validate_parameters( x264_t *h ) h->param.i_sps_id &= 31; + h->param.i_log_level = x264_clip3( h->param.i_log_level, X264_LOG_NONE, X264_LOG_DEBUG ); if( h->param.i_log_level < X264_LOG_INFO ) { h->param.analyse.b_psnr = 0; @@ -718,9 +727,9 @@ x264_t *x264_encoder_open ( x264_param_t *param ) /* Init frames. */ if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS ) - h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1; + h->frames.i_delay = h->param.rc.i_lookahead + X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1; else - h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1; + h->frames.i_delay = h->param.rc.i_lookahead + h->param.i_bframe + h->param.i_threads - 1; h->frames.i_max_ref0 = h->param.i_frame_reference; h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering; @@ -736,6 +745,9 @@ x264_t *x264_encoder_open ( x264_param_t *param ) h->frames.i_input = 0; h->frames.last_nonb = NULL; + h->frames.current = x264_malloc( (h->param.rc.i_lookahead + h->param.i_bframe + 3) * sizeof(x264_frame_t*) ); + memset( h->frames.current, 0, (h->param.rc.i_lookahead + h->param.i_bframe + 3) * sizeof(x264_frame_t*) ); + h->i_ref0 = 0; h->i_ref1 = 0; @@ -787,10 +799,13 @@ x264_t *x264_encoder_open ( x264_param_t *param ) h->thread[0] = h; h->i_thread_num = 0; - for( i = 1; i < h->param.i_threads; i++ ) + for( i = 1; i < h->param.i_threads + !!h->param.rc.i_lookahead; i++ ) h->thread[i] = x264_malloc( sizeof(x264_t) ); - for( i = 0; i < h->param.i_threads; i++ ) + if( x264_lookahead_init( h ) ) + return NULL; + + for( i = 0; i < h->param.i_threads + !!h->param.rc.i_lookahead; i++ ) { if( i > 0 ) *h->thread[i] = *h; @@ -1097,8 +1112,8 @@ static inline void x264_reference_update( x264_t *h ) } /* adaptive B decision needs a pointer, since it can't use the ref lists */ - if( h->sh.i_type != SLICE_TYPE_B ) - h->frames.last_nonb = h->fdec; + if( !h->param.rc.i_lookahead && (h->sh.i_type != SLICE_TYPE_B) ) + h->lookahead->last_nonb_planned = h->fdec; /* move frame in the buffer */ x264_frame_push( h->frames.reference, h->fdec ); @@ -1434,57 +1449,49 @@ int x264_encoder_encode( x264_t *h, h->param.i_height != 16 * h->sps->i_mb_height ) x264_frame_expand_border_mod16( h, fenc ); - fenc->i_frame = h->frames.i_input++; - - x264_frame_push( h->frames.next, fenc ); - if( h->frames.b_have_lowres ) x264_frame_init_lowres( h, fenc ); if( h->param.rc.i_aq_mode ) x264_adaptive_quant_frame( h, fenc ); - if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads ) - { - /* Nothing yet to encode */ - /* waiting for filling bframe buffer */ - pic_out->i_type = X264_TYPE_AUTO; - return 0; - } + fenc->i_frame = h->frames.i_input++; + + /* 2: Place the frame into the queue for its slice type decision */ + x264_lookahead_put_frame( h, fenc ); + } + else + { + h->lookahead->b_exit_thread = 1; + /* Make sure the lookahead thread is not waiting for new frames, there aren't any! */ + x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_full ); } - if( h->frames.current[0] == NULL ) + if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads ) { - int bframes = 0; - /* 2: Select frame types */ - if( h->frames.next[0] == NULL ) - { - x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); - return 0; - } + /* Nothing yet to encode */ + /* waiting for filling bframe buffer */ + pic_out->i_type = X264_TYPE_AUTO; + return 0; + } - x264_stack_align( x264_slicetype_decide, h ); + /* 3: The picture is analyzed in the lookahead (or when x264_lookahead_get_frames is called if lookahead==0 */ + if( !h->frames.current[0] ) + x264_lookahead_get_frames( h ); - /* 3: move some B-frames and 1 non-B to encode queue */ - while( IS_X264_TYPE_B( h->frames.next[bframes]->i_type ) ) - bframes++; - x264_frame_push( h->frames.current, x264_frame_shift( &h->frames.next[bframes] ) ); - /* FIXME: when max B-frames > 3, BREF may no longer be centered after GOP closing */ - if( h->param.b_bframe_pyramid && bframes > 1 ) - { - x264_frame_t *mid = x264_frame_shift( &h->frames.next[bframes/2] ); - mid->i_type = X264_TYPE_BREF; - x264_frame_push( h->frames.current, mid ); - bframes--; - } - while( bframes-- ) - x264_frame_push( h->frames.current, x264_frame_shift( h->frames.next ) ); + if( !h->frames.current[0] && x264_lookahead_empty( h ) ) + { + x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); + return 0; } + if( !h->frames.current[0] ) + return 0; + /* ------------------- Get frame to be encoded ------------------------- */ /* 4: get picture to encode */ h->fenc = x264_frame_shift( h->frames.current ); - if( h->fenc == NULL ) + if( !h->fenc ) { /* Nothing yet to encode (ex: waiting for I/P with B frames) */ /* waiting for filling bframe buffer */ @@ -1495,6 +1502,7 @@ int x264_encoder_encode( x264_t *h, if( h->fenc->i_type == X264_TYPE_IDR ) { h->frames.i_last_idr = h->fenc->i_frame; + h->i_frame_num = 0; } /* ------------------- Setup frame context ----------------------------- */ @@ -1808,6 +1816,8 @@ void x264_encoder_close ( x264_t *h ) || h->stat.i_mb_count[SLICE_TYPE_P][I_PCM] || h->stat.i_mb_count[SLICE_TYPE_B][I_PCM]; + x264_lookahead_destroy( h ); + for( i=0; iparam.i_threads; i++ ) { // don't strictly have to wait for the other threads, but it's simpler than canceling them @@ -2032,17 +2042,8 @@ void x264_encoder_close ( x264_t *h ) assert( h->frames.current[i]->i_reference_count == 1 ); x264_frame_delete( h->frames.current[i] ); } - for( i = 0; h->frames.next[i]; i++ ) - { - assert( h->frames.next[i]->i_reference_count == 1 ); - x264_frame_delete( h->frames.next[i] ); - } - for( i = 0; h->frames.unused[i]; i++ ) - { - assert( h->frames.unused[i]->i_reference_count == 0 ); - x264_frame_delete( h->frames.unused[i] ); - } + x264_free( h->frames.current ); h = h->thread[0]; for( i = h->param.i_threads - 1; i >= 0; i-- ) diff --git a/encoder/lookahead.c b/encoder/lookahead.c new file mode 100644 index 0000000..833c337 --- /dev/null +++ b/encoder/lookahead.c @@ -0,0 +1,285 @@ +/***************************************************************************** + * lookahead.c: Lookahead slicetype decisions for x264 + ***************************************************************************** + * Lookahead.c and associated modifications: + * Copyright (C) 2008 Avail Media + * + * Authors: Michael Kazmier + * Alex Giladi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +/* LOOKAHEAD (threaded and non-threaded mode) + * + * Lookahead types: + * [1] Slice type / scene cut; + * + * In non-threaded mode, we run the existing slicetype decision code as it was + * In threaded mode, we run in a separate thread, that lives between the calls + * to x264_encoder_open() and x264_encoder_close(), and performs lookahead for + * the number of frames specified in rc_lookahead. Recommended setting is + * 2*(# of bframes + # of threads). + */ +#include "common/common.h" +#include "common/cpu.h" +#include "analyse.h" + +/* the following methods are only used when using threads */ +#ifdef HAVE_PTHREAD +static void x264_lookahead_slicetype_decide( x264_t *h ) +{ + x264_stack_align( x264_slicetype_decide, h ); + + int bframes=0; + if( h->lookahead->next.list[bframes] ) + { + while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) ) + bframes++; + h->lookahead->last_nonb_planned = h->lookahead->next.list[bframes]; // Set in x264_reference_update when not threading + + x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); + while( h->lookahead->ofbuf.i_max_size && h->lookahead->ofbuf.list[h->lookahead->ofbuf.i_max_size - bframes - 1] ) + { + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_full ); + x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex ); + } + while( bframes-- ) + { + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list ) ); // move all b frames + h->lookahead->ofbuf.i_size++; + h->lookahead->next.i_size--; + } + + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list ) ); // and the last non-bframe + h->lookahead->ofbuf.i_size++; + h->lookahead->next.i_size--; + x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); + } + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_full ); /* unblock the encoder thread trying to take frames out */ + x264_pthread_cond_broadcast( &h->lookahead->next.cv_empty ); /* unblock the encoder thread trying to put frames in */ +} + +static void x264_lookahead_thread( x264_t *h ) +{ +#ifdef HAVE_MMX + /* Misalign mask has to be set separately for each thread. */ + if( h->param.cpu&X264_CPU_SSE_MISALIGN ) + x264_cpu_mask_misalign_sse(); +#endif + h->lookahead->b_thread_active = 1; + while( !h->lookahead->b_exit_thread && h->lookahead->b_thread_active ) + { + x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex ); + while( h->lookahead->ifbuf.list[0] && h->lookahead->next.i_size < h->lookahead->next.i_max_size ) + { + x264_frame_push( h->lookahead->next.list, x264_frame_shift( h->lookahead->ifbuf.list) ); + h->lookahead->next.i_size++; + h->lookahead->i_frames++; + h->lookahead->ifbuf.i_size--; + } + if( h->lookahead->next.i_size <= h->frames.i_delay + 1 - h->param.i_threads - h->param.rc.i_lookahead ) + { + /* We don't have enough frames in the Next queue for decision, so look for frames in the if buf */ + while( !h->lookahead->ifbuf.i_size && !h->lookahead->b_exit_thread ) + { + if( x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) > 0 ) + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_full ); /* unblock the encoder thread before we go to sleep */ + x264_pthread_cond_wait( &h->lookahead->ifbuf.cv_full, &h->lookahead->ifbuf.mutex ); /* We need to wait to for the queue to fill */ + } + x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex ); + continue; + } + else + { + x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex ); + + /* Make the decision and push decided frames to the h->frames.current queue for encoding */ + x264_lookahead_slicetype_decide( h ); + } + } + x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex ); + while( h->lookahead->ifbuf.list[0] && h->lookahead->next.i_size < h->lookahead->next.i_max_size ) + { + x264_frame_push( h->lookahead->next.list, x264_frame_shift( h->lookahead->ifbuf.list ) ); + h->lookahead->next.i_size++; + h->lookahead->i_frames++; + h->lookahead->ifbuf.i_size--; + } + x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex ); + while( x264_synch_frame_list_get_size( &h->lookahead->next ) ) + { + x264_lookahead_slicetype_decide( h ); + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_full ); + } + h->lookahead->b_thread_active = 0; +} +#endif + +static void x264_lookahead_shift( x264_t *h, x264_frame_t **dst , x264_frame_t **src ) +{ + int bframes = 0; + int i_frames = 0; + + while( src[i_frames] ) + { + while( ( src[i_frames] == h->lookahead->last_nonb_planned ) + && x264_synch_frame_list_get_size( &h->lookahead->next ) + && x264_synch_frame_list_get_size( &h->lookahead->ifbuf ) ) + x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_full, &h->lookahead->ofbuf.mutex ); + if( IS_X264_TYPE_B( src[i_frames]->i_type ) ) + bframes++; + else + break; + i_frames++; + } + if( src[i_frames] ) + { + x264_frame_push( dst, x264_frame_shift( &src[bframes] ) ); + h->lookahead->ofbuf.i_size--; + /* FIXME: when max B-frames > 3, BREF may no longer be centered after GOP closing */ + if( h->param.b_bframe_pyramid && bframes > 1 ) + { + x264_frame_t *mid = x264_frame_shift( &src[bframes/2] ); + h->lookahead->ofbuf.i_size--; + mid->i_type = X264_TYPE_BREF; + x264_frame_push( dst, mid ); + bframes--; + } + while( bframes-- ) + { + x264_frame_push( dst, x264_frame_shift( src ) ); + h->lookahead->ofbuf.i_size--; + } + } +} + +int x264_lookahead_init( x264_t *h ) +{ + x264_lookahead_t *h_lookahead = x264_malloc( sizeof( x264_lookahead_t ) ); + if( !h_lookahead ) + return -1; + memset( h_lookahead, 0, sizeof( x264_lookahead_t ) ); + int i; + for( i=0; i < h->param.i_threads + !!h->param.rc.i_lookahead; i++ ) + h->thread[i]->lookahead = h_lookahead; + + h_lookahead->b_thread_active = 0; + h_lookahead->b_exit_thread = 1; + h_lookahead->i_last_idr_planned = - h->param.i_keyint_max; + h_lookahead->last_nonb_planned = NULL; + h_lookahead->ofbuf.i_max_size = -1; + h_lookahead->i_frames = 0; + + if( x264_synch_frame_list_init( &h_lookahead->ofbuf, h->param.rc.i_lookahead + h->param.i_threads*2 + h->param.i_bframe + 4 ) + || x264_synch_frame_list_init( &h_lookahead->next, h->frames.i_delay + 4 ) + || x264_synch_frame_list_init( &h_lookahead->ifbuf, h->param.rc.i_lookahead + h->param.i_threads*2 + h->param.i_bframe + 4 ) + || x264_synch_frame_list_init( &h_lookahead->unused, h_lookahead->next.i_max_size*2 ) ) + { + x264_log( h, X264_LOG_ERROR, "failed to initialize synchronized lists\n" ); + return -1; + } + + if( !h->param.rc.i_lookahead ) + return 0; + + h_lookahead->b_exit_thread = 0; + x264_pthread_create( &h->lookahead->thread_handle, NULL, (void *)x264_lookahead_thread, h->thread[h->param.i_threads] ); + return 0; +} + +void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame ) +{ + if( h->param.rc.i_lookahead ) + { + x264_synch_frame_list_put( &h->lookahead->ifbuf, frame ); + x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_full ); + } + else + { + x264_synch_frame_list_put( &h->lookahead->next, frame ); + h->lookahead->i_frames++; + } +} + +int x264_lookahead_empty( x264_t *h ) +{ + return ( !x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) + && !x264_synch_frame_list_get_size( &h->lookahead->next ) ); +} + +void x264_lookahead_get_frames( x264_t *h ) +{ + /* We have a lookahead thread, so get frames from there */ + if( h->param.rc.i_lookahead ) + { + x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); + while( !h->lookahead->ofbuf.i_size + && ( x264_synch_frame_list_get_size( &h->lookahead->ifbuf ) + || x264_synch_frame_list_get_size( &h->lookahead->next ) ) ) + { + if( x264_synch_frame_list_get_size( &h->lookahead->ifbuf ) ) + x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_full ); /* unblock the lookahead thread before we go to sleep */ + x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_full, &h->lookahead->ofbuf.mutex ); + } + x264_lookahead_shift( h, h->frames.current, h->lookahead->ofbuf.list ); + x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_empty ); + } + /* We are not running a lookahead thread, so perform all the slicetype decide on the fly */ + else + { + if( h->frames.current[0] || !h->lookahead->next.list[0] ) + return; + /* Make the decision and push decided frames to the h->frames.current queue for encoding */ + x264_stack_align( x264_slicetype_decide, h ); + + int bframes=0; + while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) ) + bframes++; + + h->lookahead->last_nonb_planned = h->lookahead->next.list[bframes]; + + while( bframes-- ) + { + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list ) ); // move all b frames + h->lookahead->ofbuf.i_size++; + h->lookahead->next.i_size--; + } + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list ) ); // and the last non-brframe + h->lookahead->ofbuf.i_size++; + h->lookahead->next.i_size--; + + x264_lookahead_shift( h, h->frames.current, h->lookahead->ofbuf.list ); // Now make all b and the last non-bframe available to the current list + } +} + +int x264_lookahead_destroy( x264_t *h ) +{ + while( h->lookahead->b_thread_active ) + h->lookahead->b_exit_thread = 1; + + x264_synch_frame_list_invalidate( &h->lookahead->ofbuf ); + x264_synch_frame_list_invalidate( &h->lookahead->ifbuf ); + x264_synch_frame_list_invalidate( &h->lookahead->next ); + x264_synch_frame_list_invalidate( &h->lookahead->unused ); + + x264_synch_frame_list_destroy( &h->lookahead->ofbuf ); + x264_synch_frame_list_destroy( &h->lookahead->ifbuf ); + x264_synch_frame_list_destroy( &h->lookahead->next ); + x264_synch_frame_list_destroy( &h->lookahead->unused ); + + return 0; +} diff --git a/encoder/me.c b/encoder/me.c index f13e84b..5b901ce 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -731,7 +731,7 @@ void x264_me_refine_qpel( x264_t *h, x264_me_t *m ) if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P ) m->cost -= m->i_ref_cost; - + refine_subpel( h, m, hpel, qpel, NULL, 1 ); } diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 2dd34d0..2e5c381 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -271,8 +271,8 @@ int x264_ratecontrol_new( x264_t *h ) x264_emms(); - rc = h->rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) ); - memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) ); + rc = h->rc = x264_malloc( ( h->param.i_threads + !!h->param.rc.i_lookahead ) * sizeof(x264_ratecontrol_t) ); + memset( h->rc, 0, ( h->param.i_threads + !!h->param.rc.i_lookahead ) * sizeof(x264_ratecontrol_t) ); rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read; rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read; @@ -480,7 +480,7 @@ int x264_ratecontrol_new( x264_t *h ) } rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t)); - memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t)); + memset( rc->entry, 0, rc->num_entries * sizeof( ratecontrol_entry_t ) ); /* init all to skipped p frames */ for(i=0; inum_entries; i++) @@ -573,7 +573,7 @@ int x264_ratecontrol_new( x264_t *h ) x264_free( p ); } - for( i=0; iparam.i_threads; i++ ) + for( i=0; i < h->param.i_threads + !!h->param.rc.i_lookahead; i++ ) { h->thread[i]->rc = rc+i; if( i ) diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 2c16429..04bc2ce 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -443,7 +443,7 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in int icost = frame->i_cost_est[0][0]; int pcost = frame->i_cost_est[p1-p0][0]; float f_bias; - int i_gop_size = frame->i_frame - h->frames.i_last_idr; + int i_gop_size = frame->i_frame - h->lookahead->i_last_idr_planned; float f_thresh_max = h->param.i_scenecut_threshold / 100.0; /* magic numbers pulled out of thin air */ float f_thresh_min = f_thresh_max * h->param.i_keyint_min @@ -487,21 +487,25 @@ static void x264_slicetype_analyse( x264_t *h ) int i_mb_count = NUM_MBS; int cost1p0, cost2p0, cost1b1, cost2p1; int idr_frame_type; + int max_search = X264_MIN( h->lookahead->next.i_size, MAX_LENGTH ); + x264_frame_t *last_nonb = h->lookahead->last_nonb_planned; assert( h->frames.b_have_lowres ); - if( !h->frames.last_nonb ) + if( !last_nonb ) return; - frames[0] = h->frames.last_nonb; - for( j = 0; h->frames.next[j] && h->frames.next[j]->i_type == X264_TYPE_AUTO; j++ ) - frames[j+1] = h->frames.next[j]; - keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1; + + frames[0] = last_nonb; + for( j = 0; j < max_search && h->lookahead->next.list[j]->i_type == X264_TYPE_AUTO; j++ ) + frames[j+1] = h->lookahead->next.list[j]; + + keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_idr_planned - 1; num_frames = X264_MIN( j, keyint_limit ); if( num_frames == 0 ) return; x264_lowres_context_init( h, &a ); - idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; + idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_idr_planned >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; if( num_frames == 1 ) { @@ -595,15 +599,15 @@ void x264_slicetype_decide( x264_t *h ) int bframes; int i; - if( h->frames.next[0] == NULL ) + if( !h->lookahead->next.list[0] ) return; if( h->param.rc.b_stat_read ) { /* Use the frame types from the first pass */ - for( i = 0; h->frames.next[i] != NULL; i++ ) - h->frames.next[i]->i_type = - x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame ); + for( i = 0; h->lookahead->next.list[i]; i++ ) + h->lookahead->next.list[i]->i_type = + x264_ratecontrol_slice_type( h, h->lookahead->next.list[i]->i_frame ); } else if( (h->param.i_bframe && h->param.i_bframe_adaptive) || h->param.i_scenecut_threshold ) @@ -611,10 +615,9 @@ void x264_slicetype_decide( x264_t *h ) for( bframes = 0;; bframes++ ) { - frm = h->frames.next[bframes]; - + frm = h->lookahead->next.list[bframes]; /* Limit GOP size */ - if( frm->i_frame - h->frames.i_last_idr >= h->param.i_keyint_max ) + if( frm->i_frame - h->lookahead->i_last_idr_planned >= h->param.i_keyint_max ) { if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_IDR; @@ -624,19 +627,16 @@ void x264_slicetype_decide( x264_t *h ) if( frm->i_type == X264_TYPE_IDR ) { /* Close GOP */ + h->lookahead->i_last_idr_planned = frm->i_frame; if( bframes > 0 ) { bframes--; - h->frames.next[bframes]->i_type = X264_TYPE_P; - } - else - { - h->i_frame_num = 0; + h->lookahead->next.list[bframes]->i_type = X264_TYPE_P; } } if( bframes == h->param.i_bframe - || h->frames.next[bframes+1] == NULL ) + || !h->lookahead->next.list[bframes+1] ) { if( IS_X264_TYPE_B( frm->i_type ) ) x264_log( h, X264_LOG_WARNING, "specified frame type is not compatible with max B-frames\n" ); diff --git a/x264.c b/x264.c index 6e04edd..dd847b5 100644 --- a/x264.c +++ b/x264.c @@ -212,6 +212,7 @@ static void Help( x264_param_t *defaults, int b_longhelp ) H0( " --ratetol Allowed variance of average bitrate [%.1f]\n", defaults->rc.f_rate_tolerance ); H0( " --ipratio QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor ); H0( " --pbratio QP factor between P and B [%.2f]\n", defaults->rc.f_pb_factor ); + H0( " --rc-lookahead Buffer within which to run VBV planning [%d]\n", defaults->rc.i_lookahead ); H1( " --chroma-qp-offset QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset ); H1( " --aq-mode AQ method [%d]\n" " - 0: Disabled\n" @@ -439,6 +440,7 @@ static struct option long_options[] = { "vbv-init", required_argument, NULL, 0 }, { "ipratio", required_argument, NULL, 0 }, { "pbratio", required_argument, NULL, 0 }, + { "rc-lookahead", required_argument, NULL, 0 }, { "chroma-qp-offset", required_argument, NULL, 0 }, { "pass", required_argument, NULL, 'p' }, { "stats", required_argument, NULL, 0 }, @@ -938,7 +940,7 @@ generic_option: #ifdef HAVE_PTHREAD if( b_thread_input || param->i_threads > 1 - || (param->i_threads == 0 && x264_cpu_num_processors() > 1) ) + || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) ) { if( open_file_thread( NULL, &opt->hin, param ) ) { @@ -986,13 +988,13 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame ) { file_pos = ftell( opt->qpfile ); ret = fscanf( opt->qpfile, "%d %c %d\n", &num, &type, &qp ); - if( num > i_frame || ret == EOF ) - { - pic->i_type = X264_TYPE_AUTO; - pic->i_qpplus1 = 0; - fseek( opt->qpfile , file_pos , SEEK_SET ); - break; - } + if( num > i_frame || ret == EOF ) + { + pic->i_type = X264_TYPE_AUTO; + pic->i_qpplus1 = 0; + fseek( opt->qpfile, file_pos, SEEK_SET ); + break; + } if( num < i_frame ) continue; pic->i_qpplus1 = qp+1; diff --git a/x264.h b/x264.h index e61040e..693b411 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 68 +#define X264_BUILD 69 /* x264_t: * opaque handler for encoder */ @@ -135,6 +135,7 @@ static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "" /* Threading */ #define X264_THREADS_AUTO 0 /* Automatically select optimal number of threads */ +#define X264_LOOKAHEAD_AUTO -1 /* Automatically select optimal lookahead buffer size */ /* Zones: override ratecontrol or other options for specific sections of the video. * See x264_encoder_reconfig() for which options can be changed. @@ -267,6 +268,7 @@ typedef struct x264_param_t float f_vbv_buffer_init; /* <=1: fraction of buffer_size. >1: kbit */ float f_ip_factor; float f_pb_factor; + int i_lookahead; int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */ float f_aq_strength;