diff -uNrp c/common/common.c b/common/common.c --- c/common/common.c 2008-07-30 18:48:09 +0300 +++ b/common/common.c 2008-07-30 18:49:03 +0300 @@ -93,8 +93,10 @@ void x264_param_default( x264_param_t param->rc.i_qp_step = 4; param->rc.f_ip_factor = 1.4; param->rc.f_pb_factor = 1.3; - param->rc.i_aq_mode = X264_AQ_GLOBAL; + param->rc.i_aq_mode = X264_AQ_HYBRID; + param->rc.i_aq_metric = 4; param->rc.f_aq_strength = 1.0; + param->rc.f_aq_sensitivity = 10; param->rc.b_stat_write = 0; param->rc.psz_stat_out = "x264_2pass.log"; @@ -522,8 +524,12 @@ int x264_param_parse( x264_param_t *p, c p->rc.f_pb_factor = atof(value); OPT("aq-mode") p->rc.i_aq_mode = atoi(value); + OPT("aq-metric") + p->rc.i_aq_metric = atoi(value); OPT("aq-strength") p->rc.f_aq_strength = atof(value); + OPT("aq-sensitivity") + p->rc.f_aq_sensitivity = atof(value); OPT("pass") { int i = x264_clip3( atoi(value), 0, 3 ); @@ -916,7 +922,9 @@ char *x264_param2string( x264_param_t *p s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); s += sprintf( s, " aq=%d", p->rc.i_aq_mode ); if( p->rc.i_aq_mode ) - s += sprintf( s, ":%.2f", p->rc.f_aq_strength ); + s += sprintf( s, ":%d:%.2f", p->rc.i_aq_metric, p->rc.f_aq_strength ); + if( p->rc.i_aq_mode == X264_AQ_GLOBAL ) + s += sprintf( s, ":%.2f", p->rc.f_aq_sensitivity ); if( p->rc.psz_zones ) s += sprintf( s, " zones=%s", p->rc.psz_zones ); else if( p->rc.i_zones ) diff -uNrp c/encoder/encoder.c b/encoder/encoder.c --- c/encoder/encoder.c 2008-07-30 18:48:09 +0300 +++ b/encoder/encoder.c 2008-07-30 18:50:44 +0300 @@ -497,12 +497,12 @@ static int x264_validate_parameters( x26 } if( h->param.analyse.f_psy_rd ) h->param.analyse.i_psy_rd = FIX8( h->param.analyse.f_psy_rd ); - h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 ); + h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 3 ); if( h->param.rc.f_aq_strength <= 0 ) h->param.rc.i_aq_mode = 0; - /* VAQ effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */ - if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL ) - h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.rc.f_aq_strength / 0.7, 0, 1); + if( h->param.rc.f_aq_sensitivity < 0 ) + h->param.rc.f_aq_sensitivity = 0; + h->param.rc.i_aq_metric = x264_clip3( h->param.rc.i_aq_metric, 0, 12 ); h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); { diff -uNrp c/encoder/ratecontrol.c b/encoder/ratecontrol.c --- c/encoder/ratecontrol.c 2008-07-30 18:48:00 +0300 +++ b/encoder/ratecontrol.c 2008-07-30 18:49:03 +0300 @@ -134,6 +134,7 @@ struct x264_ratecontrol_t /* AQ stuff */ float aq_threshold; + float aq_threshold_num; int *ac_energy; int i_zones; @@ -177,39 +178,369 @@ static inline double qscale2bits(ratecon + rce->misc_bits; } -// Find the total AC energy of the block in all planes. -static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd ) +static const int window_weights[7][7] = +{{41,68,94,104,94,68,41}, +{68,115,155,171,155,115,68}, +{94,155,209,230,209,155,94}, +{104,171,230,256,230,171,104}, +{94,155,209,230,209,155,94}, +{68,115,155,171,155,115,68}, +{41,68,94,104,94,68,41}}; + +static inline int windowed_variance( x264_t *h, uint8_t *plane, int stride, int window_x, int window_y, int blocksize, int mb_x, int mb_y, int step ) +{ + int x,y,locx,locy,n=0; + uint64_t total = 0; + int shiftx = (window_x - 1) / 2; int shifty = (window_y - 1) / 2; + int startx = shiftx; int starty = shifty; + int endx = blocksize - shiftx; int endy = blocksize - shifty; + plane -= (shiftx + shifty * stride); + if(mb_x == 0) startx += shiftx; + if(mb_y == 0) starty += shifty; + if(mb_x == h->sps->i_mb_width - 1) endx -= shiftx; + if(mb_y == h->sps->i_mb_height - 1) endy -= shifty; + plane += starty * stride; + for(locy = starty; locy < endy; locy+=step) + { + for(locx = startx; locx < endx; locx+=step) + { + int sum = 0; + int ssd = 0; + for(y = 0; y < window_y; y++) + for(x = 0; x < window_x; x++) + sum += window_weights[y][x] * plane[x+y*stride+locx]; + sum = (sum + 64) >> 7; + sum = (sum + (window_x*window_y+1)/2)/(window_x*window_y); + for(y = 0; y < window_y; y++) + for(x = 0; x < window_x; x++) + { + int val = plane[x+y*stride+locx] - sum; + ssd += (window_weights[y][x] * val * val + 64) >> 7; + } + total += ssd; + n++; + } + plane += stride*step; + } + return (total * 256) / n; +} + +static inline int fast_windowed_variance( x264_t *h, uint8_t *plane, int stride, int blocksize, int mb_x, int mb_y, int step ) { - /* This function contains annoying hacks because GCC has a habit of reordering emms - * and putting it after floating point ops. As a result, we put the emms at the end of the - * function and make sure that its always called before the float math. Noinline makes - * sure no reordering goes on. */ - /* FIXME: This array is larger than necessary because a bug in GCC causes an all-zero - * array to be placed in .bss despite .bss not being correctly aligned on some platforms (win32?) */ - DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; - unsigned int var=0, sad, ssd, i; - if( satd || h->param.rc.i_aq_mode == X264_AQ_GLOBAL ) + DECLARE_ALIGNED_16( static uint8_t zero[9] ) = {0,0,0,0,0,0,0,0,1}; + int locx,locy,n=0; + uint64_t total = 0; + int shiftx = 4; int shifty = 4; + int startx = shiftx; int starty = shifty; + int endx = blocksize - shiftx; int endy = blocksize - shifty; + plane -= (shiftx + shifty * stride); + if(mb_x == 0) startx += shiftx; + if(mb_y == 0) starty += shifty; + if(mb_x == h->sps->i_mb_width - 1) endx -= shiftx; + if(mb_y == h->sps->i_mb_height - 1) endy -= shifty; + plane += starty * stride; + for(locy = starty; locy < endy; locy+=step) { - for( i=0; i<3; i++ ) + for(locx = startx; locx < endx; locx+=step) { - int w = i ? 8 : 16; - int stride = h->fenc->i_stride[i]; - int offset = h->mb.b_interlaced - ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride - : w * (mb_x + mb_y * stride); - int pix = i ? PIXEL_8x8 : PIXEL_16x16; - stride <<= h->mb.b_interlaced; - sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); - ssd = h->pixf.ssd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); - var += ssd - (sad * sad >> (i?6:8)); - // SATD to represent the block's overall complexity (bit cost) for intra encoding. - // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost. - if( var && satd ) - *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - sad/2; + int sad = h->pixf.sad[PIXEL_8x8](plane+locx,stride,zero,0); + int ssd = h->pixf.ssd[PIXEL_8x8](plane+locx,stride,zero,0); + total += ssd - ((sad * sad) >> 6); + n++; } - var = X264_MAX(var,1); + plane += stride*step; + } + return (total * 196) / n; +} + +static NOINLINE int aqm_GIT( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: G0 */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, sad, ssd, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + ssd = h->pixf.ssd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + var += ssd - (sad * sad >> (i?6:8)); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (sad>>1); + } + var = X264_MAX(var,1) - 1; + x264_emms(); + return var; +} + +static NOINLINE int aqm_wv1( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: wv1 */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += windowed_variance( h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 1 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_wv1c( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: wv1c */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += windowed_variance( h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 1 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride )>>1); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_wv2( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: wv2 */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += windowed_variance( h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 2 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_wv2c( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: wv2c */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += windowed_variance( h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 2 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride )>>1); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_fwv( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: fwv */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += fast_windowed_variance( h, h->fenc->plane[i]+offset, stride, w, mb_x, mb_y, 2 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_fwvc( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: fwvc */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, i; + for( i=0; i<1; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + stride <<= h->mb.b_interlaced; + var += fast_windowed_variance( h, h->fenc->plane[i]+offset, stride, w, mb_x, mb_y, 2 ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride )>>1); + } + x264_emms(); + return (var+16) >> 5; +} + +static NOINLINE int aqm_4ver0( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver0 */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + DECLARE_ALIGNED_16( uint8_t avg_flat[16] ); + unsigned int var=0, sad, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + memset( avg_flat, sad >> (i?6:8), sizeof(avg_flat) ); + var += h->pixf.ssd[pix]( avg_flat, 0, h->fenc->plane[i]+offset, stride ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return var; +} + +static NOINLINE int aqm_4ver0c( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver0c */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + DECLARE_ALIGNED_16( uint8_t avg_flat[16] ); + unsigned int var=0, sad, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + memset( avg_flat, sad >> (i?6:8), sizeof(avg_flat) ); + var += h->pixf.ssd[pix]( avg_flat, 0, h->fenc->plane[i]+offset, stride ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (sad>>1); + } + x264_emms(); + return var; +} + +static NOINLINE int aqm_4ver0r( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver0r */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + DECLARE_ALIGNED_16( uint8_t avg_flat[16] ); + unsigned int var=0, sad, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + memset( avg_flat, ((sad >> (i?5:7)) + 1) >> 1, sizeof(avg_flat) ); + var += h->pixf.ssd[pix]( avg_flat, 0, h->fenc->plane[i]+offset, stride ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return var; +} + +static NOINLINE int aqm_4ver0rc( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver0rc */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + DECLARE_ALIGNED_16( uint8_t avg_flat[16] ); + unsigned int var=0, sad, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + memset( avg_flat, ((sad >> (i?5:7)) + 1) >> 1, sizeof(avg_flat) ); + var += h->pixf.ssd[pix]( avg_flat, 0, h->fenc->plane[i]+offset, stride ); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (sad>>1); + } + x264_emms(); + return var; +} + +static NOINLINE int aqm_4ver1( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver1 */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, sad, ssd, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + ssd = h->pixf.ssd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + var += ssd - (sad * sad >> (i?6:8)); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + } + x264_emms(); + return var; +} + +static NOINLINE int aqm_4ver1c( x264_t *h, int mb_x, int mb_y, int *satd ) +{ /* Metric: 4ver1c */ + DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + unsigned int var=0, sad, ssd, i; + for( i=0; i<3; i++ ) + { + int w = i ? 8 : 16; + int stride = h->fenc->i_stride[i]; + int offset = h->mb.b_interlaced + ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride + : w * (mb_x + mb_y * stride); + int pix = i ? PIXEL_8x8 : PIXEL_16x16; + stride <<= h->mb.b_interlaced; + sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + ssd = h->pixf.ssd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ); + var += ssd - (sad * sad >> (i?6:8)); + if( satd ) + *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - (sad>>1); } - else var = h->rc->ac_energy[h->mb.i_mb_xy]; x264_emms(); return var; } @@ -219,48 +550,143 @@ void x264_autosense_aq( x264_t *h ) double total = 0; double n = 0; int mb_x, mb_y; - // FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them? - // FIXME: Is chroma SATD necessary? for( mb_y=0; mb_ysps->i_mb_height; mb_y++ ) for( mb_x=0; mb_xsps->i_mb_width; mb_x++ ) { - int satd=0; - int energy = ac_energy_mb( h, mb_x, mb_y, &satd ); + int energy, satd = 1; + if(h->param.rc.i_aq_metric == 0) + energy = aqm_GIT( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 1) + energy = aqm_fwv( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 2) + energy = aqm_fwvc( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 3) + energy = aqm_wv1( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 4) + energy = aqm_wv1c( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 5) + energy = aqm_wv2( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 6) + energy = aqm_wv2c( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 7) + energy = aqm_4ver0( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 8) + energy = aqm_4ver0c( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 9) + energy = aqm_4ver0r( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 10) + energy = aqm_4ver0rc( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 11) + energy = aqm_4ver1( h, mb_x, mb_y, &satd ); + else if(h->param.rc.i_aq_metric == 12) + energy = aqm_4ver1c( h, mb_x, mb_y, &satd ); + else + energy = aqm_wv1c( h, mb_x, mb_y, &satd ); h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy; /* Weight the energy value by the SATD value of the MB. * This represents the fact that the more complex blocks in a frame should * be weighted more when calculating the optimal threshold. This also helps * diminish the negative effect of large numbers of simple blocks in a frame, * such as in the case of a letterboxed film. */ - total += logf(energy) * satd; + total += logf(energy + 1) * satd; n += satd; } - x264_emms(); /* Calculate and store the threshold. */ - h->rc->aq_threshold = n ? total/n : 15; + h->rc->aq_threshold = n ? total/n : 10; } /***************************************************************************** -* x264_adaptive_quant: + * x264_adaptive_quant: * adjust macroblock QP based on variance (AC energy) of the MB. * high variance = higher QP * low variance = lower QP * This generally increases SSIM and lowers PSNR. -*****************************************************************************/ + *****************************************************************************/ void x264_adaptive_quant( x264_t *h ) { - int energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); - /* Adjust the QP based on the AC energy of the macroblock. */ - float qp = h->rc->f_qpm; - float qp_adj = 1.5 * (logf(energy) - h->rc->aq_threshold); + int energy, satd = 1; + float f_energy, qp_adj; + if( h->param.rc.i_aq_mode == X264_AQ_LOCAL ) - qp_adj = x264_clip3f( qp_adj, -5, 5 ); - h->mb.i_qp = x264_clip3( qp + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); + { + energy = h->rc->ac_energy[h->mb.i_mb_xy]; + x264_emms(); + } + else if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL ) + { + if( h->param.rc.i_aq_metric == 0 ) + energy = aqm_GIT( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 1 ) + energy = aqm_fwv( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 2 ) + energy = aqm_fwvc( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 3 ) + energy = aqm_wv1( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 4 ) + energy = aqm_wv1c( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 5 ) + energy = aqm_wv2( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 6 ) + energy = aqm_wv2c( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 7 ) + energy = aqm_4ver0( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 8 ) + energy = aqm_4ver0c( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 9 ) + energy = aqm_4ver0r( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 10 ) + energy = aqm_4ver0rc( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 11 ) + energy = aqm_4ver1( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else if( h->param.rc.i_aq_metric == 12 ) + energy = aqm_4ver1c( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + else + energy = aqm_wv1c( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); + } + else + { + if( h->param.rc.i_aq_metric == 0 ) + energy = aqm_GIT( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 1 ) + energy = aqm_fwv( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 2 ) + energy = aqm_fwvc( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 3 ) + energy = aqm_wv1( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 4 ) + energy = aqm_wv1c( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 5 ) + energy = aqm_wv2( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 6 ) + energy = aqm_wv2c( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 7 ) + energy = aqm_4ver0( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 8 ) + energy = aqm_4ver0c( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 9 ) + energy = aqm_4ver0r( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 10 ) + energy = aqm_4ver0rc( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 11 ) + energy = aqm_4ver1( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else if( h->param.rc.i_aq_metric == 12 ) + energy = aqm_4ver1c( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + else + energy = aqm_wv1c( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd ); + } + f_energy = logf( energy + 1 ); + if( h->param.rc.i_aq_mode == X264_AQ_HYBRID ) + { + h->rc->aq_threshold_num += satd; + h->rc->aq_threshold += (f_energy - h->rc->aq_threshold) * satd / h->rc->aq_threshold_num; + } + /* Adjust the QP based on the AC energy of the macroblock. */ + qp_adj = 1.5 * (f_energy - h->rc->aq_threshold); + h->mb.i_qp = x264_clip3( h->rc->f_qpm + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB, * to lower the bit cost of the qp_delta. */ if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 ) h->mb.i_qp = h->mb.i_last_qp; - h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )]; } int x264_ratecontrol_new( x264_t *h ) @@ -287,6 +713,8 @@ int x264_ratecontrol_new( x264_t *h ) rc->nmb = h->mb.i_mb_count; rc->last_non_b_pict_type = -1; rc->cbr_decay = 1.0; + if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL ) + rc->aq_threshold = h->param.rc.f_aq_sensitivity; if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read ) { @@ -695,6 +1123,8 @@ void x264_ratecontrol_summary( x264_t *h qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress ) * rc->cplxr_sum / rc->wanted_bits_window ) ); } + if( h->param.rc.i_aq_mode == X264_AQ_HYBRID ) + x264_log( h, X264_LOG_INFO, "final AQ sensitivity: %.4f\n", rc->aq_threshold ); } void x264_ratecontrol_delete( x264_t *h ) @@ -852,12 +1282,8 @@ void x264_ratecontrol_start( x264_t *h, rc->last_non_b_pict_type = h->sh.i_type; /* Adaptive AQ thresholding algorithm. */ - if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL ) - /* Arbitrary value for "center" of the AQ curve. - * Chosen so that any given value of CRF has on average similar bitrate with and without AQ. */ - h->rc->aq_threshold = logf(5000); - else if( h->param.rc.i_aq_mode == X264_AQ_LOCAL ) - x264_autosense_aq(h); + if( h->param.rc.i_aq_mode == X264_AQ_LOCAL ) + x264_autosense_aq( h ); } double predict_row_size( x264_t *h, int y, int qp ) diff -uNrp c/x264.c b/x264.c --- c/x264.c 2008-07-30 18:48:09 +0300 +++ b/x264.c 2008-07-30 18:49:03 +0300 @@ -191,11 +191,30 @@ static void Help( x264_param_t *defaults H0( " --aq-mode How AQ distributes bits [%d]\n" " - 0: Disabled\n" " - 1: Avoid moving bits between frames\n" - " - 2: Move bits between frames\n", defaults->rc.i_aq_mode ); + " - 2: Move bits between frames\n" + " - 3: Hybrid mode of moving bits\n", defaults->rc.i_aq_mode ); + H0( " --aq-metric The metric used for AQ [%d]\n" + " - 0: Original GIT metric (aqm_GIT)\n" + " - 1: Fast Windowed Variance (aqm_fwv)\n" + " - 2: Fast Windowed Variance with correction (aqm_fwvc)\n" + " - 3: Windowed Variance step 1 (aqm_wv1)\n" + " - 4: Windowed Variance step 1 with correction (aqm_wv1c)\n" + " - 5: Windowed Variance step 2 (aqm_wv2)\n" + " - 6: Windowed Variance step 2 with correction (aqm_wv2c)\n" + " - 7: BM ver.0 (aqm_4ver0)\n" + " - 8: BM ver.0 with correction (aqm_4ver0c)\n" + " - 9: BM ver.0 with round (aqm_4ver0r)\n" + " - 10: BM ver.0 with round and correction (aqm_4ver0rc)\n" + " - 11: BM ver.1 (aqm_4ver1)\n" + " - 12: BM ver.1 with correction (aqm_4ver1c)\n", defaults->rc.i_aq_metric ); H0( " --aq-strength Reduces blocking and blurring in flat and\n" " textured areas. [%.1f]\n" " - 0.5: weak AQ\n" " - 1.5: strong AQ\n", defaults->rc.f_aq_strength ); + H0( " --aq-sensitivity \"Center\" of AQ curve. [%.1f]\n" + " - 5: most QPs are raised\n" + " - 10: good general-use sensitivity\n" + " - 15: most QPs are lowered\n", defaults->rc.f_aq_sensitivity ); H0( "\n" ); H0( " -p, --pass <1|2|3> Enable multipass ratecontrol\n" " - 1: First pass, creates stats file\n" @@ -424,7 +443,9 @@ static int Parse( int argc, char **argv { "no-fast-pskip", no_argument, NULL, 0 }, { "no-dct-decimate", no_argument, NULL, 0 }, { "aq-strength", required_argument, NULL, 0 }, + { "aq-sensitivity", required_argument, NULL, 0 }, { "aq-mode", required_argument, NULL, 0 }, + { "aq-metric", required_argument, NULL, 0 }, { "deadzone-inter", required_argument, NULL, '0' }, { "deadzone-intra", required_argument, NULL, '0' }, { "level", required_argument, NULL, 0 }, diff -uNrp c/x264.h b/x264.h --- c/x264.h 2008-07-30 18:48:09 +0300 +++ b/x264.h 2008-07-30 18:49:03 +0300 @@ -87,6 +87,7 @@ typedef struct x264_t x264_t; #define X264_AQ_NONE 0 #define X264_AQ_LOCAL 1 #define X264_AQ_GLOBAL 2 +#define X264_AQ_HYBRID 3 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 }; static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 }; @@ -269,7 +270,9 @@ typedef struct x264_param_t float f_pb_factor; int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */ + int i_aq_metric; /* 0 = macroblock variance [0..12] */ float f_aq_strength; + float f_aq_sensitivity; /* 2pass */ int b_stat_write; /* Enable stat writing in psz_stat_out */