diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 2dd34d0..3e15132 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -168,15 +168,18 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale) } // Find the total AC energy of the block in all planes. -static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame ) +static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, uint32_t *cost ) { /* This function contains annoying hacks because GCC has a habit of reordering emms * and putting it after floating point ops. As a result, we put the emms at the end of the * function and make sure that its always called before the float math. Noinline makes * sure no reordering goes on. */ - unsigned int var = 0, i; + uint32_t var = 0, i; + assert( cost ); + *cost = 0; for( i = 0; i < 3; i++ ) { + uint64_t acs; int w = i ? 8 : 16; int stride = frame->i_stride[i]; int offset = h->mb.b_interlaced @@ -185,8 +188,9 @@ static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *f int pix = i ? PIXEL_8x8 : PIXEL_16x16; stride <<= h->mb.b_interlaced; var += h->pixf.var[pix]( frame->plane[i]+offset, stride ); + acs = h->pixf.hadamard_ac[pix]( frame->plane[i]+offset, stride ); + *cost += ((int32_t)acs + (int32_t)(acs>>32)) >> 1; } - var = X264_MAX(var,1); x264_emms(); return var; } @@ -217,7 +221,13 @@ static const uint8_t exp2_lut[64] = { 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253, }; -static int x264_exp2fix8( float x ) +static ALWAYS_INLINE float x264_log2( uint32_t x ) +{ + int lz = x264_clz( x ); + return log2_lut[(x<>24)&0x7f] + (31 - lz); +} + +static ALWAYS_INLINE int x264_exp2fix8( float x ) { int i, f; x += 8; @@ -230,16 +240,25 @@ static int x264_exp2fix8( float x ) void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ) { - /* constants chosen to result in approximately the same overall bitrate as without AQ. - * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ - float strength = h->param.rc.f_aq_strength * 1.0397; int mb_x, mb_y; + float strength; + float avg_adj = 0.f; + for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ ) + for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ ) + { + uint32_t cost; + uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame, &cost ); + float qp_adj = x264_log2( energy + 2 ) * x264_log2( cost + 2 ); + frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; + avg_adj += qp_adj; + } + avg_adj /= h->mb.i_mb_count; + strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 2500.f); for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ ) for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ ) { - uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame ); - int lz = x264_clz( energy ); - float qp_adj = strength * (log2_lut[(energy<>24)&0x7f] - lz + 16.573f); + float qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride]; + qp_adj = strength * (qp_adj - avg_adj); frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; if( h->frames.b_have_lowres ) frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));