diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 2dd34d0..3e15132 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -168,15 +168,18 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 }
 
 // Find the total AC energy of the block in all planes.
-static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
+static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, uint32_t *cost )
 {
     /* This function contains annoying hacks because GCC has a habit of reordering emms
      * and putting it after floating point ops.  As a result, we put the emms at the end of the
      * function and make sure that its always called before the float math.  Noinline makes
      * sure no reordering goes on. */
-    unsigned int var = 0, i;
+    uint32_t var = 0, i;
+    assert( cost );
+    *cost = 0;
     for( i = 0; i < 3; i++ )
     {
+        uint64_t acs;
         int w = i ? 8 : 16;
         int stride = frame->i_stride[i];
         int offset = h->mb.b_interlaced
@@ -185,8 +188,9 @@ static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *f
         int pix = i ? PIXEL_8x8 : PIXEL_16x16;
         stride <<= h->mb.b_interlaced;
         var += h->pixf.var[pix]( frame->plane[i]+offset, stride );
+        acs = h->pixf.hadamard_ac[pix]( frame->plane[i]+offset, stride );
+        *cost += ((int32_t)acs + (int32_t)(acs>>32)) >> 1;
     }
-    var = X264_MAX(var,1);
     x264_emms();
     return var;
 }
@@ -217,7 +221,13 @@ static const uint8_t exp2_lut[64] = {
     177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
 };
 
-static int x264_exp2fix8( float x )
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+    int lz = x264_clz( x );
+    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
+}
+
+static ALWAYS_INLINE int x264_exp2fix8( float x )
 {
     int i, f;
     x += 8;
@@ -230,16 +240,25 @@ static int x264_exp2fix8( float x )
 
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
 {
-    /* constants chosen to result in approximately the same overall bitrate as without AQ.
-     * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
-    float strength = h->param.rc.f_aq_strength * 1.0397;
     int mb_x, mb_y;
+    float strength;
+    float avg_adj = 0.f;
+    for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
+        for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
+        {
+            uint32_t cost;
+            uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame, &cost );
+            float qp_adj = x264_log2( energy + 2 ) * x264_log2( cost + 2 );
+            frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+            avg_adj += qp_adj;
+        }
+    avg_adj /= h->mb.i_mb_count;
+    strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 2500.f);
     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
         for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
         {
-            uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
-            int lz = x264_clz( energy );
-            float qp_adj = strength * (log2_lut[(energy<<lz>>24)&0x7f] - lz + 16.573f);
+            float qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
+            qp_adj = strength * (qp_adj - avg_adj);
             frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
             if( h->frames.b_have_lowres )
                 frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));