摘自GO语言image/jpeg包
// idct performs a 2-D Inverse Discrete Cosine Transformation.//// The input coefficients should already have been multiplied by the// appropriate quantization table. We use fixed-point computation, with the// number of bits for the fractional component varying over the intermediate// stages.//// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the// discrete W transform and for the discrete Fourier transform", IEEE Trans. on// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.func idct(src *block) { // Horizontal 1-D IDCT. for y := 0; y < 8; y++ { y8 := y * 8 // If all the AC components are zero, then the IDCT is trivial. if src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 && src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0 { dc := src[y8+0] << 3 src[y8+0] = dc src[y8+1] = dc src[y8+2] = dc src[y8+3] = dc src[y8+4] = dc src[y8+5] = dc src[y8+6] = dc src[y8+7] = dc continue } // Prescale. x0 := (src[y8+0] << 11) + 128 x1 := src[y8+4] << 11 x2 := src[y8+6] x3 := src[y8+2] x4 := src[y8+1] x5 := src[y8+7] x6 := src[y8+5] x7 := src[y8+3] // Stage 1. x8 := w7 * (x4 + x5) x4 = x8 + w1mw7*x4 x5 = x8 - w1pw7*x5 x8 = w3 * (x6 + x7) x6 = x8 - w3mw5*x6 x7 = x8 - w3pw5*x7 // Stage 2. x8 = x0 + x1 x0 -= x1 x1 = w6 * (x3 + x2) x2 = x1 - w2pw6*x2 x3 = x1 + w2mw6*x3 x1 = x4 + x6 x4 -= x6 x6 = x5 + x7 x5 -= x7 // Stage 3. x7 = x8 + x3 x8 -= x3 x3 = x0 + x2 x0 -= x2 x2 = (r2*(x4+x5) + 128) >> 8 x4 = (r2*(x4-x5) + 128) >> 8 // Stage 4. src[y8+0] = (x7 + x1) >> 8 src[y8+1] = (x3 + x2) >> 8 src[y8+2] = (x0 + x4) >> 8 src[y8+3] = (x8 + x6) >> 8 src[y8+4] = (x8 - x6) >> 8 src[y8+5] = (x0 - x4) >> 8 src[y8+6] = (x3 - x2) >> 8 src[y8+7] = (x7 - x1) >> 8 } // Vertical 1-D IDCT. for x := 0; x < 8; x++ { // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial. // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so // we do not bother to check for the all-zero case. // Prescale. y0 := (src[8*0+x] << 8) + 8192 y1 := src[8*4+x] << 8 y2 := src[8*6+x] y3 := src[8*2+x] y4 := src[8*1+x] y5 := src[8*7+x] y6 := src[8*5+x] y7 := src[8*3+x] // Stage 1. y8 := w7*(y4+y5) + 4 y4 = (y8 + w1mw7*y4) >> 3 y5 = (y8 - w1pw7*y5) >> 3 y8 = w3*(y6+y7) + 4 y6 = (y8 - w3mw5*y6) >> 3 y7 = (y8 - w3pw5*y7) >> 3 // Stage 2. y8 = y0 + y1 y0 -= y1 y1 = w6*(y3+y2) + 4 y2 = (y1 - w2pw6*y2) >> 3 y3 = (y1 + w2mw6*y3) >> 3 y1 = y4 + y6 y4 -= y6 y6 = y5 + y7 y5 -= y7 // Stage 3. y7 = y8 + y3 y8 -= y3 y3 = y0 + y2 y0 -= y2 y2 = (r2*(y4+y5) + 128) >> 8 y4 = (r2*(y4-y5) + 128) >> 8 // Stage 4. src[8*0+x] = (y7 + y1) >> 14 src[8*1+x] = (y3 + y2) >> 14 src[8*2+x] = (y0 + y4) >> 14 src[8*3+x] = (y8 + y6) >> 14 src[8*4+x] = (y8 - y6) >> 14 src[8*5+x] = (y0 - y4) >> 14 src[8*6+x] = (y3 - y2) >> 14 src[8*7+x] = (y7 - y1) >> 14 }}
// fdct performs a forward DCT on an 8x8 block of coefficients, including a// level shift.func fdct(b *block) { // Pass 1: process rows. for y := 0; y < 8; y++ { x0 := b[y*8+0] x1 := b[y*8+1] x2 := b[y*8+2] x3 := b[y*8+3] x4 := b[y*8+4] x5 := b[y*8+5] x6 := b[y*8+6] x7 := b[y*8+7] tmp0 := x0 + x7 tmp1 := x1 + x6 tmp2 := x2 + x5 tmp3 := x3 + x4 tmp10 := tmp0 + tmp3 tmp12 := tmp0 - tmp3 tmp11 := tmp1 + tmp2 tmp13 := tmp1 - tmp2 tmp0 = x0 - x7 tmp1 = x1 - x6 tmp2 = x2 - x5 tmp3 = x3 - x4 b[y*8+0] = (tmp10 + tmp11 - 8*centerJSample) << pass1Bits b[y*8+4] = (tmp10 - tmp11) << pass1Bits z1 := (tmp12 + tmp13) * fix_0_541196100 z1 += 1 << (constBits - pass1Bits - 1) b[y*8+2] = (z1 + tmp12*fix_0_765366865) >> (constBits - pass1Bits) b[y*8+6] = (z1 - tmp13*fix_1_847759065) >> (constBits - pass1Bits) tmp10 = tmp0 + tmp3 tmp11 = tmp1 + tmp2 tmp12 = tmp0 + tmp2 tmp13 = tmp1 + tmp3 z1 = (tmp12 + tmp13) * fix_1_175875602 z1 += 1 << (constBits - pass1Bits - 1) tmp0 = tmp0 * fix_1_501321110 tmp1 = tmp1 * fix_3_072711026 tmp2 = tmp2 * fix_2_053119869 tmp3 = tmp3 * fix_0_298631336 tmp10 = tmp10 * -fix_0_899976223 tmp11 = tmp11 * -fix_2_562915447 tmp12 = tmp12 * -fix_0_390180644 tmp13 = tmp13 * -fix_1_961570560 tmp12 += z1 tmp13 += z1 b[y*8+1] = (tmp0 + tmp10 + tmp12) >> (constBits - pass1Bits) b[y*8+3] = (tmp1 + tmp11 + tmp13) >> (constBits - pass1Bits) b[y*8+5] = (tmp2 + tmp11 + tmp12) >> (constBits - pass1Bits) b[y*8+7] = (tmp3 + tmp10 + tmp13) >> (constBits - pass1Bits) } // Pass 2: process columns. // We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8. for x := 0; x < 8; x++ { tmp0 := b[0*8+x] + b[7*8+x] tmp1 := b[1*8+x] + b[6*8+x] tmp2 := b[2*8+x] + b[5*8+x] tmp3 := b[3*8+x] + b[4*8+x] tmp10 := tmp0 + tmp3 + 1<<(pass1Bits-1) tmp12 := tmp0 - tmp3 tmp11 := tmp1 + tmp2 tmp13 := tmp1 - tmp2 tmp0 = b[0*8+x] - b[7*8+x] tmp1 = b[1*8+x] - b[6*8+x] tmp2 = b[2*8+x] - b[5*8+x] tmp3 = b[3*8+x] - b[4*8+x] b[0*8+x] = (tmp10 + tmp11) >> pass1Bits b[4*8+x] = (tmp10 - tmp11) >> pass1Bits z1 := (tmp12 + tmp13) * fix_0_541196100 z1 += 1 << (constBits + pass1Bits - 1) b[2*8+x] = (z1 + tmp12*fix_0_765366865) >> (constBits + pass1Bits) b[6*8+x] = (z1 - tmp13*fix_1_847759065) >> (constBits + pass1Bits) tmp10 = tmp0 + tmp3 tmp11 = tmp1 + tmp2 tmp12 = tmp0 + tmp2 tmp13 = tmp1 + tmp3 z1 = (tmp12 + tmp13) * fix_1_175875602 z1 += 1 << (constBits + pass1Bits - 1) tmp0 = tmp0 * fix_1_501321110 tmp1 = tmp1 * fix_3_072711026 tmp2 = tmp2 * fix_2_053119869 tmp3 = tmp3 * fix_0_298631336 tmp10 = tmp10 * -fix_0_899976223 tmp11 = tmp11 * -fix_2_562915447 tmp12 = tmp12 * -fix_0_390180644 tmp13 = tmp13 * -fix_1_961570560 tmp12 += z1 tmp13 += z1 b[1*8+x] = (tmp0 + tmp10 + tmp12) >> (constBits + pass1Bits) b[3*8+x] = (tmp1 + tmp11 + tmp13) >> (constBits + pass1Bits) b[5*8+x] = (tmp2 + tmp11 + tmp12) >> (constBits + pass1Bits) b[7*8+x] = (tmp3 + tmp10 + tmp13) >> (constBits + pass1Bits) }}