// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package vp8 // This file implements decoding DCT/WHT residual coefficients and // reconstructing YCbCr data equal to predicted values plus residuals. // // There are 1*16*16 + 2*8*8 + 1*4*4 coefficients per macroblock: // - 1*16*16 luma DCT coefficients, // - 2*8*8 chroma DCT coefficients, and // - 1*4*4 luma WHT coefficients. // Coefficients are read in lots of 16, and the later coefficients in each lot // are often zero. // // The YCbCr data consists of 1*16*16 luma values and 2*8*8 chroma values, // plus previously decoded values along the top and left borders. The combined // values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent // samples are 32 bytes apart. In detail, the layout is: // // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // . . . . . . . a b b b b b b b b b b b b b b b b c c c c . . . . 0 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 1 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 2 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 3 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 4 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 5 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 6 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 7 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 8 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 9 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 10 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 11 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 12 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 13 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 14 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 15 // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 16 // . . . . . . . e f f f f f f f f . . . . . . . g h h h h h h h h 17 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 18 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 19 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 20 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 21 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 22 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 23 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 24 // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 25 // // Y, B and R are the reconstructed luma (Y) and chroma (B, R) values. // The Y values are predicted (either as one 16x16 region or 16 4x4 regions) // based on the row above's Y values (some combination of {abc} or {dYC}) and // the column left's Y values (either {ad} or {bY}). Similarly, B and R values // are predicted on the row above and column left of their respective 8x8 // region: {efi} for B, {ghj} for R. // // For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values // are initialized to 0x81. Otherwise, they are copied from the bottom row of // the macroblock above. The {c} values are then duplicated from row 0 to rows // 4, 8 and 12 of the ybr workspace. // Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj} // values are initialized to 0x7f. Otherwise, they are copied from the right // column of the macroblock to the left. // For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81. // // When moving from one macroblock to the next horizontally, the {adeigj} // values can simply be copied from the workspace to itself, shifted by 8 or // 16 columns. When moving from one macroblock to the next vertically, // filtering can occur and hence the row values have to be copied from the // post-filtered image instead of the pre-filtered workspace. const ( bCoeffBase = 1*16*16 + 0*8*8 rCoeffBase = 1*16*16 + 1*8*8 whtCoeffBase = 1*16*16 + 2*8*8 ) const ( ybrYX = 8 ybrYY = 1 ybrBX = 8 ybrBY = 18 ybrRX = 24 ybrRY = 18 ) // prepareYBR prepares the {abcdefghij} elements of ybr. func (d *Decoder) prepareYBR(mbx, mby int) { if mbx == 0 { for y := 0; y < 17; y++ { d.ybr[y][7] = 0x81 } for y := 17; y < 26; y++ { d.ybr[y][7] = 0x81 d.ybr[y][23] = 0x81 } } else { for y := 0; y < 17; y++ { d.ybr[y][7] = d.ybr[y][7+16] } for y := 17; y < 26; y++ { d.ybr[y][7] = d.ybr[y][15] d.ybr[y][23] = d.ybr[y][31] } } if mby == 0 { for x := 7; x < 28; x++ { d.ybr[0][x] = 0x7f } for x := 7; x < 16; x++ { d.ybr[17][x] = 0x7f } for x := 23; x < 32; x++ { d.ybr[17][x] = 0x7f } } else { for i := 0; i < 16; i++ { d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i] } for i := 0; i < 8; i++ { d.ybr[17][8+i] = d.img.Cb[(8*mby-1)*d.img.CStride+8*mbx+i] } for i := 0; i < 8; i++ { d.ybr[17][24+i] = d.img.Cr[(8*mby-1)*d.img.CStride+8*mbx+i] } if mbx == d.mbw-1 { for i := 16; i < 20; i++ { d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+15] } } else { for i := 16; i < 20; i++ { d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i] } } } for y := 4; y < 16; y += 4 { d.ybr[y][24] = d.ybr[0][24] d.ybr[y][25] = d.ybr[0][25] d.ybr[y][26] = d.ybr[0][26] d.ybr[y][27] = d.ybr[0][27] } } // btou converts a bool to a 0/1 value. func btou(b bool) uint8 { if b { return 1 } return 0 } // pack packs four 0/1 values into four bits of a uint32. func pack(x [4]uint8, shift int) uint32 { u := uint32(x[0])<<0 | uint32(x[1])<<1 | uint32(x[2])<<2 | uint32(x[3])<<3 return u << uint(shift) } // unpack unpacks four 0/1 values from a four-bit value. var unpack = [16][4]uint8{ {0, 0, 0, 0}, {1, 0, 0, 0}, {0, 1, 0, 0}, {1, 1, 0, 0}, {0, 0, 1, 0}, {1, 0, 1, 0}, {0, 1, 1, 0}, {1, 1, 1, 0}, {0, 0, 0, 1}, {1, 0, 0, 1}, {0, 1, 0, 1}, {1, 1, 0, 1}, {0, 0, 1, 1}, {1, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, } var ( // The mapping from 4x4 region position to band is specified in section 13.3. bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0} // Category probabilties are specified in section 13.2. // Decoding categories 1 and 2 are done inline. cat3456 = [4][12]uint8{ {173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0}, {180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0}, {254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0}, } // The zigzag order is: // 0 1 5 6 // 2 4 7 12 // 3 8 11 13 // 9 10 14 15 zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15} ) // parseResiduals4 parses a 4x4 region of residual coefficients, as specified // in section 13.3, and returns a 0/1 value indicating whether there was at // least one non-zero coefficient. // r is the partition to read bits from. // plane and context describe which token probability table to use. context is // either 0, 1 or 2, and equals how many of the macroblock left and macroblock // above have non-zero coefficients. // quant are the DC/AC quantization factors. // skipFirstCoeff is whether the DC coefficient has already been parsed. // coeffBase is the base index of d.coeff to write to. func (d *Decoder) parseResiduals4(r *partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 { prob, n := &d.tokenProb[plane], 0 if skipFirstCoeff { n = 1 } p := prob[bands[n]][context] if !r.readBit(p[0]) { return 0 } for n != 16 { n++ if !r.readBit(p[1]) { p = prob[bands[n]][0] continue } var v uint32 if !r.readBit(p[2]) { v = 1 p = prob[bands[n]][1] } else { if !r.readBit(p[3]) { if !r.readBit(p[4]) { v = 2 } else { v = 3 + r.readUint(p[5], 1) } } else if !r.readBit(p[6]) { if !r.readBit(p[7]) { // Category 1. v = 5 + r.readUint(159, 1) } else { // Category 2. v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1) } } else { // Categories 3, 4, 5 or 6. b1 := r.readUint(p[8], 1) b0 := r.readUint(p[9+b1], 1) cat := 2*b1 + b0 tab := &cat3456[cat] v = 0 for i := 0; tab[i] != 0; i++ { v *= 2 v += r.readUint(tab[i], 1) } v += 3 + (8 << cat) } p = prob[bands[n]][2] } z := zigzag[n-1] c := int32(v) * int32(quant[btou(z > 0)]) if r.readBit(uniformProb) { c = -c } d.coeff[coeffBase+int(z)] = int16(c) if n == 16 || !r.readBit(p[0]) { return 1 } } return 1 } // parseResiduals parses the residuals. func (d *Decoder) parseResiduals(mbx, mby int) { partition := &d.op[mby&(d.nOP-1)] plane := planeY1SansY2 quant := &d.quant[d.segment] // Parse the DC coefficient of each 4x4 luma region. if d.usePredY16 { nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase) d.leftMB.nzY16 = nz d.upMB[mbx].nzY16 = nz d.inverseWHT16() plane = planeY1WithY2 } var ( nzDC, nzAC [4]uint8 nzDCMask, nzACMask uint32 coeffBase int ) // Parse the luma coefficients. lnz := unpack[d.leftMB.nzMask&0x0f] unz := unpack[d.upMB[mbx].nzMask&0x0f] for y := 0; y < 4; y++ { nz := lnz[y] for x := 0; x < 4; x++ { nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase) unz[x] = nz nzAC[x] = nz nzDC[x] = btou(d.coeff[coeffBase] != 0) coeffBase += 16 } lnz[y] = nz nzDCMask |= pack(nzDC, y*4) nzACMask |= pack(nzAC, y*4) } lnzMask := pack(lnz, 0) unzMask := pack(unz, 0) // Parse the chroma coefficients. lnz = unpack[d.leftMB.nzMask>>4] unz = unpack[d.upMB[mbx].nzMask>>4] for c := 0; c < 4; c += 2 { for y := 0; y < 2; y++ { nz := lnz[y+c] for x := 0; x < 2; x++ { nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase) unz[x+c] = nz nzAC[y*2+x] = nz nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0) coeffBase += 16 } lnz[y+c] = nz } nzDCMask |= pack(nzDC, 16+c*2) nzACMask |= pack(nzAC, 16+c*2) } lnzMask |= pack(lnz, 4) unzMask |= pack(unz, 4) // Save decoder state. d.leftMB.nzMask = uint8(lnzMask) d.upMB[mbx].nzMask = uint8(unzMask) d.nzDCMask = nzDCMask d.nzACMask = nzACMask } // reconstructMacroblock applies the predictor functions and adds the inverse- // DCT transformed residuals to recover the YCbCr data. func (d *Decoder) reconstructMacroblock(mbx, mby int) { if d.usePredY16 { p := checkTopLeftPred(mbx, mby, d.predY16) predFunc16[p](d, 1, 8) for j := 0; j < 4; j++ { for i := 0; i < 4; i++ { n := 4*j + i y := 4*j + 1 x := 4*i + 8 mask := uint32(1) << uint(n) if d.nzACMask&mask != 0 { d.inverseDCT4(y, x, 16*n) } else if d.nzDCMask&mask != 0 { d.inverseDCT4DCOnly(y, x, 16*n) } } } } else { for j := 0; j < 4; j++ { for i := 0; i < 4; i++ { n := 4*j + i y := 4*j + 1 x := 4*i + 8 predFunc4[d.predY4[j][i]](d, y, x) mask := uint32(1) << uint(n) if d.nzACMask&mask != 0 { d.inverseDCT4(y, x, 16*n) } else if d.nzDCMask&mask != 0 { d.inverseDCT4DCOnly(y, x, 16*n) } } } } p := checkTopLeftPred(mbx, mby, d.predC8) predFunc8[p](d, ybrBY, ybrBX) if d.nzACMask&0x0f0000 != 0 { d.inverseDCT8(ybrBY, ybrBX, bCoeffBase) } else if d.nzDCMask&0x0f0000 != 0 { d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase) } predFunc8[p](d, ybrRY, ybrRX) if d.nzACMask&0xf00000 != 0 { d.inverseDCT8(ybrRY, ybrRX, rCoeffBase) } else if d.nzDCMask&0xf00000 != 0 { d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase) } } // reconstruct reconstructs one macroblock. func (d *Decoder) reconstruct(mbx, mby int) { if d.segmentHeader.updateMap { if !d.fp.readBit(d.segmentHeader.prob[0]) { d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1)) } else { d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2 } } skip := false if d.useSkipProb { skip = d.fp.readBit(d.skipProb) } // Prepare the workspace. for i := range d.coeff { d.coeff[i] = 0 } d.prepareYBR(mbx, mby) // Parse the predictor modes. d.usePredY16 = d.fp.readBit(145) if d.usePredY16 { d.parsePredModeY16(mbx) } else { d.parsePredModeY4(mbx) } d.parsePredModeC8() // Parse the residuals. if !skip { d.parseResiduals(mbx, mby) } else { if d.usePredY16 { d.leftMB.nzY16 = 0 d.upMB[mbx].nzY16 = 0 } d.leftMB.nzMask = 0 d.upMB[mbx].nzMask = 0 d.nzDCMask = 0 d.nzACMask = 0 } // Reconstruct the YCbCr data and copy it to the image. d.reconstructMacroblock(mbx, mby) for i, y := (mby*d.img.YStride+mbx)*16, 0; y < 16; i, y = i+d.img.YStride, y+1 { copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16]) } for i, y := (mby*d.img.CStride+mbx)*8, 0; y < 8; i, y = i+d.img.CStride, y+1 { copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8]) copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8]) } }