#!/usr/bin/env python # Copyright (c) 2011 Bastian Venthur # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. """ Video decoding for the AR.Drone. This library uses psyco to speed-up the decoding process. It is however written in a way that it works also without psyco installed. On the author's development machine the speed up is from 2FPS w/o psyco to > 20 FPS w/ psyco. """ import array import cProfile import datetime import struct import sys try: import psyco except ImportError: print "Please install psyco for better video decoding performance." # from zig-zag back to normal ZIG_ZAG_POSITIONS = array.array('B', ( 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63)) # Inverse quantization IQUANT_TAB = array.array('B', ( 3, 5, 7, 9, 11, 13, 15, 17, 5, 7, 9, 11, 13, 15, 17, 19, 7, 9, 11, 13, 15, 17, 19, 21, 9, 11, 13, 15, 17, 19, 21, 23, 11, 13, 15, 17, 19, 21, 23, 25, 13, 15, 17, 19, 21, 23, 25, 27, 15, 17, 19, 21, 23, 25, 27, 29, 17, 19, 21, 23, 25, 27, 29, 31)) # Used for upscaling the 8x8 b- and r-blocks to 16x16 SCALE_TAB = array.array('B', ( 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 8, 8, 9, 9, 10, 10, 11, 11, 8, 8, 9, 9, 10, 10, 11, 11, 16, 16, 17, 17, 18, 18, 19, 19, 16, 16, 17, 17, 18, 18, 19, 19, 24, 24, 25, 25, 26, 26, 27, 27, 24, 24, 25, 25, 26, 26, 27, 27, 4, 4, 5, 5, 6, 6, 7, 7, 4, 4, 5, 5, 6, 6, 7, 7, 12, 12, 13, 13, 14, 14, 15, 15, 12, 12, 13, 13, 14, 14, 15, 15, 20, 20, 21, 21, 22, 22, 23, 23, 20, 20, 21, 21, 22, 22, 23, 23, 28, 28, 29, 29, 30, 30, 31, 31, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 32, 32, 33, 33, 34, 34, 35, 35, 40, 40, 41, 41, 42, 42, 43, 43, 40, 40, 41, 41, 42, 42, 43, 43, 48, 48, 49, 49, 50, 50, 51, 51, 48, 48, 49, 49, 50, 50, 51, 51, 56, 56, 57, 57, 58, 58, 59, 59, 56, 56, 57, 57, 58, 58, 59, 59, 36, 36, 37, 37, 38, 38, 39, 39, 36, 36, 37, 37, 38, 38, 39, 39, 44, 44, 45, 45, 46, 46, 47, 47, 44, 44, 45, 45, 46, 46, 47, 47, 52, 52, 53, 53, 54, 54, 55, 55, 52, 52, 53, 53, 54, 54, 55, 55, 60, 60, 61, 61, 62, 62, 63, 63, 60, 60, 61, 61, 62, 62, 63, 63)) # Count leading zeros look up table CLZLUT = array.array('B', (8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) # Map pixels from four 8x8 blocks to one 16x16 MB_TO_GOB_MAP = array.array('B', [ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, 52, 53, 54, 55, 64, 65, 66, 67, 68, 69, 70, 71, 80, 81, 82, 83, 84, 85, 86, 87, 96, 97, 98, 99, 100, 101, 102, 103, 112, 113, 114, 115, 116, 117, 118, 119, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57, 58, 59, 60, 61, 62, 63, 72, 73, 74, 75, 76, 77, 78, 79, 88, 89, 90, 91, 92, 93, 94, 95, 104, 105, 106, 107, 108, 109, 110, 111, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 144, 145, 146, 147, 148, 149, 150, 151, 160, 161, 162, 163, 164, 165, 166, 167, 176, 177, 178, 179, 180, 181, 182, 183, 192, 193, 194, 195, 196, 197, 198, 199, 208, 209, 210, 211, 212, 213, 214, 215, 224, 225, 226, 227, 228, 229, 230, 231, 240, 241, 242, 243, 244, 245, 246, 247, 136, 137, 138, 139, 140, 141, 142, 143, 152, 153, 154, 155, 156, 157, 158, 159, 168, 169, 170, 171, 172, 173, 174, 175, 184, 185, 186, 187, 188, 189, 190, 191, 200, 201, 202, 203, 204, 205, 206, 207, 216, 217, 218, 219, 220, 221, 222, 223, 232, 233, 234, 235, 236, 237, 238, 239, 248, 249, 250, 251, 252, 253, 254, 255]) MB_ROW_MAP = array.array('B', [i / 16 for i in MB_TO_GOB_MAP]) MB_COL_MAP = array.array('B', [i % 16 for i in MB_TO_GOB_MAP]) # An array of zeros. It is much faster to take the zeros from here than to # generate a new list when needed. ZEROS = array.array('i', [0 for i in range(256)]) # Constants needed for the inverse discrete cosine transform. FIX_0_298631336 = 2446 FIX_0_390180644 = 3196 FIX_0_541196100 = 4433 FIX_0_765366865 = 6270 FIX_0_899976223 = 7373 FIX_1_175875602 = 9633 FIX_1_501321110 = 12299 FIX_1_847759065 = 15137 FIX_1_961570560 = 16069 FIX_2_053119869 = 16819 FIX_2_562915447 = 20995 FIX_3_072711026 = 25172 CONST_BITS = 13 PASS1_BITS = 1 F1 = CONST_BITS - PASS1_BITS - 1 F2 = CONST_BITS - PASS1_BITS F3 = CONST_BITS + PASS1_BITS + 3 # tuning parameter for get_block TRIES = 16 MASK = 2**(TRIES*32)-1 SHIFT = 32*(TRIES-1) def _first_half(data): """Helper function used to precompute the zero values in a 12 bit datum. """ # data has to be 12 bits wide streamlen = 0 # count the zeros zerocount = CLZLUT[data >> 4]; data = (data << (zerocount + 1)) & 0b111111111111 streamlen += zerocount + 1 # get number of remaining bits to read toread = 0 if zerocount <= 1 else zerocount - 1 additional = data >> (12 - toread) data = (data << toread) & 0b111111111111 streamlen += toread # add as many zeros to out_list as indicated by additional bits # if zerocount is 0, tmp = 0 else the 1 merged with additional bits tmp = 0 if zerocount == 0 else (1 << toread) | additional return [streamlen, tmp] def _second_half(data): """Helper function to precompute the nonzeror values in a 15 bit datum. """ # data has to be 15 bits wide streamlen = 0 zerocount = CLZLUT[data >> 7] data = (data << (zerocount + 1)) & 0b111111111111111 streamlen += zerocount + 1 # 01 == EOB eob = False if zerocount == 1: eob = True return [streamlen, None, eob] # get number of remaining bits to read toread = 0 if zerocount == 0 else zerocount - 1 additional = data >> (15 - toread) data = (data << toread) & 0b111111111111111 streamlen += toread tmp = (1 << toread) | additional # get one more bit for the sign tmp = -tmp if data >> (15 - 1) else tmp tmp = int(tmp) streamlen += 1 return [streamlen, tmp, eob] # Precompute all 12 and 15 bit values for the entropy decoding process FH = [_first_half(i) for i in range(2**12)] SH = [_second_half(i) for i in range(2**15)] class BitReader(object): """Bitreader. Given a stream of data, it allows to read it bitwise.""" def __init__(self, packet): self.packet = packet self.offset = 0 self.bits_left = 0 self.chunk = 0 self.read_bits = 0 def read(self, nbits, consume=True): """Read nbits and return the integervalue of the read bits. If consume is False, it behaves like a 'peek' method (ie it reads the bits but does not consume them. """ # Read enough bits into chunk so we have at least nbits available while nbits > self.bits_left: try: self.chunk = (self.chunk << 32) | struct.unpack_from('> shift if consume: self.chunk -= res << shift self.bits_left -= nbits self.read_bits += nbits return res def align(self): """Byte align the data stream.""" shift = (8 - self.read_bits) % 8 self.read(shift) def inverse_dct(block): """Inverse discrete cosine transform. """ workspace = ZEROS[0:64] data = ZEROS[0:64] for pointer in range(8): if (block[pointer + 8] == 0 and block[pointer + 16] == 0 and block[pointer + 24] == 0 and block[pointer + 32] == 0 and block[pointer + 40] == 0 and block[pointer + 48] == 0 and block[pointer + 56] == 0): dcval = block[pointer] << PASS1_BITS for i in range(8): workspace[pointer + i*8] = dcval continue z2 = block[pointer + 16] z3 = block[pointer + 48] z1 = (z2 + z3) * FIX_0_541196100 tmp2 = z1 + z3 * -FIX_1_847759065 tmp3 = z1 + z2 * FIX_0_765366865 z2 = block[pointer] z3 = block[pointer + 32] tmp0 = (z2 + z3) << CONST_BITS tmp1 = (z2 - z3) << CONST_BITS tmp10 = tmp0 + tmp3 tmp13 = tmp0 - tmp3 tmp11 = tmp1 + tmp2 tmp12 = tmp1 - tmp2 tmp0 = block[pointer + 56] tmp1 = block[pointer + 40] tmp2 = block[pointer + 24] tmp3 = block[pointer + 8] z1 = tmp0 + tmp3 z2 = tmp1 + tmp2 z3 = tmp0 + tmp2 z4 = tmp1 + tmp3 z5 = (z3 + z4) * FIX_1_175875602 tmp0 *= FIX_0_298631336 tmp1 *= FIX_2_053119869 tmp2 *= FIX_3_072711026 tmp3 *= FIX_1_501321110 z1 *= -FIX_0_899976223 z2 *= -FIX_2_562915447 z3 *= -FIX_1_961570560 z4 *= -FIX_0_390180644 z3 += z5 z4 += z5 tmp0 += z1 + z3 tmp1 += z2 + z4 tmp2 += z2 + z3 tmp3 += z1 + z4 workspace[pointer + 0] = ((tmp10 + tmp3 + (1 << F1)) >> F2) workspace[pointer + 56] = ((tmp10 - tmp3 + (1 << F1)) >> F2) workspace[pointer + 8] = ((tmp11 + tmp2 + (1 << F1)) >> F2) workspace[pointer + 48] = ((tmp11 - tmp2 + (1 << F1)) >> F2) workspace[pointer + 16] = ((tmp12 + tmp1 + (1 << F1)) >> F2) workspace[pointer + 40] = ((tmp12 - tmp1 + (1 << F1)) >> F2) workspace[pointer + 24] = ((tmp13 + tmp0 + (1 << F1)) >> F2) workspace[pointer + 32] = ((tmp13 - tmp0 + (1 << F1)) >> F2) for pointer in range(0, 64, 8): z2 = workspace[pointer + 2] z3 = workspace[pointer + 6] z1 = (z2 + z3) * FIX_0_541196100 tmp2 = z1 + z3 * -FIX_1_847759065 tmp3 = z1 + z2 * FIX_0_765366865 tmp0 = (workspace[pointer] + workspace[pointer + 4]) << CONST_BITS tmp1 = (workspace[pointer] - workspace[pointer + 4]) << CONST_BITS tmp10 = tmp0 + tmp3 tmp13 = tmp0 - tmp3 tmp11 = tmp1 + tmp2 tmp12 = tmp1 - tmp2 tmp0 = workspace[pointer + 7] tmp1 = workspace[pointer + 5] tmp2 = workspace[pointer + 3] tmp3 = workspace[pointer + 1] z1 = tmp0 + tmp3 z2 = tmp1 + tmp2 z3 = tmp0 + tmp2 z4 = tmp1 + tmp3 z5 = (z3 + z4) * FIX_1_175875602 tmp0 *= FIX_0_298631336 tmp1 *= FIX_2_053119869 tmp2 *= FIX_3_072711026 tmp3 *= FIX_1_501321110 z1 *= -FIX_0_899976223 z2 *= -FIX_2_562915447 z3 *= -FIX_1_961570560 z4 *= -FIX_0_390180644 z3 += z5 z4 += z5 tmp0 += z1 + z3 tmp1 += z2 + z4 tmp2 += z2 + z3 tmp3 += z1 + z4 data[pointer + 0] = (tmp10 + tmp3) >> F3 data[pointer + 7] = (tmp10 - tmp3) >> F3 data[pointer + 1] = (tmp11 + tmp2) >> F3 data[pointer + 6] = (tmp11 - tmp2) >> F3 data[pointer + 2] = (tmp12 + tmp1) >> F3 data[pointer + 5] = (tmp12 - tmp1) >> F3 data[pointer + 3] = (tmp13 + tmp0) >> F3 data[pointer + 4] = (tmp13 - tmp0) >> F3 return data def get_pheader(bitreader): """Read the picture header. Returns the width and height of the image. """ bitreader.align() psc = bitreader.read(22) assert(psc == 0b0000000000000000100000) pformat = bitreader.read(2) assert(pformat != 0b00) if pformat == 1: # CIF width, height = 88, 72 else: # VGA width, height = 160, 120 presolution = bitreader.read(3) assert(presolution != 0b000) # double resolution presolution-1 times width = width << presolution - 1 height = height << presolution - 1 #print "width/height:", width, height ptype = bitreader.read(3) pquant = bitreader.read(5) pframe = bitreader.read(32) return width, height def get_mb(bitreader, picture, width, offset): """Get macro block. This method does not return data but modifies the picture parameter in place. """ mbc = bitreader.read(1) if mbc == 0: mbdesc = bitreader.read(8) assert(mbdesc >> 7 & 1) if mbdesc >> 6 & 1: mbdiff = bitreader.read(2) y = get_block(bitreader, mbdesc & 1) y.extend(get_block(bitreader, mbdesc >> 1 & 1)) y.extend(get_block(bitreader, mbdesc >> 2 & 1)) y.extend(get_block(bitreader, mbdesc >> 3 & 1)) cb = get_block(bitreader, mbdesc >> 4 & 1) cr = get_block(bitreader, mbdesc >> 5 & 1) # ycbcr to rgb for i in range(256): j = SCALE_TAB[i] Y = y[i] - 16 B = cb[j] - 128 R = cr[j] - 128 r = (298 * Y + 409 * R + 128) >> 8 g = (298 * Y - 100 * B - 208 * R + 128) >> 8 b = (298 * Y + 516 * B + 128) >> 8 r = 0 if r < 0 else r r = 255 if r > 255 else r g = 0 if g < 0 else g g = 255 if g > 255 else g b = 0 if b < 0 else b b = 255 if b > 255 else b # re-order the pixels row = MB_ROW_MAP[i] col = MB_COL_MAP[i] picture[offset + row*width + col] = ''.join((chr(r), chr(g), chr(b))) else: print "mbc was not zero" def get_block(bitreader, has_coeff): """Read a 8x8 block from the data stream. This method takes care of the huffman-, RLE, zig-zag and idct and returns a list of 64 ints. """ # read the first 10 bits in a 16 bit datum out_list = ZEROS[0:64] out_list[0] = int(bitreader.read(10)) * IQUANT_TAB[0] if not has_coeff: return inverse_dct(out_list) i = 1 while 1: _ = bitreader.read(32*TRIES, False) streamlen = 0 ####################################################################### for j in range(TRIES): data = (_ << streamlen) & MASK data >>= SHIFT l, tmp = FH[data >> 20] streamlen += l data = (data << l) & 0xffffffff i += tmp l, tmp, eob = SH[data >> 17] streamlen += l if eob: bitreader.read(streamlen) return inverse_dct(out_list) j = ZIG_ZAG_POSITIONS[i] out_list[j] = tmp*IQUANT_TAB[j] i += 1 ####################################################################### bitreader.read(streamlen) return inverse_dct(out_list) def get_gob(bitreader, picture, slicenr, width): """Read a group of blocks. The method does not return data, the picture parameter is modified in place instead. """ # the first gob has a special header if slicenr > 0: bitreader.align() gobsc = bitreader.read(22) if gobsc == 0b0000000000000000111111: print "weeeee" return False elif (not (gobsc & 0b0000000000000000100000) or (gobsc & 0b1111111111111111000000)): print "Got wrong GOBSC, aborting.", bin(gobsc) return False _ = bitreader.read(5) offset = slicenr*16*width for i in range(width / 16): get_mb(bitreader, picture, width, offset+16*i) def read_picture(data): """Convert an AR.Drone image packet to rgb-string. Returns: width, height, image and time to decode the image """ bitreader = BitReader(data) t = datetime.datetime.now() width, height = get_pheader(bitreader) slices = height / 16 blocks = width / 16 image = [0 for i in range(width*height)] for i in range(0, slices): get_gob(bitreader, image, i, width) bitreader.align() eos = bitreader.read(22) assert(eos == 0b0000000000000000111111) t2 = datetime.datetime.now() return width, height, ''.join(image), (t2 - t).microseconds / 1000000. try: psyco.bind(BitReader) psyco.bind(get_block) psyco.bind(get_gob) psyco.bind(get_mb) psyco.bind(inverse_dct) psyco.bind(read_picture) except NameError: print "Unable to bind video decoding methods with psyco. Proceeding anyways, but video decoding will be slow!" def main(): fh = open('framewireshark.raw', 'r') #fh = open('videoframe.raw', 'r') data = fh.read() fh.close() runs = 20 t = 0 for i in range(runs): print '.', width, height, image, ti = read_picture(data) #show_image(image, width, height) t += ti print print 'avg time:\t', t / runs, 'sec' print 'avg fps:\t', 1 / (t / runs), 'fps' if 'image' in sys.argv: import pygame pygame.init() W, H = 320, 240 screen = pygame.display.set_mode((W, H)) surface = pygame.image.fromstring(image, (width, height), 'RGB') screen.blit(surface, (0, 0)) pygame.display.flip() raw_input() if __name__ == '__main__': if 'profile' in sys.argv: cProfile.run('main()') else: main()