Blame | Last modification | View Log | RSS feed
/*Colour conversion routines (RGB <-> YUV) in x86 assembly(C) 2000 Nemosoft Unv. nemosoft@smcc.demon.nlThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*//* The ccvt_* functions always start with width and height, so theseparameters are in 8(%ebp) and 12 (%ebp). The other parameters can be2 to 4 pointers, and one of these combinations:*src, *dst*srcy, *srcu, *srv, *dst*src, *dsty, *dstu, *dstv*/#define __ASSEMBLY__#include <linux/linkage.h>#define Width 8(%ebp)#define Height 12(%ebp)/* 2 parameters, 1 in, 1 out */#define Src2 16(%ebp)#define Dst2 20(%ebp)/* 4 parameters, 3 in, 1 out */#define SrcY 16(%ebp)#define SrcU 20(%ebp)#define SrcV 24(%ebp)#define Dst4 28(%ebp)/* 4 parameters, 1 in, 3 out */#define Src4 16(%ebp)#define DstY 20(%ebp)#define DstU 24(%ebp)#define DstV 28(%ebp)/* This buffer space used to be staticly allocted, but this is going togive problems with multiple cams (though I have yet to see it).Therefor, we reserve at least 64 + 8 = 72 bytes on the stack with`enter'.*/#define PixelBuffer -64(%ebp)#define Uptr -68(%ebp)#define Vptr -72(%ebp).text/* This function will load the src and destination pointers, includingUptr/Vptr when necessary, and test the width/height parameters.- %esi will be set to Src or SrcY- %edi will be set to Dst or DstYthe carry flag will be set if any of these tests fail.It assumes %ebp has been set.*//* 2 parameters, src & dst */test_param_2:mov Src2, %esimov Dst2, %edicmp $0, %esi # NULL pointers?je param_failcmp $0, %edije param_failjmp test_width_height/* 3 inputs, 1 output */test_param_31:mov Dst4, %edi # NULL pointerscmp $0, %edije param_failmov SrcV, %esicmp $0, %esije param_failmov %esi, Vptrmov SrcU, %esicmp $0, %esije param_failmov %esi, Uptrmov SrcY, %esicmp $0, %esije param_failjmp test_width_height/* 1 input, 3 output */test_param_13:mov Src4, %esi # NULL pointerscmp $0, %esije param_failmov DstV, %edicmp $0, %edije param_failmov %edi, Vptrmov DstU, %edicmp $0, %edije param_failmov %edi, Uptrmov DstY, %edicmp $0, %edije param_failjmp test_width_heightnoptest_width_height:cmpl $0, Widthjbe param_failtestl $3, Width # multiple of 4?jnz param_fail # Nope...cmp $0, Height # check illegal heightjbe param_failtestl $1, Height # Odd no. of lines?jnz param_fail # Aye/* fall through *//* exit points */param_ok:clc # Success: clear carryretparam_fail:stc # Fail: set carryret# This will fill PixelBuffer with 4 grey scale pixels (Y)# In: %eax = Value (Y3Y2Y1Y0)# Out:# Modifies: %ecx (-4)# Destroys: %edxexpand_4_y:mov %eax, %edx # Keep in edx (we need eax)lea PixelBuffer, %edi0: # This code is executed 4 timesmovzbl %dl, %eax # move, zero extending byte-to-longshl $8, %eax # 8 digit precisionstosl # Expand into PixelBufferstoslstosladd $4, %edi # Skip alphashr $8, %edx # next Ydec %ecxtest $3, %ecxjnz 0bret # from expand_4_y# This will add the color factors to the (grey) values in PixelBuffer# In: %ebx (U1U0V1V0)# Out:# Modifies:# Destroys: %edi, %ebx, %eax, %edxexpand_4_uv:lea PixelBuffer, %edi # reset pointer# V0sub $128, %blmovsbl %bl, %eaxmov $359, %edx # Vrmul %edxadd %eax, 0x00(%edi)add %eax, 0x10(%edi)movsbl %bl, %eaxmov $183, %edx # Vgmul %edxsub %eax, 0x04(%edi)sub %eax, 0x14(%edi)# V1sub $128, %bhmovsbl %bh, %eaxmov $359, %edx # Vrmul %edxadd %eax, 0x20(%edi)add %eax, 0x30(%edi)movsbl %bh, %eaxmov $183, %edx # Vgmul %edxsub %eax, 0x24(%edi)sub %eax, 0x34(%edi)# U0bswap %ebx # Get U values in lower halfsub $128, %bhmovsbl %bh, %eaxmov $88, %edx # Ugmul %edxsub %eax, 0x04(%edi)sub %eax, 0x14(%edi)movsbl %bh, %eaxmov $454, %edx # Ubmul %edxadd %eax, 0x08(%edi)add %eax, 0x18(%edi)# U1sub $128, %blmovsbl %bl, %eaxmov $88, %edx # Ugmul %edxsub %eax, 0x24(%edi)sub %eax, 0x34(%edi)movsbl %bl, %eaxmov $454, %edx # Ubmul %edxadd %eax, 0x28(%edi)add %eax, 0x38(%edi)ret # expand_4_uv/* This function expands 4 420i pixels into PixelBuffer */do_four_yuvi:push %edilodsl # 4 bytes at a timecall expand_4_y# now do UV values. on even lines, Y is followed by U values; on# odd lines V values follow. The U and V values are always pushed# on the stack in this order:# U V# First, calculate offset per line (1.5 * width)mov Width, %ebx # widthshl %ebx # 2 *add Width, %ebx # 3 *shr %ebx # 1.5 *# even or odd linestestl $1, Heightjz 2f# odd line; we are at V data, but do U data firstneg %ebx # make ebx offset negativemov (%esi,%ebx),%ax # Upush %axlodsw # Vpush %axjmp 3f2: # even linelodsw # Upush %axsub $2, %ebxmov (%esi,%ebx), %ax # Vpush %ax3: # Okay, so we now have the U and V values... expand into PixelBufferpop %ebxcall expand_4_uvpop %ediret # from do_four_yuvi# Do four pixels, in planar formatdo_four_yuvp:push %edi# The first part is the same as for interlaced (4 bytes Y)lodsl # 4 bytes at a timecall expand_4_y# now gather U and V values...mov Uptr, %ebx # Use Uptr/Vptrmov (%ebx), %axpush %axadd $2, %ebxmov %ebx, Uptrmov Vptr, %ebxmov (%ebx), %axpush %axadd $2, %ebxmov %ebx, Vptrpop %ebxcall expand_4_uvpop %ediret# Do four pixels, in yuyv interlaced formatdo_four_yuyv:push %edilodsl # v0y1u0y0mov %eax, %ebxbswap %ebx # y0u0y1v0mov %bh, %ah # v0y1y1y0and $0x00ff00ff, %ebx # __u0__v0push %ax # y1y0lodsl # v1y3u1y2 # mix register instructionsmov %eax, %edx # so CPU pipeline doesnt stallrol $16, %eax # u1y2v1y3mov %dl, %dh # v1y3y2y2and $0xff00ff00, %eax # u1__v1__mov $0, %dl # v1y3y2__or %eax, %ebx # u1u0v1v0shl $8, %edx # y3y2____pop %dx # y3y2y1y0mov %edx, %eaxcall expand_4_ycall expand_4_uvpop %ediretlimit_pixels:# Limit all values in PixelBufferpush %esipush %edipush %ecxlea PixelBuffer, %esimov %esi, %edimov $16, %ecx0: lodslcmp $0, %eax # this would have been a perfect spot for CMOVxx instructions...jl 2f # except they only work on Pentium Pro processors,cmp $0xff00, %eax # and not even all of themjg 3fadd $4, %edi # no use for stosl hereloop 0bjmp 9f2: mov $0, %eaxstoslloop 0bjmp 9f3: mov $0xff00, %eaxstoslloop 0bjmp 9f9: pop %ecxpop %edipop %esiret # from limit_pixels/* Copy RGB values from PixelBuffer into destination buffer, 4 byteswith alpha*//* Push 3 pixel (12 bytes), in correct order */push_rgb24:push %ecxpush %esilea PixelBuffer, %esimov $4, %ecx0: lodslshr $8, %eaxmov %al, (%edi) # Redlodslshr $8, %eaxmov %al, 1(%edi) # Greenlodslshr $8, %eaxmov %al, 2(%edi) # Blueadd $3, %edilodsl # dummyloop 0bpop %esipop %ecxret/* Push 3 pixels (12 bytes), in wrong order */push_bgr24:push %ecxpush %esilea PixelBuffer, %esimov $4, %ecx0: lodslshr $8, %eaxmov %al, 2(%edi) # Redlodslshr $8, %eaxmov %al, 1(%edi) # Greenlodslshr $8, %eaxmov %al, (%edi) # Blueadd $3, %edilodsl # dummyloop 0bpop %esipop %ecxret/* The simplest format: push 4 bytes, RGBa */push_rgb32:push %ecxpush %esimov $16, %ecxlea PixelBuffer, %esi0: lodsl # redshr $8, %eax # 8 bit precisionstosbloop 0bpop %esipop %ecxret/* Gosh. Would you believe it. They even made this format... (Qt 2.*) */push_bgr32:# copy all 4 values to output bufferpush %ecxpush %esimov $4, %ecxlea PixelBuffer, %esi0: lodsl # redshr $8, %eax # 8 bit precisionmov %al, 2(%edi)lodsl # greenshr $8, %eaxmov %al, 1(%edi)lodsl # blueshr $8, %eaxmov %al, (%edi)add $4, %edilodsl # dummyloop 0bpop %esipop %ecxret/*************************************//* Functions to go from YUV interlaced formats to RGB *//* Go from interlaced to RGB, red first */ENTRY(ccvt_420i_rgb24)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f0: mov Width, %ecx # width1: call do_four_yuvicall limit_pixelscall push_rgb24cmp $0, %ecxjnz 1b # end of line?decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Go from interlaced to BGR, blue first */ENTRY(ccvt_420i_bgr24)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f0: mov Width, %ecx # width1: call do_four_yuvicall limit_pixelscall push_bgr24cmp $0, %ecxjnz 1b # end of line?decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* From interlaced to RGBa */ENTRY(ccvt_420i_rgb32)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f0: mov Width, %ecx # width1: call do_four_yuvicall limit_pixelscall push_rgb32cmp $0, %ecx # end of line?jnz 1bdecl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Guess what? Go from interlaced to BGRa */ENTRY(ccvt_420i_bgr32)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f0: mov Width, %ecx # width1: call do_four_yuvicall limit_pixelscall push_bgr32cmp $0, %ecx # end of line?jnz 1bdecl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* From YUYV to RGBa */ENTRY(ccvt_yuyv_rgb32)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f0: mov Width, %ecx # width1: call do_four_yuyvcall limit_pixelscall push_rgb32cmp $0, %ecx # end of line?jnz 1b8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* From YUYV to BGRa */ENTRY(ccvt_yuyv_bgr32)enter $72, $0 # no extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9f# YUYV -> RGBa RGBa0: mov Width, %ecx # width1: call do_four_yuyvcall limit_pixelscall push_bgr32cmp $0, %ecx # end of line?jnz 1b8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Planar to RGBa */ENTRY(ccvt_420p_rgb32)enter $72, $0push %ebxpush %esipush %edicall test_param_31jc 9fmov Width, %eax # widthmull Height # * heightmov SrcU, %eax # Copy U/V pointersmov %eax, Uptrmov SrcV, %eaxmov %eax, Vptr0: mov Width, %ecx # width1: call do_four_yuvpcall limit_pixelscall push_rgb32cmp $0, %ecx # end of line?jnz 1btestl $1, Height # odd/even linejnz 8fmov Width, %eax # Even: rewind U/V pointersshr %eaxsub %eax, Uptrsub %eax, Vptr8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Okay... eventually, you end up with a very complete set of conversionroutines. I just wished things were a bit simpler. *//* Planar to RGB */ENTRY(ccvt_420p_rgb24)enter $72, $0push %ebxpush %esipush %edicall test_param_31jc 9fmov Width, %eax # widthmull Height # * heightmov SrcU, %eax # Copy U/V pointersmov %eax, Uptrmov SrcV, %eaxmov %eax, Vptr0: mov Width, %ecx # width1: call do_four_yuvpcall limit_pixelscall push_rgb24cmp $0, %ecx # end of line?jnz 1btestl $1, Height # odd/even linejnz 8fmov Width, %eax # Even: rewind U/V pointersshr %eaxsub %eax, Uptrsub %eax, Vptr8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Planar to RGB */ENTRY(ccvt_420p_bgr24)enter $72, $0push %ebxpush %esipush %edicall test_param_31jc 9fmov Width, %eax # widthmull Height # * heightmov SrcU, %eax # Copy U/V pointersmov %eax, Uptrmov SrcV, %eaxmov %eax, Vptr0: mov Width, %ecx # width1: call do_four_yuvpcall limit_pixelscall push_bgr24cmp $0, %ecx # end of line?jnz 1btestl $1, Height # odd/even linejnz 8fmov Width, %eax # Even: rewind U/V pointersshr %eaxsub %eax, Uptrsub %eax, Vptr8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Okay... eventually, you end up with a very complete set of conversionroutines. I just wished things were a bit simpler. */ENTRY(ccvt_420p_bgr32)enter $72, $0push %ebxpush %esipush %edicall test_param_31jc 9fmov Width, %eax # widthmull Height # * heightmov SrcU, %eax # Copy U/V pointersmov %eax, Uptrmov SrcV, %eaxmov %eax, Vptr0: mov Width, %ecx # width1: call do_four_yuvpcall limit_pixelscall push_bgr32cmp $0, %ecx # end of line?jnz 1btestl $1, Height # odd/even linejnz 8fmov Width, %eax # Even: rewind U/V pointersshr %eaxsub %eax, Uptrsub %eax, Vptr8: decl Height # yes; decrement line counterjnz 0b9: pop %edipop %esipop %ebxleaveret/* Go from RGB (red first) to 4:2:0 planar.* Note: this requires decimation of the U/V space by 2 in both directions* Also, a matrix multiply would be QUITE convenient...This is the matrix:(Y ) ( 77 150 29) (R)(Cb) = (-43 -85 128) * (G)(Cr) (128 -107 -21) (B)*/ENTRY(ccvt_rgb24_420p)enter $96, $0 # 24 bytes extra stack, no stackframespush %ebx # -76: line width in bytespush %esi # -80: height (copy)push %edi # -84: width (copy)# -88: red factor# -92: green factor# -96: blue factorcall test_param_13jc 9fmov Width, %eaxshl %eaxadd Width, %eax # 3 * width = line incrementmov %eax, -76(%ebp)mov Height, %eaxmov %eax, -80(%ebp) # copy height into stackframe/*This is a bit complicated... since U/V decimation is takingplace both in horizontal and vertical direction, we have toprocess 2 lines in parallel. Also, 2 adjacent pixels areconsidered. We average the U/V values over these 4 pixels(of course, we could have just taken the U/V value of the firstpixel and be done with it, but that's not how we do things aroundhere)*/# 1st pass: Y values. Set factorsmovl $77 , -88(%ebp) # 0.299movl $150, -92(%ebp) # 0.587movl $29 , -96(%ebp) # 0.1140: mov Width, %ecx # width1: xor %ebx, %ebx # 0call rgb_multiplyshr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)mov %bl, %alstosb # store it into Y bufferdec %ecx # end of line?jnz 1bdecl -80(%ebp) # end of image?jnz 0b# Okay, now the U/V pointers...# The following code is passed twice, with different factors# Note that the %esi pointer jumps around quite a bit# factors for Umovl $-43, -88(%ebp) # -0.1687movl $-85, -92(%ebp) # -0.3313movl $128, -96(%ebp) # 0.5mov DstU, %edi # Set %edi register now7: mov Src4, %esi # Rewind source pointermov Height, %eax # heightshr %eax # / 2mov %eax, -80(%ebp) # copy2: mov Width, %eax # widthshr %eax # / 2mov %eax, -84(%ebp) # copy3: xor %ebx, %ebx # 0mov $4, %ecx # average over 4 pixels4: call rgb_multiplydec %ecxjz 5f # done?cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offsetjne 4bsub $6, %esi # backup to where we startedadd -76(%ebp), %esi # add line incrementjmp 4b5: # okay, 4 pixels done...sub -76(%ebp), %esi # Get %esi back to its proper placeadd $0x20000, %ebx # add 0.5 factorshr $10, %ebx # Divide by 4 * 256mov %bl, %alstosb # store it!decl -84(%ebp) # end of line?jnz 3badd -76(%ebp), %esi # %esi to next line (actually, 2 lines further)decl -80(%ebp) # end of image?jnz 2b# check if 3rd pass has been donecmpl $128, -88(%ebp)je 9f # Done!# Set factors for V passmovl $128 , -88(%ebp) # 0.5movl $-107, -92(%ebp) # -0.4187movl $-21 , -96(%ebp) # -0.0813mov DstV, %edi # %edi to V bufferjmp 7b # "Do it to me one more time..."9: pop %edipop %esipop %ebxleaveretENTRY(ccvt_bgr24_420p)enter $96, $0 # 24 bytes extra stack, no stackframespush %ebx # -4: line width in bytespush %esi # -8: height (copy)push %edi # -12: width (copy)# -16: red factor# -20: green factor# -24: blue factorcall test_param_13jc 9f/* No surprise, this code looks just like rgb24_420p, but with swapped factors */mov Width, %eaxshl %eaxadd Width, %eax # 3 * width = line incrementmov %eax, -76(%ebp)mov Height, %eaxmov %eax, -80(%ebp) # copy height into stackframe# 1st pass: Y values. Set factorsmovl $29 , -88(%ebp) # 0.114movl $150, -92(%ebp) # 0.587movl $77 , -96(%ebp) # 0.2990: mov Width, %ecx # width1: xor %ebx, %ebx # 0call rgb_multiplyshr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)mov %bl, %alstosb # store it into Y bufferdec %ecx # end of line?jnz 1bdecl -80(%ebp) # end of image?jnz 0b# Okay, now the U/V pointers...# The following code is passed twice, with different factors# Note that the %esi pointer jumps around quite a bit# factors for Umovl $123, -88(%ebp) # 0.5movl $-85, -92(%ebp) # -0.3313movl $-43, -96(%ebp) # -0.1687mov DstU, %edi # Set %edi register now7: mov Src4, %esi # Rewind source pointermov Height, %eax # heightshr %eax # / 2mov %eax, -80(%ebp) # copy2: mov Width, %eax # widthshr %eax # / 2mov %eax, -84(%ebp) # copy3: xor %ebx, %ebx # 0mov $4, %ecx # average over 4 pixels4: call rgb_multiplydec %ecxjz 5f # done?cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offsetjne 4bsub $6, %esi # backup to where we startedadd -76(%ebp), %esi # add line incrementjmp 4b5: # okay, 4 pixels done...sub -76(%ebp), %esi # Get %esi back to its proper placeadd $0x20000, %ebx # add 0.5 factorshr $10, %ebx # Divide by 4 * 256mov %bl, %alstosb # store it!decl -84(%ebp) # end of line?jnz 3badd -76(%ebp), %esi # %esi to next line (actually, 2 lines further)decl -80(%ebp) # end of image?jnz 2b# check if 3rd pass has been donecmpl $-21, -88(%ebp)je 9f # Done!# Set factors for V passmovl $-21 , -88(%ebp) # -0.0813movl $-107, -92(%ebp) # -0.4187movl $128 , -96(%ebp) # 0.5mov DstV, %edi # %edi to V bufferjmp 7b # "Do it to me one more time..."9: pop %edipop %esipop %ebxleaveret/* RGB-to-YUV helper functions */rgb_multiply:# do one RGB vector multiplication; its assumed the RGB factors# are set on the stack. The data is accumulated in ebx.lodsb # red byteand $0xff, %eaxmov -88(%ebp), %edx # red factormul %edxadd %eax, %ebxlodsb # green byteand $0xff, %eaxmov -92(%ebp), %edx # green factormul %edxadd %eax, %ebxlodsb # blue byteand $0xff, %eaxmov -96(%ebp), %edx # blue factormul %edxadd %eax, %ebx # ebx now contains sumret/**************************************************************************//* Go from 'interlaced' (YYYY UU/VV) format to planar */ENTRY(ccvt_420i_420p)enter $76, $0 # 4 bytes extra space, no stackframespush %ebx # -4: width / 4push %esipush %edicall test_param_13jc 9f# Okay, this is fairly easy... we first grab the Y values (4 bytes# at a time), then rewind and do the U values, and repeat for V.# This leaves us with a nice planar formatmov Width, %eaxshr %eaxshr %eax # width / 4mov %eax, -76(%ebp) # Store# Ymov Height, %edx # line counter0: mov -76(%ebp), %ecx1: lodsl # get 4 bytes...stosl # ...push 4 bytesadd $2, %esi # Skip U or Vloop 1bdec %edxjnz 0b# Umov Src4, %esi # rewind source pointermov DstU, %ediadd $4, %esi # set to Umov Height, %edxshr %edx # height / 2mov Width, %ebxshl %ebxadd Width, %ebxshr %ebx # Width * 1.5 (line offset)2: mov -76(%ebp), %ecx # width / 43: lodsw # 2 bytes at a timestoswadd $4, %esi # skip Yloop 3badd %ebx, %esi # Skip line (U is on even lines)dec %edxjnz 2b# Vmov Src4, %esi # rewind, set to V in first odd lineadd $4, %esiadd %ebx, %esi # register re-use; no compiler can beat that :)mov DstV, %edi # V ptrmov Height, %edxshr %edx # height / 24: mov -76(%ebp), %ecx # Get width/45: lodswstoswadd $4, %esi # Skip Yloop 5badd %ebx, %esi # Skip line (V is on odd lines)dec %edxjnz 4b/* That's it! */9: pop %edipop %esipop %ebxleaveret/* Go from 4:2:0 interlaced to 'normal' YUYV */ENTRY(ccvt_420i_yuyv)enter $80, $0 # 8 bytes extra space, no stackframespush %ebxpush %esipush %edicall test_param_2jc 9fmov Width, %ecx # -4: width / 4 = no. loops per lineshr %ecxshr %ecxmov %ecx, -76(%ebp)mov Width, %ebx # -8: width * 1.5 = line offsetshl %ebxadd Width, %ebxshr %ebxmov %ebx, -80(%ebp)# Okay, this requires a bit of byte shuffling... we go from# YYYY UU# YYYY VV# to# YUYV YUYV# YUYV YUYV# which indeed takes up more space#0: mov -76(%ebp), %ecx1: lodsl # 4 Y in eaxtestl $1, Height # even or odd line?jnz 2f# Evenmov -80(%ebp), %ebxmov (%ebx, %esi), %dx # 16 bits Vshl $16, %edx # store in high wordmov (%esi), %dx # 16 bits Uadd $2, %esijmp 3f2: # Oddmov -80(%ebp), %ebxneg %ebx # negative offsetmov (%esi), %dx # 16 bits Vshl $16, %edx # store in high wordmov (%ebx, %esi), %dx # 16 bits Uadd $2, %esi3: # eax = Y3Y2Y1Y0, edx = V1V0U1U0, ebx is freepush %eaxmovzbl %al, %ebx # ______y0and $0xFF00, %eax # ____y1__shl $8, %eax # __y1____or %ebx, %eax # __y1__y0mov %edx, %ebx # v1v0u1u0shl $8, %ebx # v0u1u0__and $0xff00ff00, %ebx # v0__u0__or %ebx, %eax # v0y1u0y0stoslpop %eax # y3y2y1y0# Second halfshr $8, %eax # __y3y2y1shr $8, %ax # __y3__y2and $0xff00ff00, %edx # v1__u1__or %edx, %eax # v1y3u1y2stoslloop 1bdecl Height # height--jnz 0b# Done9: pop %edipop %esipop %ebxleaveret