diff -urp libdv-0.104-old/libdv/dct_block_mmx.S libdv-0.104/libdv/dct_block_mmx.S --- libdv-0.104-old/libdv/dct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/dct_block_mmx.S 2005-10-24 00:11:39.000000000 +0200 @@ -53,6 +53,17 @@ scratch2: .quad 0 scratch3: .quad 0 scratch4: .quad 0 +#ifdef __PIC__ +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret +#endif + .text .align 8 @@ -60,10 +71,14 @@ scratch4: .quad 0 _dv_dct_88_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi - movl 8(%ebp), %esi # source +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 12(%ebp), %esi # source # column 0 movq 16*0(%esi), %mm0 # v0 @@ -86,22 +101,45 @@ _dv_dct_88_block_mmx: movq 16*3(%esi), %mm5 # v3 movq 16*4(%esi), %mm7 # v4 +#ifdef __PIC__ + movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ; +#else movq %mm7, scratch1 # scratch1: v4 ; +#endif movq %mm5, %mm7 # duplicate v3 +#ifdef __PIC__ + paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4 + psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4 + movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03 +#else paddw scratch1, %mm5 # v03: v3+v4 psubw scratch1, %mm7 # v04: v3-v4 movq %mm5, scratch2 # scratch2: v03 +#endif movq %mm0, %mm5 # mm5: v00 +#ifdef __PIC__ + paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03 + psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03 + movq %mm3, scratch3@GOTOFF(%ebp) # scratch3: v02 +#else paddw scratch2, %mm0 # v10: v00+v03 psubw scratch2, %mm5 # v13: v00-v03 movq %mm3, scratch3 # scratch3: v02 +#endif movq %mm1, %mm3 # duplicate v01 +#ifdef __PIC__ + paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02 + psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02 + + movq %mm6, scratch4@GOTOFF(%ebp) # scratch4: v05 +#else paddw scratch3, %mm1 # v11: v01+v02 psubw scratch3, %mm3 # v12: v01-v02 movq %mm6, scratch4 # scratch4: v05 +#endif movq %mm0, %mm6 # duplicate v10 paddw %mm1, %mm0 # v10+v11 @@ -111,10 +149,18 @@ _dv_dct_88_block_mmx: movq %mm6, 16*4(%esi) # out4: v10-v11 movq %mm4, %mm0 # mm0: v06 +#ifdef __PIC__ + paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06 +#else paddw scratch4, %mm4 # v15: v05+v06 +#endif paddw %mm2, %mm0 # v16: v07+v06 +#ifdef __PIC__ + pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15 +#else pmulhw WA3, %mm4 # v35~: WA3*v15 +#endif psllw $1, %mm4 # v35: compensate the coeefient scale movq %mm4, %mm6 # duplicate v35 @@ -123,7 +169,11 @@ _dv_dct_88_block_mmx: paddw %mm5, %mm3 # v22: v12+v13 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA1*v22 +#else pmulhw WA1, %mm3 # v32~: WA1*v22 +#endif psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale movq %mm5, %mm6 # duplicate v13 @@ -134,13 +184,23 @@ _dv_dct_88_block_mmx: movq %mm6, 16*6(%esi) # out6: v13-v32 +#ifdef __PIC__ + paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05 +#else paddw scratch4, %mm7 # v14n: v04+v05 +#endif movq %mm0, %mm5 # duplicate v16 psubw %mm7, %mm0 # va1: v16-v14n +#ifdef __PIC__ + pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5 + pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4 + pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2 +#else pmulhw WA5, %mm0 # va0~: va1*WA5 pmulhw WA4, %mm5 # v36~~: v16*WA4 pmulhw WA2, %mm7 # v34~~: v14n*WA2 +#endif psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale @@ -188,22 +248,45 @@ _dv_dct_88_block_mmx: movq 16*3(%esi), %mm5 # v3 movq 16*4(%esi), %mm7 # v4 +#ifdef __PIC__ + movq %mm7, scratch1@GOTOFF(%ebp) # scratch1: v4 ; +#else movq %mm7, scratch1 # scratch1: v4 ; +#endif movq %mm5, %mm7 # duplicate v3 +#ifdef __PIC__ + paddw scratch1@GOTOFF(%ebp), %mm5 # v03: v3+v4 + psubw scratch1@GOTOFF(%ebp), %mm7 # v04: v3-v4 + movq %mm5, scratch2@GOTOFF(%ebp) # scratch2: v03 +#else paddw scratch1, %mm5 # v03: v3+v4 psubw scratch1, %mm7 # v04: v3-v4 movq %mm5, scratch2 # scratch2: v03 +#endif movq %mm0, %mm5 # mm5: v00 +#ifdef __PIC__ + paddw scratch2@GOTOFF(%ebp), %mm0 # v10: v00+v03 + psubw scratch2@GOTOFF(%ebp), %mm5 # v13: v00-v03 + movq %mm3, scratch3@GOTOFF(%ebp) # scratc3: v02 +#else paddw scratch2, %mm0 # v10: v00+v03 psubw scratch2, %mm5 # v13: v00-v03 movq %mm3, scratch3 # scratc3: v02 +#endif movq %mm1, %mm3 # duplicate v01 +#ifdef __PIC__ + paddw scratch3@GOTOFF(%ebp), %mm1 # v11: v01+v02 + psubw scratch3@GOTOFF(%ebp), %mm3 # v12: v01-v02 + + movq %mm6, scratch4@GOTOFF(%ebp) # scratc4: v05 +#else paddw scratch3, %mm1 # v11: v01+v02 psubw scratch3, %mm3 # v12: v01-v02 movq %mm6, scratch4 # scratc4: v05 +#endif movq %mm0, %mm6 # duplicate v10 paddw %mm1, %mm0 # v10+v11 @@ -213,10 +296,18 @@ _dv_dct_88_block_mmx: movq %mm6, 16*4(%esi) # out4: v10-v11 movq %mm4, %mm0 # mm0: v06 +#ifdef __PIC__ + paddw scratch4@GOTOFF(%ebp), %mm4 # v15: v05+v06 +#else paddw scratch4, %mm4 # v15: v05+v06 +#endif paddw %mm2, %mm0 # v16: v07+v06 +#ifdef __PIC__ + pmulhw WA3@GOTOFF(%ebp), %mm4 # v35~: WA3*v15 +#else pmulhw WA3, %mm4 # v35~: WA3*v15 +#endif psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale movq %mm4, %mm6 # duplicate v35 @@ -225,7 +316,11 @@ _dv_dct_88_block_mmx: paddw %mm5, %mm3 # v22: v12+v13 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm3 # v32~: WA3*v15 +#else pmulhw WA1, %mm3 # v32~: WA3*v15 +#endif psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale movq %mm5, %mm6 # duplicate v13 @@ -235,13 +330,23 @@ _dv_dct_88_block_mmx: movq %mm5, 16*2(%esi) # out2: v13+v32 movq %mm6, 16*6(%esi) # out6: v13-v32 +#ifdef __PIC__ + paddw scratch4@GOTOFF(%ebp), %mm7 # v14n: v04+v05 +#else paddw scratch4, %mm7 # v14n: v04+v05 +#endif movq %mm0, %mm5 # duplicate v16 psubw %mm7, %mm0 # va1: v16-v14n +#ifdef __PIC__ + pmulhw WA2@GOTOFF(%ebp), %mm7 # v34~~: v14n*WA2 + pmulhw WA5@GOTOFF(%ebp), %mm0 # va0~: va1*WA5 + pmulhw WA4@GOTOFF(%ebp), %mm5 # v36~~: v16*WA4 +#else pmulhw WA2, %mm7 # v34~~: v14n*WA2 pmulhw WA5, %mm0 # va0~: va1*WA5 pmulhw WA4, %mm5 # v36~~: v16*WA4 +#endif psllw $16-NSHIFT, %mm7 psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient # scale note that WA4 is shifted 1 bit less than the others @@ -751,11 +856,15 @@ _dv_dct_block_mmx_postscale_88: _dv_dct_248_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi - movl 8(%ebp), %esi # source +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 16(%ebp), %esi # source # column 0 @@ -779,7 +888,11 @@ _dv_dct_248_block_mmx: paddw %mm1, %mm0 # v20: v10+v11 psubw %mm1, %mm3 # v21: v10-v11 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 +#else pmulhw WA1, %mm5 # v32~: WA1*v22 +#endif movq %mm4, %mm2 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale @@ -818,7 +931,11 @@ _dv_dct_248_block_mmx: paddw %mm1, %mm0 # v20: v10+v11 psubw %mm1, %mm3 # v21: v10-v11 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 +#else pmulhw WA1, %mm5 # v32~: WA1*v22 +#endif movq %mm4, %mm2 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale @@ -855,7 +972,11 @@ _dv_dct_248_block_mmx: paddw %mm1, %mm0 # v20: v10+v11 psubw %mm1, %mm3 # v21: v10-v11 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 +#else pmulhw WA1, %mm5 # v32~: WA1*v22 +#endif movq %mm4, %mm2 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale @@ -892,7 +1013,11 @@ _dv_dct_248_block_mmx: paddw %mm1, %mm0 # v20: v10+v11 psubw %mm1, %mm3 # v21: v10-v11 +#ifdef __PIC__ + pmulhw WA1@GOTOFF(%ebp), %mm5 # v32~: WA1*v22 +#else pmulhw WA1, %mm5 # v32~: WA1*v22 +#endif movq %mm4, %mm2 psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale diff -urp libdv-0.104-old/libdv/dv.c libdv-0.104/libdv/dv.c --- libdv-0.104-old/libdv/dv.c 2004-10-20 05:49:24.000000000 +0200 +++ libdv-0.104/libdv/dv.c 2005-10-24 00:59:57.000000000 +0200 @@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp } /* dv_reconfigure */ +extern uint8_t dv_quant_offset[4]; +extern uint8_t dv_quant_shifts[22][4]; + static inline void dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { int i; @@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d dv_idct_248 (co248, mb->b[i].coeffs); } else { #if ARCH_X86 - _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); + _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); _dv_idct_88(mb->b[i].coeffs); #elif ARCH_X86_64 _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); @@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv dv_idct_248 (co248, mb->b[b].coeffs); } else { #if ARCH_X86 - _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); + _dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); _dv_weight_88_inverse(bl->coeffs); _dv_idct_88(bl->coeffs); #elif ARCH_X86_64 diff -urp libdv-0.104-old/libdv/encode.c libdv-0.104/libdv/encode.c --- libdv-0.104-old/libdv/encode.c 2004-11-17 04:36:30.000000000 +0100 +++ libdv-0.104/libdv/encode.c 2005-10-24 01:17:41.000000000 +0200 @@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl } extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, - dv_vlc_entry_t ** out); + dv_vlc_entry_t ** out, + dv_vlc_entry_t * lookup); extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, dv_vlc_entry_t ** out); @@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv #elif ARCH_X86 int num_bits; - num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); + num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); emms(); #else int num_bits; @@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv return num_bits; } -extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); +extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) @@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl #elif ARCH_X86_64 return _dv_vlc_num_bits_block_x86_64(coeffs); #else - return _dv_vlc_num_bits_block_x86(coeffs); + return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); #endif } diff -urp libdv-0.104-old/libdv/encode_x86.S libdv-0.104/libdv/encode_x86.S --- libdv-0.104-old/libdv/encode_x86.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/encode_x86.S 2005-10-24 01:18:32.000000000 +0200 @@ -23,10 +23,6 @@ * The libdv homepage is http://libdv.sourceforge.net/. */ -.data -ALLONE: .word 1,1,1,1 -VLCADDMASK: .byte 255,0,0,0,255,0,0,0 - .text .global _dv_vlc_encode_block_mmx @@ -45,11 +41,14 @@ _dv_vlc_encode_block_mmx: movl $63, %ecx - movl vlc_encode_lookup, %esi + movl 4+4*4+8(%esp), %esi # vlc_encode_lookup pxor %mm0, %mm0 pxor %mm2, %mm2 - movq VLCADDMASK, %mm1 + pushl $0x000000FF + pushl $0x000000FF + movq (%esp), %mm1 + addl $8, %esp xorl %ebp, %ebp subl $8, %edx vlc_encode_block_mmx_loop: @@ -121,7 +120,7 @@ _dv_vlc_num_bits_block_x86: addl $2, %edi movl $63, %ecx - movl vlc_num_bits_lookup, %esi + movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup vlc_num_bits_block_x86_loop: movw (%edi), %ax @@ -579,8 +578,11 @@ _dv_need_dct_248_mmx_rows: paddw %mm5, %mm1 paddw %mm1, %mm0 - - pmaddwd ALLONE, %mm0 + + pushl $0x00010001 + pushl $0x00010001 + pmaddwd (%esp), %mm0 + addl $8, %esp movq %mm0, %mm1 psrlq $32, %mm1 paddd %mm1, %mm0 diff -urp libdv-0.104-old/libdv/idct_block_mmx.S libdv-0.104/libdv/idct_block_mmx.S --- libdv-0.104-old/libdv/idct_block_mmx.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/idct_block_mmx.S 2005-10-24 01:12:12.000000000 +0200 @@ -8,16 +8,37 @@ +#ifdef __PIC__ +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret +#endif + .text + .align 4 .globl _dv_idct_block_mmx .type _dv_idct_block_mmx,@function _dv_idct_block_mmx: pushl %ebp - movl %esp,%ebp pushl %esi + +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + +#ifdef __PIC__ + leal preSC@GOTOFF(%ebp), %ecx +#else leal preSC, %ecx - movl 8(%ebp),%esi /* source matrix */ +#endif + movl 12(%esp),%esi /* source matrix */ /* * column 0: even part @@ -35,7 +56,11 @@ _dv_idct_block_mmx: movq %mm1, %mm2 /* added 11/1/96 */ pmulhw 8*8(%esi),%mm5 /* V8 */ psubsw %mm0, %mm1 /* V16 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */ +#else pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ +#endif paddsw %mm0, %mm2 /* V17 */ movq %mm2, %mm0 /* duplicate V17 */ psraw $1, %mm2 /* t75=t82 */ @@ -76,7 +101,11 @@ _dv_idct_block_mmx: paddsw %mm0, %mm3 /* V29 ; free mm0 */ movq %mm7, %mm1 /* duplicate V26 */ psraw $1, %mm3 /* t91=t94 */ +#ifdef __PIC__ + pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */ +#else pmulhw x539f539f539f539f,%mm7 /* V33 */ +#endif psraw $1, %mm1 /* t96 */ movq %mm5, %mm0 /* duplicate V2 */ psraw $2, %mm4 /* t85=t87 */ @@ -84,15 +113,27 @@ _dv_idct_block_mmx: psubsw %mm4, %mm0 /* V28 ; free mm4 */ movq %mm0, %mm2 /* duplicate V28 */ psraw $1, %mm5 /* t90=t93 */ +#ifdef __PIC__ + pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */ +#else pmulhw x4546454645464546,%mm0 /* V35 */ +#endif psraw $1, %mm2 /* t97 */ movq %mm5, %mm4 /* duplicate t90=t93 */ psubsw %mm2, %mm1 /* V32 ; free mm2 */ +#ifdef __PIC__ + pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */ +#else pmulhw x61f861f861f861f8,%mm1 /* V36 */ +#endif psllw $1, %mm7 /* t107 */ paddsw %mm3, %mm5 /* V31 */ psubsw %mm3, %mm4 /* V30 ; free mm3 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */ +#else pmulhw x5a825a825a825a82,%mm4 /* V34 */ +#endif nop psubsw %mm1, %mm0 /* V38 */ psubsw %mm7, %mm1 /* V37 ; free mm7 */ @@ -159,7 +200,11 @@ _dv_idct_block_mmx: psubsw %mm7, %mm1 /* V50 */ pmulhw 8*9(%esi), %mm5 /* V9 */ paddsw %mm7, %mm2 /* V51 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */ +#else pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ +#endif movq %mm2, %mm6 /* duplicate V51 */ psraw $1, %mm2 /* t138=t144 */ movq %mm3, %mm4 /* duplicate V1 */ @@ -200,11 +245,19 @@ _dv_idct_block_mmx: * even more by doing the correction step in a later stage when the number * is actually multiplied by 16 */ +#ifdef __PIC__ + paddw x0005000200010001@GOTOFF(%ebp), %mm4 +#else paddw x0005000200010001, %mm4 +#endif psubsw %mm6, %mm3 /* V60 ; free mm6 */ psraw $1, %mm0 /* t154=t156 */ movq %mm3, %mm1 /* duplicate V60 */ +#ifdef __PIC__ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */ +#else pmulhw x539f539f539f539f, %mm1 /* V67 */ +#endif movq %mm5, %mm6 /* duplicate V3 */ psraw $2, %mm4 /* t148=t150 */ paddsw %mm4, %mm5 /* V61 */ @@ -213,13 +266,25 @@ _dv_idct_block_mmx: psllw $1, %mm1 /* t169 */ paddsw %mm0, %mm5 /* V65 -> result */ psubsw %mm0, %mm4 /* V64 ; free mm0 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */ +#else pmulhw x5a825a825a825a82, %mm4 /* V68 */ +#endif psraw $1, %mm3 /* t158 */ psubsw %mm6, %mm3 /* V66 */ movq %mm5, %mm2 /* duplicate V65 */ +#ifdef __PIC__ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */ +#else pmulhw x61f861f861f861f8, %mm3 /* V70 */ +#endif psllw $1, %mm6 /* t165 */ +#ifdef __PIC__ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */ +#else pmulhw x4546454645464546, %mm6 /* V69 */ +#endif psraw $1, %mm2 /* t172 */ /* moved from next block */ movq 8*5(%esi), %mm0 /* V56 */ @@ -344,7 +409,11 @@ _dv_idct_block_mmx: * movq 8*13(%esi), %mm4 tmt13 */ psubsw %mm4, %mm3 /* V134 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */ +#else pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ +#endif movq 8*9(%esi), %mm6 /* tmt9 */ paddsw %mm4, %mm5 /* V135 ; mm4 free */ movq %mm0, %mm4 /* duplicate tmt1 */ @@ -373,17 +442,33 @@ _dv_idct_block_mmx: psubsw %mm7, %mm0 /* V144 */ movq %mm0, %mm3 /* duplicate V144 */ paddsw %mm7, %mm2 /* V147 ; free mm7 */ +#ifdef __PIC__ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */ +#else pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ +#endif movq %mm1, %mm7 /* duplicate tmt3 */ paddsw %mm5, %mm7 /* V145 */ psubsw %mm5, %mm1 /* V146 ; free mm5 */ psubsw %mm1, %mm3 /* V150 */ movq %mm7, %mm5 /* duplicate V145 */ +#ifdef __PIC__ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */ +#else pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ +#endif psubsw %mm2, %mm5 /* V148 */ +#ifdef __PIC__ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */ +#else pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ +#endif psllw $2, %mm0 /* t311 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */ +#else pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ +#endif paddsw %mm2, %mm7 /* V149 ; free mm2 */ psllw $1, %mm1 /* t313 */ nop /* without the nop - freeze here for one clock */ @@ -409,7 +494,11 @@ _dv_idct_block_mmx: paddsw %mm3, %mm6 /* V164 ; free mm3 */ movq %mm4, %mm3 /* duplicate V142 */ psubsw %mm5, %mm4 /* V165 ; free mm5 */ +#ifdef __PIC__ + movq %mm2, scratch7@GOTOFF(%ebp) /* out7 */ +#else movq %mm2, scratch7 /* out7 */ +#endif psraw $4, %mm6 psraw $4, %mm4 paddsw %mm5, %mm3 /* V162 */ @@ -420,11 +509,19 @@ _dv_idct_block_mmx: */ movq %mm6, 8*9(%esi) /* out9 */ paddsw %mm1, %mm0 /* V161 */ +#ifdef __PIC__ + movq %mm3, scratch5@GOTOFF(%ebp) /* out5 */ +#else movq %mm3, scratch5 /* out5 */ +#endif psubsw %mm1, %mm5 /* V166 ; free mm1 */ movq %mm4, 8*11(%esi) /* out11 */ psraw $4, %mm5 +#ifdef __PIC__ + movq %mm0, scratch3@GOTOFF(%ebp) /* out3 */ +#else movq %mm0, scratch3 /* out3 */ +#endif movq %mm2, %mm4 /* duplicate V140 */ movq %mm5, 8*13(%esi) /* out13 */ paddsw %mm7, %mm2 /* V160 */ @@ -434,7 +531,11 @@ _dv_idct_block_mmx: /* moved from the next block */ movq 8*3(%esi), %mm7 psraw $4, %mm4 +#ifdef __PIC__ + movq %mm2, scratch1@GOTOFF(%ebp) /* out1 */ +#else movq %mm2, scratch1 /* out1 */ +#endif /* moved from the next block */ movq %mm0, %mm1 movq %mm4, 8*15(%esi) /* out15 */ @@ -491,15 +592,31 @@ _dv_idct_block_mmx: paddsw %mm4, %mm3 /* V113 ; free mm4 */ movq %mm0, %mm4 /* duplicate V110 */ paddsw %mm1, %mm2 /* V111 */ +#ifdef __PIC__ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */ +#else pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ +#endif psubsw %mm1, %mm5 /* V112 ; free mm1 */ psubsw %mm5, %mm4 /* V116 */ movq %mm2, %mm1 /* duplicate V111 */ +#ifdef __PIC__ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */ +#else pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ +#endif psubsw %mm3, %mm2 /* V114 */ +#ifdef __PIC__ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */ +#else pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ +#endif paddsw %mm3, %mm1 /* V115 ; free mm3 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */ +#else pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ +#endif psllw $2, %mm0 /* t266 */ movq %mm1, (%esi) /* save V115 */ psllw $1, %mm5 /* t268 */ @@ -517,7 +634,11 @@ _dv_idct_block_mmx: movq %mm6, %mm3 /* duplicate tmt4 */ psubsw %mm0, %mm6 /* V100 */ paddsw %mm0, %mm3 /* V101 ; free mm0 */ +#ifdef __PIC__ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */ +#else pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ +#endif movq %mm7, %mm5 /* duplicate tmt0 */ movq 8*8(%esi), %mm1 /* tmt8 */ paddsw %mm1, %mm7 /* V103 */ @@ -551,10 +672,18 @@ _dv_idct_block_mmx: movq 8*2(%esi), %mm3 /* V123 */ paddsw %mm4, %mm7 /* out0 */ /* moved up from next block */ +#ifdef __PIC__ + movq scratch3@GOTOFF(%ebp), %mm0 +#else movq scratch3, %mm0 +#endif psraw $4, %mm7 /* moved up from next block */ +#ifdef __PIC__ + movq scratch5@GOTOFF(%ebp), %mm6 +#else movq scratch5, %mm6 +#endif psubsw %mm4, %mm1 /* out14 ; free mm4 */ paddsw %mm3, %mm5 /* out2 */ psraw $4, %mm1 @@ -565,7 +694,11 @@ _dv_idct_block_mmx: movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ psraw $4, %mm2 /* moved up to the prev block */ +#ifdef __PIC__ + movq scratch7@GOTOFF(%ebp), %mm4 +#else movq scratch7, %mm4 +#endif /* moved up to the prev block */ psraw $4, %mm0 movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ @@ -579,7 +712,11 @@ _dv_idct_block_mmx: * psraw $4, %mm0 * psraw $4, %mm6 */ +#ifdef __PIC__ + movq scratch1@GOTOFF(%ebp), %mm1 +#else movq scratch1, %mm1 +#endif psraw $4, %mm4 movq %mm0, 8*3(%esi) /* out3 */ psraw $4, %mm1 diff -urp libdv-0.104-old/libdv/quant.c libdv-0.104/libdv/quant.c --- libdv-0.104-old/libdv/quant.c 2004-10-20 05:49:24.000000000 +0200 +++ libdv-0.104/libdv/quant.c 2005-10-24 01:06:24.000000000 +0200 @@ -144,7 +144,7 @@ uint8_t dv_quant_offset[4] = { 6,3,0,1 uint32_t dv_quant_248_mul_tab [2] [22] [64]; uint32_t dv_quant_88_mul_tab [2] [22] [64]; -extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); +extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t dv_quant_offset[],uint8_t dv_quant_shifts[][]); extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); @@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno _dv_quant_x86_64(block, qno, klass); emms(); #else - _dv_quant_x86(block, qno, klass); + _dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); emms(); #endif } diff -urp libdv-0.104-old/libdv/quant.h libdv-0.104/libdv/quant.h --- libdv-0.104-old/libdv/quant.h 2004-10-20 05:49:24.000000000 +0200 +++ libdv-0.104/libdv/quant.h 2005-10-24 00:57:43.000000000 +0200 @@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block, extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, dv_248_coeff_t *co); -extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); +extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t offset[], uint8_t shifts[][]); extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); extern void dv_quant_init (void); #ifdef __cplusplus diff -urp libdv-0.104-old/libdv/quant_x86.S libdv-0.104/libdv/quant_x86.S --- libdv-0.104-old/libdv/quant_x86.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/quant_x86.S 2005-10-24 01:10:21.000000000 +0200 @@ -71,10 +71,13 @@ _dv_quant_88_inverse_x86: /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ movl ARGn(1),%eax /* qno */ + movl ARGn(3),%ebx /* dv_quant_offset */ + addl ARGn(2),%ebx /* class */ + movzbl (%ebx),%ecx movl ARGn(2),%ebx /* class */ - movzbl dv_quant_offset(%ebx),%ecx addl %ecx,%eax - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ + movl ARGn(4),%edx /* dv_quant_shifts */ + leal (%edx,%eax,4),%edx /* edx is pq */ /* extra = (class == 3); */ /* 0 1 2 3 */ @@ -212,11 +215,13 @@ _dv_quant_x86: /* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ movl ARGn(1),%eax /* qno */ + movl ARGn(3),%ebx /* offset */ + addl ARGn(2),%ebx /* class */ + movzbl (%ebx),%ecx movl ARGn(2),%ebx /* class */ - - movzbl dv_quant_offset(%ebx),%ecx + movl ARGn(4),%edx /* shifts */ addl %ecx,%eax - leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ + leal (%edx,%eax,4),%edx /* edx is pq */ /* extra = (class == 3); */ /* 0 1 2 3 */ diff -urp libdv-0.104-old/libdv/rgbtoyuv.S libdv-0.104/libdv/rgbtoyuv.S --- libdv-0.104-old/libdv/rgbtoyuv.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/rgbtoyuv.S 2005-10-24 00:46:34.000000000 +0200 @@ -110,20 +110,30 @@ VR0GR: .long 0,0 VBG0B: .long 0,0 #endif - + +#ifdef __PIC__ +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret +#endif + .text -#define _inPtr 8 -#define _rows 12 -#define _columns 16 -#define _outyPtr 20 -#define _outuPtr 24 -#define _outvPtr 28 +#define _inPtr 24+8 +#define _rows 24+12 +#define _columns 24+16 +#define _outyPtr 24+20 +#define _outuPtr 24+24 +#define _outvPtr 24+28 _dv_rgbtoycb_mmx: pushl %ebp - movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx @@ -131,46 +141,103 @@ _dv_rgbtoycb_mmx: pushl %esi pushl %edi +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + +#ifdef __PIC__ + leal ZEROSX@GOTOFF(%ebp), %eax #This section gets around a bug +#else leal ZEROSX, %eax #This section gets around a bug +#endif movq (%eax), %mm0 #unlikely to persist +#ifdef __PIC__ + movq %mm0, ZEROS@GOTOFF(%ebp) + leal OFFSETDX@GOTOFF(%ebp), %eax +#else movq %mm0, ZEROS leal OFFSETDX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, OFFSETD@GOTOFF(%ebp) + leal OFFSETWX@GOTOFF(%ebp), %eax +#else movq %mm0, OFFSETD leal OFFSETWX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, OFFSETW@GOTOFF(%ebp) + leal OFFSETBX@GOTOFF(%ebp), %eax +#else movq %mm0, OFFSETW leal OFFSETBX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, OFFSETB@GOTOFF(%ebp) + leal YR0GRX@GOTOFF(%ebp), %eax +#else movq %mm0, OFFSETB leal YR0GRX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, YR0GR@GOTOFF(%ebp) + leal YBG0BX@GOTOFF(%ebp), %eax +#else movq %mm0, YR0GR leal YBG0BX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, YBG0B@GOTOFF(%ebp) + leal UR0GRX@GOTOFF(%ebp), %eax +#else movq %mm0, YBG0B leal UR0GRX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, UR0GR@GOTOFF(%ebp) + leal UBG0BX@GOTOFF(%ebp), %eax +#else movq %mm0, UR0GR leal UBG0BX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, UBG0B@GOTOFF(%ebp) + leal VR0GRX@GOTOFF(%ebp), %eax +#else movq %mm0, UBG0B leal VR0GRX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, VR0GR@GOTOFF(%ebp) + leal VBG0BX@GOTOFF(%ebp), %eax +#else movq %mm0, VR0GR leal VBG0BX, %eax +#endif movq (%eax), %mm0 +#ifdef __PIC__ + movq %mm0, VBG0B@GOTOFF(%ebp) +#else movq %mm0, VBG0B - - movl _rows(%ebp), %eax - movl _columns(%ebp), %ebx +#endif + movl _rows(%esp), %eax + movl _columns(%esp), %ebx mull %ebx #number pixels shrl $3, %eax #number of loops movl %eax, %edi #loop counter in edi - movl _inPtr(%ebp), %eax - movl _outyPtr(%ebp), %ebx - movl _outuPtr(%ebp), %ecx - movl _outvPtr(%ebp), %edx + movl _inPtr(%esp), %eax + movl _outyPtr(%esp), %ebx + movl _outuPtr(%esp), %ecx + movl _outvPtr(%esp), %edx rgbtoycb_mmx_loop: movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 pxor %mm6, %mm6 #0 -> mm6 @@ -184,29 +251,57 @@ rgbtoycb_mmx_loop: punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 movq %mm0, %mm2 #R1B0G0R0 -> mm2 +#ifdef __PIC__ + pmaddwd YR0GR@GOTOFF(%ebp), %mm0 #yrR1,ygG0+yrR0 -> mm0 +#else pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 +#endif movq %mm1, %mm3 #B1G1R1B0 -> mm3 +#ifdef __PIC__ + pmaddwd YBG0B@GOTOFF(%ebp), %mm1 #ybB1+ygG1,ybB0 -> mm1 +#else pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 +#endif movq %mm2, %mm4 #R1B0G0R0 -> mm4 +#ifdef __PIC__ + pmaddwd UR0GR@GOTOFF(%ebp), %mm2 #urR1,ugG0+urR0 -> mm2 +#else pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 +#endif movq %mm3, %mm5 #B1G1R1B0 -> mm5 +#ifdef __PIC__ + pmaddwd UBG0B@GOTOFF(%ebp), %mm3 #ubB1+ugG1,ubB0 -> mm3 +#else pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 +#endif punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 +#ifdef __PIC__ + pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR1,vgG0+vrR0 -> mm4 +#else pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 +#endif paddd %mm1, %mm0 #Y1Y0 -> mm0 +#ifdef __PIC__ + pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB1+vgG1,vbB0 -> mm5 +#else pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 +#endif movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 paddd %mm3, %mm2 #U1U0 -> mm2 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 +#ifdef __PIC__ + punpcklbw ZEROS@GOTOFF(%ebp), %mm1 #B3G3R3B2 -> mm1 +#else punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 +#endif paddd %mm5, %mm4 #V1V0 -> mm4 movq %mm1, %mm5 #B3G3R3B2 -> mm5 @@ -214,29 +309,61 @@ rgbtoycb_mmx_loop: paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 +#ifdef __PIC__ + punpckhbw ZEROS@GOTOFF(%ebp), %mm6 #R5B4G4R3 -> mm6 +#else punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 +#endif movq %mm1, %mm3 #R3B2G2R2 -> mm3 +#ifdef __PIC__ + pmaddwd YR0GR@GOTOFF(%ebp), %mm1 #yrR3,ygG2+yrR2 -> mm1 +#else pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 +#endif movq %mm5, %mm7 #B3G3R3B2 -> mm7 +#ifdef __PIC__ + pmaddwd YBG0B@GOTOFF(%ebp), %mm5 #ybB3+ygG3,ybB2 -> mm5 +#else pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 +#endif psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 +#ifdef __PIC__ + movq %mm6, TEMP0@GOTOFF(%ebp) #R5B4G4R4 -> TEMP0 +#else movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 +#endif movq %mm3, %mm6 #R3B2G2R2 -> mm6 +#ifdef __PIC__ + pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR3,ugG2+urR2 -> mm6 +#else pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 +#endif psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 paddd %mm5, %mm1 #Y3Y2 -> mm1 movq %mm7, %mm5 #B3G3R3B2 -> mm5 +#ifdef __PIC__ + pmaddwd UBG0B@GOTOFF(%ebp), %mm7 #ubB3+ugG3,ubB2 +#else pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 +#endif psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 +#ifdef __PIC__ + pmaddwd VR0GR@GOTOFF(%ebp), %mm3 #vrR3,vgG2+vgR2 +#else pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 +#endif packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 +#ifdef __PIC__ + pmaddwd VBG0B@GOTOFF(%ebp), %mm5 #vbB3+vgG3,vbB2 -> mm5 +#else pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 +#endif psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 @@ -251,58 +378,114 @@ rgbtoycb_mmx_loop: movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 +#ifdef __PIC__ + paddw OFFSETY@GOTOFF(%ebp), %mm0 +#else paddw OFFSETY, %mm0 +#endif movq %mm0, (%ebx) #store Y3Y2Y1Y0 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 +#ifdef __PIC__ + movq TEMP0@GOTOFF(%ebp), %mm0 #R5B4G4R4 -> mm0 +#else movq TEMP0, %mm0 #R5B4G4R4 -> mm0 +#endif addl $8, %ebx - + +#ifdef __PIC__ + punpcklbw ZEROS@GOTOFF(%ebp), %mm7 #B5G500 -> mm7 +#else punpcklbw ZEROS, %mm7 #B5G500 -> mm7 +#endif movq %mm0, %mm6 #R5B4G4R4 -> mm6 +#ifdef __PIC__ + movq %mm2, TEMPU@GOTOFF(%ebp) #32-bit scaled U3U2U1U0 -> TEMPU +#else movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU +#endif psrlq $32, %mm0 #00R5B4 -> mm0 paddw %mm0, %mm7 #B5G5R5B4 -> mm7 movq %mm6, %mm2 #B5B4G4R4 -> mm2 +#ifdef __PIC__ + pmaddwd YR0GR@GOTOFF(%ebp), %mm2 #yrR5,ygG4+yrR4 -> mm2 +#else pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 +#endif movq %mm7, %mm0 #B5G5R5B4 -> mm0 +#ifdef __PIC__ + pmaddwd YBG0B@GOTOFF(%ebp), %mm7 #ybB5+ygG5,ybB4 -> mm7 +#else pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 +#endif packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 addl $24, %eax #increment RGB count +#ifdef __PIC__ + movq %mm4, TEMPV@GOTOFF(%ebp) #(V3V2V1V0)/256 -> mm4 +#else movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 +#endif movq %mm6, %mm4 #B5B4G4R4 -> mm4 +#ifdef __PIC__ + pmaddwd UR0GR@GOTOFF(%ebp), %mm6 #urR5,ugG4+urR4 +#else pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 +#endif movq %mm0, %mm3 #B5G5R5B4 -> mm0 +#ifdef __PIC__ + pmaddwd UBG0B@GOTOFF(%ebp), %mm0 #ubB5+ugG5,ubB4 +#else pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 +#endif paddd %mm7, %mm2 #Y5Y4 -> mm2 +#ifdef __PIC__ + pmaddwd VR0GR@GOTOFF(%ebp), %mm4 #vrR5,vgG4+vrR4 -> mm4 +#else pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 +#endif pxor %mm7, %mm7 #0 -> mm7 +#ifdef __PIC__ + pmaddwd VBG0B@GOTOFF(%ebp), %mm3 #vbB5+vgG5,vbB4 -> mm3 +#else pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 +#endif punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 paddd %mm6, %mm0 #U5U4 -> mm0 movq %mm1, %mm6 #B7G7R7B6 -> mm6 +#ifdef __PIC__ + pmaddwd YBG0B@GOTOFF(%ebp), %mm6 #ybB7+ygG7,ybB6 -> mm6 +#else pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 +#endif punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 movq %mm5, %mm7 #R7B6G6R6 -> mm7 paddd %mm4, %mm3 #V5V4 -> mm3 +#ifdef __PIC__ + pmaddwd YR0GR@GOTOFF(%ebp), %mm5 #yrR7,ygG6+yrR6 -> mm5 +#else pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 +#endif movq %mm1, %mm4 #B7G7R7B6 -> mm4 +#ifdef __PIC__ + pmaddwd UBG0B@GOTOFF(%ebp), %mm4 #ubB7+ugG7,ubB6 -> mm4 +#else pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 +#endif psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 @@ -310,25 +493,49 @@ rgbtoycb_mmx_loop: paddd %mm5, %mm6 #Y7Y6 -> mm6 movq %mm7, %mm5 #R7B6G6R6 -> mm5 +#ifdef __PIC__ + pmaddwd UR0GR@GOTOFF(%ebp), %mm7 #urR7,ugG6+ugR6 -> mm7 +#else pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 +#endif psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 +#ifdef __PIC__ + pmaddwd VBG0B@GOTOFF(%ebp), %mm1 #vbB7+vgG7,vbB6 -> mm1 +#else pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 +#endif psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 +#ifdef __PIC__ + pmaddwd VR0GR@GOTOFF(%ebp), %mm5 #vrR7,vgG6+vrR6 -> mm5 +#else pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 +#endif paddd %mm4, %mm7 #U7U6 -> mm7 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 +#ifdef __PIC__ + paddw OFFSETY@GOTOFF(%ebp), %mm2 +#else paddw OFFSETY, %mm2 +#endif movq %mm2, (%ebx) #store Y7Y6Y5Y4 +#ifdef __PIC__ + movq ALLONE@GOTOFF(%ebp), %mm6 +#else movq ALLONE, %mm6 +#endif packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 +#ifdef __PIC__ + movq TEMPU@GOTOFF(%ebp), %mm4 #32-bit scaled U3U2U1U0 -> mm4 +#else movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 +#endif pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 @@ -338,8 +545,12 @@ rgbtoycb_mmx_loop: psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 - + +#ifdef __PIC__ + movq TEMPV@GOTOFF(%ebp), %mm5 #32-bit scaled V3V2V1V0 -> mm5 +#else movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 +#endif movq %mm4, (%ecx) # store U @@ -425,14 +636,22 @@ _dv_ppm_copy_y_block_mmx: _dv_pgm_copy_y_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 16(%esp), %edi # dest + movl 20(%esp), %esi # src + +#ifdef __PIC__ + movq OFFSETY@GOTOFF(%ebp), %mm7 +#else movq OFFSETY, %mm7 +#endif pxor %mm6, %mm6 movq (%esi), %mm0 @@ -567,14 +786,22 @@ _dv_pgm_copy_y_block_mmx: _dv_video_copy_y_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 16(%esp), %edi # dest + movl 20(%esp), %esi # src + +#ifdef __PIC__ + movq OFFSETBX@GOTOFF(%ebp), %mm7 +#else movq OFFSETBX, %mm7 +#endif pxor %mm6, %mm6 movq (%esi), %mm0 @@ -855,16 +1082,23 @@ _dv_ppm_copy_pal_c_block_mmx: _dv_pgm_copy_pal_c_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi pushl %ebx - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 20(%esp), %edi # dest + movl 24(%esp), %esi # src +#ifdef __PIC__ + movq OFFSETBX@GOTOFF(%ebp), %mm7 +#else movq OFFSETBX, %mm7 +#endif pxor %mm6, %mm6 @@ -1003,15 +1237,23 @@ _dv_pgm_copy_pal_c_block_mmx: _dv_video_copy_pal_c_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi pushl %ebx - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 20(%esp), %edi # dest + movl 24(%esp), %esi # src + +#ifdef __PIC__ + movq OFFSETBX@GOTOFF(%ebp), %mm7 +#else movq OFFSETBX, %mm7 +#endif paddw %mm7, %mm7 pxor %mm6, %mm6 @@ -1098,18 +1340,25 @@ video_copy_pal_c_block_mmx_loop: _dv_ppm_copy_ntsc_c_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi pushl %ebx - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src + +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 20(%esp), %edi # dest + movl 24(%esp), %esi # src movl $4, %ebx +#ifdef __PIC__ + movq ALLONE@GOTOFF(%ebp), %mm6 +#else movq ALLONE, %mm6 - +#endif ppm_copy_ntsc_c_block_mmx_loop: movq (%esi), %mm0 @@ -1171,14 +1420,22 @@ ppm_copy_ntsc_c_block_mmx_loop: _dv_pgm_copy_ntsc_c_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 16(%esp), %edi # dest + movl 20(%esp), %esi # src + +#ifdef __PIC__ + movq OFFSETBX@GOTOFF(%ebp), %mm7 +#else movq OFFSETBX, %mm7 +#endif paddw %mm7, %mm7 pxor %mm6, %mm6 @@ -1328,15 +1585,23 @@ _dv_pgm_copy_ntsc_c_block_mmx: _dv_video_copy_ntsc_c_block_mmx: pushl %ebp - movl %esp, %ebp pushl %esi pushl %edi pushl %ebx - - movl 8(%ebp), %edi # dest - movl 12(%ebp), %esi # src +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif + + movl 20(%esp), %edi # dest + movl 24(%esp), %esi # src + +#ifdef __PIC__ + movq OFFSETBX@GOTOFF(%ebp), %mm7 +#else movq OFFSETBX, %mm7 +#endif paddw %mm7, %mm7 pxor %mm6, %mm6 diff -urp libdv-0.104-old/libdv/vlc_x86.S libdv-0.104/libdv/vlc_x86.S --- libdv-0.104-old/libdv/vlc_x86.S 2005-10-23 19:40:58.000000000 +0200 +++ libdv-0.104/libdv/vlc_x86.S 2005-10-25 01:47:14.000000000 +0200 @@ -1,29 +1,76 @@ #include "asmoff.h" .text + +#ifdef __PIC__ +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret + + .section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits +.globl __i686.get_pc_thunk.si + .hidden __i686.get_pc_thunk.si + .type __i686.get_pc_thunk.si,@function +__i686.get_pc_thunk.si: + movl (%esp), %esi + ret +#endif + .align 4 .globl dv_decode_vlc .type dv_decode_vlc,@function dv_decode_vlc: pushl %ebx + pushl %ebp + +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif - /* Args are at 8(%esp). */ - movl 8(%esp),%eax /* %eax is bits */ - movl 12(%esp),%ebx /* %ebx is maxbits */ + /* Args are at 12(%esp). */ + movl 12(%esp),%eax /* %eax is bits */ + movl 16(%esp),%ebx /* %ebx is maxbits */ andl $0x3f,%ebx /* limit index range STL*/ +#ifdef __ELF__ + movl dv_vlc_class_index_mask@GOTOFF(%ebp,%ebx,4),%edx +#else movl dv_vlc_class_index_mask(,%ebx,4),%edx +#endif andl %eax,%edx +#ifdef __ELF__ + movl dv_vlc_class_index_rshift@GOTOFF(%ebp,%ebx,4),%ecx +#else movl dv_vlc_class_index_rshift(,%ebx,4),%ecx +#endif sarl %cl,%edx +#ifdef __ELF__ + movl dv_vlc_classes@GOTOFF(%ebp,%ebx,4),%ecx +#else movl dv_vlc_classes(,%ebx,4),%ecx +#endif movsbl (%ecx,%edx,1),%edx /* %edx is class */ - + +#ifdef __ELF__ + movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx + movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx +#else movl dv_vlc_index_mask(,%edx,4),%ebx movl dv_vlc_index_rshift(,%edx,4),%ecx +#endif andl %eax,%ebx sarl %cl,%ebx +#ifdef __ELF__ + movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx +#else movl dv_vlc_lookups(,%edx,4),%edx +#endif movl (%edx,%ebx,4),%edx /* Now %edx holds result, like this: @@ -42,7 +89,11 @@ dv_decode_vlc: movl %edx,%ecx sarl $8,%ecx andl $0xff,%ecx +#ifdef __ELF__ + movl sign_mask@GOTOFF(%ebp,%ecx,4),%ebx +#else movl sign_mask(,%ecx,4),%ebx +#endif andl %ebx,%eax negl %eax sarl $31,%eax @@ -63,14 +114,14 @@ dv_decode_vlc: *result = broken; Note that the 'broken' pattern is all ones (i.e. 0xffffffff) */ - movl 12(%esp),%ebx /* %ebx is maxbits */ + movl 16(%esp),%ebx /* %ebx is maxbits */ subl %ecx,%ebx sbbl %ebx,%ebx orl %ebx,%edx - movl 16(%esp),%eax + movl 20(%esp),%eax movl %edx,(%eax) - + popl %ebp popl %ebx ret @@ -80,21 +131,38 @@ dv_decode_vlc: .type __dv_decode_vlc,@function __dv_decode_vlc: pushl %ebx + pushl %ebp + +#ifdef __PIC__ + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp +#endif - /* Args are at 8(%esp). */ - movl 8(%esp),%eax /* %eax is bits */ + /* Args are at 12(%esp). */ + movl 12(%esp),%eax /* %eax is bits */ movl %eax,%edx /* %edx is class */ andl $0xfe00,%edx sarl $9,%edx +#ifdef __PIC__ + movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx + + movl dv_vlc_index_mask@GOTOFF(%ebp,%edx,4),%ebx + movl dv_vlc_index_rshift@GOTOFF(%ebp,%edx,4),%ecx +#else movsbl dv_vlc_class_lookup5(%edx),%edx - + movl dv_vlc_index_mask(,%edx,4),%ebx movl dv_vlc_index_rshift(,%edx,4),%ecx +#endif andl %eax,%ebx sarl %cl,%ebx +#ifdef __PIC__ + movl dv_vlc_lookups@GOTOFF(%ebp,%edx,4),%edx +#else movl dv_vlc_lookups(,%edx,4),%edx +#endif movl (%edx,%ebx,4),%edx /* Now %edx holds result, like this: @@ -112,7 +180,11 @@ __dv_decode_vlc: movl %edx,%ecx sarl $8,%ecx andl $0xff,%ecx +#ifdef __PIC__ + movl sign_mask@GOTOFF(%ebp,%ecx,4),%ecx +#else movl sign_mask(,%ecx,4),%ecx +#endif andl %ecx,%eax negl %eax sarl $31,%eax @@ -127,9 +199,9 @@ __dv_decode_vlc: xorl %eax,%edx subl %eax,%edx - movl 12(%esp),%eax + movl 16(%esp),%eax movl %edx,(%eax) - + popl %ebp popl %ebx ret @@ -147,6 +219,11 @@ dv_parse_ac_coeffs_pass0: pushl %esi pushl %ebp +#ifdef __PIC__ + call __i686.get_pc_thunk.si + addl $_GLOBAL_OFFSET_TABLE_, %esi +#endif + #define ARGn(N) (20+(4*(N)))(%esp) /* @@ -159,8 +236,10 @@ dv_parse_ac_coeffs_pass0: ebp bl */ movl ARGn(2),%ebp +#ifndef __PIC__ movl ARGn(0),%esi movl bitstream_t_buf(%esi),%esi +#endif movl dv_block_t_offset(%ebp),%edi movl dv_block_t_reorder(%ebp),%ebx @@ -170,7 +249,11 @@ dv_parse_ac_coeffs_pass0: movq dv_block_t_coeffs(%ebp),%mm1 pxor %mm0,%mm0 +#ifdef __PIC__ + pand const_f_0_0_0@GOTOFF(%esi),%mm1 +#else pand const_f_0_0_0,%mm1 +#endif movq %mm1,dv_block_t_coeffs(%ebp) movq %mm0,(dv_block_t_coeffs + 8)(%ebp) movq %mm0,(dv_block_t_coeffs + 16)(%ebp) @@ -191,9 +274,17 @@ dv_parse_ac_coeffs_pass0: readloop: movl %edi,%ecx shrl $3,%ecx +#ifdef __PIC__ + pushl %esi + movl ARGn(1),%esi + movl bitstream_t_buf(%esi),%esi +#endif movzbl (%esi,%ecx,1),%eax movzbl 1(%esi,%ecx,1),%edx movzbl 2(%esi,%ecx,1),%ecx +#ifdef __PIC__ + popl %esi +#endif shll $16,%eax shll $8,%edx orl %ecx,%eax @@ -217,7 +308,11 @@ readloop: /* Attempt to use the shortcut first. If it hits, then this vlc term has been decoded. */ +#ifdef __PIC__ + movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx +#else movl dv_vlc_class1_shortcut(,%ecx,4),%edx +#endif test $0x80,%edx je done_decode @@ -228,12 +323,19 @@ readloop: movl %ebx,dv_block_t_reorder(%ebp) /* %eax is bits */ - +#ifdef __PIC__ + movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx + + movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx + movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx + movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx +#else movsbl dv_vlc_class_lookup5(%ecx),%ecx movl dv_vlc_index_mask(,%ecx,4),%ebx movl dv_vlc_lookups(,%ecx,4),%edx movl dv_vlc_index_rshift(,%ecx,4),%ecx +#endif andl %eax,%ebx sarl %cl,%ebx @@ -256,7 +358,11 @@ readloop: movl %edx,%ecx sarl $8,%ecx andl $0xff,%ecx +#ifdef __PIC__ + movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx +#else movl sign_mask(,%ecx,4),%ecx +#endif andl %ecx,%eax negl %eax sarl $31,%eax @@ -326,10 +432,20 @@ alldone: slowpath: /* slow path: use dv_decode_vlc */; +#ifdef __PIC__ + pushl %esi + leal vlc@GOTOFF(%esi),%esi + xchgl %esi,(%esp) /* last parameter is &vlc */ +#else pushl $vlc /* last parameter is &vlc */ +#endif pushl %edx /* bits_left */ pushl %eax /* bits */ +#ifdef __PIC__ + call dv_decode_vlc@PLT +#else call dv_decode_vlc +#endif addl $12,%esp test $0x80,%edx /* If (vlc.run < 0) break */ jne escape @@ -365,6 +481,11 @@ dv_parse_video_segment: pushl %esi pushl %ebp +#ifdef __PIC__ + call __i686.get_pc_thunk.si + addl $_GLOBAL_OFFSET_TABLE_, %esi +#endif + #define ARGn(N) (20+(4*(N)))(%esp) movl ARGn(1),%eax /* quality */ @@ -373,7 +494,11 @@ dv_parse_video_segment: jz its_mono movl $6,%ebx its_mono: +#ifdef __PIC__ + movl %ebx,n_blocks@GOTOFF(%esi) +#else movl %ebx,n_blocks +#endif /* * ebx seg/b @@ -384,15 +509,22 @@ its_mono: * ebp bl */ movl ARGn(0),%ebx +#ifndef __PIC__ movl dv_videosegment_t_bs(%ebx),%esi movl bitstream_t_buf(%esi),%esi +#endif leal dv_videosegment_t_mb(%ebx),%edi movl $0,%eax movl $0,%ecx macloop: +#ifdef __PIC__ + movl %eax,m@GOTOFF(%esi) + movl %ecx,mb_start@GOTOFF(%esi) +#else movl %eax,m movl %ecx,mb_start +#endif movl ARGn(0),%ebx @@ -400,7 +532,15 @@ macloop: /* mb->qno = bitstream_get(bs,4); */ movl %ecx,%edx shr $3,%edx +#ifdef __PIC__ + pushl %esi + movl dv_videosegment_t_bs(%ebx),%esi + movl bitstream_t_buf(%esi),%esi +#endif movzbl 3(%esi,%edx,1),%edx +#ifdef __PIC__ + popl %esi +#endif andl $0xf,%edx movl %edx,dv_macroblock_t_qno(%edi) @@ -411,7 +551,11 @@ macloop: movl %edx,dv_macroblock_t_eob_count(%edi) /* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ +#ifdef __PIC__ + movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx +#else movl dv_super_map_vertical(,%eax,4),%edx +#endif movl dv_videosegment_t_i(%ebx),%ecx addl %ecx,%edx @@ -422,11 +566,20 @@ skarly: andl $1,%ecx shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ +#ifdef __PIC__ + leal mod_10@GOTOFF(%esi,%edx),%edx + movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ +#else movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ +#endif movl %edx,dv_macroblock_t_i(%edi) /* mb->j = dv_super_map_horizontal[m]; */ +#ifdef __PIC__ + movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx +#else movl dv_super_map_horizontal(,%eax,4),%edx +#endif movl %edx,dv_macroblock_t_j(%edi) /* mb->k = seg->k; */ @@ -445,12 +598,29 @@ blkloop: +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ */ /* dc = bitstream_get(bs,9); */ +#ifdef __PIC__ + movl mb_start@GOTOFF(%esi),%ecx +#else movl mb_start,%ecx +#endif shr $3,%ecx +#ifdef __PIC__ + movzbl blk_start@GOTOFF(%esi,%ebx),%edx +#else movzbl blk_start(%ebx),%edx +#endif addl %ecx,%edx +#ifdef __PIC__ + pushl %esi + movl ARGn(1),%esi + movl dv_videosegment_t_bs(%esi),%esi + movl bitstream_t_buf(%esi),%esi +#endif movzbl (%esi,%edx,1),%eax /* hi byte */ movzbl 1(%esi,%edx,1),%ecx /* lo byte */ +#ifdef __PIC__ + popl %esi +#endif shll $8,%eax orl %ecx,%eax @@ -477,7 +647,11 @@ blkloop: /* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ shll $6,%eax +#ifdef __PIC__ + addl dv_reorder@GOTOFF+1(%esi),%eax +#else addl $(dv_reorder+1),%eax +#endif movl %eax,dv_block_t_reorder(%ebp) /* bl->reorder_sentinel = bl->reorder + 63; */ @@ -485,13 +659,22 @@ blkloop: movl %eax,dv_block_t_reorder_sentinel(%ebp) /* bl->offset= mb_start + dv_parse_bit_start[b]; */ +#ifdef __PIC__ + movl mb_start@GOTOFF(%esi),%ecx + movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax +#else movl mb_start,%ecx movl dv_parse_bit_start(,%ebx,4),%eax +#endif addl %ecx,%eax movl %eax,dv_block_t_offset(%ebp) /* bl->end= mb_start + dv_parse_bit_end[b]; */ +#ifdef __PIC__ + movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax +#else movl dv_parse_bit_end(,%ebx,4),%eax +#endif addl %ecx,%eax movl %eax,dv_block_t_end(%ebp) @@ -503,7 +686,11 @@ blkloop: /* no AC pass. Just zero out the remaining coeffs */ movq dv_block_t_coeffs(%ebp),%mm1 pxor %mm0,%mm0 +#ifdef __PIC__ + pand const_f_0_0_0@GOTOFF(%esi),%mm1 +#else pand const_f_0_0_0,%mm1 +#endif movq %mm1,dv_block_t_coeffs(%ebp) movq %mm0,(dv_block_t_coeffs + 8)(%ebp) movq %mm0,(dv_block_t_coeffs + 16)(%ebp) @@ -528,18 +715,31 @@ do_ac_pass: pushl %ebp pushl %edi pushl %eax +#ifdef __PIC__ + call dv_parse_ac_coeffs_pass0@PLT +#else call dv_parse_ac_coeffs_pass0 +#endif addl $12,%esp done_ac: +#ifdef __PIC__ + movl n_blocks@GOTOFF(%esi),%eax +#else movl n_blocks,%eax +#endif addl $dv_block_t_size,%ebp incl %ebx cmpl %eax,%ebx jnz blkloop +#ifdef __PIC__ + movl m@GOTOFF(%esi),%eax + movl mb_start@GOTOFF(%esi),%ecx +#else movl m,%eax movl mb_start,%ecx +#endif addl $(8 * 80),%ecx addl $dv_macroblock_t_size,%edi incl %eax @@ -557,7 +757,11 @@ done_ac: andl $DV_QUALITY_AC_MASK,%eax cmpl $DV_QUALITY_AC_2,%eax +#ifdef __PIC__ + jz dv_parse_ac_coeffs@PLT +#else jz dv_parse_ac_coeffs +#endif movl $0,%eax ret