diff -urp libmpeg3-1.5.2-old/audio/mpeg3audio.c libmpeg3-1.5.2/audio/mpeg3audio.c --- libmpeg3-1.5.2-old/audio/mpeg3audio.c 2003-04-21 08:36:50.000000000 +0200 +++ libmpeg3-1.5.2/audio/mpeg3audio.c 2005-12-22 23:45:39.000000000 +0100 @@ -532,9 +532,9 @@ static int seek(mpeg3audio_t *audio) index = audio->sample_seek / MPEG3_AUDIO_CHUNKSIZE; if(index >= track->total_sample_offsets) index = track->total_sample_offsets - 1; title_number = (track->sample_offsets[index] & - 0xff00000000000000) >> 56; + 0xff00000000000000ULL) >> 56; byte = track->sample_offsets[index] & - 0xffffffffffffff; + 0xffffffffffffffULL; mpeg3demux_open_title(demuxer, title_number); mpeg3demux_seek_byte(demuxer, byte); diff -urp libmpeg3-1.5.2-old/video/mmxidct.S libmpeg3-1.5.2/video/mmxidct.S --- libmpeg3-1.5.2-old/video/mmxidct.S 2002-06-21 14:35:24.000000000 +0200 +++ libmpeg3-1.5.2/video/mmxidct.S 2005-12-22 23:46:25.000000000 +0100 @@ -63,12 +63,25 @@ x0: .align 8 .text + +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret + .align 4 .globl IDCT_mmx .type IDCT_mmx, @function IDCT_mmx: pushl %ebp - movl %esp, %ebp + + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp + pushl %ebx pushl %ecx pushl %edx @@ -84,8 +97,8 @@ IDCT_mmx: pushl $0 pushl $0 - movl 8(%ebp), %esi /* source matrix */ - leal preSC, %ecx + movl 8+13*4(%esp), %esi /* source matrix */ + leal preSC@GOTOFF(%ebp), %ecx /* column 0: even part * use V4, V12, V0, V8 to produce V22..V25 */ @@ -101,7 +114,7 @@ IDCT_mmx: movq %mm1, %mm2 /* added 11/1/96 */ pmulhw 8*8(%esi),%mm5 /* V8 */ psubsw %mm0, %mm1 /* V16 */ - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */ paddsw %mm0, %mm2 /* V17 */ movq %mm2, %mm0 /* duplicate V17 */ psraw $1, %mm2 /* t75=t82 */ @@ -142,7 +155,7 @@ IDCT_mmx: paddsw %mm0, %mm3 /* V29 ; free mm0 */ movq %mm7, %mm1 /* duplicate V26 */ psraw $1, %mm3 /* t91=t94 */ - pmulhw x539f539f539f539f,%mm7 /* V33 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */ psraw $1, %mm1 /* t96 */ movq %mm5, %mm0 /* duplicate V2 */ psraw $2, %mm4 /* t85=t87 */ @@ -150,15 +163,15 @@ IDCT_mmx: psubsw %mm4, %mm0 /* V28 ; free mm4 */ movq %mm0, %mm2 /* duplicate V28 */ psraw $1, %mm5 /* t90=t93 */ - pmulhw x4546454645464546,%mm0 /* V35 */ + pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */ psraw $1, %mm2 /* t97 */ movq %mm5, %mm4 /* duplicate t90=t93 */ psubsw %mm2, %mm1 /* V32 ; free mm2 */ - pmulhw x61f861f861f861f8,%mm1 /* V36 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */ psllw $1, %mm7 /* t107 */ paddsw %mm3, %mm5 /* V31 */ psubsw %mm3, %mm4 /* V30 ; free mm3 */ - pmulhw x5a825a825a825a82,%mm4 /* V34 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */ nop psubsw %mm1, %mm0 /* V38 */ psubsw %mm7, %mm1 /* V37 ; free mm7 */ @@ -225,7 +238,7 @@ IDCT_mmx: psubsw %mm7, %mm1 /* V50 */ pmulhw 8*9(%esi), %mm5 /* V9 */ paddsw %mm7, %mm2 /* V51 */ - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */ movq %mm2, %mm6 /* duplicate V51 */ psraw $1, %mm2 /* t138=t144 */ movq %mm3, %mm4 /* duplicate V1 */ @@ -266,11 +279,11 @@ IDCT_mmx: * even more by doing the correction step in a later stage when the number * is actually multiplied by 16 */ - paddw x0005000200010001, %mm4 + paddw x0005000200010001@GOTOFF(%ebp), %mm4 psubsw %mm6, %mm3 /* V60 ; free mm6 */ psraw $1, %mm0 /* t154=t156 */ movq %mm3, %mm1 /* duplicate V60 */ - pmulhw x539f539f539f539f, %mm1 /* V67 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */ movq %mm5, %mm6 /* duplicate V3 */ psraw $2, %mm4 /* t148=t150 */ paddsw %mm4, %mm5 /* V61 */ @@ -279,13 +292,13 @@ IDCT_mmx: psllw $1, %mm1 /* t169 */ paddsw %mm0, %mm5 /* V65 -> result */ psubsw %mm0, %mm4 /* V64 ; free mm0 */ - pmulhw x5a825a825a825a82, %mm4 /* V68 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */ psraw $1, %mm3 /* t158 */ psubsw %mm6, %mm3 /* V66 */ movq %mm5, %mm2 /* duplicate V65 */ - pmulhw x61f861f861f861f8, %mm3 /* V70 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */ psllw $1, %mm6 /* t165 */ - pmulhw x4546454645464546, %mm6 /* V69 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */ psraw $1, %mm2 /* t172 */ /* moved from next block */ movq 8*5(%esi), %mm0 /* V56 */ @@ -410,7 +423,7 @@ IDCT_mmx: * movq 8*13(%esi), %mm4 tmt13 */ psubsw %mm4, %mm3 /* V134 */ - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */ movq 8*9(%esi), %mm6 /* tmt9 */ paddsw %mm4, %mm5 /* V135 ; mm4 free */ movq %mm0, %mm4 /* duplicate tmt1 */ @@ -439,17 +452,17 @@ IDCT_mmx: psubsw %mm7, %mm0 /* V144 */ movq %mm0, %mm3 /* duplicate V144 */ paddsw %mm7, %mm2 /* V147 ; free mm7 */ - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */ movq %mm1, %mm7 /* duplicate tmt3 */ paddsw %mm5, %mm7 /* V145 */ psubsw %mm5, %mm1 /* V146 ; free mm5 */ psubsw %mm1, %mm3 /* V150 */ movq %mm7, %mm5 /* duplicate V145 */ - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */ psubsw %mm2, %mm5 /* V148 */ - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */ psllw $2, %mm0 /* t311 */ - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */ paddsw %mm2, %mm7 /* V149 ; free mm2 */ psllw $1, %mm1 /* t313 */ nop /* without the nop - freeze here for one clock */ @@ -557,15 +570,15 @@ IDCT_mmx: paddsw %mm4, %mm3 /* V113 ; free mm4 */ movq %mm0, %mm4 /* duplicate V110 */ paddsw %mm1, %mm2 /* V111 */ - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */ psubsw %mm1, %mm5 /* V112 ; free mm1 */ psubsw %mm5, %mm4 /* V116 */ movq %mm2, %mm1 /* duplicate V111 */ - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */ psubsw %mm3, %mm2 /* V114 */ - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */ paddsw %mm3, %mm1 /* V115 ; free mm3 */ - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */ psllw $2, %mm0 /* t266 */ movq %mm1, (%esi) /* save V115 */ psllw $1, %mm5 /* t268 */ @@ -583,7 +596,7 @@ IDCT_mmx: movq %mm6, %mm3 /* duplicate tmt4 */ psubsw %mm0, %mm6 /* V100 */ paddsw %mm0, %mm3 /* V101 ; free mm0 */ - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */ movq %mm7, %mm5 /* duplicate tmt0 */ movq 8*8(%esi), %mm1 /* tmt8 */ paddsw %mm1, %mm7 /* V103 */ @@ -667,9 +680,11 @@ IDCT_mmx: popl %edx popl %ecx popl %ebx - movl %ebp, %esp popl %ebp ret .Lfe1: .size IDCT_mmx,.Lfe1-IDCT_mmx + +.section .note.GNU-stack,"",@progbits + diff -urp libmpeg3-1.5.2-old/video/output.c libmpeg3-1.5.2/video/output.c --- libmpeg3-1.5.2-old/video/output.c 2005-12-22 23:16:37.000000000 +0100 +++ libmpeg3-1.5.2/video/output.c 2005-12-22 23:36:51.000000000 +0100 @@ -5,22 +5,22 @@ #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0) static long long mpeg3_MMX_0 = 0L; -static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */ -static unsigned long __attribute__((used)) mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */ +static unsigned long long mpeg3_MMX_10w = 0x0010001000100010ULL; /*dd 00010 0010h, 000100010h */ +static unsigned long long mpeg3_MMX_80w = 0x0080008000800080ULL; /*dd 00080 0080h, 000800080h */ -static unsigned long __attribute__((used)) mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */ +static unsigned long long mpeg3_MMX_00FFw = 0x00ff00ff00ff00ffULL; /*dd 000FF 00FFh, 000FF00FFh */ -static unsigned short __attribute__((used)) mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */ -static unsigned short __attribute__((used)) mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */ +static unsigned long long mpeg3_MMX_Ublucoeff = 0x0081008100810081ULL; /*dd 00081 0081h, 000810081h */ +static unsigned long long mpeg3_MMX_Vredcoeff = 0x0066006600660066ULL; /*dd 00066 0066h, 000660066h */ -static unsigned short __attribute__((used)) mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */ -static unsigned short __attribute__((used)) mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */ +static unsigned long long mpeg3_MMX_Ugrncoeff = 0xffe8ffe8ffe8ffe8ULL; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */ +static unsigned long long mpeg3_MMX_Vgrncoeff = 0xffcdffcdffcdffcdULL; /*dd 0FFCC FFCCh, 0FFCCFFCCh */ -static unsigned short __attribute__((used)) mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */ +static unsigned long long mpeg3_MMX_Ycoeff = 0x004a004a004a004aULL; /*dd 0004A 004Ah, 0004A004Ah */ -static unsigned short __attribute__((used)) mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */ +static unsigned long long mpeg3_MMX_redmask = 0xf800f800f800f800ULL; /*dd 07c00 7c00h, 07c007c00h */ -static unsigned short __attribute__((used)) mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */ +static unsigned long long mpeg3_MMX_grnmask = 0x07e007e007e007e0ULL; /*dd 003e0 03e0h, 003e003e0h */ static unsigned char mpeg3_601_to_rgb[256]; @@ -30,7 +30,7 @@ static unsigned char mpeg3_601_to_rgb[25 /* b = (int)(*y + 1.732 * (*cb - 128)); */ #ifdef HAVE_MMX -inline void mpeg3video_rgb16_mmx(unsigned char *lum, +static void mpeg3video_rgb16_mmx(unsigned char *lum, unsigned char *cr, unsigned char *cb, unsigned char *out, @@ -55,25 +55,25 @@ inline void mpeg3video_rgb16_mmx(unsigne "1:\n" "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */ "pxor %%mm7, %%mm7\n" - "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ + "movd %0, %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */ "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */ - "psubw mpeg3_MMX_80w, %%mm0\n" - "psubw mpeg3_MMX_80w, %%mm1\n" + "psubw %9, %%mm0\n" + "psubw %9, %%mm1\n" "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */ "movq %%mm1, %%mm3\n" /* Cr */ - "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ + "pmullw %10, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */ - "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */ - "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ - "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */ + "pmullw %11, %%mm0\n" /* Cb2blue */ + "pand %12, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ + "pmullw %13, %%mm3\n" /* Cr2green */ "movq (%2), %%mm7\n" /* L2 */ - "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */ + "pmullw %14, %%mm1\n" /* Cr2red */ "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */ - "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */ + "pmullw %15, %%mm6\n" /* lum1 */ "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */ - "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */ + "pmullw %15, %%mm7\n" /* lum2 */ "movq %%mm6, %%mm4\n" /* lum1 */ "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */ @@ -91,11 +91,11 @@ inline void mpeg3video_rgb16_mmx(unsigne "punpcklbw %%mm4, %%mm4\n" "punpcklbw %%mm5, %%mm5\n" - "pand mpeg3_MMX_redmask, %%mm4\n" + "pand %16, %%mm4\n" "psllw $3, %%mm5\n" /* GREEN 1 */ "punpcklbw %%mm6, %%mm6\n" - "pand mpeg3_MMX_grnmask, %%mm5\n" - "pand mpeg3_MMX_redmask, %%mm6\n" + "pand %17, %%mm5\n" + "pand %16, %%mm6\n" "por %%mm5, %%mm4\n" /* */ "psrlw $11, %%mm6\n" /* BLUE 1 */ "movq %%mm3, %%mm5\n" /* lum2 */ @@ -109,23 +109,23 @@ inline void mpeg3video_rgb16_mmx(unsigne "packuswb %%mm3, %%mm3\n" "packuswb %%mm5, %%mm5\n" "packuswb %%mm7, %%mm7\n" - "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ + "pand %12, %%mm6\n" /* L3 */ "punpcklbw %%mm3, %%mm3\n" "punpcklbw %%mm5, %%mm5\n" - "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ + "pmullw %15, %%mm6\n" /* lum3 */ "punpcklbw %%mm7, %%mm7\n" "psllw $3, %%mm5\n" /* GREEN 2 */ - "pand mpeg3_MMX_redmask, %%mm7\n" - "pand mpeg3_MMX_redmask, %%mm3\n" + "pand %16, %%mm7\n" + "pand %16, %%mm3\n" "psrlw $11, %%mm7\n" /* BLUE 2 */ - "pand mpeg3_MMX_grnmask, %%mm5\n" + "pand %17, %%mm5\n" "por %%mm7, %%mm3\n" "movq (%2,%3), %%mm7\n" /* L4 */ "por %%mm5, %%mm3\n" /* */ "psrlw $8, %%mm7\n" /* L4 */ "movq %%mm4, %%mm5\n" "punpcklwd %%mm3, %%mm4\n" - "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ + "pmullw %15, %%mm7\n" /* lum4 */ "punpckhwd %%mm3, %%mm5\n" "movq %%mm4, (%4)\n" @@ -152,11 +152,11 @@ inline void mpeg3video_rgb16_mmx(unsigne "punpcklbw %%mm5, %%mm5\n" "punpcklbw %%mm6, %%mm6\n" "psllw $3, %%mm5\n" /* GREEN 3 */ - "pand mpeg3_MMX_redmask, %%mm4\n" + "pand %16, %%mm4\n" "psraw $6, %%mm3\n" /* psr 6 */ "psraw $6, %%mm0\n" - "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ - "pand mpeg3_MMX_grnmask, %%mm5\n" + "pand %16, %%mm6\n" /* BLUE */ + "pand %17, %%mm5\n" "psrlw $11, %%mm6\n" /* BLUE 3 */ "por %%mm5, %%mm4\n" "psraw $6, %%mm7\n" @@ -167,11 +167,11 @@ inline void mpeg3video_rgb16_mmx(unsigne "punpcklbw %%mm3, %%mm3\n" "punpcklbw %%mm0, %%mm0\n" "punpcklbw %%mm7, %%mm7\n" - "pand mpeg3_MMX_redmask, %%mm3\n" - "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ + "pand %16, %%mm3\n" + "pand %16, %%mm7\n" /* BLUE */ "psllw $3, %%mm0\n" /* GREEN 4 */ "psrlw $11, %%mm7\n" - "pand mpeg3_MMX_grnmask, %%mm0\n" + "pand %17, %%mm0\n" "por %%mm7, %%mm3\n" "addl $8, %6\n" "por %%mm0, %%mm3\n" @@ -195,7 +195,7 @@ inline void mpeg3video_rgb16_mmx(unsigne "movl $0, %6\n" "cmpl %8, %2\n" "jl 1b\n" - : : "r" (cr), + : : "m" (cr), "r" (cb), "r" (lum), "r" (cols), @@ -203,18 +203,27 @@ inline void mpeg3video_rgb16_mmx(unsigne "r" (col1), "m" (x), "m" (mod), - "m" (y) + "m" (y), + "m" (mpeg3_MMX_80w), + "m" (mpeg3_MMX_Ugrncoeff), + "m" (mpeg3_MMX_Ublucoeff), + "m" (mpeg3_MMX_00FFw), + "m" (mpeg3_MMX_Vgrncoeff), + "m" (mpeg3_MMX_Vredcoeff), + "m" (mpeg3_MMX_Ycoeff), + "m" (mpeg3_MMX_redmask), + "m" (mpeg3_MMX_grnmask) ); } -static unsigned long long __attribute__((used)) mpeg3_MMX_U_80 = 0x0000008000800000; -static unsigned long long __attribute__((used)) mpeg3_MMX_V_80 = 0x0000000000800080; -static long long __attribute__((used)) mpeg3_MMX_U_COEF = 0x00000058ffd30000; -static long long __attribute__((used)) mpeg3_MMX_V_COEF = 0x00000000ffea006f; -static long long __attribute__((used)) mpeg3_MMX_601_Y_COEF = 0x0000004800480048; -static long long __attribute__((used)) mpeg3_MMX_601_Y_DIFF = 0x0000000000000010; +static unsigned long long __attribute__((used)) mpeg3_MMX_U_80 = 0x0000008000800000ULL; +static unsigned long long __attribute__((used)) mpeg3_MMX_V_80 = 0x0000000000800080ULL; +static long long __attribute__((used)) mpeg3_MMX_U_COEF = 0x00000058ffd30000ULL; +static long long __attribute__((used)) mpeg3_MMX_V_COEF = 0x00000000ffea006fULL; +static long long __attribute__((used)) mpeg3_MMX_601_Y_COEF = 0x0000004800480048ULL; +static long long __attribute__((used)) mpeg3_MMX_601_Y_DIFF = 0x0000000000000010ULL; -inline void mpeg3_bgra32_mmx(unsigned long y, +static void mpeg3_bgra32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) @@ -239,11 +248,11 @@ asm( "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ - "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */ - "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ + "psubw %4, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */ + "pmullw %5, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ - "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */ - "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ + "psubw %6, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */ + "pmullw %7, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ "paddsw %%mm1, %%mm0;\n" /* Add u to result */ @@ -252,10 +261,12 @@ asm( "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */ "movd %%mm0, (%3);\n" /* Store output */ : -: "r" (&y), "r" (&u), "r" (&v), "r" (output)); +: "r" (&y), "r" (&u), "r" (&v), "r" (output), + "m" (mpeg3_MMX_U_80), "m" (mpeg3_MMX_U_COEF), + "m" (mpeg3_MMX_V_80), "m" (mpeg3_MMX_V_COEF)); } -inline void mpeg3_601_bgra32_mmx(unsigned long y, +static void mpeg3_601_bgra32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) @@ -264,7 +275,7 @@ asm( /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ /* for bgr24. */ "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */ - "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */ + "psubsw %4, %%mm0;\n" /* Subtract 16 from y */ "movd (%1), %%mm1;\n" /* Load u 0x00000000000000cr */ "movq %%mm0, %%mm3;\n" /* Copy y to temp */ "psllq $16, %%mm1;\n" /* Shift u 0x0000000000cr0000 */ @@ -281,11 +292,11 @@ asm( "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ - "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale and shift y coeffs */ - "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */ - "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ - "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */ - "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ + "pmullw %5, %%mm0;\n" /* Scale and shift y coeffs */ + "psubw %6, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */ + "pmullw %7, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ + "psubw %8, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */ + "pmullw %9, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ "paddsw %%mm1, %%mm0;\n" /* Add u to result */ @@ -294,15 +305,18 @@ asm( "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */ "movd %%mm0, (%3);\n" /* Store output */ : -: "r" (&y), "r" (&u), "r" (&v), "r" (output)); +: "r" (&y), "r" (&u), "r" (&v), "r" (output), + "m" (mpeg3_MMX_601_Y_DIFF), "m" (mpeg3_MMX_601_Y_COEF), + "m" (mpeg3_MMX_U_80), "m" (mpeg3_MMX_U_COEF), + "m" (mpeg3_MMX_V_80), "m" (mpeg3_MMX_V_COEF)); } -static unsigned long long __attribute__((used)) mpeg3_MMX_U_80_RGB = 0x0000000000800080; -static unsigned long long __attribute__((used)) mpeg3_MMX_V_80_RGB = 0x0000008000800000; -static long long __attribute__((used)) mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058; -static long long __attribute__((used)) mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000; +static unsigned long long __attribute__((used)) mpeg3_MMX_U_80_RGB = 0x0000000000800080ULL; +static unsigned long long __attribute__((used)) mpeg3_MMX_V_80_RGB = 0x0000008000800000ULL; +static long long __attribute__((used)) mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058ULL; +static long long __attribute__((used)) mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000ULL; -inline void mpeg3_rgba32_mmx(unsigned long y, +static void mpeg3_rgba32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) @@ -327,11 +341,11 @@ asm( "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ - "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */ - "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ + "psubw %4, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */ + "pmullw %5, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ - "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */ - "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ + "psubw %6, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */ + "pmullw %7, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ "paddsw %%mm1, %%mm0;\n" /* Add v to result */ @@ -340,10 +354,12 @@ asm( "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */ "movd %%mm0, (%3);\n" /* Store output */ : -: "r" (&y), "r" (&v), "r" (&u), "r" (output)); +: "r" (&y), "r" (&v), "r" (&u), "r" (output), + "m" (mpeg3_MMX_V_80_RGB), "m" (mpeg3_MMX_V_COEF_RGB), + "m" (mpeg3_MMX_U_80_RGB), "m" (mpeg3_MMX_U_COEF_RGB)); } -inline void mpeg3_601_rgba32_mmx(unsigned long y, +static void mpeg3_601_rgba32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) @@ -352,7 +368,7 @@ asm( /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ /* for rgb24. */ "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */ - "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */ + "psubsw %4, %%mm0;\n" /* Subtract 16 from y */ "movd (%1), %%mm1;\n" /* Load v 0x00000000000000vv */ "movq %%mm0, %%mm3;\n" /* Copy y to temp */ "psllq $16, %%mm1;\n" /* Shift v 0x0000000000vv0000 */ @@ -369,11 +385,11 @@ asm( "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ - "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale y coeffs */ - "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */ - "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ - "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */ - "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ + "pmullw %5, %%mm0;\n" /* Scale y coeffs */ + "psubw %6, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */ + "pmullw %7, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ + "psubw %8, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */ + "pmullw %9, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ "paddsw %%mm1, %%mm0;\n" /* Add v to result */ @@ -382,7 +398,10 @@ asm( "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */ "movd %%mm0, (%3);\n" /* Store output */ : -: "r" (&y), "r" (&v), "r" (&u), "r" (output)); +: "r" (&y), "r" (&v), "r" (&u), "r" (output), + "m" (mpeg3_MMX_601_Y_DIFF), "m" (mpeg3_MMX_601_Y_COEF), + "m" (mpeg3_MMX_V_80_RGB), "m" (mpeg3_MMX_V_COEF_RGB), + "m" (mpeg3_MMX_U_80_RGB), "m" (mpeg3_MMX_U_COEF_RGB)); } #endif diff -urp libmpeg3-1.5.2-old/video/reconmmx.s libmpeg3-1.5.2/video/reconmmx.s --- libmpeg3-1.5.2-old/video/reconmmx.s 2002-06-21 14:35:24.000000000 +0200 +++ libmpeg3-1.5.2/video/reconmmx.s 2005-12-22 23:53:17.000000000 +0100 @@ -32,18 +32,28 @@ global add_block_mmx global set_block_mmx +extern _GLOBAL_OFFSET_TABLE_ +get_pc.bp: + mov ebp, [esp] + retn + align 16 rech_mmx: push esi push edi push ecx push ebx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .rech1: movq mm0,[esi] movq mm1,[esi+1] @@ -68,6 +78,7 @@ rech_mmx: dec ecx jnz .rech1 emms + pop ebp pop ebx pop ecx pop edi @@ -80,13 +91,18 @@ rechc_mmx: push edi push ecx push ebx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + ; sub esp, LocalFrameSize mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .rechc1: movq mm0,[esi] movq mm1,[esi+1] @@ -103,6 +119,7 @@ rechc_mmx: jnz .rechc1 emms ; add esp, LocalFrameSize + pop ebp pop ebx pop ecx pop edi @@ -125,13 +142,18 @@ recva_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] mov edx, [esp+lx] - movq mm7, [MASK_AND] - movq mm6, [ADD_1] + movq mm7, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .recva1: movq mm0,[esi] movq mm1,[esi+edx] @@ -170,6 +192,7 @@ recva_mmx: dec ecx jnz near .recva1 emms + pop ebp pop edx pop ebx pop ecx @@ -184,13 +207,18 @@ recvac_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] mov edx, [esp+lx] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .recvac1: movq mm0,[esi] movq mm1,[esi+edx] @@ -213,6 +241,7 @@ recvac_mmx: dec ecx jnz .recvac1 emms + pop ebp pop edx pop ebx pop ecx @@ -269,10 +298,15 @@ set_block_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+bp] mov edi, [esp+rfp] mov ebx, [esp+iincr] - movq mm7, [PLUS_128] + movq mm7, [ebp + PLUS_128 wrt ..gotoff] %rep 4 movq mm0, [esi] movq mm1, [esi+8] @@ -291,6 +325,7 @@ set_block_mmx: add edi, ebx %endrep emms + pop ebp pop edx pop ebx pop ecx @@ -298,4 +333,5 @@ set_block_mmx: pop esi ret +section .note.GNU-stack noalloc noexec nowrite progbits diff -urp libmpeg3-1.5.2-old/video/reconstruct.c libmpeg3-1.5.2/video/reconstruct.c --- libmpeg3-1.5.2-old/video/reconstruct.c 2005-12-22 23:16:37.000000000 +0100 +++ libmpeg3-1.5.2/video/reconstruct.c 2005-12-22 23:19:43.000000000 +0100 @@ -334,8 +334,8 @@ static inline void reca_mmx(unsigned cha ); #else /* No 3dnow */ __asm__ ( - "movq MASK_AND, %%mm5\n" - "movq ADD_1, %%mm6\n" + "movq %4, %%mm5\n" + "movq %5, %%mm6\n" "1:\t" "movq (%1),%%mm0\n" /* Load 16 pixels from each row */ "movq (%2),%%mm1\n" @@ -360,7 +360,8 @@ static inline void reca_mmx(unsigned cha "leal (%2, %3), %2\n" "jnz 1b\n" : - : "c" (h), "r" (s), "r" (d), "r" (lx2) + : "c" (h), "r" (s), "r" (d), "r" (lx2), + "m" (MASK_AND), "m" (ADD_1) ); #endif } @@ -384,8 +385,8 @@ static inline void recac_mmx(unsigned ch ); #else /* No 3dnow */ __asm__ ( - "movq MASK_AND, %%mm5\n" - "movq ADD_1, %%mm6\n" + "movq %4, %%mm5\n" + "movq %5, %%mm6\n" "1:\t" "movq (%1),%%mm0\n" "movq (%2),%%mm1\n" @@ -401,7 +402,8 @@ static inline void recac_mmx(unsigned ch "leal (%2, %3), %2\n" "jnz 1b\n" : - : "c" (h), "r" (s), "r" (d), "r" (lx2) + : "c" (h), "r" (s), "r" (d), "r" (lx2), + "m" (MASK_AND), "m" (ADD_1) ); #endif } @@ -432,8 +434,8 @@ static inline void recv_mmx(unsigned cha ); #else __asm__ ( - "movq MASK_AND, %%mm5\n" - "movq ADD_1, %%mm6\n" + "movq %5, %%mm5\n" + "movq %6, %%mm6\n" "1:\t" "movq (%1), %%mm0\n" /* 8 s */ "movq (%4), %%mm1\n" /* 8 s +lx */ @@ -459,7 +461,8 @@ static inline void recv_mmx(unsigned cha "leal (%2, %3), %2\n" "jnz 1b\n" : - : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx) + : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx), + "m" (MASK_AND), "m" (ADD_1) ); #endif } @@ -484,8 +487,8 @@ static inline void recvc_mmx(unsigned ch ); #else __asm__ ( - "movq MASK_AND, %%mm5\n" - "movq ADD_1, %%mm6\n" + "movq %5, %%mm5\n" + "movq %6, %%mm6\n" "1:\t" "movq (%1), %%mm0\n" /* 8 s */ "movq (%4), %%mm1\n" /* 8 s +lx */ @@ -502,7 +505,8 @@ static inline void recvc_mmx(unsigned ch "leal (%2, %3), %2\n" "jnz 1b\n" : - : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx) + : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx), + "m" (MASK_AND), "m" (ADD_1) ); #endif } diff -urp libmpeg3-1.5.2-old/video/seek.c libmpeg3-1.5.2/video/seek.c --- libmpeg3-1.5.2-old/video/seek.c 2003-04-21 08:36:50.000000000 +0200 +++ libmpeg3-1.5.2/video/seek.c 2005-12-22 23:45:55.000000000 +0100 @@ -263,9 +263,9 @@ int mpeg3video_seek(mpeg3video_t *video) frame = track->keyframe_numbers[i]; title_number = (track->frame_offsets[frame] & - 0xff00000000000000) >> 56; + 0xff00000000000000ULL) >> 56; byte = track->frame_offsets[frame] & - 0xffffffffffffff; + 0xffffffffffffffULL; video->framenum = track->keyframe_numbers[i]; diff -urp libmpeg3-1.5.2-old/video/slice.c libmpeg3-1.5.2/video/slice.c --- libmpeg3-1.5.2-old/video/slice.c 2005-12-22 23:16:37.000000000 +0100 +++ libmpeg3-1.5.2/video/slice.c 2005-12-22 23:19:43.000000000 +0100 @@ -212,7 +212,7 @@ static inline int mpeg3video_addblock(mp "movd (%2), %%mm0\n" /* " 0 0 0 v1" */ "punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */ "punpcklwd %%mm0, %%mm0\n" - "paddw MMX_128, %%mm0\n" + "paddw %3, %%mm0\n" "packuswb %%mm0, %%mm0\n" "leal (%0,%1,2), %%eax\n" @@ -228,13 +228,13 @@ static inline int mpeg3video_addblock(mp "movq %%mm0, (%%eax)\n" "movq %%mm0, (%%eax, %1)\n" : - : "D" (rfp), "c" (iincr), "b" (bp) + : "r" (rfp), "r" (iincr), "r" (bp), "m" (MMX_128) : "eax"); } else { __asm__ __volatile__( - "movq MMX_128,%%mm4\n" + "movq %4,%%mm4\n" ".align 8\n" "1:" "movq (%1), %%mm0\n" @@ -260,7 +260,7 @@ static inline int mpeg3video_addblock(mp "leal (%2,%3,2), %2\n" "loop 1b\n" : - : "c" (4), "r" (bp), "r" (rfp), "r" (iincr) + : "c" (4), "r" (bp), "r" (rfp), "r" (iincr), "m" (MMX_128) ); } }