diff --git a/configure.in b/configure.in index ecb124b..99be080 100644 --- a/configure.in +++ b/configure.in @@ -705,6 +705,41 @@ AC_SUBST(MMX_CFLAGS) AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== +dnl Check for VMX/Altivec +if test -n "`$cc -v 2>&1 | grep version | grep Apple`"; then + VMX_CFLAGS="-faltivec" +else + VMX_CFLAGS="-maltivec -mabi=altivec" +fi + +have_vmx_intrinsics=no +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $VMX_CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for sane altivec support" +#endif +#include +int main () { + vector unsigned int v = vec_splat_u32 (1); + v = vec_sub (v, v); + return 0; +}], have_vmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS +AC_MSG_RESULT($have_vmx_intrinsics) + +if test $have_vmx_intrinsics = yes ; then + AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) +else + VMX_CFLAGS= +fi +AC_SUBST(VMX_CFLAGS) + +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) + +dnl =========================================================================== + AC_ARG_ENABLE(gcov, AS_HELP_STRING([--enable-gcov], diff --git a/pixman/configure.in b/pixman/configure.in index c9cdae8..9a91697 100644 --- a/pixman/configure.in +++ b/pixman/configure.in @@ -83,6 +83,40 @@ AC_SUBST(MMX_CFLAGS) AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== +dnl Check for VMX/Altivec +if test -n "`$cc -v 2>&1 | grep version | grep Apple`"; then + VMX_CFLAGS="-faltivec" +else + VMX_CFLAGS="-maltivec -mabi=altivec" +fi + +have_vmx_intrinsics=no +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $VMX_CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for sane altivec support" +#endif +#include +int main () { + vector unsigned int v = vec_splat_u32 (1); + v = vec_sub (v, v); + return 0; +}], have_vmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS +AC_MSG_RESULT($have_vmx_intrinsics) + +if test $have_vmx_intrinsics = yes ; then + AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) +else + VMX_CFLAGS= +fi +AC_SUBST(VMX_CFLAGS) + +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) + +dnl =========================================================================== AC_OUTPUT([ libpixman.pc diff --git a/pixman/src/Makefile.am b/pixman/src/Makefile.am index aa04c31..0987fb5 100644 --- a/pixman/src/Makefile.am +++ b/pixman/src/Makefile.am @@ -39,4 +39,13 @@ libpixman_mmx_la_CFLAGS = @MMX_CFLAGS@ $ libpixman_la_LIBADD = libpixman-mmx.la endif +if USE_VMX +noinst_LTLIBRARIES += libpixman-vmx.la +libpixman_vmx_la_SOURCES = \ + fbvmx.c \ + fbvmx.h +libpixman_vmx_la_CFLAGS = @VMX_CFLAGS@ $(WARN_CFLAGS) +libpixman_la_LIBADD = libpixman-vmx.la +endif + INCLUDES = -I$(top_srcdir) -I$(srcdir) @WARN_CFLAGS@ diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c index 63b1cbc..af22ef5 100644 --- a/pixman/src/fbpict.c +++ b/pixman/src/fbpict.c @@ -30,6 +30,7 @@ #include "fbpict.h" #include "fbmmx.h" +#include "fbvmx.h" static CARD32 fbOver (CARD32 x, CARD32 y) @@ -1411,6 +1412,14 @@ pixman_composite (pixman_operator_t op, } #endif +#ifdef USE_VMX + static Bool vmx_setup = FALSE; + if (!vmx_setup) { + fbComposeSetupVMX(); + vmx_setup = TRUE; + } +#endif + xDst += pDst->pDrawable->x; yDst += pDst->pDrawable->y; if (pSrc->pDrawable) { diff --git a/pixman/src/fbvmx.c b/pixman/src/fbvmx.c new file mode 100644 index 0000000..0b7d6fa --- /dev/null +++ b/pixman/src/fbvmx.c @@ -0,0 +1,993 @@ +/* + * Copyright © 2006 Luca Barbato + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Luca Barbato (lu_zero@gentoo.org) + * + * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell + */ + +#include "fbpict.h" +#include "fbvmx.h" +#include + +#include +#include + +static sigjmp_buf jmp; +static volatile sig_atomic_t in_test = 0; + +static void vmx_test (int sig) { + if (!in_test) { + signal(sig, SIG_DFL); + raise (sig); + } + in_test = 0; + siglongjmp (jmp, 1); +} + +pixman_private +Bool fbHaveVMX(void) { + signal (SIGILL, vmx_test); + if (sigsetjmp (jmp, 1)) { + signal (SIGILL, SIG_DFL); + } else { + in_test = 1; + asm volatile ( "vor 0, 0, 0" ); + signal (SIGILL, SIG_DFL); + return 1; + } + return 0; +} + +static __inline__ vector unsigned int +splat_alpha(vector unsigned int pix) { + return vec_perm(pix, pix, + (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04, + 0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C)); +} + +static __inline__ vector unsigned int +pix_multiply(vector unsigned int p, vector unsigned int a) +{ + vector unsigned short hi, lo, mod; + /* unpack to short */ + hi = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)p); + mod = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)a); + + hi = vec_mladd(hi, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8))); + + hi = vec_sr(hi, vec_splat_u16(8)); + + /* unpack to short */ + lo = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)p); + mod = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)a); + + lo = vec_mladd(lo, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8))); + + lo = vec_sr(lo, vec_splat_u16(8)); + + return vec_packsu(hi, lo); +} + +static __inline__ vector unsigned int +pix_add (vector unsigned int a, vector unsigned int b) +{ + return vec_adds ((vector unsigned char)a, + (vector unsigned char)b); +} + +static __inline__ vector unsigned int +pix_add_mul (vector unsigned int x, vector unsigned int a, + vector unsigned int y, vector unsigned int b) +{ + vector unsigned short hi, lo, mod, hiy, loy, mody; + + hi = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)x); + mod = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)a); + hiy = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)y); + mody = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)b); + + hi = vec_mladd(hi, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + hi = vec_mladd(hiy, mody, hi); + + hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8))); + + hi = vec_sr(hi, vec_splat_u16(8)); + + lo = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)x); + mod = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)a); + + loy = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)y); + mody = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)b); + + lo = vec_mladd(lo, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + lo = vec_mladd(loy, mody, lo); + + lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8))); + + lo = vec_sr(lo, vec_splat_u16(8)); + + return vec_packsu(hi, lo); +} + +static __inline__ vector unsigned int +negate (vector unsigned int src) +{ + return vec_nor (src, src); +} + +static __inline__ vector unsigned int +over (vector unsigned int src, vector unsigned int srca, + vector unsigned int dest) +{ + vector unsigned char tmp = pix_multiply(dest, negate (srca)); + tmp = vec_adds((vector unsigned char)src, tmp); + return tmp; +} + +static FASTCALL void +vmxCombineMaskU (CARD32 *src, const CARD32 *msk, int width) +{ + int i; + vector unsigned int vsrc, vmsk; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + src_mask, msk_mask, store_mask; + + src_mask = vec_lvsl(0, src); + msk_mask = vec_lvsl(0, msk); + store_mask = vec_lvsr(0, src); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, msk); + + tmp2 = vec_ld(15, msk); + + tmp3 = vec_ld(0, src); + vmsk = vec_perm(tmp1, tmp2, msk_mask); + + tmp4 = vec_ld(15, src); + vsrc = vec_perm(tmp3, tmp4, src_mask); + + vmsk = splat_alpha(vmsk); + + vsrc = pix_multiply(vsrc, vmsk); + + edges = vec_perm(tmp4, tmp3, src_mask); + + tmp3 = vec_perm((vector unsigned char)vsrc, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vsrc, store_mask); + + vec_st((vector unsigned int) tmp3, 15, src ); + + vec_st((vector unsigned int) tmp1, 0, src ); + + msk+=4; + src+=4; + } + + for (i = width%4; --i >= 0;) { + CARD32 a = msk[i] >> 24; + CARD32 s = src[i]; + FbByteMul(s, a); + src[i] = s; + } +} + +static FASTCALL void +vmxCombineOverU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(vsrc); + + vdest = over(vsrc, tmp1, vdest); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 ia = Alpha(~s); + + FbByteMulAdd(d, ia, s); + dest[i] = d; + } +} +#if 0 +static FASTCALL void +vmxCombineOverU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = vec_unpackh((vector pixel)vsrc); + tmp2 = vec_unpackh((vector pixel)vdest); + + tmp1 = over(tmp1, splat_alpha(tmp1), tmp2); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp2 = vec_unpackl((vector pixel)vsrc); + tmp3 = vec_unpackl((vector pixel)vdest); + + tmp2 = over(tmp2, splat_alpha(tmp2), tmp3); + + vdest = vec_packpx((vector unsigned int)tmp1, + (vector unsigned int)tmp2); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 ia = Alpha(~s); + + FbByteMulAdd(d, ia, s); + dest[i] = d; + } +} +#endif + + +static FASTCALL void +vmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(vdest); + + vdest = over(vdest, tmp1, vsrc); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 ia = Alpha(~s); + + FbByteMulAdd(d, ia, s); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineInU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(vdest); + + vdest = pix_multiply(vsrc, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + + CARD32 s = src[i]; + CARD32 a = Alpha(dest[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(vsrc); + + vdest = pix_multiply(vdest, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = dest[i]; + CARD32 a = Alpha(src[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineOutU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(negate(vdest)); + + vdest = pix_multiply(vsrc, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 a = Alpha(~dest[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(negate(vsrc)); + + vdest = pix_multiply(vdest, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 a = Alpha(~dest[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(negate(vsrc)); + + vdest = pix_add_mul(vsrc, splat_alpha(vdest), vdest, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 dest_a = Alpha(d); + CARD32 src_ia = Alpha(~s); + + FbByteAddMul(s, dest_a, d, src_ia); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + tmp1 = splat_alpha(negate(vdest)); + + vdest = pix_add_mul(vdest, splat_alpha(vsrc), vsrc, tmp1); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 src_a = Alpha(s); + CARD32 dest_ia = Alpha(~d); + + FbByteAddMul(s, dest_ia, d, src_a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineXorU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + vdest = pix_add_mul(vsrc, splat_alpha(negate(vdest)), + vdest, splat_alpha(negate(vsrc))); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 src_ia = Alpha(~s); + CARD32 dest_ia = Alpha(~d); + + FbByteAddMul(s, dest_ia, d, src_ia); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineAddU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + dest_mask = vec_lvsl(0, dest); + src_mask = vec_lvsl(0, src); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, src); + + tmp2 = vec_ld(15, src); + + tmp3 = vec_ld(0, dest); + vsrc = vec_perm(tmp1, tmp2, src_mask); + + tmp4 = vec_ld(15, dest); + vdest = vec_perm(tmp3, tmp4, dest_mask); + + vdest = pix_add(vsrc, vdest); + + edges = vec_perm(tmp4, tmp3, dest_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp1 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, dest ); + + vec_st((vector unsigned int) tmp1, 0, dest ); + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + FbByteAdd(d, s); + dest[i] = d; + } +} + +#if 0 +static FASTCALL void +vmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *msk, int width) +{ + int i; + vector unsigned int vsrc, vmsk, vdest; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + src_mask, msk_mask, dest_mask, store_mask; + + src_mask = vec_lvsl(0, src); + dest_mask = vec_lvsl(0, dest); + msk_mask = vec_lvsl(0, msk); + store_mask = vec_lvsr(0, dest); + + for (i = width/4; i > 0; i--) { + + tmp1 = vec_ld(0, msk); + + tmp2 = vec_ld(15, msk); + + tmp3 = vec_ld(0, src); + vmsk = vec_perm(tmp1, tmp2, msk_mask); + + tmp4 = vec_ld(15, src); + vsrc = vec_perm(tmp3, tmp4, src_mask); + + tmp1 = vec_ld(0, dest); + tmp2 = vec_ld(15, dest); + + vdest = pix_multiply(vsrc, vmsk); + + edges = vec_perm(tmp2, tmp1, src_mask); + + tmp3 = vec_perm((vector unsigned char)vdest, edges, store_mask); + + tmp4 = vec_perm(edges, (vector unsigned char)vdest, store_mask); + + vec_st((vector unsigned int) tmp3, 15, src ); + + vec_st((vector unsigned int) tmp4, 0, src ); + + msk+=4; + src+=4; + dest+=4; + } + + fbCombineSrcC (dest, src, msk, width%4); +} + +#endif +#if 0 +void +fbCompositeSolid_nx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD32 *dstLine, *dst; + FbStride dstStride; + + fbComposeGetSolid(pSrc, pDst, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; +// vmxCombineOverU(dst, src, width); + } +} + +void +fbCompositeSolid_nx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD16 *dstLine, *dst; + CARD16 w; + FbStride dstStride; + + fbComposeGetSolid(pSrc, pDst, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + vmxCombineOverU565(dst, src, width); + } +} + +#endif + +extern FbComposeFunctions composeFunctions; + +void fbComposeSetupVMX(void) +{ + /* check if we have MMX support and initialize accordingly */ + if (fbHaveVMX()) { + composeFunctions.combineU[PIXMAN_OPERATOR_OVER] = vmxCombineOverU; + composeFunctions.combineU[PIXMAN_OPERATOR_OVER_REVERSE] = vmxCombineOverReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN] = vmxCombineInU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN_REVERSE] = vmxCombineInReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT] = vmxCombineOutU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT_REVERSE] = vmxCombineOutReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP] = vmxCombineAtopU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP_REVERSE] = vmxCombineAtopReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_XOR] = vmxCombineXorU; + composeFunctions.combineU[PIXMAN_OPERATOR_ADD] = vmxCombineAddU; +/* later time + composeFunctions.combineC[PIXMAN_OPERATOR_SRC] = mmxCombineSrcC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER] = mmxCombineOverC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER_REVERSE] = mmxCombineOverReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN] = mmxCombineInC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN_REVERSE] = mmxCombineInReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT] = mmxCombineOutC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT_REVERSE] = mmxCombineOutReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP] = mmxCombineAtopC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP_REVERSE] = mmxCombineAtopReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_XOR] = mmxCombineXorC; + composeFunctions.combineC[PIXMAN_OPERATOR_ADD] = mmxCombineAddC; +*/ + composeFunctions.combineMaskU = vmxCombineMaskU; + } +} + + + + +#if 0 +int main (int argc, char** argv) +{ + + int data[5][10245]; __attribute__((aligned(16))); + int i; + //input data + for (i = 0; i<10240; i++) data[0][i] = data[1][i] = data[2][i] = + data[3][i] = (i&0xff) * 0x01010101; + // + for (i = 0; i<10240; i++) data[4][i] = (i&0xff) * 0x01010101; + + for (i = 0; i<10240; i++) + if (data[0][i] != data[1][i]) { + printf ("wrong byte %d : %d != %d\n",i , data[0][i], data[1][i]); + } + + printf ("combine \n"); + fbCombineSrcC (data[0], data[2], data[4], 1024); + vmxCombineSrcC (data[1], data[3], data[4], 1024); + + for (i = 0; i<10240; i++) + if (data[0][i] != data[1][i]) { + printf ("wrong byte %0d : %0x != %0x\n",i , data[0][i], data[1][i]); + } + return 0; +} + +#endif diff --git a/pixman/src/fbvmx.h b/pixman/src/fbvmx.h new file mode 100644 index 0000000..e690dbf --- /dev/null +++ b/pixman/src/fbvmx.h @@ -0,0 +1,316 @@ +/* + * Copyright 2006 Luca Barbato + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Luca Barbato (lu_zero@gentoo.org) + * + * Based on work by Owen Taylor, Søren Sandmann and Lars Knoll + */ +#ifdef USE_VMX + +pixman_private +Bool fbHaveVMX(void); + +#else +#define fbHaveVMX() FALSE +#endif + +#ifdef USE_VMX + +#define AVV(x...) {x} + + +pixman_private +void fbComposeSetupVMX(void); + + +#if 0 +pixman_private +void fbCompositeIn_nx8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSolidMask_nx8888x0565Cvmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrcAdd_8888x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8888x8888Cvmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMaskSrc_nx8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSrcAdd_8888x8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeIn_8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSrcAdd_8000x8000vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888RevNPx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888RevNPx0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolid_nx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolid_nx0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8x0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_x888x8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +Bool fbCopyAreavmx (FbPixels *pSrc, + FbPixels *pDst, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height); + +pixman_private +void fbCompositeCopyAreavmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +Bool fbSolidFillvmx (FbPixels *pDraw, + int x, + int y, + int width, + int height, + FbBits xor); +#endif +#endif /* USE_VMX */ diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c index 7223726..bdfef00 100644 --- a/src/cairo-pdf-surface.c +++ b/src/cairo-pdf-surface.c @@ -1709,7 +1709,7 @@ _cairo_pdf_surface_emit_cff_font_subset " /Widths [", subset_resource.id, subset.base_font, - font_subset->num_glyphs, + font_subset->num_glyphs - 1, descriptor.id); for (i = 0; i < font_subset->num_glyphs; i++)