Browse Subversion Repository
Contents of /branches/mty-makai/tr64_mmx.S
Parent Directory
| Revision Log
Revision 125 -
( show annotations)
( download)
Mon Apr 16 14:50:11 2007 UTC
(16 years, 11 months ago)
by notanpe
File size: 2144 byte(s)
魔改造用ブランチ
| 1 |
/******************************************************-*-fundamental-*- |
| 2 |
* |
| 3 |
* 最終転置+行列転置(64bit MMX) |
| 4 |
* |
| 5 |
* $Id$ |
| 6 |
* |
| 7 |
*/ |
| 8 |
|
| 9 |
#if defined(USE_MMX) |
| 10 |
|
| 11 |
#define W 8 |
| 12 |
|
| 13 |
#else /* XMM */ |
| 14 |
|
| 15 |
#define W 16 |
| 16 |
|
| 17 |
#endif |
| 18 |
|
| 19 |
.text |
| 20 |
|
| 21 |
#define IC %eax |
| 22 |
#define IK %ebx |
| 23 |
|
| 24 |
#define MASK %mm7 |
| 25 |
#define MC %mm6 |
| 26 |
|
| 27 |
#define IOFS(b,n,i) W*(n)+4*i(b) |
| 28 |
#define OOFS(b,n,i) 8*(n)+4*i(b) |
| 29 |
|
| 30 |
#define SW32(l,m,j) \ |
| 31 |
.if (l)<64; \ |
| 32 |
mov IOFS(%ecx,l,0),%eax; \ |
| 33 |
mov IOFS(%ecx,l,1),%ebx; \ |
| 34 |
mov %eax,OOFS(%edx,j,0); \ |
| 35 |
mov %ebx,OOFS(%edx,j+0x20,0); \ |
| 36 |
.endif; \ |
| 37 |
.if (m)<64; \ |
| 38 |
mov IOFS(%ecx,m,0),%eax; \ |
| 39 |
mov IOFS(%ecx,m,1),%ebx; \ |
| 40 |
mov %eax,OOFS(%edx,j,1); \ |
| 41 |
mov %ebx,OOFS(%edx,j+0x20,1); \ |
| 42 |
.endif |
| 43 |
|
| 44 |
/* (uint64_t const *ECX, uint64 *EDX) (clobber eax) */ |
| 45 |
.globl _transpose64 |
| 46 |
_transpose64: |
| 47 |
.globl transpose64 |
| 48 |
transpose64: |
| 49 |
push %ebx |
| 50 |
|
| 51 |
pxor %mm0,%mm0 |
| 52 |
movq %mm0,OOFS(%edx,0x16,0) |
| 53 |
movq %mm0,OOFS(%edx,0x16+0x20,0) |
| 54 |
movq %mm0,OOFS(%edx,0x17,0) |
| 55 |
movq %mm0,OOFS(%edx,0x17+0x20,0) |
| 56 |
// 初段(コピー)は展開 |
| 57 |
SW32(0x0E,0x1B,0x00) |
| 58 |
SW32(0x2E,0x3B,0x01) |
| 59 |
SW32(0x06,0x13,0x02) |
| 60 |
SW32(0x26,0x33,0x03) |
| 61 |
SW32(0x1F,0x1A,0x04) |
| 62 |
SW32(0x3F,0x3A,0x05) |
| 63 |
SW32(0x05,0x12,0x06) |
| 64 |
SW32(0x25,0x32,0x07) |
| 65 |
SW32(0x1E,0x0A,0x08) |
| 66 |
SW32(0x3E,0x2A,0x09) |
| 67 |
SW32(0x16,0x11,0x0A) |
| 68 |
SW32(0x36,0x31,0x0B) |
| 69 |
SW32(0x1D,0x09,0x0C) |
| 70 |
SW32(0x3D,0x29,0x0D) |
| 71 |
SW32(0x15,0x01,0x0E) |
| 72 |
SW32(0x35,0x21,0x0F) |
| 73 |
SW32(0x0D,0x08,0x10) |
| 74 |
SW32(0x2D,0x28,0x11) |
| 75 |
SW32(0x14,0x00,0x12) |
| 76 |
SW32(0x34,0x20,0x13) |
| 77 |
SW32(0x0C,0x19,0x14) |
| 78 |
SW32(0x2C,0x39,0x15) |
| 79 |
SW32(0x04,0x41,0x16) |
| 80 |
SW32(0x24,0x40,0x17) |
| 81 |
SW32(0x0B,0x18,0x18) |
| 82 |
SW32(0x2B,0x38,0x19) |
| 83 |
SW32(0x03,0x10,0x1A) |
| 84 |
SW32(0x23,0x30,0x1B) |
| 85 |
SW32(0x1C,0x44,0x1C) |
| 86 |
SW32(0x3C,0x44,0x1D) |
| 87 |
SW32(0x02,0x44,0x1E) |
| 88 |
SW32(0x22,0x44,0x1F) |
| 89 |
|
| 90 |
// マスクの用意 |
| 91 |
pcmpeqb MASK,MASK |
| 92 |
mov $16,IC |
| 93 |
movd IC,MC |
| 94 |
psllq $32,MASK |
| 95 |
|
| 96 |
LI: |
| 97 |
movq MASK,%mm0 |
| 98 |
psrlq MC,MASK |
| 99 |
pxor %mm0,MASK |
| 100 |
lea (%edx,IC,8),%ecx |
| 101 |
xor IK,IK |
| 102 |
LJ: |
| 103 |
movq (%edx,IK,8),%mm0 |
| 104 |
movq (%ecx,IK,8),%mm1 |
| 105 |
movq %mm1,%mm2 |
| 106 |
psllq MC,%mm1 |
| 107 |
pxor %mm0,%mm1 |
| 108 |
pand MASK,%mm1 |
| 109 |
pxor %mm1,%mm0 |
| 110 |
movq %mm0,(%edx,IK,8) |
| 111 |
psrlq MC,%mm1 |
| 112 |
pxor %mm2,%mm1 |
| 113 |
movq %mm1,(%ecx,IK,8) |
| 114 |
lea 1(IK,IC),IK |
| 115 |
or IC,IK |
| 116 |
xor IC,IK |
| 117 |
cmp $64,IK |
| 118 |
jl LJ |
| 119 |
|
| 120 |
psrld $1,MC |
| 121 |
shr IC |
| 122 |
jne LI |
| 123 |
|
| 124 |
|
| 125 |
emms |
| 126 |
pop %ebx |
| 127 |
ret |
| 128 |
|
| 129 |
// EOF |
Properties
| svn:eol-style |
native
|
| svn:keywords |
Author Date Id Rev URL
|
| |