Develop and Download Open Source Software

Browse Subversion Repository

Contents of /branches/mty-makai/tr64_mmx.S

Parent Directory Parent Directory | Revision Log Revision Log


Revision 125 - (show annotations) (download)
Mon Apr 16 14:50:11 2007 UTC (16 years, 11 months ago) by notanpe
File size: 2144 byte(s)
魔改造用ブランチ
1 /******************************************************-*-fundamental-*-
2 *
3 * 最終転置+行列転置(64bit MMX)
4 *
5 * $Id$
6 *
7 */
8
9 #if defined(USE_MMX)
10
11 #define W 8
12
13 #else /* XMM */
14
15 #define W 16
16
17 #endif
18
19 .text
20
21 #define IC %eax
22 #define IK %ebx
23
24 #define MASK %mm7
25 #define MC %mm6
26
27 #define IOFS(b,n,i) W*(n)+4*i(b)
28 #define OOFS(b,n,i) 8*(n)+4*i(b)
29
30 #define SW32(l,m,j) \
31 .if (l)<64; \
32 mov IOFS(%ecx,l,0),%eax; \
33 mov IOFS(%ecx,l,1),%ebx; \
34 mov %eax,OOFS(%edx,j,0); \
35 mov %ebx,OOFS(%edx,j+0x20,0); \
36 .endif; \
37 .if (m)<64; \
38 mov IOFS(%ecx,m,0),%eax; \
39 mov IOFS(%ecx,m,1),%ebx; \
40 mov %eax,OOFS(%edx,j,1); \
41 mov %ebx,OOFS(%edx,j+0x20,1); \
42 .endif
43
44 /* (uint64_t const *ECX, uint64 *EDX) (clobber eax) */
45 .globl _transpose64
46 _transpose64:
47 .globl transpose64
48 transpose64:
49 push %ebx
50
51 pxor %mm0,%mm0
52 movq %mm0,OOFS(%edx,0x16,0)
53 movq %mm0,OOFS(%edx,0x16+0x20,0)
54 movq %mm0,OOFS(%edx,0x17,0)
55 movq %mm0,OOFS(%edx,0x17+0x20,0)
56 // 初段(コピー)は展開
57 SW32(0x0E,0x1B,0x00)
58 SW32(0x2E,0x3B,0x01)
59 SW32(0x06,0x13,0x02)
60 SW32(0x26,0x33,0x03)
61 SW32(0x1F,0x1A,0x04)
62 SW32(0x3F,0x3A,0x05)
63 SW32(0x05,0x12,0x06)
64 SW32(0x25,0x32,0x07)
65 SW32(0x1E,0x0A,0x08)
66 SW32(0x3E,0x2A,0x09)
67 SW32(0x16,0x11,0x0A)
68 SW32(0x36,0x31,0x0B)
69 SW32(0x1D,0x09,0x0C)
70 SW32(0x3D,0x29,0x0D)
71 SW32(0x15,0x01,0x0E)
72 SW32(0x35,0x21,0x0F)
73 SW32(0x0D,0x08,0x10)
74 SW32(0x2D,0x28,0x11)
75 SW32(0x14,0x00,0x12)
76 SW32(0x34,0x20,0x13)
77 SW32(0x0C,0x19,0x14)
78 SW32(0x2C,0x39,0x15)
79 SW32(0x04,0x41,0x16)
80 SW32(0x24,0x40,0x17)
81 SW32(0x0B,0x18,0x18)
82 SW32(0x2B,0x38,0x19)
83 SW32(0x03,0x10,0x1A)
84 SW32(0x23,0x30,0x1B)
85 SW32(0x1C,0x44,0x1C)
86 SW32(0x3C,0x44,0x1D)
87 SW32(0x02,0x44,0x1E)
88 SW32(0x22,0x44,0x1F)
89
90 // マスクの用意
91 pcmpeqb MASK,MASK
92 mov $16,IC
93 movd IC,MC
94 psllq $32,MASK
95
96 LI:
97 movq MASK,%mm0
98 psrlq MC,MASK
99 pxor %mm0,MASK
100 lea (%edx,IC,8),%ecx
101 xor IK,IK
102 LJ:
103 movq (%edx,IK,8),%mm0
104 movq (%ecx,IK,8),%mm1
105 movq %mm1,%mm2
106 psllq MC,%mm1
107 pxor %mm0,%mm1
108 pand MASK,%mm1
109 pxor %mm1,%mm0
110 movq %mm0,(%edx,IK,8)
111 psrlq MC,%mm1
112 pxor %mm2,%mm1
113 movq %mm1,(%ecx,IK,8)
114 lea 1(IK,IC),IK
115 or IC,IK
116 xor IC,IK
117 cmp $64,IK
118 jl LJ
119
120 psrld $1,MC
121 shr IC
122 jne LI
123
124
125 emms
126 pop %ebx
127 ret
128
129 // EOF

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Rev URL

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26