Develop and Download Open Source Software

Browse Subversion Repository

Contents of /branches/mty-makai/tr64_amd64.S

Parent Directory Parent Directory | Revision Log Revision Log


Revision 125 - (show annotations) (download)
Mon Apr 16 14:50:11 2007 UTC (16 years, 11 months ago) by notanpe
File size: 2085 byte(s)
魔改造用ブランチ
1 /******************************************************-*-fundamental-*-
2 *
3 * 最終転置+行列転置(AMD64)
4 *
5 * $Id$
6 *
7 */
8
9 #if defined(USE_64)
10
11 #define W 8
12
13 #elif defined(USE_64_XMM)
14
15 #define W 16
16
17 #endif
18
19 .text
20
21 #define IC %rcx
22 #define ICL %cl
23 #define IK %rbx
24
25 #define MASK %r11
26
27 #define IOFS(b,n,i) W*(n)+4*i(b)
28 #define OOFS(b,n,i) 8*(n)+4*i(b)
29
30 #define SW32(l,m,j) \
31 .if (l)<64; \
32 mov IOFS(%r10,l,0),%rax; \
33 mov %eax,OOFS(%rdx,j,0); \
34 shr $32,%rax; \
35 mov %eax,OOFS(%rdx,j+0x20,0); \
36 .endif; \
37 .if (m)<64; \
38 mov IOFS(%r10,m,0),%rax; \
39 mov %eax,OOFS(%rdx,j,1); \
40 shr $32,%rax; \
41 mov %eax,OOFS(%rdx,j+0x20,1); \
42 .endif
43
44 /* (uint64_t const *RCX, uint64 *RDX)
45 (clobber A C D 8 9 10 11) */
46 .globl transpose64
47 transpose64:
48 .globl _transpose64
49 _transpose64:
50 push IK
51
52 mov %rcx,%r10
53 xor %rax,%rax
54 mov %rax,OOFS(%rdx,0x16,0)
55 mov %rax,OOFS(%rdx,0x16+0x20,0)
56 mov %rax,OOFS(%rdx,0x17,0)
57 mov %rax,OOFS(%rdx,0x17+0x20,0)
58 // 初段(コピー)は展開
59 SW32(0x0E,0x1B,0x00)
60 SW32(0x2E,0x3B,0x01)
61 SW32(0x06,0x13,0x02)
62 SW32(0x26,0x33,0x03)
63 SW32(0x1F,0x1A,0x04)
64 SW32(0x3F,0x3A,0x05)
65 SW32(0x05,0x12,0x06)
66 SW32(0x25,0x32,0x07)
67 SW32(0x1E,0x0A,0x08)
68 SW32(0x3E,0x2A,0x09)
69 SW32(0x16,0x11,0x0A)
70 SW32(0x36,0x31,0x0B)
71 SW32(0x1D,0x09,0x0C)
72 SW32(0x3D,0x29,0x0D)
73 SW32(0x15,0x01,0x0E)
74 SW32(0x35,0x21,0x0F)
75 SW32(0x0D,0x08,0x10)
76 SW32(0x2D,0x28,0x11)
77 SW32(0x14,0x00,0x12)
78 SW32(0x34,0x20,0x13)
79 SW32(0x0C,0x19,0x14)
80 SW32(0x2C,0x39,0x15)
81 SW32(0x04,0x41,0x16)
82 SW32(0x24,0x40,0x17)
83 SW32(0x0B,0x18,0x18)
84 SW32(0x2B,0x38,0x19)
85 SW32(0x03,0x10,0x1A)
86 SW32(0x23,0x30,0x1B)
87 SW32(0x1C,0x44,0x1C)
88 SW32(0x3C,0x44,0x1D)
89 SW32(0x02,0x44,0x1E)
90 SW32(0x22,0x44,0x1F)
91
92 // マスクの用意
93 mov $-1,MASK
94 mov $16,IC
95 shl $32,MASK
96 LI:
97 mov MASK,%r8
98 shr ICL,MASK
99 xor %r8,MASK
100 lea (%rdx,IC,8),%r10
101 xor IK,IK
102 LJ:
103 mov (%rdx,IK,8),%r8
104 mov (%r10,IK,8),%r9
105 mov %r9,%rax
106 shl ICL,%r9
107 xor %r8,%r9
108 and MASK,%r9
109 xor %r9,%r8
110 mov %r8,(%rdx,IK,8)
111 shr ICL,%r9
112 xor %rax,%r9
113 mov %r9,(%r10,IK,8)
114 add IC,IK
115 add $1,IK
116 or IC,IK
117 xor IC,IK
118 cmp $64,IK
119 jl LJ
120
121 shr IC
122 jne LI
123
124 pop IK
125 ret
126
127 // EOF

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Rev URL

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26