Develop and Download Open Source Software

Browse Subversion Repository

Contents of /branches/mty-makai/x86-sse.S

Parent Directory Parent Directory | Revision Log Revision Log


Revision 261 - (show annotations) (download)
Thu Jun 23 03:11:18 2011 UTC (12 years, 9 months ago) by notanpe
File size: 24591 byte(s)
From john-1.7.8
1 /*
2 * This file contains the core of a bitslice DES implementation for x86/SSE2.
3 * It is part of John the Ripper password cracker,
4 * Copyright (c) 2000-2001,2005,2006,2008,2011 by Solar Designer
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted. (This is a heavily cut-down "BSD license".)
7 *
8 * Gate counts per S-box: 49 44 46 33 48 46 46 41
9 * Average: 44.125
10 *
11 * The Boolean expressions corresponding to DES S-boxes have been generated
12 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
13 * John the Ripper password cracker: http://www.openwall.com/john/
14 * Being mathematical formulas, they are not copyrighted and are free for reuse
15 * by anyone.
16 *
17 * The x86/SSE2 code for the S-boxes was generated by Solar Designer using a
18 * Perl script and then hand-optimized - originally for MMX, then converted to
19 * SSE2. Instruction scheduling was not re-done for SSE2-capable CPUs yet;
20 * doing so may provide further speedup.
21 *
22 * The effort has been sponsored by Rapid7: http://www.rapid7.com
23 */
24
25 #include "arch.h"
26
27 #if DES_BS_ASM
28
29 #ifdef UNDERSCORES
30 #define DES_bs_all _DES_bs_all
31 #define DES_bs_init_asm _DES_bs_init_asm
32 #define DES_bs_crypt _DES_bs_crypt
33 #define DES_bs_crypt_25 _DES_bs_crypt_25
34 #define DES_bs_crypt_LM _DES_bs_crypt_LM
35 #endif
36
37 /*
38 * Some broken systems don't offer section alignments larger than 4 bytes,
39 * while for the SSE code we need at least a 16 byte alignment. ALIGN_FIX
40 * is here to work around this issue when we happen to get bad addresses.
41 */
42 #ifndef ALIGN_FIX
43 #ifdef ALIGN_LOG
44 #define DO_ALIGN(log) .align log
45 #else
46 #define DO_ALIGN(log) .align 1 << log
47 #endif
48 #else
49 #ifdef ALIGN_LOG
50 #define DO_ALIGN(log) .align log; .space ALIGN_FIX
51 #else
52 #define DO_ALIGN(log) .align 1 << log; .space ALIGN_FIX
53 #endif
54 #endif
55
56 #ifdef __sun
57 /* Sun's assembler doesn't recognize .space */
58 #define DO_SPACE(size) .zero size
59 #else
60 /* Mac OS X assembler doesn't recognize .zero */
61 #define DO_SPACE(size) .space size
62 #endif
63
64 /* Sun's assembler can't multiply, but at least it can add... */
65 #define nptr(n) n+n+n+n
66 #define nvec(n) n+n+n+n+n+n+n+n+n+n+n+n+n+n+n+n
67
68 #ifdef BSD
69 .data
70 #else
71 .bss
72 #endif
73
74 .globl DES_bs_all
75 DO_ALIGN(5)
76 DES_bs_all:
77 DES_bs_all_KSp:
78 DO_SPACE(nptr(0x300))
79 DES_bs_all_KS_p:
80 DES_bs_all_KS_v:
81 DO_SPACE(nvec(0x300))
82 DES_bs_all_E:
83 DO_SPACE(nptr(96))
84 DES_bs_all_K:
85 DO_SPACE(nvec(56))
86 DES_bs_all_B:
87 DO_SPACE(nvec(64))
88 DES_bs_all_tmp:
89 DO_SPACE(nvec(16))
90 DES_bs_all_fields_not_used_here:
91 DO_SPACE(0x400 + 0x100 + 4 + 4 + 0x400)
92 DES_bs_all_possible_alignment_gaps:
93 DO_SPACE(0x100)
94
95 #define E(i) DES_bs_all_E+nptr(i)
96 #define B(i) DES_bs_all_B+nvec(i)
97 #define tmp_at(i) DES_bs_all_tmp+nvec(i)
98
99 #define pnot tmp_at(0)
100
101 #define S1(out1, out2, out3, out4, extra) \
102 extra; \
103 movdqa %xmm0,tmp_at(1); \
104 movdqa %xmm5,%xmm7; \
105 movdqa %xmm4,tmp_at(4); \
106 movdqa %xmm2,%xmm6; \
107 movdqa %xmm1,tmp_at(2); \
108 por %xmm2,%xmm7; \
109 movdqa %xmm3,tmp_at(3); \
110 pxor %xmm0,%xmm6; \
111 movdqa %xmm7,tmp_at(5); \
112 movdqa %xmm6,%xmm1; \
113 pandn %xmm0,%xmm4; \
114 pand %xmm7,%xmm1; \
115 movdqa %xmm1,%xmm7; \
116 por %xmm5,%xmm7; \
117 pxor %xmm3,%xmm1; \
118 pxor %xmm4,%xmm3; \
119 movdqa %xmm1,tmp_at(6); \
120 movdqa %xmm3,%xmm1; \
121 pandn tmp_at(6),%xmm3; \
122 movdqa %xmm3,tmp_at(7); \
123 movdqa %xmm5,%xmm3; \
124 por %xmm0,%xmm5; \
125 pxor tmp_at(4),%xmm3; \
126 movdqa %xmm3,tmp_at(8); \
127 movdqa %xmm5,%xmm0; \
128 pandn %xmm3,%xmm6; \
129 pxor %xmm2,%xmm3; \
130 pandn %xmm2,%xmm4; \
131 pandn %xmm1,%xmm3; \
132 pxor %xmm3,%xmm7; \
133 movdqa tmp_at(7),%xmm3; \
134 pandn tmp_at(3),%xmm5; \
135 por %xmm7,%xmm0; \
136 pandn %xmm7,%xmm3; \
137 movdqa %xmm3,tmp_at(9); \
138 pand tmp_at(5),%xmm7; \
139 movdqa tmp_at(6),%xmm3; \
140 movdqa %xmm0,%xmm2; \
141 pxor %xmm1,%xmm2; \
142 pandn tmp_at(4),%xmm3; \
143 pandn %xmm2,%xmm4; \
144 movdqa tmp_at(2),%xmm2; \
145 pxor %xmm4,%xmm7; \
146 pxor tmp_at(8),%xmm4; \
147 pxor %xmm3,%xmm5; \
148 por %xmm3,%xmm4; \
149 pxor tmp_at(1),%xmm4; \
150 pxor %xmm0,%xmm3; \
151 pandn %xmm3,%xmm2; \
152 pxor tmp_at(5),%xmm0; \
153 movdqa tmp_at(7),%xmm3; \
154 por tmp_at(2),%xmm3; \
155 pxor pnot,%xmm7; \
156 pxor out1,%xmm3; \
157 pxor %xmm7,%xmm2; \
158 pxor tmp_at(5),%xmm4; \
159 pxor out3,%xmm2; \
160 pxor %xmm4,%xmm7; \
161 pxor %xmm7,%xmm3; \
162 movdqa %xmm3,out1; \
163 por %xmm6,%xmm5; \
164 por tmp_at(8),%xmm7; \
165 por %xmm5,%xmm0; \
166 pxor out2,%xmm7; \
167 pxor %xmm4,%xmm0; \
168 pxor %xmm0,%xmm7; \
169 por tmp_at(4),%xmm1; \
170 movdqa tmp_at(2),%xmm3; \
171 pand tmp_at(9),%xmm4; \
172 pandn %xmm1,%xmm0; \
173 pxor %xmm0,%xmm4; \
174 por tmp_at(9),%xmm3; \
175 por tmp_at(2),%xmm4; \
176 movdqa %xmm2,out3; \
177 pxor %xmm3,%xmm7; \
178 pxor %xmm5,%xmm4; \
179 pxor out4,%xmm4; \
180 movdqa %xmm7,out2; \
181 movdqa %xmm4,out4
182
183 #define S2(out1, out2, out3, out4, extra) \
184 extra; \
185 movdqa %xmm2,tmp_at(2); \
186 movdqa %xmm1,tmp_at(1); \
187 movdqa %xmm5,%xmm2; \
188 movdqa %xmm4,tmp_at(4); \
189 pandn %xmm0,%xmm2; \
190 movdqa %xmm3,tmp_at(3); \
191 pandn %xmm4,%xmm2; \
192 movdqa %xmm0,%xmm6; \
193 movdqa %xmm2,%xmm7; \
194 pxor pnot,%xmm0; \
195 por %xmm1,%xmm7; \
196 pxor %xmm4,%xmm1; \
197 movdqa %xmm7,tmp_at(5); \
198 pand %xmm1,%xmm6; \
199 movdqa %xmm5,%xmm7; \
200 pxor %xmm4,%xmm6; \
201 pandn %xmm1,%xmm7; \
202 movdqa %xmm3,%xmm4; \
203 pxor %xmm7,%xmm2; \
204 pandn %xmm6,%xmm7; \
205 pxor %xmm5,%xmm1; \
206 movdqa %xmm7,tmp_at(7); \
207 movdqa %xmm5,%xmm7; \
208 pand tmp_at(2),%xmm5; \
209 pand tmp_at(5),%xmm2; \
210 movdqa %xmm5,tmp_at(8); \
211 pandn %xmm2,%xmm5; \
212 pand tmp_at(2),%xmm2; \
213 movdqa tmp_at(8),%xmm7; \
214 pandn tmp_at(3),%xmm5; \
215 pandn %xmm1,%xmm7; \
216 pxor %xmm2,%xmm0; \
217 movdqa %xmm7,%xmm3; \
218 pxor %xmm0,%xmm3; \
219 pxor out2,%xmm5; \
220 pandn tmp_at(1),%xmm7; \
221 pxor %xmm6,%xmm7; \
222 pxor %xmm3,%xmm5; \
223 movdqa %xmm7,%xmm6; \
224 movdqa %xmm5,out2; \
225 movdqa tmp_at(7),%xmm5; \
226 pandn tmp_at(5),%xmm4; \
227 pandn %xmm0,%xmm6; \
228 pxor tmp_at(5),%xmm3; \
229 movdqa %xmm1,%xmm0; \
230 pxor %xmm4,%xmm6; \
231 pxor tmp_at(2),%xmm0; \
232 pxor %xmm0,%xmm6; \
233 movdqa %xmm0,%xmm4; \
234 pxor out1,%xmm6; \
235 pandn tmp_at(1),%xmm0; \
236 pxor tmp_at(4),%xmm2; \
237 pxor %xmm3,%xmm0; \
238 movdqa %xmm6,out1; \
239 por %xmm1,%xmm3; \
240 por tmp_at(8),%xmm0; \
241 pxor %xmm4,%xmm0; \
242 movdqa %xmm0,%xmm4; \
243 pandn tmp_at(2),%xmm0; \
244 movdqa tmp_at(3),%xmm6; \
245 pxor tmp_at(7),%xmm0; \
246 por %xmm7,%xmm0; \
247 por %xmm6,%xmm5; \
248 pxor %xmm0,%xmm2; \
249 pandn %xmm2,%xmm7; \
250 por %xmm2,%xmm6; \
251 pxor out4,%xmm7; \
252 pxor %xmm4,%xmm6; \
253 pxor out3,%xmm6; \
254 pxor %xmm5,%xmm7; \
255 pxor %xmm3,%xmm7; \
256 movdqa %xmm6,out3; \
257 movdqa %xmm7,out4
258
259 #define S3(out1, out2, out3, out4, extra) \
260 extra; \
261 movdqa %xmm0,tmp_at(1); \
262 movdqa %xmm1,tmp_at(2); \
263 movdqa %xmm0,%xmm7; \
264 pandn %xmm0,%xmm1; \
265 movdqa %xmm2,tmp_at(3); \
266 movdqa %xmm5,%xmm0; \
267 pxor %xmm2,%xmm0; \
268 movdqa %xmm4,tmp_at(4); \
269 movdqa %xmm5,%xmm2; \
270 por %xmm0,%xmm1; \
271 pxor %xmm3,%xmm2; \
272 movdqa %xmm0,%xmm4; \
273 movdqa %xmm5,%xmm6; \
274 pandn %xmm2,%xmm7; \
275 pxor tmp_at(2),%xmm4; \
276 movdqa %xmm7,tmp_at(5); \
277 pxor %xmm1,%xmm7; \
278 pandn %xmm4,%xmm6; \
279 movdqa %xmm7,tmp_at(6); \
280 pxor %xmm6,%xmm1; \
281 pand %xmm0,%xmm2; \
282 movdqa %xmm1,%xmm6; \
283 movdqa %xmm3,%xmm0; \
284 pandn %xmm7,%xmm6; \
285 pand %xmm5,%xmm7; \
286 pand %xmm3,%xmm5; \
287 por %xmm3,%xmm7; \
288 pand tmp_at(1),%xmm7; \
289 movdqa tmp_at(4),%xmm3; \
290 pandn tmp_at(6),%xmm3; \
291 pxor %xmm4,%xmm7; \
292 pxor tmp_at(1),%xmm0; \
293 movdqa %xmm7,tmp_at(7); \
294 pxor %xmm3,%xmm7; \
295 movdqa tmp_at(2),%xmm3; \
296 pxor out4,%xmm7; \
297 pxor %xmm0,%xmm1; \
298 movdqa %xmm7,out4; \
299 movdqa tmp_at(3),%xmm7; \
300 por tmp_at(3),%xmm1; \
301 pandn %xmm1,%xmm2; \
302 por tmp_at(5),%xmm0; \
303 movdqa %xmm0,%xmm1; \
304 pandn %xmm5,%xmm3; \
305 pandn tmp_at(7),%xmm1; \
306 por %xmm4,%xmm5; \
307 pxor %xmm3,%xmm1; \
308 por tmp_at(2),%xmm7; \
309 movdqa tmp_at(3),%xmm3; \
310 pandn %xmm1,%xmm3; \
311 pxor %xmm4,%xmm0; \
312 pandn %xmm5,%xmm3; \
313 movdqa tmp_at(4),%xmm5; \
314 pxor tmp_at(1),%xmm3; \
315 pand %xmm2,%xmm5; \
316 pxor pnot,%xmm0; \
317 pxor %xmm5,%xmm3; \
318 movdqa %xmm7,%xmm5; \
319 pxor out2,%xmm3; \
320 pandn tmp_at(4),%xmm6; \
321 pandn tmp_at(6),%xmm7; \
322 pxor %xmm0,%xmm6; \
323 movdqa %xmm3,out2; \
324 pxor tmp_at(1),%xmm2; \
325 por tmp_at(4),%xmm1; \
326 por %xmm2,%xmm0; \
327 pxor tmp_at(6),%xmm5; \
328 pxor %xmm1,%xmm0; \
329 pxor out1,%xmm6; \
330 pxor out3,%xmm5; \
331 pxor tmp_at(7),%xmm0; \
332 pxor %xmm7,%xmm6; \
333 pxor %xmm5,%xmm0; \
334 movdqa %xmm6,out1; \
335 movdqa %xmm0,out3
336
337 #define S4(out1, out2, out3, out4, extra) \
338 extra; \
339 movdqa %xmm1,%xmm7; \
340 pxor %xmm2,%xmm0; \
341 por %xmm3,%xmm1; \
342 pxor %xmm4,%xmm2; \
343 movdqa %xmm5,tmp_at(2); \
344 pxor %xmm4,%xmm1; \
345 movdqa %xmm7,%xmm6; \
346 movdqa %xmm7,%xmm5; \
347 pandn %xmm2,%xmm7; \
348 pandn %xmm2,%xmm1; \
349 por %xmm7,%xmm4; \
350 pxor %xmm3,%xmm7; \
351 movdqa %xmm7,%xmm6; \
352 por %xmm0,%xmm7; \
353 pxor %xmm5,%xmm3; \
354 movdqa %xmm1,tmp_at(3); \
355 pandn %xmm7,%xmm1; \
356 movdqa %xmm1,%xmm7; \
357 pxor %xmm5,%xmm1; \
358 pand %xmm1,%xmm6; \
359 movdqa %xmm6,%xmm5; \
360 pxor %xmm1,%xmm0; \
361 pandn %xmm2,%xmm6; \
362 pandn %xmm0,%xmm6; \
363 pxor %xmm0,%xmm4; \
364 movdqa %xmm3,%xmm0; \
365 pandn %xmm4,%xmm3; \
366 movdqa tmp_at(2),%xmm2; \
367 pxor %xmm7,%xmm3; \
368 pxor tmp_at(3),%xmm6; \
369 movdqa %xmm6,%xmm7; \
370 pandn %xmm2,%xmm6; \
371 pxor out1,%xmm6; \
372 pandn %xmm7,%xmm2; \
373 pxor out2,%xmm2; \
374 pxor %xmm3,%xmm6; \
375 pxor pnot,%xmm3; \
376 pxor %xmm3,%xmm2; \
377 pxor %xmm7,%xmm3; \
378 movdqa %xmm6,out1; \
379 pandn %xmm3,%xmm0; \
380 por %xmm5,%xmm0; \
381 movdqa %xmm2,out2; \
382 movdqa tmp_at(2),%xmm3; \
383 por %xmm1,%xmm3; \
384 pand tmp_at(2),%xmm1; \
385 pxor %xmm4,%xmm0; \
386 pxor %xmm0,%xmm3; \
387 pxor out3,%xmm3; \
388 pxor %xmm1,%xmm0; \
389 movdqa %xmm3,out3; \
390 pxor out4,%xmm0; \
391 movdqa %xmm0,out4
392
393 #define S5(out1, out2, out3, out4, extra) \
394 extra; \
395 movdqa %xmm2,tmp_at(3); \
396 movdqa %xmm0,tmp_at(1); \
397 por %xmm0,%xmm2; \
398 movdqa %xmm5,%xmm6; \
399 movdqa %xmm2,tmp_at(4); \
400 pandn %xmm2,%xmm5; \
401 movdqa %xmm2,%xmm7; \
402 movdqa %xmm5,%xmm2; \
403 pxor %xmm0,%xmm5; \
404 movdqa %xmm3,%xmm7; \
405 movdqa %xmm5,tmp_at(5); \
406 pxor tmp_at(3),%xmm5; \
407 movdqa %xmm1,tmp_at(2); \
408 por %xmm5,%xmm0; \
409 por %xmm3,%xmm5; \
410 pandn %xmm2,%xmm3; \
411 pxor tmp_at(3),%xmm3; \
412 movdqa %xmm3,tmp_at(6); \
413 movdqa %xmm0,%xmm1; \
414 pand %xmm4,%xmm3; \
415 pxor %xmm0,%xmm3; \
416 pand %xmm7,%xmm0; \
417 pxor %xmm7,%xmm3; \
418 movdqa %xmm3,tmp_at(3); \
419 pxor %xmm3,%xmm6; \
420 movdqa %xmm6,%xmm2; \
421 por tmp_at(5),%xmm6; \
422 movdqa %xmm6,%xmm3; \
423 pand %xmm4,%xmm6; \
424 movdqa %xmm6,tmp_at(7); \
425 pxor tmp_at(5),%xmm6; \
426 pxor %xmm6,%xmm0; \
427 movdqa tmp_at(1),%xmm6; \
428 movdqa %xmm0,tmp_at(8); \
429 pandn %xmm3,%xmm6; \
430 movdqa tmp_at(2),%xmm0; \
431 movdqa %xmm6,%xmm3; \
432 pxor tmp_at(6),%xmm6; \
433 pxor %xmm5,%xmm4; \
434 pandn %xmm4,%xmm6; \
435 pxor pnot,%xmm6; \
436 pandn %xmm6,%xmm0; \
437 pxor tmp_at(3),%xmm0; \
438 movdqa tmp_at(7),%xmm6; \
439 pandn tmp_at(6),%xmm6; \
440 pxor out3,%xmm0; \
441 pxor %xmm4,%xmm3; \
442 movdqa %xmm0,out3; \
443 por tmp_at(8),%xmm3; \
444 movdqa tmp_at(6),%xmm0; \
445 pandn %xmm3,%xmm6; \
446 pand tmp_at(6),%xmm1; \
447 pand %xmm6,%xmm2; \
448 movdqa %xmm6,%xmm3; \
449 pandn %xmm5,%xmm6; \
450 pxor %xmm4,%xmm2; \
451 por %xmm2,%xmm1; \
452 pxor tmp_at(4),%xmm3; \
453 pxor tmp_at(7),%xmm1; \
454 pand %xmm2,%xmm7; \
455 pand tmp_at(2),%xmm1; \
456 pxor tmp_at(1),%xmm7; \
457 pxor tmp_at(8),%xmm1; \
458 pxor %xmm7,%xmm3; \
459 por tmp_at(2),%xmm6; \
460 pxor out4,%xmm1; \
461 movdqa %xmm1,out4; \
462 pxor %xmm5,%xmm0; \
463 pxor tmp_at(5),%xmm2; \
464 pxor %xmm3,%xmm6; \
465 pandn %xmm0,%xmm3; \
466 pand tmp_at(2),%xmm5; \
467 pxor %xmm2,%xmm3; \
468 pxor out2,%xmm5; \
469 pxor %xmm5,%xmm3; \
470 pxor out1,%xmm6; \
471 movdqa %xmm3,out2; \
472 movdqa %xmm6,out1
473
474 #define S6(out1, out2, out3, out4, extra) \
475 extra; \
476 movdqa %xmm4,tmp_at(2); \
477 pxor %xmm1,%xmm4; \
478 movdqa %xmm5,tmp_at(3); \
479 por %xmm1,%xmm5; \
480 movdqa %xmm2,%xmm7; \
481 pand %xmm0,%xmm5; \
482 pxor %xmm0,%xmm2; \
483 movdqa %xmm0,tmp_at(1); \
484 pxor %xmm5,%xmm4; \
485 movdqa %xmm4,tmp_at(4); \
486 pxor tmp_at(3),%xmm4; \
487 movdqa %xmm4,%xmm6; \
488 pandn tmp_at(2),%xmm4; \
489 pand %xmm0,%xmm6; \
490 movdqa %xmm6,tmp_at(5); \
491 pxor %xmm1,%xmm6; \
492 movdqa %xmm6,tmp_at(6); \
493 por %xmm2,%xmm6; \
494 movdqa %xmm6,tmp_at(7); \
495 pxor tmp_at(4),%xmm6; \
496 movdqa %xmm6,%xmm0; \
497 pand %xmm7,%xmm6; \
498 movdqa %xmm6,tmp_at(8); \
499 movdqa tmp_at(3),%xmm6; \
500 por %xmm1,%xmm2; \
501 pandn tmp_at(8),%xmm6; \
502 movdqa %xmm6,tmp_at(9); \
503 movdqa tmp_at(6),%xmm6; \
504 por %xmm4,%xmm6; \
505 movdqa %xmm6,tmp_at(6); \
506 pxor tmp_at(9),%xmm6; \
507 movdqa %xmm6,tmp_at(10); \
508 pand %xmm3,%xmm6; \
509 pxor out4,%xmm6; \
510 pxor %xmm0,%xmm6; \
511 por tmp_at(1),%xmm0; \
512 movdqa %xmm6,out4; \
513 movdqa tmp_at(7),%xmm6; \
514 pxor %xmm1,%xmm6; \
515 movdqa %xmm3,%xmm1; \
516 movdqa %xmm6,tmp_at(7); \
517 pandn tmp_at(3),%xmm6; \
518 pxor %xmm7,%xmm6; \
519 movdqa tmp_at(8),%xmm7; \
520 movdqa %xmm6,tmp_at(12); \
521 pandn tmp_at(2),%xmm7; \
522 pand tmp_at(6),%xmm0; \
523 por %xmm6,%xmm7; \
524 pxor %xmm6,%xmm0; \
525 movdqa tmp_at(9),%xmm6; \
526 por %xmm3,%xmm4; \
527 pandn %xmm0,%xmm6; \
528 por %xmm7,%xmm5; \
529 pxor %xmm4,%xmm6; \
530 pxor tmp_at(4),%xmm0; \
531 pxor out3,%xmm6; \
532 pxor %xmm2,%xmm5; \
533 movdqa %xmm6,out3; \
534 movdqa tmp_at(5),%xmm6; \
535 pandn tmp_at(2),%xmm0; \
536 pxor pnot,%xmm2; \
537 pxor tmp_at(7),%xmm2; \
538 pxor tmp_at(3),%xmm6; \
539 pxor out2,%xmm5; \
540 movdqa tmp_at(12),%xmm4; \
541 pxor %xmm2,%xmm0; \
542 pxor tmp_at(1),%xmm4; \
543 pxor tmp_at(10),%xmm5; \
544 pand %xmm6,%xmm4; \
545 pandn %xmm0,%xmm3; \
546 pxor out1,%xmm4; \
547 pandn %xmm7,%xmm1; \
548 pxor tmp_at(8),%xmm4; \
549 pxor %xmm2,%xmm1; \
550 pxor %xmm3,%xmm5; \
551 movdqa %xmm5,out2; \
552 pxor %xmm1,%xmm4; \
553 movdqa %xmm4,out1
554
555 #define S7(out1, out2, out3, out4, extra) \
556 extra; \
557 movdqa %xmm0,tmp_at(1); \
558 movdqa %xmm4,tmp_at(3); \
559 movdqa %xmm4,%xmm0; \
560 pxor %xmm3,%xmm4; \
561 movdqa %xmm5,tmp_at(4); \
562 movdqa %xmm4,%xmm7; \
563 movdqa %xmm3,tmp_at(2); \
564 pxor %xmm2,%xmm4; \
565 movdqa %xmm4,tmp_at(5); \
566 pand %xmm5,%xmm4; \
567 movdqa %xmm7,%xmm5; \
568 pxor tmp_at(4),%xmm5; \
569 pand %xmm3,%xmm7; \
570 movdqa %xmm7,tmp_at(6); \
571 movdqa %xmm7,%xmm6; \
572 pxor %xmm1,%xmm7; \
573 pand tmp_at(4),%xmm6; \
574 pxor %xmm2,%xmm6; \
575 movdqa %xmm7,tmp_at(7); \
576 movdqa tmp_at(1),%xmm3; \
577 movdqa %xmm6,%xmm0; \
578 por %xmm7,%xmm6; \
579 pand %xmm4,%xmm7; \
580 pxor %xmm5,%xmm6; \
581 pandn %xmm3,%xmm7; \
582 pxor %xmm4,%xmm0; \
583 pxor out4,%xmm7; \
584 pxor %xmm5,%xmm4; \
585 pxor %xmm6,%xmm7; \
586 movdqa %xmm7,out4; \
587 pandn tmp_at(2),%xmm4; \
588 por tmp_at(6),%xmm6; \
589 movdqa tmp_at(5),%xmm7; \
590 pandn tmp_at(3),%xmm7; \
591 pandn tmp_at(7),%xmm4; \
592 movdqa %xmm7,tmp_at(9); \
593 por tmp_at(7),%xmm7; \
594 pandn tmp_at(5),%xmm5; \
595 pxor %xmm0,%xmm7; \
596 pxor tmp_at(3),%xmm0; \
597 pxor %xmm4,%xmm0; \
598 movdqa tmp_at(1),%xmm4; \
599 pand %xmm0,%xmm2; \
600 por %xmm2,%xmm6; \
601 pxor %xmm5,%xmm6; \
602 pandn %xmm6,%xmm3; \
603 movdqa %xmm6,%xmm5; \
604 pxor %xmm7,%xmm3; \
605 pxor %xmm6,%xmm7; \
606 por %xmm0,%xmm6; \
607 pxor out1,%xmm3; \
608 pand tmp_at(4),%xmm6; \
609 pxor pnot,%xmm5; \
610 pand %xmm6,%xmm1; \
611 pxor out3,%xmm0; \
612 pxor %xmm7,%xmm1; \
613 movdqa %xmm3,out1; \
614 movdqa %xmm4,%xmm3; \
615 pxor tmp_at(3),%xmm7; \
616 por %xmm1,%xmm2; \
617 pxor %xmm6,%xmm2; \
618 por %xmm2,%xmm7; \
619 pand %xmm7,%xmm4; \
620 pxor %xmm6,%xmm7; \
621 por tmp_at(9),%xmm7; \
622 pxor %xmm5,%xmm7; \
623 pxor out2,%xmm1; \
624 pandn %xmm7,%xmm3; \
625 pxor %xmm4,%xmm0; \
626 movdqa %xmm0,out3; \
627 pxor %xmm3,%xmm1; \
628 movdqa %xmm1,out2
629
630 #define S8(out1, out2, out3, out4, extra) \
631 extra; \
632 movdqa %xmm2,%xmm7; \
633 movdqa %xmm1,tmp_at(1); \
634 pandn %xmm2,%xmm1; \
635 movdqa %xmm2,tmp_at(2); \
636 pandn %xmm4,%xmm2; \
637 movdqa %xmm5,tmp_at(5); \
638 pxor %xmm3,%xmm2; \
639 movdqa %xmm4,tmp_at(4); \
640 movdqa %xmm1,%xmm5; \
641 movdqa %xmm3,tmp_at(3); \
642 movdqa %xmm2,%xmm4; \
643 movdqa %xmm2,%xmm3; \
644 pandn tmp_at(1),%xmm4; \
645 pand %xmm0,%xmm2; \
646 pandn tmp_at(1),%xmm7; \
647 pandn %xmm2,%xmm1; \
648 pxor tmp_at(4),%xmm7; \
649 movdqa %xmm4,%xmm6; \
650 por %xmm0,%xmm4; \
651 movdqa %xmm7,tmp_at(6); \
652 pand %xmm4,%xmm7; \
653 pxor pnot,%xmm3; \
654 por %xmm7,%xmm2; \
655 pxor %xmm7,%xmm3; \
656 pandn tmp_at(2),%xmm4; \
657 movdqa tmp_at(5),%xmm7; \
658 pxor %xmm4,%xmm3; \
659 por %xmm1,%xmm7; \
660 pxor %xmm3,%xmm5; \
661 pxor %xmm5,%xmm7; \
662 pxor %xmm0,%xmm5; \
663 pxor out2,%xmm7; \
664 movdqa %xmm7,out2; \
665 pxor tmp_at(1),%xmm3; \
666 movdqa %xmm5,%xmm4; \
667 pand tmp_at(4),%xmm5; \
668 pxor %xmm3,%xmm5; \
669 por tmp_at(3),%xmm3; \
670 pxor %xmm5,%xmm6; \
671 pxor tmp_at(6),%xmm3; \
672 pxor %xmm2,%xmm5; \
673 pxor %xmm6,%xmm3; \
674 por tmp_at(1),%xmm5; \
675 pxor %xmm3,%xmm0; \
676 pxor %xmm4,%xmm5; \
677 por tmp_at(3),%xmm4; \
678 pxor tmp_at(4),%xmm5; \
679 pand tmp_at(5),%xmm2; \
680 pandn %xmm5,%xmm4; \
681 pand tmp_at(5),%xmm0; \
682 pxor %xmm6,%xmm0; \
683 por %xmm1,%xmm4; \
684 pxor out4,%xmm0; \
685 pxor %xmm4,%xmm3; \
686 pxor out3,%xmm2; \
687 por tmp_at(5),%xmm3; \
688 pxor out1,%xmm3; \
689 pxor %xmm5,%xmm2; \
690 pxor %xmm6,%xmm3; \
691 movdqa %xmm0,out4; \
692 movdqa %xmm2,out3; \
693 movdqa %xmm3,out1
694
695 #define a1 %xmm0
696 #define a2 %xmm1
697 #define a3 %xmm2
698 #define a4 %xmm3
699 #define a5 %xmm4
700 #define a6 %xmm5
701
702 #define zero %xmm0
703
704 #define DES_bs_clear_block_8(i) \
705 movdqa zero,B(i); \
706 movdqa zero,B(i + 1); \
707 movdqa zero,B(i + 2); \
708 movdqa zero,B(i + 3); \
709 movdqa zero,B(i + 4); \
710 movdqa zero,B(i + 5); \
711 movdqa zero,B(i + 6); \
712 movdqa zero,B(i + 7)
713
714 #define DES_bs_clear_block \
715 DES_bs_clear_block_8(0); \
716 DES_bs_clear_block_8(8); \
717 DES_bs_clear_block_8(16); \
718 DES_bs_clear_block_8(24); \
719 DES_bs_clear_block_8(32); \
720 DES_bs_clear_block_8(40); \
721 DES_bs_clear_block_8(48); \
722 DES_bs_clear_block_8(56)
723
724 #define k_ptr %edx
725 #define K(i) nvec(i)(k_ptr)
726 #define k(i) nptr(i)(k_ptr)
727
728 #define a6_xor_ptr %esi
729 #define a6_p pxor (a6_xor_ptr),a6
730 #define a6_v(i) pxor K(i),a6
731
732 #define tmp1 %ecx
733 #define tmp2 a6_xor_ptr
734
735 #define xor_E(i) \
736 movl E(i),tmp1; \
737 movdqa K(i),a1; \
738 movl E(i + 1),tmp2; \
739 movdqa K(i + 1),a2; \
740 pxor (tmp1),a1; \
741 pxor (tmp2),a2; \
742 movl E(i + 2),tmp1; \
743 movdqa K(i + 2),a3; \
744 movl E(i + 3),tmp2; \
745 movdqa K(i + 3),a4; \
746 pxor (tmp1),a3; \
747 pxor (tmp2),a4; \
748 movl E(i + 4),tmp1; \
749 movdqa K(i + 4),a5; \
750 movl E(i + 5),a6_xor_ptr; \
751 movdqa K(i + 5),a6; \
752 pxor (tmp1),a5
753
754 #define xor_B(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6) \
755 movdqa B(b1),a1; \
756 movdqa B(b2),a2; \
757 pxor K(k1),a1; \
758 movdqa B(b3),a3; \
759 pxor K(k2),a2; \
760 movdqa B(b4),a4; \
761 pxor K(k3),a3; \
762 movdqa B(b5),a5; \
763 pxor K(k4),a4; \
764 movdqa B(b6),a6; \
765 pxor K(k5),a5
766
767 #define xor_B_KS_p(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \
768 movl k(k1),tmp1; \
769 movl k(k2),tmp2; \
770 movdqa B(b1),a1; \
771 movdqa B(b2),a2; \
772 pxor (tmp1),a1; \
773 movl k(k3),tmp1; \
774 pxor (tmp2),a2; \
775 movl k(k4),tmp2; \
776 movdqa B(b3),a3; \
777 movdqa B(b4),a4; \
778 pxor (tmp1),a3; \
779 movl k(k5),tmp1; \
780 pxor (tmp2),a4; \
781 movdqa B(b5),a5; \
782 movl k(k6),a6_xor_ptr; \
783 movdqa B(b6),a6; \
784 pxor (tmp1),a5
785
786 .text
787
788 DO_ALIGN(5)
789 .globl DES_bs_init_asm
790 DES_bs_init_asm:
791 pcmpeqd %xmm0,%xmm0
792 movdqa %xmm0,pnot
793 ret
794
795 #define rounds_and_swapped %ebp
796 #define iterations %eax
797
798 DO_ALIGN(5)
799 .globl DES_bs_crypt
800 DES_bs_crypt:
801 movl 4(%esp),iterations
802 pxor zero,zero
803 pushl %ebp
804 pushl %esi
805 movl $DES_bs_all_KS_v,k_ptr
806 DES_bs_clear_block
807 movl $8,rounds_and_swapped
808 DES_bs_crypt_start:
809 xor_E(0)
810 S1(B(40), B(48), B(54), B(62), a6_p)
811 xor_E(6)
812 S2(B(44), B(59), B(33), B(49), a6_p)
813 xor_E(12)
814 S3(B(55), B(47), B(61), B(37), a6_p)
815 xor_E(18)
816 S4(B(57), B(51), B(41), B(32), a6_p)
817 xor_E(24)
818 S5(B(39), B(45), B(56), B(34), a6_p)
819 xor_E(30)
820 S6(B(35), B(60), B(42), B(50), a6_p)
821 xor_E(36)
822 S7(B(63), B(43), B(53), B(38), a6_p)
823 xor_E(42)
824 S8(B(36), B(58), B(46), B(52), a6_p)
825 cmpl $0x100,rounds_and_swapped
826 je DES_bs_crypt_next
827 DES_bs_crypt_swap:
828 xor_E(48)
829 S1(B(8), B(16), B(22), B(30), a6_p)
830 xor_E(54)
831 S2(B(12), B(27), B(1), B(17), a6_p)
832 xor_E(60)
833 S3(B(23), B(15), B(29), B(5), a6_p)
834 xor_E(66)
835 S4(B(25), B(19), B(9), B(0), a6_p)
836 xor_E(72)
837 S5(B(7), B(13), B(24), B(2), a6_p)
838 xor_E(78)
839 S6(B(3), B(28), B(10), B(18), a6_p)
840 xor_E(84)
841 S7(B(31), B(11), B(21), B(6), a6_p)
842 xor_E(90)
843 addl $nvec(96),k_ptr
844 S8(B(4), B(26), B(14), B(20), a6_p)
845 decl rounds_and_swapped
846 jnz DES_bs_crypt_start
847 subl $nvec(0x300+48),k_ptr
848 movl $0x108,rounds_and_swapped
849 decl iterations
850 jnz DES_bs_crypt_swap
851 popl %esi
852 popl %ebp
853 ret
854 DES_bs_crypt_next:
855 subl $nvec(0x300-48),k_ptr
856 movl $8,rounds_and_swapped
857 decl iterations
858 jnz DES_bs_crypt_start
859 popl %esi
860 popl %ebp
861 ret
862
863 DO_ALIGN(5)
864 .globl DES_bs_crypt_25
865 DES_bs_crypt_25:
866 pxor zero,zero
867 pushl %ebp
868 pushl %esi
869 movl $DES_bs_all_KS_v,k_ptr
870 DES_bs_clear_block
871 movl $8,rounds_and_swapped
872 movl $25,iterations
873 DES_bs_crypt_25_start:
874 xor_E(0)
875 S1(B(40), B(48), B(54), B(62), a6_p)
876 xor_E(6)
877 S2(B(44), B(59), B(33), B(49), a6_p)
878 xor_B(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12)
879 S3(B(55), B(47), B(61), B(37), a6_v(17))
880 xor_B(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16)
881 S4(B(57), B(51), B(41), B(32), a6_v(23))
882 xor_E(24)
883 S5(B(39), B(45), B(56), B(34), a6_p)
884 xor_E(30)
885 S6(B(35), B(60), B(42), B(50), a6_p)
886 xor_B(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28)
887 S7(B(63), B(43), B(53), B(38), a6_v(41))
888 xor_B(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0)
889 S8(B(36), B(58), B(46), B(52), a6_v(47))
890 cmpl $0x100,rounds_and_swapped
891 je DES_bs_crypt_25_next
892 DES_bs_crypt_25_swap:
893 xor_E(48)
894 S1(B(8), B(16), B(22), B(30), a6_p)
895 xor_E(54)
896 S2(B(12), B(27), B(1), B(17), a6_p)
897 xor_B(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44)
898 S3(B(23), B(15), B(29), B(5), a6_v(65))
899 xor_B(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48)
900 S4(B(25), B(19), B(9), B(0), a6_v(71))
901 xor_E(72)
902 S5(B(7), B(13), B(24), B(2), a6_p)
903 xor_E(78)
904 S6(B(3), B(28), B(10), B(18), a6_p)
905 xor_B(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60)
906 S7(B(31), B(11), B(21), B(6), a6_v(89))
907 xor_B(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32)
908 S8(B(4), B(26), B(14), B(20), a6_v(95))
909 addl $nvec(96),k_ptr
910 decl rounds_and_swapped
911 jnz DES_bs_crypt_25_start
912 subl $nvec(0x300+48),k_ptr
913 movl $0x108,rounds_and_swapped
914 decl iterations
915 jnz DES_bs_crypt_25_swap
916 popl %esi
917 popl %ebp
918 ret
919 DES_bs_crypt_25_next:
920 subl $nvec(0x300-48),k_ptr
921 movl $8,rounds_and_swapped
922 decl iterations
923 jmp DES_bs_crypt_25_start
924
925 #define ones %xmm1
926
927 #define rounds %eax
928
929 DO_ALIGN(5)
930 .globl DES_bs_crypt_LM
931 DES_bs_crypt_LM:
932 pxor zero,zero
933 pushl %esi
934 pcmpeqd ones,ones
935 movl $DES_bs_all_KS_p,k_ptr
936 movdqa zero,B(0)
937 movdqa zero,B(1)
938 movdqa zero,B(2)
939 movdqa zero,B(3)
940 movdqa zero,B(4)
941 movdqa zero,B(5)
942 movdqa zero,B(6)
943 movdqa zero,B(7)
944 movdqa ones,B(8)
945 movdqa ones,B(9)
946 movdqa ones,B(10)
947 movdqa zero,B(11)
948 movdqa ones,B(12)
949 movdqa zero,B(13)
950 movdqa zero,B(14)
951 movdqa zero,B(15)
952 movdqa zero,B(16)
953 movdqa zero,B(17)
954 movdqa zero,B(18)
955 movdqa zero,B(19)
956 movdqa zero,B(20)
957 movdqa zero,B(21)
958 movdqa zero,B(22)
959 movdqa ones,B(23)
960 movdqa zero,B(24)
961 movdqa zero,B(25)
962 movdqa ones,B(26)
963 movdqa zero,B(27)
964 movdqa zero,B(28)
965 movdqa ones,B(29)
966 movdqa ones,B(30)
967 movdqa ones,B(31)
968 movdqa zero,B(32)
969 movdqa zero,B(33)
970 movdqa zero,B(34)
971 movdqa ones,B(35)
972 movdqa zero,B(36)
973 movdqa ones,B(37)
974 movdqa ones,B(38)
975 movdqa ones,B(39)
976 movdqa zero,B(40)
977 movdqa zero,B(41)
978 movdqa zero,B(42)
979 movdqa zero,B(43)
980 movdqa zero,B(44)
981 movdqa ones,B(45)
982 movdqa zero,B(46)
983 movdqa zero,B(47)
984 movdqa ones,B(48)
985 movdqa ones,B(49)
986 movdqa zero,B(50)
987 movdqa zero,B(51)
988 movdqa zero,B(52)
989 movdqa zero,B(53)
990 movdqa ones,B(54)
991 movdqa zero,B(55)
992 movdqa ones,B(56)
993 movdqa zero,B(57)
994 movdqa ones,B(58)
995 movdqa zero,B(59)
996 movdqa ones,B(60)
997 movdqa ones,B(61)
998 movdqa ones,B(62)
999 movdqa ones,B(63)
1000 movl $8,rounds
1001 DES_bs_crypt_LM_loop:
1002 xor_B_KS_p(31, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5)
1003 S1(B(40), B(48), B(54), B(62), a6_p)
1004 xor_B_KS_p(3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11)
1005 S2(B(44), B(59), B(33), B(49), a6_p)
1006 xor_B_KS_p(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17)
1007 S3(B(55), B(47), B(61), B(37), a6_p)
1008 xor_B_KS_p(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23)
1009 S4(B(57), B(51), B(41), B(32), a6_p)
1010 xor_B_KS_p(15, 24, 16, 25, 17, 26, 18, 27, 19, 28, 20, 29)
1011 S5(B(39), B(45), B(56), B(34), a6_p)
1012 xor_B_KS_p(19, 30, 20, 31, 21, 32, 22, 33, 23, 34, 24, 35)
1013 S6(B(35), B(60), B(42), B(50), a6_p)
1014 xor_B_KS_p(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41)
1015 S7(B(63), B(43), B(53), B(38), a6_p)
1016 xor_B_KS_p(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47)
1017 S8(B(36), B(58), B(46), B(52), a6_p)
1018 xor_B_KS_p(63, 48, 32, 49, 33, 50, 34, 51, 35, 52, 36, 53)
1019 S1(B(8), B(16), B(22), B(30), a6_p)
1020 xor_B_KS_p(35, 54, 36, 55, 37, 56, 38, 57, 39, 58, 40, 59)
1021 S2(B(12), B(27), B(1), B(17), a6_p)
1022 xor_B_KS_p(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65)
1023 S3(B(23), B(15), B(29), B(5), a6_p)
1024 xor_B_KS_p(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71)
1025 S4(B(25), B(19), B(9), B(0), a6_p)
1026 xor_B_KS_p(47, 72, 48, 73, 49, 74, 50, 75, 51, 76, 52, 77)
1027 S5(B(7), B(13), B(24), B(2), a6_p)
1028 xor_B_KS_p(51, 78, 52, 79, 53, 80, 54, 81, 55, 82, 56, 83)
1029 S6(B(3), B(28), B(10), B(18), a6_p)
1030 xor_B_KS_p(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89)
1031 S7(B(31), B(11), B(21), B(6), a6_p)
1032 xor_B_KS_p(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95)
1033 addl $nptr(96),k_ptr
1034 S8(B(4), B(26), B(14), B(20), a6_p)
1035 decl rounds
1036 jnz DES_bs_crypt_LM_loop
1037 popl %esi
1038 ret
1039
1040 #endif
1041
1042 #if defined(__ELF__) && defined(__linux__)
1043 .section .note.GNU-stack,"",@progbits
1044 #endif

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Rev URL

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26