Browse Subversion Repository
Contents of /branches/mty-makai/x86-mmx.S
Parent Directory
| Revision Log
Revision 125 -
( show annotations)
( download)
Mon Apr 16 14:50:11 2007 UTC
(16 years, 11 months ago)
by notanpe
File size: 27372 byte(s)
魔改造用ブランチ
| 1 |
/* |
| 2 |
* This file is part of John the Ripper password cracker, |
| 3 |
* Copyright (c) 2000-2001,2005,2006 by Solar Designer and others: |
| 4 |
* |
| 5 |
* The MMX DES S-box code is by Bruce Ford and Rémi Guyomarch, originally |
| 6 |
* for use in the distributed.net clients, included here with permission. |
| 7 |
* Only minor modifications have been made to their S-box code. Optimized |
| 8 |
* S-box expressions are based on work by Matthew Kwan (see nonstd.c). |
| 9 |
* |
| 10 |
* Note: there's some MMX code in x86.S as well. |
| 11 |
*/ |
| 12 |
|
| 13 |
#ifdef UNDERSCORES |
| 14 |
#define DES_bs_all _DES_bs_all |
| 15 |
#define DES_bs_init_asm _DES_bs_init_asm |
| 16 |
#define DES_bs_crypt _DES_bs_crypt |
| 17 |
#define DES_bs_crypt_25 _DES_bs_crypt_25 |
| 18 |
#define DES_bs_crypt_LM _DES_bs_crypt_LM |
| 19 |
#endif |
| 20 |
|
| 21 |
/* |
| 22 |
* Some broken systems don't offer section alignments larger than 4 bytes, |
| 23 |
* while for the MMX code we need at least an 8 byte alignment. ALIGN_FIX |
| 24 |
* is here to work around this issue when we happen to get bad addresses. |
| 25 |
*/ |
| 26 |
#ifndef ALIGN_FIX |
| 27 |
#ifdef ALIGN_LOG |
| 28 |
#define DO_ALIGN(log) .align (log) |
| 29 |
#else |
| 30 |
#define DO_ALIGN(log) .align (1 << (log)) |
| 31 |
#endif |
| 32 |
#else |
| 33 |
#ifdef ALIGN_LOG |
| 34 |
#define DO_ALIGN(log) .align (log); .space 4 |
| 35 |
#else |
| 36 |
#define DO_ALIGN(log) .align (1 << (log)); .space 4 |
| 37 |
#endif |
| 38 |
#endif |
| 39 |
|
| 40 |
#ifdef BSD |
| 41 |
.data |
| 42 |
#else |
| 43 |
.bss |
| 44 |
#endif |
| 45 |
|
| 46 |
.globl DES_bs_all |
| 47 |
DO_ALIGN(5) |
| 48 |
DES_bs_all: |
| 49 |
DES_bs_all_KSp: |
| 50 |
.space (0x300 * 4) |
| 51 |
DES_bs_all_KS_p: |
| 52 |
DES_bs_all_KS_v: |
| 53 |
.space (0x300 * 8) |
| 54 |
DES_bs_all_E: |
| 55 |
.space (96 * 4) |
| 56 |
DES_bs_all_K: |
| 57 |
.space (56 * 8) |
| 58 |
DES_bs_all_B: |
| 59 |
.space (64 * 8) |
| 60 |
DES_bs_all_tmp: |
| 61 |
.space (16 * 8) |
| 62 |
DES_bs_all_fields_not_used_here: |
| 63 |
.space (0x400 + 0x100 + 4 + 4 + 64 * 8) |
| 64 |
DES_bs_all_possible_alignment_gaps: |
| 65 |
.space 0x100 |
| 66 |
|
| 67 |
#define E(i) DES_bs_all_E+(i)*4 |
| 68 |
#define B(i) DES_bs_all_B+(i)*8 |
| 69 |
#define tmp_at(i) DES_bs_all_tmp+(i)*8 |
| 70 |
|
| 71 |
#define pnot tmp_at(0) |
| 72 |
|
| 73 |
#define a1 %mm0 |
| 74 |
#define a2 %mm1 |
| 75 |
#define a3 %mm2 |
| 76 |
#define a4 %mm3 |
| 77 |
#define a5 %mm4 |
| 78 |
#define a6 %mm5 |
| 79 |
|
| 80 |
#define S1_out1 %mm5 |
| 81 |
#define S1_out2 %mm7 |
| 82 |
#define S1_out3 %mm2 |
| 83 |
#define S1_out4 %mm0 |
| 84 |
|
| 85 |
#define S1_a1 tmp_at(1) |
| 86 |
#define S1_a3 tmp_at(2) |
| 87 |
#define S1_a5 tmp_at(3) |
| 88 |
#define S1_x1 tmp_at(4) |
| 89 |
#define S1_x3 tmp_at(5) |
| 90 |
#define S1_x4 tmp_at(6) |
| 91 |
#define S1_x5 tmp_at(7) |
| 92 |
#define S1_x6 tmp_at(8) |
| 93 |
#define S1_x13 tmp_at(9) |
| 94 |
#define S1_x14 tmp_at(10) |
| 95 |
#define S1_x25 tmp_at(11) |
| 96 |
#define S1_x26 tmp_at(12) |
| 97 |
#define S1_x38 tmp_at(13) |
| 98 |
#define S1_x55 tmp_at(14) |
| 99 |
#define S1_x58 tmp_at(15) |
| 100 |
|
| 101 |
#define S1(out1, out2, out3, out4, extra) \ |
| 102 |
movq %mm0,S1_a1; \ |
| 103 |
movq %mm3,%mm6; \ |
| 104 |
pxor pnot,%mm0; \ |
| 105 |
pxor %mm2,%mm3; \ |
| 106 |
pxor pnot,%mm6; \ |
| 107 |
movq %mm0,%mm7; \ |
| 108 |
extra; \ |
| 109 |
movq %mm4,S1_a5; \ |
| 110 |
por %mm2,%mm7; \ |
| 111 |
movq %mm3,S1_x3; \ |
| 112 |
movq %mm5,%mm4; \ |
| 113 |
movq %mm6,S1_x1; \ |
| 114 |
pxor %mm0,%mm3; \ |
| 115 |
movq %mm7,S1_x5; \ |
| 116 |
por %mm6,%mm0; \ |
| 117 |
movq %mm2,S1_a3; \ |
| 118 |
pand %mm6,%mm7; \ |
| 119 |
movq %mm3,S1_x4; \ |
| 120 |
por %mm3,%mm2; \ |
| 121 |
pxor pnot,%mm2; \ |
| 122 |
pand %mm0,%mm4; \ |
| 123 |
movq %mm7,%mm6; \ |
| 124 |
por %mm5,%mm2; \ |
| 125 |
movq %mm7,S1_x6; \ |
| 126 |
por %mm5,%mm6; \ |
| 127 |
pxor %mm2,%mm7; \ |
| 128 |
pxor %mm6,%mm3; \ |
| 129 |
movq %mm2,S1_x25; \ |
| 130 |
pxor %mm4,%mm6; \ |
| 131 |
pand S1_a3,%mm4; \ |
| 132 |
movq %mm6,%mm2; \ |
| 133 |
pxor S1_a3,%mm6; \ |
| 134 |
por %mm1,%mm2; \ |
| 135 |
pand S1_x5,%mm6; \ |
| 136 |
pxor %mm3,%mm2; \ |
| 137 |
movq %mm4,S1_x38; \ |
| 138 |
pxor %mm2,%mm0; \ |
| 139 |
movq %mm7,S1_x26; \ |
| 140 |
movq %mm5,%mm4; \ |
| 141 |
movq %mm2,S1_x13; \ |
| 142 |
por %mm0,%mm4; \ |
| 143 |
movq S1_x1,%mm7; \ |
| 144 |
por %mm1,%mm6; \ |
| 145 |
movq %mm0,S1_x14; \ |
| 146 |
movq %mm3,%mm2; \ |
| 147 |
pandn S1_x3,%mm0; \ |
| 148 |
pxor %mm7,%mm4; \ |
| 149 |
por S1_x4,%mm5; \ |
| 150 |
por %mm1,%mm0; \ |
| 151 |
pxor S1_x38,%mm5; \ |
| 152 |
pxor %mm0,%mm4; \ |
| 153 |
movq S1_a5,%mm0; \ |
| 154 |
pand %mm7,%mm2; \ |
| 155 |
movq %mm6,S1_x55; \ |
| 156 |
por %mm1,%mm2; \ |
| 157 |
movq S1_x14,%mm6; \ |
| 158 |
por %mm4,%mm0; \ |
| 159 |
pand S1_x5,%mm6; \ |
| 160 |
por %mm3,%mm7; \ |
| 161 |
movq %mm5,S1_x58; \ |
| 162 |
pxor %mm3,%mm6; \ |
| 163 |
pxor S1_x6,%mm7; \ |
| 164 |
movq %mm1,%mm5; \ |
| 165 |
pxor S1_x26,%mm2; \ |
| 166 |
pand %mm6,%mm5; \ |
| 167 |
pand S1_a3,%mm6; \ |
| 168 |
pxor %mm7,%mm5; \ |
| 169 |
por S1_a5,%mm5; \ |
| 170 |
movq S1_a1,%mm7; \ |
| 171 |
pxor %mm2,%mm5; \ |
| 172 |
movq S1_x4,%mm2; \ |
| 173 |
por %mm3,%mm7; \ |
| 174 |
por S1_x38,%mm2; \ |
| 175 |
pxor %mm6,%mm3; \ |
| 176 |
pxor S1_x25,%mm6; \ |
| 177 |
pxor %mm4,%mm7; \ |
| 178 |
movq S1_a3,%mm4; \ |
| 179 |
por %mm1,%mm7; \ |
| 180 |
por S1_x26,%mm4; \ |
| 181 |
por %mm1,%mm6; \ |
| 182 |
pxor S1_x14,%mm4; \ |
| 183 |
pxor %mm2,%mm6; \ |
| 184 |
movq S1_x13,%mm2; \ |
| 185 |
pxor %mm4,%mm7; \ |
| 186 |
pxor S1_x55,%mm3; \ |
| 187 |
pxor %mm2,%mm0; \ |
| 188 |
pxor out1,%mm5; \ |
| 189 |
pand %mm3,%mm2; \ |
| 190 |
movq S1_a5,%mm4; \ |
| 191 |
pand %mm1,%mm2; \ |
| 192 |
movq %mm5,out1; \ |
| 193 |
pxor S1_x58,%mm2; \ |
| 194 |
pand %mm4,%mm7; \ |
| 195 |
pxor out4,%mm0; \ |
| 196 |
pand %mm4,%mm2; \ |
| 197 |
pxor out2,%mm7; \ |
| 198 |
movq %mm0,out4; \ |
| 199 |
pxor out3,%mm2; \ |
| 200 |
pxor %mm6,%mm7; \ |
| 201 |
pxor %mm3,%mm2; \ |
| 202 |
movq %mm7,out2; \ |
| 203 |
movq %mm2,out3 |
| 204 |
|
| 205 |
#define S2_out1 %mm1 |
| 206 |
#undef S2_out2 |
| 207 |
#define S2_out3 %mm7 |
| 208 |
#define S2_out4 %mm2 |
| 209 |
|
| 210 |
#define S2_a1 tmp_at(1) |
| 211 |
#define S2_a2 tmp_at(2) |
| 212 |
#define S2_a3 tmp_at(3) |
| 213 |
#define S2_a4 tmp_at(4) |
| 214 |
#define S2_x3 tmp_at(5) |
| 215 |
#define S2_x4 tmp_at(6) |
| 216 |
#define S2_x5 tmp_at(7) |
| 217 |
#define S2_x13 tmp_at(8) |
| 218 |
#define S2_x18 tmp_at(9) |
| 219 |
#define S2_x25 tmp_at(10) |
| 220 |
|
| 221 |
#define S2(out1, out2, out3, out4, extra) \ |
| 222 |
movq %mm3,S2_a4; \ |
| 223 |
movq %mm4,%mm6; \ |
| 224 |
extra; \ |
| 225 |
movq %mm0,S2_a1; \ |
| 226 |
movq %mm4,%mm7; \ |
| 227 |
pxor pnot,%mm0; \ |
| 228 |
pxor %mm5,%mm6; \ |
| 229 |
pxor pnot,%mm7; \ |
| 230 |
movq %mm0,%mm3; \ |
| 231 |
movq %mm2,S2_a3; \ |
| 232 |
por %mm5,%mm7; \ |
| 233 |
movq %mm6,S2_x3; \ |
| 234 |
por %mm7,%mm3; \ |
| 235 |
pxor %mm4,%mm7; \ |
| 236 |
pxor %mm0,%mm6; \ |
| 237 |
pand %mm1,%mm3; \ |
| 238 |
por %mm7,%mm2; \ |
| 239 |
movq %mm1,S2_a2; \ |
| 240 |
pxor %mm5,%mm3; \ |
| 241 |
movq %mm6,S2_x4; \ |
| 242 |
pxor %mm1,%mm6; \ |
| 243 |
movq %mm7,S2_x13; \ |
| 244 |
pand %mm3,%mm1; \ |
| 245 |
pand S2_a3,%mm3; \ |
| 246 |
pxor %mm2,%mm1; \ |
| 247 |
movq S2_x4,%mm7; \ |
| 248 |
movq %mm1,%mm2; \ |
| 249 |
pand S2_a4,%mm2; \ |
| 250 |
pxor %mm6,%mm3; \ |
| 251 |
movq %mm6,S2_x5; \ |
| 252 |
pxor %mm2,%mm3; \ |
| 253 |
movq S2_a1,%mm2; \ |
| 254 |
por %mm5,%mm7; \ |
| 255 |
por %mm2,%mm1; \ |
| 256 |
pand %mm3,%mm7; \ |
| 257 |
pxor out2,%mm3; \ |
| 258 |
por %mm4,%mm2; \ |
| 259 |
por S2_a3,%mm7; \ |
| 260 |
movq %mm2,%mm6; \ |
| 261 |
pxor S2_x13,%mm1; \ |
| 262 |
por %mm5,%mm6; \ |
| 263 |
movq %mm3,out2; \ |
| 264 |
pand %mm0,%mm4; \ |
| 265 |
movq S2_x13,%mm3; \ |
| 266 |
por %mm0,%mm5; \ |
| 267 |
movq %mm2,S2_x18; \ |
| 268 |
pxor %mm6,%mm3; \ |
| 269 |
movq S2_a2,%mm2; \ |
| 270 |
pxor %mm6,%mm0; \ |
| 271 |
pxor %mm2,%mm3; \ |
| 272 |
pand %mm2,%mm0; \ |
| 273 |
pxor %mm3,%mm7; \ |
| 274 |
por %mm4,%mm2; \ |
| 275 |
pxor S2_x3,%mm4; \ |
| 276 |
pand %mm3,%mm6; \ |
| 277 |
pxor %mm0,%mm4; \ |
| 278 |
pxor %mm5,%mm6; \ |
| 279 |
movq %mm7,S2_x25; \ |
| 280 |
pand %mm3,%mm0; \ |
| 281 |
movq S2_a3,%mm7; \ |
| 282 |
pxor %mm2,%mm5; \ |
| 283 |
pxor S2_x5,%mm0; \ |
| 284 |
pand %mm4,%mm7; \ |
| 285 |
pand S2_a2,%mm4; \ |
| 286 |
pxor %mm5,%mm7; \ |
| 287 |
por S2_a4,%mm7; \ |
| 288 |
movq %mm1,%mm5; \ |
| 289 |
por S2_a3,%mm5; \ |
| 290 |
por %mm2,%mm1; \ |
| 291 |
pand S2_x18,%mm2; \ |
| 292 |
pxor %mm3,%mm4; \ |
| 293 |
movq S2_a4,%mm3; \ |
| 294 |
pand %mm4,%mm2; \ |
| 295 |
pand S2_a3,%mm4; \ |
| 296 |
pxor %mm5,%mm0; \ |
| 297 |
pxor S2_x25,%mm7; \ |
| 298 |
pxor %mm6,%mm4; \ |
| 299 |
pxor out3,%mm7; \ |
| 300 |
pand %mm3,%mm1; \ |
| 301 |
por %mm3,%mm2; \ |
| 302 |
pxor out1,%mm1; \ |
| 303 |
pxor %mm4,%mm2; \ |
| 304 |
pxor %mm0,%mm1; \ |
| 305 |
pxor out4,%mm2; \ |
| 306 |
movq %mm1,out1; \ |
| 307 |
movq %mm7,out3; \ |
| 308 |
movq %mm2,out4 |
| 309 |
|
| 310 |
#define S3_out1 %mm2 |
| 311 |
#define S3_out2 %mm6 |
| 312 |
#define S3_out3 %mm3 |
| 313 |
#define S3_out4 %mm7 |
| 314 |
|
| 315 |
#define S3_a1 tmp_at(1) |
| 316 |
#define S3_x2 tmp_at(2) |
| 317 |
#define S3_x9 tmp_at(3) |
| 318 |
#define S3_a5 tmp_at(4) |
| 319 |
#define S3_x4 tmp_at(5) |
| 320 |
#define S3_a6 tmp_at(6) |
| 321 |
#define S3_x6 tmp_at(7) |
| 322 |
#define S3_x5 tmp_at(8) |
| 323 |
#define S3_x11 tmp_at(9) |
| 324 |
#define S3_x12 tmp_at(10) |
| 325 |
#define S3_x13 tmp_at(11) |
| 326 |
#define S3_x54 tmp_at(12) |
| 327 |
#define S3_x7 tmp_at(13) |
| 328 |
#define S3_a4 tmp_at(14) |
| 329 |
#define S3_a3 S3_a5 |
| 330 |
#define S3_x38 S3_x4 |
| 331 |
|
| 332 |
#define S3(out1, out2, out3, out4, extra) \ |
| 333 |
movq %mm0,S3_a1; \ |
| 334 |
extra; \ |
| 335 |
movq %mm4,%mm0; \ |
| 336 |
movq %mm5,%mm6; \ |
| 337 |
pxor pnot,%mm6; \ |
| 338 |
movq %mm4,%mm7; \ |
| 339 |
pxor %mm6,%mm7; \ |
| 340 |
movq %mm6,S3_x2; \ |
| 341 |
pand %mm2,%mm0; \ |
| 342 |
movq %mm7,S3_x9; \ |
| 343 |
pxor %mm5,%mm0; \ |
| 344 |
movq %mm4,S3_a5; \ |
| 345 |
pandn %mm3,%mm4; \ |
| 346 |
movq %mm0,S3_x4; \ |
| 347 |
por %mm3,%mm7; \ |
| 348 |
movq S3_a5,%mm6; \ |
| 349 |
pxor %mm4,%mm0; \ |
| 350 |
movq %mm5,S3_a6; \ |
| 351 |
pandn %mm2,%mm6; \ |
| 352 |
movq %mm0,S3_x6; \ |
| 353 |
pxor %mm6,%mm7; \ |
| 354 |
movq S3_x2,%mm5; \ |
| 355 |
pxor %mm1,%mm0; \ |
| 356 |
movq %mm4,S3_x5; \ |
| 357 |
movq %mm7,%mm4; \ |
| 358 |
por S3_x4,%mm5; \ |
| 359 |
pand %mm0,%mm4; \ |
| 360 |
movq %mm7,S3_x11; \ |
| 361 |
pxor %mm5,%mm6; \ |
| 362 |
pxor S3_a5,%mm7; \ |
| 363 |
por %mm1,%mm6; \ |
| 364 |
movq %mm4,S3_x12; \ |
| 365 |
pand %mm5,%mm4; \ |
| 366 |
movq %mm7,S3_x13; \ |
| 367 |
por %mm0,%mm7; \ |
| 368 |
movq %mm4,S3_x54; \ |
| 369 |
movq %mm2,%mm4; \ |
| 370 |
pxor S3_x9,%mm4; \ |
| 371 |
pand %mm3,%mm7; \ |
| 372 |
movq %mm0,S3_x7; \ |
| 373 |
pxor %mm3,%mm4; \ |
| 374 |
pxor S3_a6,%mm5; \ |
| 375 |
pxor %mm4,%mm6; \ |
| 376 |
movq %mm3,S3_a4; \ |
| 377 |
por %mm5,%mm3; \ |
| 378 |
movq %mm2,S3_a3; \ |
| 379 |
pxor %mm3,%mm5; \ |
| 380 |
por %mm1,%mm5; \ |
| 381 |
pxor %mm7,%mm2; \ |
| 382 |
pxor S3_x12,%mm7; \ |
| 383 |
movq %mm2,%mm4; \ |
| 384 |
por S3_x5,%mm2; \ |
| 385 |
pand %mm1,%mm7; \ |
| 386 |
por S3_x4,%mm4; \ |
| 387 |
por %mm1,%mm2; \ |
| 388 |
pxor S3_x11,%mm7; \ |
| 389 |
pxor %mm3,%mm2; \ |
| 390 |
movq S3_a1,%mm3; \ |
| 391 |
pxor S3_a4,%mm4; \ |
| 392 |
pand %mm3,%mm7; \ |
| 393 |
pxor S3_x7,%mm7; \ |
| 394 |
por %mm3,%mm2; \ |
| 395 |
movq %mm4,S3_x38; \ |
| 396 |
pxor %mm6,%mm2; \ |
| 397 |
pxor out4,%mm7; \ |
| 398 |
por %mm1,%mm4; \ |
| 399 |
movq S3_a3,%mm6; \ |
| 400 |
movq %mm2,%mm3; \ |
| 401 |
pxor S3_x9,%mm6; \ |
| 402 |
por S3_x5,%mm6; \ |
| 403 |
pxor S3_x38,%mm3; \ |
| 404 |
pxor %mm6,%mm4; \ |
| 405 |
movq S3_a6,%mm6; \ |
| 406 |
pand S3_x11,%mm6; \ |
| 407 |
movq %mm7,out4; \ |
| 408 |
movq S3_x2,%mm0; \ |
| 409 |
pxor %mm6,%mm3; \ |
| 410 |
por S3_x6,%mm6; \ |
| 411 |
pand %mm1,%mm3; \ |
| 412 |
por S3_x38,%mm0; \ |
| 413 |
pxor %mm6,%mm3; \ |
| 414 |
pxor S3_x13,%mm0; \ |
| 415 |
movq %mm5,%mm6; \ |
| 416 |
por S3_a1,%mm3; \ |
| 417 |
pxor %mm5,%mm0; \ |
| 418 |
pand S3_x54,%mm6; \ |
| 419 |
pxor %mm4,%mm3; \ |
| 420 |
por S3_a1,%mm6; \ |
| 421 |
pxor out3,%mm3; \ |
| 422 |
pxor %mm0,%mm6; \ |
| 423 |
pxor out1,%mm2; \ |
| 424 |
movq %mm3,out3; \ |
| 425 |
pxor out2,%mm6; \ |
| 426 |
movq %mm2,out1; \ |
| 427 |
movq %mm6,out2 |
| 428 |
|
| 429 |
#define S4_out1 %mm1 |
| 430 |
#define S4_out2 %mm0 |
| 431 |
#define S4_out3 %mm6 |
| 432 |
#define S4_out4 %mm5 |
| 433 |
|
| 434 |
#define S4_a2 tmp_at(1) |
| 435 |
#define S4_a3 tmp_at(2) |
| 436 |
#define S4_a4 tmp_at(3) |
| 437 |
#define S4_a6 tmp_at(4) |
| 438 |
|
| 439 |
#define S4(out1, out2, out3, out4, extra) \ |
| 440 |
movq %mm2,%mm6; \ |
| 441 |
movq %mm3,S4_a4; \ |
| 442 |
movq %mm0,%mm7; \ |
| 443 |
movq %mm1,S4_a2; \ |
| 444 |
por %mm0,%mm6; \ |
| 445 |
extra; \ |
| 446 |
pand %mm4,%mm7; \ |
| 447 |
movq %mm1,%mm3; \ |
| 448 |
movq %mm5,S4_a6; \ |
| 449 |
movq %mm2,S4_a3; \ |
| 450 |
movq %mm4,%mm5; \ |
| 451 |
pand %mm6,%mm5; \ |
| 452 |
por %mm2,%mm3; \ |
| 453 |
pxor pnot,%mm2; \ |
| 454 |
pxor %mm5,%mm0; \ |
| 455 |
pxor pnot,%mm0; \ |
| 456 |
pxor %mm7,%mm6; \ |
| 457 |
pxor %mm0,%mm3; \ |
| 458 |
movq %mm1,%mm7; \ |
| 459 |
pand %mm6,%mm7; \ |
| 460 |
pxor %mm2,%mm5; \ |
| 461 |
pxor %mm4,%mm2; \ |
| 462 |
pand %mm5,%mm0; \ |
| 463 |
pxor %mm7,%mm4; \ |
| 464 |
pand %mm1,%mm5; \ |
| 465 |
por %mm1,%mm2; \ |
| 466 |
pxor %mm6,%mm5; \ |
| 467 |
movq S4_a4,%mm1; \ |
| 468 |
movq %mm0,%mm6; \ |
| 469 |
pand %mm4,%mm1; \ |
| 470 |
pxor %mm2,%mm6; \ |
| 471 |
por S4_a4,%mm6; \ |
| 472 |
pxor %mm3,%mm1; \ |
| 473 |
pand S4_a2,%mm4; \ |
| 474 |
pxor %mm5,%mm6; \ |
| 475 |
movq S4_a6,%mm3; \ |
| 476 |
pxor %mm0,%mm4; \ |
| 477 |
pxor S4_a3,%mm7; \ |
| 478 |
movq %mm3,%mm0; \ |
| 479 |
pxor %mm2,%mm7; \ |
| 480 |
pand %mm6,%mm0; \ |
| 481 |
movq S4_a4,%mm2; \ |
| 482 |
por %mm3,%mm6; \ |
| 483 |
pxor %mm1,%mm0; \ |
| 484 |
pand %mm2,%mm7; \ |
| 485 |
pxor pnot,%mm1; \ |
| 486 |
pxor %mm7,%mm4; \ |
| 487 |
movq %mm4,%mm5; \ |
| 488 |
pxor %mm1,%mm4; \ |
| 489 |
pxor out1,%mm1; \ |
| 490 |
por %mm4,%mm2; \ |
| 491 |
pand S4_a2,%mm4; \ |
| 492 |
pxor %mm6,%mm1; \ |
| 493 |
pxor %mm0,%mm4; \ |
| 494 |
pxor out3,%mm6; \ |
| 495 |
pxor %mm4,%mm2; \ |
| 496 |
pxor out2,%mm0; \ |
| 497 |
pand %mm2,%mm3; \ |
| 498 |
pxor %mm2,%mm6; \ |
| 499 |
pxor %mm3,%mm5; \ |
| 500 |
movq %mm1,out1; \ |
| 501 |
pxor %mm5,%mm6; \ |
| 502 |
movq %mm0,out2; \ |
| 503 |
pxor out4,%mm5; \ |
| 504 |
movq %mm6,out3; \ |
| 505 |
movq %mm5,out4 |
| 506 |
|
| 507 |
#define S5_out1 %mm5 |
| 508 |
#define S5_out2 %mm7 |
| 509 |
#define S5_out3 %mm6 |
| 510 |
#define S5_out4 %mm4 |
| 511 |
|
| 512 |
#define S5_a1 tmp_at(1) |
| 513 |
#define S5_a2 tmp_at(2) |
| 514 |
#define S5_a6 tmp_at(3) |
| 515 |
#define S5_x2 tmp_at(4) |
| 516 |
#define S5_x4 tmp_at(5) |
| 517 |
#define S5_x5 tmp_at(6) |
| 518 |
#define S5_x6 tmp_at(7) |
| 519 |
#define S5_x7 tmp_at(8) |
| 520 |
#define S5_x8 tmp_at(9) |
| 521 |
#define S5_x9 tmp_at(10) |
| 522 |
#define S5_x13 tmp_at(11) |
| 523 |
#define S5_x16 tmp_at(12) |
| 524 |
#define S5_x17 S5_a6 |
| 525 |
#define S5_x21 S5_x7 |
| 526 |
#define S5_x24 S5_x8 |
| 527 |
#define S5_x28 S5_x17 |
| 528 |
#define S5_x38 S5_x9 |
| 529 |
|
| 530 |
#define S5(out1, out2, out3, out4, extra) \ |
| 531 |
movq %mm1,S5_a2; \ |
| 532 |
movq %mm3,%mm6; \ |
| 533 |
movq %mm2,%mm7; \ |
| 534 |
pandn %mm2,%mm6; \ |
| 535 |
pandn %mm0,%mm7; \ |
| 536 |
movq %mm6,%mm1; \ |
| 537 |
movq %mm0,S5_a1; \ |
| 538 |
pxor %mm0,%mm1; \ |
| 539 |
extra; \ |
| 540 |
pxor %mm3,%mm0; \ |
| 541 |
movq %mm1,S5_x2; \ |
| 542 |
movq %mm5,S5_a6; \ |
| 543 |
por %mm0,%mm6; \ |
| 544 |
por %mm7,%mm5; \ |
| 545 |
movq %mm6,S5_x7; \ |
| 546 |
pxor %mm5,%mm1; \ |
| 547 |
movq %mm5,S5_x4; \ |
| 548 |
pand %mm2,%mm6; \ |
| 549 |
movq S5_a6,%mm5; \ |
| 550 |
pxor %mm3,%mm6; \ |
| 551 |
pandn S5_x7,%mm5; \ |
| 552 |
movq %mm0,S5_x6; \ |
| 553 |
movq %mm7,%mm0; \ |
| 554 |
movq %mm5,S5_x8; \ |
| 555 |
pxor %mm2,%mm5; \ |
| 556 |
movq %mm1,S5_x5; \ |
| 557 |
pxor %mm3,%mm0; \ |
| 558 |
movq %mm5,S5_x9; \ |
| 559 |
pandn %mm6,%mm7; \ |
| 560 |
por S5_a6,%mm0; \ |
| 561 |
por %mm4,%mm5; \ |
| 562 |
movq %mm6,S5_x13; \ |
| 563 |
pxor %mm1,%mm5; \ |
| 564 |
movq %mm0,S5_x16; \ |
| 565 |
pxor %mm0,%mm7; \ |
| 566 |
movq S5_a2,%mm0; \ |
| 567 |
movq %mm4,%mm1; \ |
| 568 |
movq %mm7,S5_x17; \ |
| 569 |
por %mm7,%mm1; \ |
| 570 |
pand S5_x5,%mm7; \ |
| 571 |
pxor %mm6,%mm1; \ |
| 572 |
pandn %mm1,%mm0; \ |
| 573 |
movq %mm7,%mm6; \ |
| 574 |
pandn S5_x7,%mm6; \ |
| 575 |
pxor %mm0,%mm5; \ |
| 576 |
pxor S5_x9,%mm7; \ |
| 577 |
movq %mm3,%mm0; \ |
| 578 |
movq %mm5,S5_x21; \ |
| 579 |
movq %mm6,%mm5; \ |
| 580 |
pandn S5_x8,%mm0; \ |
| 581 |
pandn %mm1,%mm5; \ |
| 582 |
pxor out3,%mm6; \ |
| 583 |
pxor %mm2,%mm0; \ |
| 584 |
movq S5_a1,%mm2; \ |
| 585 |
movq %mm0,%mm1; \ |
| 586 |
pxor S5_x9,%mm2; \ |
| 587 |
pand %mm4,%mm1; \ |
| 588 |
movq %mm7,S5_x38; \ |
| 589 |
pxor %mm1,%mm6; \ |
| 590 |
movq S5_x4,%mm1; \ |
| 591 |
movq %mm2,%mm7; \ |
| 592 |
pand S5_x2,%mm7; \ |
| 593 |
pand %mm3,%mm1; \ |
| 594 |
pxor S5_x17,%mm1; \ |
| 595 |
pandn %mm4,%mm7; \ |
| 596 |
movq %mm2,S5_x24; \ |
| 597 |
pxor %mm7,%mm1; \ |
| 598 |
movq out2,%mm7; \ |
| 599 |
por %mm2,%mm3; \ |
| 600 |
movq S5_a2,%mm2; \ |
| 601 |
pxor %mm1,%mm7; \ |
| 602 |
movq %mm3,S5_x28; \ |
| 603 |
pandn %mm3,%mm2; \ |
| 604 |
movq S5_x38,%mm3; \ |
| 605 |
pxor %mm2,%mm7; \ |
| 606 |
movq S5_x16,%mm2; \ |
| 607 |
por %mm4,%mm3; \ |
| 608 |
por S5_x13,%mm2; \ |
| 609 |
por %mm5,%mm1; \ |
| 610 |
pxor out1,%mm5; \ |
| 611 |
pxor %mm3,%mm2; \ |
| 612 |
por S5_a2,%mm2; \ |
| 613 |
movq %mm7,out2; \ |
| 614 |
pxor S5_x6,%mm1; \ |
| 615 |
pxor %mm2,%mm6; \ |
| 616 |
pandn %mm4,%mm1; \ |
| 617 |
movq S5_x38,%mm2; \ |
| 618 |
pxor S5_x24,%mm1; \ |
| 619 |
movq %mm2,%mm3; \ |
| 620 |
pxor S5_x21,%mm2; \ |
| 621 |
pxor %mm1,%mm5; \ |
| 622 |
pand S5_x6,%mm3; \ |
| 623 |
pandn %mm4,%mm2; \ |
| 624 |
pand S5_x28,%mm2; \ |
| 625 |
pxor %mm0,%mm3; \ |
| 626 |
pxor pnot,%mm6; \ |
| 627 |
pxor %mm2,%mm3; \ |
| 628 |
movq S5_x21,%mm4; \ |
| 629 |
por S5_a2,%mm3; \ |
| 630 |
movq %mm6,out3; \ |
| 631 |
pxor out4,%mm4; \ |
| 632 |
pxor %mm3,%mm5; \ |
| 633 |
movq %mm4,out4; \ |
| 634 |
movq %mm5,out1 |
| 635 |
|
| 636 |
#define S6_out1 %mm0 |
| 637 |
#undef S6_out2 |
| 638 |
#define S6_out3 %mm2 |
| 639 |
#define S6_out4 %mm4 |
| 640 |
|
| 641 |
#define S6_a1 tmp_at(1) |
| 642 |
#define S6_a2 tmp_at(2) |
| 643 |
#define S6_a3 tmp_at(3) |
| 644 |
#define S6_a4 tmp_at(4) |
| 645 |
#define S6_x1 tmp_at(5) |
| 646 |
#define S6_x2 tmp_at(6) |
| 647 |
#define S6_x5 tmp_at(7) |
| 648 |
#define S6_x6 tmp_at(8) |
| 649 |
#define S6_x8 tmp_at(9) |
| 650 |
#define S6_x15 tmp_at(10) |
| 651 |
#define S6_x16 tmp_at(11) |
| 652 |
|
| 653 |
#define S6(out1, out2, out3, out4, extra) \ |
| 654 |
movq %mm2,S6_a3; \ |
| 655 |
extra; \ |
| 656 |
movq %mm4,%mm6; \ |
| 657 |
pxor pnot,%mm6; \ |
| 658 |
movq %mm5,%mm7; \ |
| 659 |
movq %mm1,S6_a2; \ |
| 660 |
movq %mm4,%mm2; \ |
| 661 |
movq %mm3,S6_a4; \ |
| 662 |
pxor %mm1,%mm7; \ |
| 663 |
pxor pnot,%mm1; \ |
| 664 |
pxor %mm6,%mm7; \ |
| 665 |
movq %mm6,S6_x2; \ |
| 666 |
pxor %mm0,%mm7; \ |
| 667 |
pand %mm5,%mm2; \ |
| 668 |
movq %mm4,%mm6; \ |
| 669 |
movq %mm1,S6_x1; \ |
| 670 |
movq %mm5,%mm3; \ |
| 671 |
pand S6_a2,%mm3; \ |
| 672 |
pand %mm7,%mm6; \ |
| 673 |
movq %mm0,S6_a1; \ |
| 674 |
por %mm2,%mm1; \ |
| 675 |
movq %mm2,S6_x6; \ |
| 676 |
pand %mm6,%mm0; \ |
| 677 |
movq %mm3,S6_x15; \ |
| 678 |
pxor %mm0,%mm1; \ |
| 679 |
movq S6_a4,%mm0; \ |
| 680 |
movq %mm4,%mm2; \ |
| 681 |
movq %mm6,S6_x8; \ |
| 682 |
pand %mm1,%mm0; \ |
| 683 |
movq %mm7,S6_x5; \ |
| 684 |
pxor %mm3,%mm2; \ |
| 685 |
movq S6_x2,%mm6; \ |
| 686 |
pxor %mm7,%mm0; \ |
| 687 |
movq S6_a1,%mm7; \ |
| 688 |
pxor %mm5,%mm1; \ |
| 689 |
movq %mm2,S6_x16; \ |
| 690 |
pand %mm7,%mm2; \ |
| 691 |
movq S6_a4,%mm3; \ |
| 692 |
pxor %mm2,%mm6; \ |
| 693 |
pxor S6_a2,%mm2; \ |
| 694 |
pand %mm7,%mm1; \ |
| 695 |
por %mm6,%mm3; \ |
| 696 |
pxor %mm5,%mm6; \ |
| 697 |
pxor %mm3,%mm1; \ |
| 698 |
pand %mm6,%mm7; \ |
| 699 |
pand S6_a3,%mm1; \ |
| 700 |
pand %mm4,%mm6; \ |
| 701 |
movq S6_x6,%mm3; \ |
| 702 |
pxor %mm1,%mm0; \ |
| 703 |
pxor out2,%mm0; \ |
| 704 |
por %mm2,%mm3; \ |
| 705 |
pand S6_a4,%mm3; \ |
| 706 |
pxor %mm7,%mm4; \ |
| 707 |
movq S6_x5,%mm1; \ |
| 708 |
pxor %mm3,%mm4; \ |
| 709 |
pxor pnot,%mm2; \ |
| 710 |
por %mm4,%mm5; \ |
| 711 |
movq %mm0,out2; \ |
| 712 |
movq %mm5,%mm3; \ |
| 713 |
pandn S6_a4,%mm3; \ |
| 714 |
pxor %mm6,%mm1; \ |
| 715 |
movq S6_x6,%mm0; \ |
| 716 |
pxor %mm2,%mm3; \ |
| 717 |
por S6_a4,%mm1; \ |
| 718 |
pxor %mm3,%mm0; \ |
| 719 |
pand S6_a3,%mm3; \ |
| 720 |
pxor %mm1,%mm0; \ |
| 721 |
por S6_x5,%mm6; \ |
| 722 |
movq %mm7,%mm1; \ |
| 723 |
pxor S6_x15,%mm7; \ |
| 724 |
pxor %mm3,%mm4; \ |
| 725 |
movq S6_a4,%mm3; \ |
| 726 |
pxor %mm5,%mm7; \ |
| 727 |
pand S6_x8,%mm5; \ |
| 728 |
por %mm3,%mm7; \ |
| 729 |
pxor S6_x6,%mm6; \ |
| 730 |
por %mm3,%mm5; \ |
| 731 |
por S6_x16,%mm1; \ |
| 732 |
pxor %mm6,%mm5; \ |
| 733 |
pxor S6_x1,%mm1; \ |
| 734 |
movq S6_a3,%mm3; \ |
| 735 |
pxor %mm1,%mm7; \ |
| 736 |
pxor out4,%mm4; \ |
| 737 |
por %mm3,%mm7; \ |
| 738 |
pand %mm1,%mm2; \ |
| 739 |
pxor out1,%mm0; \ |
| 740 |
por %mm3,%mm2; \ |
| 741 |
pxor %mm7,%mm0; \ |
| 742 |
pxor %mm5,%mm2; \ |
| 743 |
movq %mm4,out4; \ |
| 744 |
pxor out3,%mm2; \ |
| 745 |
movq %mm0,out1; \ |
| 746 |
movq %mm2,out3 |
| 747 |
|
| 748 |
#define S7_out1 %mm7 |
| 749 |
#define S7_out2 %mm1 |
| 750 |
#define S7_out3 %mm3 |
| 751 |
#define S7_out4 %mm0 |
| 752 |
|
| 753 |
#define S7_a1 tmp_at(1) |
| 754 |
#define S7_a2 tmp_at(2) |
| 755 |
#define S7_a4 tmp_at(3) |
| 756 |
#define S7_a6 tmp_at(4) |
| 757 |
#define S7_x6 tmp_at(5) |
| 758 |
#define S7_x7 tmp_at(6) |
| 759 |
#define S7_x8 tmp_at(7) |
| 760 |
#define S7_x11 tmp_at(8) |
| 761 |
#define S7_x13 tmp_at(9) |
| 762 |
#define S7_x15 tmp_at(10) |
| 763 |
#define S7_x25 tmp_at(11) |
| 764 |
#define S7_x26 tmp_at(12) |
| 765 |
|
| 766 |
#define S7(out1, out2, out3, out4, extra) \ |
| 767 |
movq %mm0,S7_a1; \ |
| 768 |
movq %mm1,%mm6; \ |
| 769 |
extra; \ |
| 770 |
movq %mm1,S7_a2; \ |
| 771 |
movq %mm3,%mm7; \ |
| 772 |
movq %mm5,S7_a6; \ |
| 773 |
pand %mm3,%mm6; \ |
| 774 |
movq %mm3,S7_a4; \ |
| 775 |
pxor %mm4,%mm6; \ |
| 776 |
pxor pnot,%mm4; \ |
| 777 |
pand %mm6,%mm7; \ |
| 778 |
pand %mm4,%mm3; \ |
| 779 |
movq %mm1,%mm5; \ |
| 780 |
pxor %mm2,%mm6; \ |
| 781 |
pxor %mm7,%mm5; \ |
| 782 |
movq %mm7,S7_x6; \ |
| 783 |
por %mm1,%mm4; \ |
| 784 |
por %mm3,%mm1; \ |
| 785 |
pxor %mm6,%mm7; \ |
| 786 |
movq %mm5,S7_x7; \ |
| 787 |
pand %mm2,%mm4; \ |
| 788 |
pand %mm2,%mm5; \ |
| 789 |
por %mm7,%mm3; \ |
| 790 |
movq %mm1,S7_x13; \ |
| 791 |
pxor %mm5,%mm0; \ |
| 792 |
por S7_a6,%mm0; \ |
| 793 |
pxor %mm4,%mm1; \ |
| 794 |
movq %mm4,S7_x15; \ |
| 795 |
pxor %mm6,%mm0; \ |
| 796 |
movq %mm5,S7_x8; \ |
| 797 |
movq %mm3,%mm4; \ |
| 798 |
movq S7_a6,%mm6; \ |
| 799 |
movq %mm0,%mm5; \ |
| 800 |
pxor S7_x6,%mm5; \ |
| 801 |
por %mm6,%mm4; \ |
| 802 |
movq %mm7,S7_x25; \ |
| 803 |
por %mm6,%mm5; \ |
| 804 |
movq S7_a1,%mm7; \ |
| 805 |
pxor %mm1,%mm5; \ |
| 806 |
movq %mm3,S7_x26; \ |
| 807 |
pand %mm5,%mm7; \ |
| 808 |
movq %mm0,S7_x11; \ |
| 809 |
pxor %mm0,%mm7; \ |
| 810 |
movq S7_a4,%mm3; \ |
| 811 |
movq %mm7,%mm0; \ |
| 812 |
por S7_a2,%mm0; \ |
| 813 |
pand %mm3,%mm1; \ |
| 814 |
pand S7_x13,%mm3; \ |
| 815 |
por S7_x7,%mm2; \ |
| 816 |
pxor S7_x6,%mm0; \ |
| 817 |
pxor %mm3,%mm2; \ |
| 818 |
movq S7_a2,%mm3; \ |
| 819 |
movq %mm0,%mm6; \ |
| 820 |
pxor pnot,%mm3; \ |
| 821 |
pxor S7_x15,%mm6; \ |
| 822 |
por %mm3,%mm1; \ |
| 823 |
pand S7_x26,%mm0; \ |
| 824 |
pxor %mm6,%mm4; \ |
| 825 |
pand S7_a6,%mm0; \ |
| 826 |
por %mm3,%mm6; \ |
| 827 |
por S7_a6,%mm6; \ |
| 828 |
pand %mm5,%mm3; \ |
| 829 |
pand S7_a6,%mm1; \ |
| 830 |
pxor %mm3,%mm0; \ |
| 831 |
por S7_a1,%mm0; \ |
| 832 |
pxor %mm6,%mm2; \ |
| 833 |
pxor S7_x11,%mm1; \ |
| 834 |
pxor %mm4,%mm0; \ |
| 835 |
movq S7_a1,%mm4; \ |
| 836 |
pxor %mm2,%mm5; \ |
| 837 |
movq S7_a4,%mm6; \ |
| 838 |
por %mm2,%mm4; \ |
| 839 |
pxor S7_x25,%mm6; \ |
| 840 |
pxor %mm4,%mm1; \ |
| 841 |
movq S7_a6,%mm4; \ |
| 842 |
pand %mm1,%mm6; \ |
| 843 |
movq S7_x6,%mm3; \ |
| 844 |
pand %mm4,%mm6; \ |
| 845 |
pxor S7_x15,%mm3; \ |
| 846 |
pxor %mm5,%mm6; \ |
| 847 |
pxor S7_x8,%mm2; \ |
| 848 |
por %mm4,%mm3; \ |
| 849 |
por S7_a1,%mm6; \ |
| 850 |
pxor %mm2,%mm3; \ |
| 851 |
pxor out1,%mm7; \ |
| 852 |
pxor %mm6,%mm3; \ |
| 853 |
pxor out2,%mm1; \ |
| 854 |
movq %mm7,out1; \ |
| 855 |
pxor out3,%mm3; \ |
| 856 |
movq %mm1,out2; \ |
| 857 |
pxor out4,%mm0; \ |
| 858 |
movq %mm3,out3; \ |
| 859 |
movq %mm0,out4 |
| 860 |
|
| 861 |
#define S8_out1 %mm6 |
| 862 |
#define S8_out2 %mm2 |
| 863 |
#define S8_out3 %mm5 |
| 864 |
#define S8_out4 %mm1 |
| 865 |
|
| 866 |
#define S8_a1 tmp_at(1) |
| 867 |
#define S8_a2 tmp_at(2) |
| 868 |
#define S8_a4 tmp_at(3) |
| 869 |
#define S8_a5 tmp_at(4) |
| 870 |
#define S8_a6 tmp_at(5) |
| 871 |
#define S8_x14 tmp_at(6) |
| 872 |
#define S8_x22 tmp_at(7) |
| 873 |
#define S8_x33 tmp_at(8) |
| 874 |
|
| 875 |
#define S8(out1, out2, out3, out4, extra) \ |
| 876 |
movq %mm0,S8_a1; \ |
| 877 |
extra; \ |
| 878 |
movq %mm2,%mm6; \ |
| 879 |
pxor pnot,%mm0; \ |
| 880 |
movq %mm2,%mm7; \ |
| 881 |
movq %mm3,S8_a4; \ |
| 882 |
por %mm0,%mm7; \ |
| 883 |
pxor pnot,%mm3; \ |
| 884 |
pxor %mm0,%mm6; \ |
| 885 |
movq %mm5,S8_a6; \ |
| 886 |
movq %mm4,%mm5; \ |
| 887 |
movq %mm1,S8_a2; \ |
| 888 |
movq %mm7,%mm1; \ |
| 889 |
movq %mm4,S8_a5; \ |
| 890 |
pxor %mm3,%mm7; \ |
| 891 |
por %mm6,%mm5; \ |
| 892 |
por %mm7,%mm0; \ |
| 893 |
pand %mm4,%mm1; \ |
| 894 |
pandn %mm0,%mm2; \ |
| 895 |
por %mm7,%mm4; \ |
| 896 |
pxor %mm1,%mm2; \ |
| 897 |
movq %mm5,S8_x22; \ |
| 898 |
pand %mm3,%mm5; \ |
| 899 |
por S8_a2,%mm2; \ |
| 900 |
pxor %mm4,%mm7; \ |
| 901 |
pxor %mm0,%mm3; \ |
| 902 |
movq %mm4,%mm1; \ |
| 903 |
pxor S8_x22,%mm7; \ |
| 904 |
pxor %mm3,%mm1; \ |
| 905 |
pxor %mm6,%mm4; \ |
| 906 |
pxor %mm5,%mm2; \ |
| 907 |
pxor S8_a1,%mm5; \ |
| 908 |
pand %mm3,%mm6; \ |
| 909 |
movq %mm1,S8_x14; \ |
| 910 |
pand %mm4,%mm5; \ |
| 911 |
movq %mm7,S8_x33; \ |
| 912 |
movq %mm0,%mm1; \ |
| 913 |
pand S8_a5,%mm3; \ |
| 914 |
movq %mm0,%mm7; \ |
| 915 |
pand S8_a5,%mm1; \ |
| 916 |
pxor %mm3,%mm7; \ |
| 917 |
pand S8_a2,%mm7; \ |
| 918 |
pxor %mm1,%mm6; \ |
| 919 |
movq S8_a6,%mm1; \ |
| 920 |
pxor %mm4,%mm7; \ |
| 921 |
por S8_a2,%mm6; \ |
| 922 |
pandn %mm0,%mm4; \ |
| 923 |
pxor S8_x14,%mm6; \ |
| 924 |
pand %mm2,%mm1; \ |
| 925 |
pxor S8_a1,%mm3; \ |
| 926 |
pxor %mm6,%mm2; \ |
| 927 |
por S8_a6,%mm6; \ |
| 928 |
pxor %mm7,%mm1; \ |
| 929 |
pxor S8_x22,%mm3; \ |
| 930 |
pxor %mm7,%mm6; \ |
| 931 |
por S8_a2,%mm4; \ |
| 932 |
pand S8_a2,%mm5; \ |
| 933 |
pxor %mm4,%mm3; \ |
| 934 |
movq S8_a1,%mm4; \ |
| 935 |
pand S8_x33,%mm4; \ |
| 936 |
por S8_a4,%mm7; \ |
| 937 |
pxor %mm4,%mm0; \ |
| 938 |
pand S8_a2,%mm7; \ |
| 939 |
pxor %mm0,%mm5; \ |
| 940 |
movq S8_a6,%mm4; \ |
| 941 |
por %mm0,%mm2; \ |
| 942 |
pxor S8_x33,%mm7; \ |
| 943 |
por %mm4,%mm5; \ |
| 944 |
pxor out1,%mm6; \ |
| 945 |
pand %mm4,%mm2; \ |
| 946 |
pxor out4,%mm1; \ |
| 947 |
pxor %mm7,%mm5; \ |
| 948 |
pxor %mm3,%mm2; \ |
| 949 |
pxor out3,%mm5; \ |
| 950 |
movq %mm6,out1; \ |
| 951 |
pxor out2,%mm2; \ |
| 952 |
movq %mm1,out4; \ |
| 953 |
movq %mm5,out3; \ |
| 954 |
movq %mm2,out2 |
| 955 |
|
| 956 |
#define zero %mm0 |
| 957 |
|
| 958 |
#define DES_bs_clear_block_8(i) \ |
| 959 |
movq zero,B(i); \ |
| 960 |
movq zero,B(i + 1); \ |
| 961 |
movq zero,B(i + 2); \ |
| 962 |
movq zero,B(i + 3); \ |
| 963 |
movq zero,B(i + 4); \ |
| 964 |
movq zero,B(i + 5); \ |
| 965 |
movq zero,B(i + 6); \ |
| 966 |
movq zero,B(i + 7) |
| 967 |
|
| 968 |
#define DES_bs_clear_block \ |
| 969 |
DES_bs_clear_block_8(0); \ |
| 970 |
DES_bs_clear_block_8(8); \ |
| 971 |
DES_bs_clear_block_8(16); \ |
| 972 |
DES_bs_clear_block_8(24); \ |
| 973 |
DES_bs_clear_block_8(32); \ |
| 974 |
DES_bs_clear_block_8(40); \ |
| 975 |
DES_bs_clear_block_8(48); \ |
| 976 |
DES_bs_clear_block_8(56) |
| 977 |
|
| 978 |
#define k_ptr %edx |
| 979 |
#define K(i) (i)*8(k_ptr) |
| 980 |
#define k(i) (i)*4(k_ptr) |
| 981 |
|
| 982 |
#define a6_xor_ptr %esi |
| 983 |
#define a6_p pxor (a6_xor_ptr),a6 |
| 984 |
#define a6_v(i) pxor K(i),a6 |
| 985 |
|
| 986 |
#define tmp1 %ecx |
| 987 |
#define tmp2 a6_xor_ptr |
| 988 |
|
| 989 |
#define xor_E(i) \ |
| 990 |
movl E(i),tmp1; \ |
| 991 |
movq K(i),a1; \ |
| 992 |
movl E(i + 1),tmp2; \ |
| 993 |
movq K(i + 1),a2; \ |
| 994 |
pxor (tmp1),a1; \ |
| 995 |
pxor (tmp2),a2; \ |
| 996 |
movl E(i + 2),tmp1; \ |
| 997 |
movq K(i + 2),a3; \ |
| 998 |
movl E(i + 3),tmp2; \ |
| 999 |
movq K(i + 3),a4; \ |
| 1000 |
pxor (tmp1),a3; \ |
| 1001 |
pxor (tmp2),a4; \ |
| 1002 |
movl E(i + 4),tmp1; \ |
| 1003 |
movq K(i + 4),a5; \ |
| 1004 |
movl E(i + 5),a6_xor_ptr; \ |
| 1005 |
movq K(i + 5),a6; \ |
| 1006 |
pxor (tmp1),a5 |
| 1007 |
|
| 1008 |
#define xor_B(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6) \ |
| 1009 |
movq B(b1),a1; \ |
| 1010 |
movq B(b2),a2; \ |
| 1011 |
pxor K(k1),a1; \ |
| 1012 |
movq B(b3),a3; \ |
| 1013 |
pxor K(k2),a2; \ |
| 1014 |
movq B(b4),a4; \ |
| 1015 |
pxor K(k3),a3; \ |
| 1016 |
movq B(b5),a5; \ |
| 1017 |
pxor K(k4),a4; \ |
| 1018 |
movq B(b6),a6; \ |
| 1019 |
pxor K(k5),a5 |
| 1020 |
|
| 1021 |
#define xor_B_KS_p(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \ |
| 1022 |
movl k(k1),tmp1; \ |
| 1023 |
movl k(k2),tmp2; \ |
| 1024 |
movq B(b1),a1; \ |
| 1025 |
movq B(b2),a2; \ |
| 1026 |
pxor (tmp1),a1; \ |
| 1027 |
movl k(k3),tmp1; \ |
| 1028 |
pxor (tmp2),a2; \ |
| 1029 |
movl k(k4),tmp2; \ |
| 1030 |
movq B(b3),a3; \ |
| 1031 |
movq B(b4),a4; \ |
| 1032 |
pxor (tmp1),a3; \ |
| 1033 |
movl k(k5),tmp1; \ |
| 1034 |
pxor (tmp2),a4; \ |
| 1035 |
movq B(b5),a5; \ |
| 1036 |
movl k(k6),a6_xor_ptr; \ |
| 1037 |
movq B(b6),a6; \ |
| 1038 |
pxor (tmp1),a5 |
| 1039 |
|
| 1040 |
.text |
| 1041 |
|
| 1042 |
DO_ALIGN(5) |
| 1043 |
.globl DES_bs_init_asm |
| 1044 |
DES_bs_init_asm: |
| 1045 |
pcmpeqd %mm0,%mm0 |
| 1046 |
movq %mm0,pnot |
| 1047 |
ret |
| 1048 |
|
| 1049 |
#define rounds_and_swapped %ebp |
| 1050 |
#define iterations %eax |
| 1051 |
|
| 1052 |
DO_ALIGN(5) |
| 1053 |
.globl DES_bs_crypt |
| 1054 |
DES_bs_crypt: |
| 1055 |
movl 4(%esp),iterations |
| 1056 |
pxor zero,zero |
| 1057 |
pushl %ebp |
| 1058 |
pushl %esi |
| 1059 |
movl $DES_bs_all_KS_v,k_ptr |
| 1060 |
DES_bs_clear_block |
| 1061 |
movl $8,rounds_and_swapped |
| 1062 |
DES_bs_crypt_start: |
| 1063 |
xor_E(0) |
| 1064 |
S1(B(40), B(48), B(54), B(62), a6_p) |
| 1065 |
xor_E(6) |
| 1066 |
S2(B(44), B(59), B(33), B(49), a6_p) |
| 1067 |
xor_E(12) |
| 1068 |
S3(B(55), B(47), B(61), B(37), a6_p) |
| 1069 |
xor_E(18) |
| 1070 |
S4(B(57), B(51), B(41), B(32), a6_p) |
| 1071 |
xor_E(24) |
| 1072 |
S5(B(39), B(45), B(56), B(34), a6_p) |
| 1073 |
xor_E(30) |
| 1074 |
S6(B(35), B(60), B(42), B(50), a6_p) |
| 1075 |
xor_E(36) |
| 1076 |
S7(B(63), B(43), B(53), B(38), a6_p) |
| 1077 |
xor_E(42) |
| 1078 |
S8(B(36), B(58), B(46), B(52), a6_p) |
| 1079 |
cmpl $0x100,rounds_and_swapped |
| 1080 |
je DES_bs_crypt_next |
| 1081 |
DES_bs_crypt_swap: |
| 1082 |
xor_E(48) |
| 1083 |
S1(B(8), B(16), B(22), B(30), a6_p) |
| 1084 |
xor_E(54) |
| 1085 |
S2(B(12), B(27), B(1), B(17), a6_p) |
| 1086 |
xor_E(60) |
| 1087 |
S3(B(23), B(15), B(29), B(5), a6_p) |
| 1088 |
xor_E(66) |
| 1089 |
S4(B(25), B(19), B(9), B(0), a6_p) |
| 1090 |
xor_E(72) |
| 1091 |
S5(B(7), B(13), B(24), B(2), a6_p) |
| 1092 |
xor_E(78) |
| 1093 |
S6(B(3), B(28), B(10), B(18), a6_p) |
| 1094 |
xor_E(84) |
| 1095 |
S7(B(31), B(11), B(21), B(6), a6_p) |
| 1096 |
xor_E(90) |
| 1097 |
addl $96*8,k_ptr |
| 1098 |
S8(B(4), B(26), B(14), B(20), a6_p) |
| 1099 |
decl rounds_and_swapped |
| 1100 |
jnz DES_bs_crypt_start |
| 1101 |
subl $0x300*8+48*8,k_ptr |
| 1102 |
movl $0x108,rounds_and_swapped |
| 1103 |
decl iterations |
| 1104 |
jnz DES_bs_crypt_swap |
| 1105 |
popl %esi |
| 1106 |
popl %ebp |
| 1107 |
#ifdef EMMS |
| 1108 |
emms |
| 1109 |
#endif |
| 1110 |
ret |
| 1111 |
DES_bs_crypt_next: |
| 1112 |
subl $0x300*8-48*8,k_ptr |
| 1113 |
movl $8,rounds_and_swapped |
| 1114 |
decl iterations |
| 1115 |
jnz DES_bs_crypt_start |
| 1116 |
popl %esi |
| 1117 |
popl %ebp |
| 1118 |
#ifdef EMMS |
| 1119 |
emms |
| 1120 |
#endif |
| 1121 |
ret |
| 1122 |
|
| 1123 |
DO_ALIGN(5) |
| 1124 |
.globl DES_bs_crypt_25 |
| 1125 |
DES_bs_crypt_25: |
| 1126 |
pxor zero,zero |
| 1127 |
pushl %ebp |
| 1128 |
pushl %esi |
| 1129 |
movl $DES_bs_all_KS_v,k_ptr |
| 1130 |
DES_bs_clear_block |
| 1131 |
movl $8,rounds_and_swapped |
| 1132 |
movl $25,iterations |
| 1133 |
DES_bs_crypt_25_start: |
| 1134 |
xor_E(0) |
| 1135 |
S1(B(40), B(48), B(54), B(62), a6_p) |
| 1136 |
xor_E(6) |
| 1137 |
S2(B(44), B(59), B(33), B(49), a6_p) |
| 1138 |
xor_B(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12) |
| 1139 |
S3(B(55), B(47), B(61), B(37), a6_v(17)) |
| 1140 |
xor_B(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16) |
| 1141 |
S4(B(57), B(51), B(41), B(32), a6_v(23)) |
| 1142 |
xor_E(24) |
| 1143 |
S5(B(39), B(45), B(56), B(34), a6_p) |
| 1144 |
xor_E(30) |
| 1145 |
S6(B(35), B(60), B(42), B(50), a6_p) |
| 1146 |
xor_B(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28) |
| 1147 |
S7(B(63), B(43), B(53), B(38), a6_v(41)) |
| 1148 |
xor_B(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0) |
| 1149 |
S8(B(36), B(58), B(46), B(52), a6_v(47)) |
| 1150 |
cmpl $0x100,rounds_and_swapped |
| 1151 |
je DES_bs_crypt_25_next |
| 1152 |
DES_bs_crypt_25_swap: |
| 1153 |
xor_E(48) |
| 1154 |
S1(B(8), B(16), B(22), B(30), a6_p) |
| 1155 |
xor_E(54) |
| 1156 |
S2(B(12), B(27), B(1), B(17), a6_p) |
| 1157 |
xor_B(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44) |
| 1158 |
S3(B(23), B(15), B(29), B(5), a6_v(65)) |
| 1159 |
xor_B(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48) |
| 1160 |
S4(B(25), B(19), B(9), B(0), a6_v(71)) |
| 1161 |
xor_E(72) |
| 1162 |
S5(B(7), B(13), B(24), B(2), a6_p) |
| 1163 |
xor_E(78) |
| 1164 |
S6(B(3), B(28), B(10), B(18), a6_p) |
| 1165 |
xor_B(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60) |
| 1166 |
S7(B(31), B(11), B(21), B(6), a6_v(89)) |
| 1167 |
xor_B(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32) |
| 1168 |
S8(B(4), B(26), B(14), B(20), a6_v(95)) |
| 1169 |
addl $96*8,k_ptr |
| 1170 |
decl rounds_and_swapped |
| 1171 |
jnz DES_bs_crypt_25_start |
| 1172 |
subl $0x300*8+48*8,k_ptr |
| 1173 |
movl $0x108,rounds_and_swapped |
| 1174 |
decl iterations |
| 1175 |
jnz DES_bs_crypt_25_swap |
| 1176 |
popl %esi |
| 1177 |
popl %ebp |
| 1178 |
#ifdef EMMS |
| 1179 |
emms |
| 1180 |
#endif |
| 1181 |
ret |
| 1182 |
DES_bs_crypt_25_next: |
| 1183 |
subl $0x300*8-48*8,k_ptr |
| 1184 |
movl $8,rounds_and_swapped |
| 1185 |
decl iterations |
| 1186 |
jmp DES_bs_crypt_25_start |
| 1187 |
|
| 1188 |
#define ones %mm1 |
| 1189 |
|
| 1190 |
#define rounds %eax |
| 1191 |
|
| 1192 |
DO_ALIGN(5) |
| 1193 |
.globl DES_bs_crypt_LM |
| 1194 |
DES_bs_crypt_LM: |
| 1195 |
pxor zero,zero |
| 1196 |
pushl %esi |
| 1197 |
pcmpeqd ones,ones |
| 1198 |
movl $DES_bs_all_KS_p,k_ptr |
| 1199 |
movq zero,B(0) |
| 1200 |
movq zero,B(1) |
| 1201 |
movq zero,B(2) |
| 1202 |
movq zero,B(3) |
| 1203 |
movq zero,B(4) |
| 1204 |
movq zero,B(5) |
| 1205 |
movq zero,B(6) |
| 1206 |
movq zero,B(7) |
| 1207 |
movq ones,B(8) |
| 1208 |
movq ones,B(9) |
| 1209 |
movq ones,B(10) |
| 1210 |
movq zero,B(11) |
| 1211 |
movq ones,B(12) |
| 1212 |
movq zero,B(13) |
| 1213 |
movq zero,B(14) |
| 1214 |
movq zero,B(15) |
| 1215 |
movq zero,B(16) |
| 1216 |
movq zero,B(17) |
| 1217 |
movq zero,B(18) |
| 1218 |
movq zero,B(19) |
| 1219 |
movq zero,B(20) |
| 1220 |
movq zero,B(21) |
| 1221 |
movq zero,B(22) |
| 1222 |
movq ones,B(23) |
| 1223 |
movq zero,B(24) |
| 1224 |
movq zero,B(25) |
| 1225 |
movq ones,B(26) |
| 1226 |
movq zero,B(27) |
| 1227 |
movq zero,B(28) |
| 1228 |
movq ones,B(29) |
| 1229 |
movq ones,B(30) |
| 1230 |
movq ones,B(31) |
| 1231 |
movq zero,B(32) |
| 1232 |
movq zero,B(33) |
| 1233 |
movq zero,B(34) |
| 1234 |
movq ones,B(35) |
| 1235 |
movq zero,B(36) |
| 1236 |
movq ones,B(37) |
| 1237 |
movq ones,B(38) |
| 1238 |
movq ones,B(39) |
| 1239 |
movq zero,B(40) |
| 1240 |
movq zero,B(41) |
| 1241 |
movq zero,B(42) |
| 1242 |
movq zero,B(43) |
| 1243 |
movq zero,B(44) |
| 1244 |
movq ones,B(45) |
| 1245 |
movq zero,B(46) |
| 1246 |
movq zero,B(47) |
| 1247 |
movq ones,B(48) |
| 1248 |
movq ones,B(49) |
| 1249 |
movq zero,B(50) |
| 1250 |
movq zero,B(51) |
| 1251 |
movq zero,B(52) |
| 1252 |
movq zero,B(53) |
| 1253 |
movq ones,B(54) |
| 1254 |
movq zero,B(55) |
| 1255 |
movq ones,B(56) |
| 1256 |
movq zero,B(57) |
| 1257 |
movq ones,B(58) |
| 1258 |
movq zero,B(59) |
| 1259 |
movq ones,B(60) |
| 1260 |
movq ones,B(61) |
| 1261 |
movq ones,B(62) |
| 1262 |
movq ones,B(63) |
| 1263 |
movl $8,rounds |
| 1264 |
DES_bs_crypt_LM_loop: |
| 1265 |
xor_B_KS_p(31, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5) |
| 1266 |
S1(B(40), B(48), B(54), B(62), a6_p) |
| 1267 |
xor_B_KS_p(3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11) |
| 1268 |
S2(B(44), B(59), B(33), B(49), a6_p) |
| 1269 |
xor_B_KS_p(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17) |
| 1270 |
S3(B(55), B(47), B(61), B(37), a6_p) |
| 1271 |
xor_B_KS_p(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23) |
| 1272 |
S4(B(57), B(51), B(41), B(32), a6_p) |
| 1273 |
xor_B_KS_p(15, 24, 16, 25, 17, 26, 18, 27, 19, 28, 20, 29) |
| 1274 |
S5(B(39), B(45), B(56), B(34), a6_p) |
| 1275 |
xor_B_KS_p(19, 30, 20, 31, 21, 32, 22, 33, 23, 34, 24, 35) |
| 1276 |
S6(B(35), B(60), B(42), B(50), a6_p) |
| 1277 |
xor_B_KS_p(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41) |
| 1278 |
S7(B(63), B(43), B(53), B(38), a6_p) |
| 1279 |
xor_B_KS_p(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47) |
| 1280 |
S8(B(36), B(58), B(46), B(52), a6_p) |
| 1281 |
xor_B_KS_p(63, 48, 32, 49, 33, 50, 34, 51, 35, 52, 36, 53) |
| 1282 |
S1(B(8), B(16), B(22), B(30), a6_p) |
| 1283 |
xor_B_KS_p(35, 54, 36, 55, 37, 56, 38, 57, 39, 58, 40, 59) |
| 1284 |
S2(B(12), B(27), B(1), B(17), a6_p) |
| 1285 |
xor_B_KS_p(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65) |
| 1286 |
S3(B(23), B(15), B(29), B(5), a6_p) |
| 1287 |
xor_B_KS_p(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71) |
| 1288 |
S4(B(25), B(19), B(9), B(0), a6_p) |
| 1289 |
xor_B_KS_p(47, 72, 48, 73, 49, 74, 50, 75, 51, 76, 52, 77) |
| 1290 |
S5(B(7), B(13), B(24), B(2), a6_p) |
| 1291 |
xor_B_KS_p(51, 78, 52, 79, 53, 80, 54, 81, 55, 82, 56, 83) |
| 1292 |
S6(B(3), B(28), B(10), B(18), a6_p) |
| 1293 |
xor_B_KS_p(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89) |
| 1294 |
S7(B(31), B(11), B(21), B(6), a6_p) |
| 1295 |
xor_B_KS_p(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95) |
| 1296 |
addl $96*4,k_ptr |
| 1297 |
S8(B(4), B(26), B(14), B(20), a6_p) |
| 1298 |
decl rounds |
| 1299 |
jnz DES_bs_crypt_LM_loop |
| 1300 |
popl %esi |
| 1301 |
#ifdef EMMS |
| 1302 |
emms |
| 1303 |
#endif |
| 1304 |
ret |
Properties
| svn:eol-style |
native
|
| svn:keywords |
Author Date Id Rev URL
|
| |