Browse Subversion Repository
Diff of /branches/mty-makai/mty.c
Parent Directory
| Revision Log
| Patch
| 70 |
typedef uint64_t WS_T; |
typedef uint64_t WS_T; |
| 71 |
typedef uint32_t ALU_T; |
typedef uint32_t ALU_T; |
| 72 |
|
|
| 73 |
|
#elif defined(USE_64) /* 64-bit ALU */ |
| 74 |
|
|
| 75 |
|
#define N_STRIDE 6 |
| 76 |
|
typedef uint64_t WS_T; |
| 77 |
|
typedef uint64_t ALU_T; |
| 78 |
|
|
| 79 |
#elif defined(USE_64_XMM) |
#elif defined(USE_64_XMM) |
| 80 |
|
|
| 81 |
#define N_STRIDE 7 |
#define N_STRIDE 7 |
| 932 |
* |
* |
| 933 |
* オペコードを展開 |
* オペコードを展開 |
| 934 |
* |
* |
|
* op |
|
|
* 0x6F MOVQ |
|
|
* 0x7F STORE |
|
|
* 0xDB PAND |
|
|
* 0xDF PANDN |
|
|
* 0xEB POR |
|
|
* 0xEF PXOR |
|
|
* |
|
| 935 |
* r EAX, ECX, EDX, EBX |
* r EAX, ECX, EDX, EBX |
| 936 |
* ESP, EBP, ESI, EDI |
* SIB, EBP, ESI, EDI |
| 937 |
* |
* |
| 938 |
*/ |
*/ |
| 939 |
|
|
| 940 |
|
/* ofs8(i) もしくは ofs32(i) を生成 */ |
| 941 |
|
static |
| 942 |
|
signed char * |
| 943 |
|
disp_rm(signed char *pc, |
| 944 |
|
unsigned d, |
| 945 |
|
unsigned i, |
| 946 |
|
long ofs) |
| 947 |
|
{ |
| 948 |
|
assert(i != 4); /* SP ではなく SIB になる */ |
| 949 |
|
if (-128 <= ofs && ofs <= 127) |
| 950 |
|
{ |
| 951 |
|
/* short ofs |
| 952 |
|
01 ddd sss [ofs.b] */ |
| 953 |
|
pc[0] = (0100 |
| 954 |
|
| ((d << 3) & 0070) |
| 955 |
|
| (i & 0007)); |
| 956 |
|
pc[1] = ofs; |
| 957 |
|
return pc + 2; |
| 958 |
|
} |
| 959 |
|
else |
| 960 |
|
{ |
| 961 |
|
/* long ofs |
| 962 |
|
10 ddd sss [ofs.l] */ |
| 963 |
|
pc[0] = (0200 |
| 964 |
|
| ((d << 3) & 0070) |
| 965 |
|
| (i & 0007)); |
| 966 |
|
*(long *)&pc[1] = ofs; /* XXX unaligned */ |
| 967 |
|
return pc + 5; |
| 968 |
|
} |
| 969 |
|
} |
| 970 |
|
|
| 971 |
|
#define PTR_T 6 /* SI */ |
| 972 |
|
|
| 973 |
|
#ifdef USE_64 /* ALU 64 */ |
| 974 |
|
|
| 975 |
|
/* |
| 976 |
|
* 0x49 0xF7 11-010-ddd not |
| 977 |
|
* 0x49 op 11-sss-ddd |
| 978 |
|
* |
| 979 |
|
* 0x4C op rm |
| 980 |
|
* 0x4C 0x8B rm load |
| 981 |
|
* 0x4C 0x89 rm store |
| 982 |
|
* |
| 983 |
|
*/ |
| 984 |
|
|
| 985 |
|
#define OP_MOV 0x8B |
| 986 |
|
#define OP_STOR 0x89 |
| 987 |
|
#define OP_AND 0x23 |
| 988 |
|
#define OP_OR 0x0B |
| 989 |
|
#define OP_XOR 0x33 |
| 990 |
|
|
| 991 |
|
static |
| 992 |
|
signed char * |
| 993 |
|
reg_op(signed char *pc, |
| 994 |
|
unsigned op, |
| 995 |
|
unsigned d, |
| 996 |
|
unsigned s) |
| 997 |
|
{ |
| 998 |
|
pc[0] = 0x4D; /* 49 */ |
| 999 |
|
pc[1] = op & 0xFD; |
| 1000 |
|
/* 11 ddd sss */ |
| 1001 |
|
pc[2] = (0300 |
| 1002 |
|
| ((s << 3) & 0070) |
| 1003 |
|
| (d & 0007)); |
| 1004 |
|
return pc + 3; |
| 1005 |
|
} |
| 1006 |
|
|
| 1007 |
|
static |
| 1008 |
|
signed char * |
| 1009 |
|
reg_mem(signed char *pc, |
| 1010 |
|
unsigned op, |
| 1011 |
|
unsigned d, |
| 1012 |
|
unsigned i, |
| 1013 |
|
long ofs) |
| 1014 |
|
{ |
| 1015 |
|
pc[0] = 0x4C; |
| 1016 |
|
pc[1] = op; |
| 1017 |
|
return disp_rm(pc + 2, d, i, ofs); |
| 1018 |
|
} |
| 1019 |
|
|
| 1020 |
|
#else /* MMX or XMM */ |
| 1021 |
|
|
| 1022 |
|
#define OP_MOV 0x6F |
| 1023 |
|
#define OP_STOR 0x7F |
| 1024 |
|
#define OP_AND 0xDB |
| 1025 |
|
#define OP_ANDN 0xDF |
| 1026 |
|
#define OP_OR 0xEB |
| 1027 |
|
#define OP_XOR 0xEF |
| 1028 |
|
|
| 1029 |
static |
static |
| 1030 |
signed char * |
signed char * |
| 1031 |
reg_op(signed char *pc, |
reg_op(signed char *pc, |
| 1050 |
reg_mem(signed char *pc, |
reg_mem(signed char *pc, |
| 1051 |
unsigned op, |
unsigned op, |
| 1052 |
unsigned d, |
unsigned d, |
| 1053 |
unsigned s, |
unsigned i, |
| 1054 |
int ofs) |
int ofs) |
| 1055 |
{ |
{ |
| 1056 |
#ifndef USE_MMX |
#ifndef USE_MMX |
| 1058 |
#endif |
#endif |
| 1059 |
pc[0] = 0x0F; |
pc[0] = 0x0F; |
| 1060 |
pc[1] = op; |
pc[1] = op; |
| 1061 |
if (-128 <= ofs && ofs <= 127) |
return disp_rm(pc + 2, d, i, ofs); |
|
{ |
|
|
/* short ofs |
|
|
01 ddd sss [ofs.b] */ |
|
|
pc[2] = (0100 |
|
|
| ((d << 3) & 0070) |
|
|
| (s & 0007)); |
|
|
pc[3] = ofs; |
|
|
return pc + 4; |
|
|
} |
|
|
else |
|
|
{ |
|
|
/* long ofs |
|
|
10 ddd sss [ofs.l] */ |
|
|
pc[2] = (0200 |
|
|
| ((d << 3) & 0070) |
|
|
| (s & 0007)); |
|
|
*(long *)&pc[3] = ofs; |
|
|
return pc + 7; |
|
|
} |
|
| 1062 |
} |
} |
| 1063 |
|
|
| 1064 |
|
#endif |
| 1065 |
|
|
| 1066 |
/*************************************************************** |
/*************************************************************** |
| 1067 |
* |
* |
| 1068 |
* 与えられた式を解析・命令生成 |
* 与えられた式を解析・命令生成 |
| 1112 |
{ |
{ |
| 1113 |
/* 先頭制約 */ |
/* 先頭制約 */ |
| 1114 |
signed char *o_iptr = iptr; |
signed char *o_iptr = iptr; |
| 1115 |
unsigned op = 0x6F; |
unsigned op = OP_MOV; |
| 1116 |
int i; |
int i; |
| 1117 |
|
|
| 1118 |
if (expr[0] == '^') |
if (expr[0] == '^') |
| 1130 |
|
|
| 1131 |
if (c == '$') |
if (c == '$') |
| 1132 |
{ |
{ |
| 1133 |
assert(op != 0x6F); |
assert(op != OP_MOV); |
| 1134 |
if (ofs < 10) |
if (ofs < 10) |
| 1135 |
return 0; |
return 0; |
| 1136 |
else if (ofs > 10) |
else if (ofs > 10) |
| 1168 |
assert(c == ']'); |
assert(c == ']'); |
| 1169 |
|
|
| 1170 |
/* マップされたモノから命令を生成する */ |
/* マップされたモノから命令を生成する */ |
| 1171 |
oop = 0x6F; /* MOVQ */ |
oop = OP_MOV; |
| 1172 |
for (j = 0; j < 64; j++) |
for (j = 0; j < 64; j++) |
| 1173 |
if (cs[j]) |
if (cs[j]) |
| 1174 |
{ |
{ |
| 1176 |
continue; |
continue; |
| 1177 |
iptr = reg_mem(iptr, |
iptr = reg_mem(iptr, |
| 1178 |
oop, |
oop, |
| 1179 |
1, /* MM1 */ |
1, /* MM1/R9 */ |
| 1180 |
5, /* EBP */ |
PTR_T, |
| 1181 |
sizeof(WS_T) * ((64 * ofs + j) + 32 - 16)); |
sizeof(WS_T) * ((64 * ofs + j) + 32 - 16)); |
| 1182 |
oop = 0xEB; |
oop = OP_OR; |
| 1183 |
} |
} |
| 1184 |
if (oop != 0xEB) |
if (oop != OP_OR) |
| 1185 |
{ |
{ |
| 1186 |
if (ofs == 9) |
if (ofs == 9) |
| 1187 |
return -1; |
return -1; |
| 1190 |
{ |
{ |
| 1191 |
iptr = reg_op(iptr, |
iptr = reg_op(iptr, |
| 1192 |
op, |
op, |
| 1193 |
0, /* MM0 */ |
0, /* MM0/R8 */ |
| 1194 |
1); /* MM1 */ |
1); /* MM1/R9 */ |
| 1195 |
op = 0xDB; |
op = OP_AND; |
| 1196 |
} |
} |
| 1197 |
ofs++; |
ofs++; |
| 1198 |
} |
} |
| 1206 |
return -1; |
return -1; |
| 1207 |
iptr = reg_mem(iptr, |
iptr = reg_mem(iptr, |
| 1208 |
op, |
op, |
| 1209 |
0, /* MM0 */ |
0, /* MM0/R8 */ |
| 1210 |
5, /* EBP */ |
PTR_T, |
| 1211 |
sizeof(WS_T) * ((64 * ofs + c) + 32 - 16)); |
sizeof(WS_T) * ((64 * ofs + c) + 32 - 16)); |
| 1212 |
op = 0xDB; |
op = OP_AND; |
| 1213 |
ofs++; |
ofs++; |
| 1214 |
} |
} |
| 1215 |
else |
else |
| 1264 |
*pe = 0; |
*pe = 0; |
| 1265 |
} |
} |
| 1266 |
|
|
| 1267 |
/* PXOR MM7,MM7 */ |
/* MM7 にターゲット毎に比較結果を OR していくため |
| 1268 |
iptr = reg_op(iptr, 0xEF, 7, 7); |
まずはゼロクリア */ |
| 1269 |
|
iptr = reg_op(iptr, OP_XOR, 7, 7); /* MM7/R15 := 0 */ |
| 1270 |
|
|
| 1271 |
/* 順繰りに parse */ |
/* 順繰りに parse */ |
| 1272 |
expr = expr_buf; |
expr = expr_buf; |
| 1301 |
printf(": of=%d len=%d\n", i, expr - p); |
printf(": of=%d len=%d\n", i, expr - p); |
| 1302 |
} |
} |
| 1303 |
#endif |
#endif |
| 1304 |
|
/* 1ターゲット分の比較結果を MM7 に追加 */ |
| 1305 |
if (n > 0) |
if (n > 0) |
| 1306 |
iptr = reg_op(iptr + n, |
iptr = reg_op(iptr + n, |
| 1307 |
0xEB, |
OP_OR, |
| 1308 |
7, |
7, /* MM7/R15 */ |
| 1309 |
0); |
0); /* MM0/R8 */ |
| 1310 |
} |
} |
| 1311 |
} |
} |
| 1312 |
|
|
| 1313 |
|
/* MM7 に生成された結果を t[31] に格納 */ |
| 1314 |
return reg_mem(iptr, |
return reg_mem(iptr, |
| 1315 |
0x7F, |
OP_STOR, |
| 1316 |
7, |
7, /* MM7/R15 */ |
| 1317 |
5, |
PTR_T, |
| 1318 |
sizeof(WS_T) * (31 - 16)); |
sizeof(WS_T) * (31 - 16)); |
| 1319 |
} |
} |
| 1320 |
|
|
| 1504 |
okey[i] = key[i]; |
okey[i] = key[i]; |
| 1505 |
} |
} |
| 1506 |
|
|
| 1507 |
/* まずは LR を初期化 */ |
/* 呼ぶ! |
| 1508 |
//memset(param64.lr, 0, sizeof(param64.lr)); |
LR 初期化は、サブモジュール内で行うべし |
| 1509 |
/* 呼ぶ! */ |
FASTCALL に準じた呼び出しのため、 |
| 1510 |
|
ホントはいろいろレジスタが破壊されるハズ…なんだが。 */ |
| 1511 |
#ifdef __GNUC__ |
#ifdef __GNUC__ |
| 1512 |
asm volatile("call *%3" |
asm volatile("call *%3" |
| 1513 |
: "=a"(cnt) |
: "=a"(cnt) |
| 1514 |
: "c"(key64.k), "d"(param64.lr), "m"(code) |
: "c"(key64.k), "d"(param64.lr), |
| 1515 |
//: "c"(key64.k), "d"(param64.lr), "m"(crypt64_sta) |
"m"(code) |
| 1516 |
: "memory"); |
//"m"(crypt64_sta) |
| 1517 |
|
: |
| 1518 |
|
#if 0 |
| 1519 |
|
"%rbx", |
| 1520 |
|
//"%rcx", "%rdx", |
| 1521 |
|
"%r8","%r9","%r10","%r11", |
| 1522 |
|
#endif |
| 1523 |
|
"memory"); |
| 1524 |
#else |
#else |
| 1525 |
cnt = (*d_crypt64)(key64.k[0][0][0].a, param64.lr[0][0].a); |
cnt = (*d_crypt64)(key64.k[0][0][0].a, param64.lr[0][0].a); |
| 1526 |
#endif |
#endif |
| 1532 |
cr = 0; |
cr = 0; |
| 1533 |
fprintf(stderr, "cycle=%d\n", (int)cnt); |
fprintf(stderr, "cycle=%d\n", (int)cnt); |
| 1534 |
} |
} |
|
/* XXX 手抜きのため、ワークにはみ出ている2ビットを落とす */ |
|
|
for (i = 0; i < N_ALU; i++) |
|
|
param64.t[0].a[i] = param64.t[1].a[i] = 0; |
|
| 1535 |
|
|
| 1536 |
/* ヒットしたときの処理 */ |
/* ヒットしたときの処理 */ |
| 1537 |
for (kk = 0; kk < N_ALU; kk++) |
for (kk = 0; kk < N_ALU; kk++) |
| 1550 |
if (!(t & ((ALU_T)1 << k))) |
if (!(t & ((ALU_T)1 << k))) |
| 1551 |
continue; |
continue; |
| 1552 |
|
|
| 1553 |
|
/* XXX 手抜きのため、ワークにはみ出ている 2 ビットをここで落とす |
| 1554 |
|
ヒットするたびに冗長に行われるが、気にしてはいかん */ |
| 1555 |
|
for (i = 0; i < N_ALU; i++) |
| 1556 |
|
param64.t[0].a[i] = param64.t[1].a[i] = 0; |
| 1557 |
for (i = 1; i < 11; i++) |
for (i = 1; i < 11; i++) |
| 1558 |
{ |
{ |
| 1559 |
unsigned c = 0; |
unsigned c = 0; |
|
|
Legend:
| Removed from v.13 |
|
| changed lines |
| |
Added in v.14 |
|
|
| |