| 1 |
//====================================================================== |
| 2 |
//----------------------------------------------------------------------- |
| 3 |
/** |
| 4 |
* @file iris_allegrex.h |
| 5 |
* @brief ファイル |
| 6 |
* |
| 7 |
* @author t.sirayanagi |
| 8 |
* @version 1.0 |
| 9 |
* |
| 10 |
* @par copyright |
| 11 |
* Copyright (C) 2009-2011 Takazumi Shirayanagi\n |
| 12 |
* The new BSD License is applied to this software. |
| 13 |
* see iris_LICENSE.txt |
| 14 |
*/ |
| 15 |
//----------------------------------------------------------------------- |
| 16 |
//====================================================================== |
| 17 |
#ifndef INCG_IRIS_iris_allegrex_H_ |
| 18 |
#define INCG_IRIS_iris_allegrex_H_ |
| 19 |
|
| 20 |
//====================================================================== |
| 21 |
// include |
| 22 |
#include "../iris_inchead.h" |
| 23 |
#include "../iris_stdlib.h" |
| 24 |
|
| 25 |
//====================================================================== |
| 26 |
// define |
| 27 |
|
| 28 |
EXTERN_C_BEGIN |
| 29 |
|
| 30 |
//====================================================================== |
| 31 |
// inline function |
| 32 |
|
| 33 |
/** |
| 34 |
* @brief ビットが立っている総数を算出 |
| 35 |
* Count All One |
| 36 |
* @param val [in] = 検査値 |
| 37 |
* @return 結果 |
| 38 |
*/ |
| 39 |
STATICINLINE IrisS32 iris_allegrex_cao(IrisU32 val) |
| 40 |
{ |
| 41 |
val = (val & 0x55555555) + (val >> 1 & 0x55555555); |
| 42 |
val = (val & 0x33333333) + (val >> 2 & 0x33333333); |
| 43 |
val = (val & 0x0f0f0f0f) + (val >> 4 & 0x0f0f0f0f); |
| 44 |
val = (val & 0x00ff00ff) + (val >> 8 & 0x00ff00ff); |
| 45 |
return (IrisS32)((val & 0x0000ffff) + (val >>16 & 0x0000ffff)); |
| 46 |
} |
| 47 |
|
| 48 |
/** |
| 49 |
* @brief ビットが寝ている総数を算出 |
| 50 |
* Count All Zero |
| 51 |
* @param val [in] = 検査値 |
| 52 |
* @return 結果 |
| 53 |
*/ |
| 54 |
STATICINLINE IrisS32 iris_allegrex_caz(IrisU32 val) |
| 55 |
{ |
| 56 |
return iris_allegrex_cao(~val); |
| 57 |
} |
| 58 |
|
| 59 |
/** |
| 60 |
* @brief MSBから0が続く個数を算出 |
| 61 |
* Count Leading Zero |
| 62 |
* @param val [in] = 検査値 |
| 63 |
* @return 結果 |
| 64 |
*/ |
| 65 |
STATICINLINE IrisS32 iris_allegrex_clz(IrisArgU32 val) |
| 66 |
{ |
| 67 |
IrisS32 n = 1; |
| 68 |
IrisU32 ui = val; |
| 69 |
if( ui == 0 ) return 32; |
| 70 |
#ifdef __BIG_ENDIAN__ |
| 71 |
if( (ui&0xFFFF) == 0 ) { n += 16; ui >>= 16; } |
| 72 |
if( (ui&0x00FF) == 0 ) { n += 8; ui >>= 8; } |
| 73 |
if( (ui&0x000F) == 0 ) { n += 4; ui >>= 4; } |
| 74 |
if( (ui&0x0003) == 0 ) { n += 2; ui >>= 2; } |
| 75 |
n -= ui & 0x1; |
| 76 |
#else |
| 77 |
if( (ui>>16) == 0 ) { n += 16; ui <<= 16; } |
| 78 |
if( (ui>>24) == 0 ) { n += 8; ui <<= 8; } |
| 79 |
if( (ui>>28) == 0 ) { n += 4; ui <<= 4; } |
| 80 |
if( (ui>>30) == 0 ) { n += 2; ui <<= 2; } |
| 81 |
n -= (IrisS32)((ui>>31) & 0x1); |
| 82 |
#endif |
| 83 |
return n; |
| 84 |
} |
| 85 |
|
| 86 |
/** |
| 87 |
* @brief MSBから1が続く個数を算出 |
| 88 |
* Count Leading One |
| 89 |
* @param val [in] = 検査値 |
| 90 |
* @return 結果 |
| 91 |
*/ |
| 92 |
STATICINLINE IrisS32 iris_allegrex_clo(IrisArgU32 val) |
| 93 |
{ |
| 94 |
return iris_allegrex_clz(~val); |
| 95 |
} |
| 96 |
|
| 97 |
/** |
| 98 |
* @brief LSBから0が続く個数を算出 |
| 99 |
* Count Trailing Zero |
| 100 |
* @param val [in] = 検査値 |
| 101 |
* @return 結果 |
| 102 |
*/ |
| 103 |
STATICINLINE IrisS32 iris_allegrex_ctz(IrisArgU32 val) |
| 104 |
{ |
| 105 |
IrisS32 n = 1; |
| 106 |
IrisU32 ui = val; |
| 107 |
if( ui == 0 ) return 32; |
| 108 |
#ifdef __BIG_ENDIAN__ |
| 109 |
if( (ui>>16) == 0 ) { n += 16; ui <<= 16; } |
| 110 |
if( (ui>>24) == 0 ) { n += 8; ui <<= 8; } |
| 111 |
if( (ui>>28) == 0 ) { n += 4; ui <<= 4; } |
| 112 |
if( (ui>>30) == 0 ) { n += 2; ui <<= 2; } |
| 113 |
n -= (ui>>31) & 0x1; |
| 114 |
#else |
| 115 |
if( (ui&0xFFFF) == 0 ) { n += 16; ui >>= 16; } |
| 116 |
if( (ui&0x00FF) == 0 ) { n += 8; ui >>= 8; } |
| 117 |
if( (ui&0x000F) == 0 ) { n += 4; ui >>= 4; } |
| 118 |
if( (ui&0x0003) == 0 ) { n += 2; ui >>= 2; } |
| 119 |
n -= (IrisS32)(ui & 0x1); |
| 120 |
#endif |
| 121 |
return n; |
| 122 |
} |
| 123 |
|
| 124 |
/** |
| 125 |
* @brief LSBから1が続く個数を算出 |
| 126 |
* Count Trailing One |
| 127 |
* @param val [in] = 検査値 |
| 128 |
* @return 結果 |
| 129 |
*/ |
| 130 |
STATICINLINE IrisS32 iris_allegrex_cto(IrisArgU32 val) |
| 131 |
{ |
| 132 |
return iris_allegrex_ctz(~val); |
| 133 |
} |
| 134 |
|
| 135 |
/** |
| 136 |
* @brief max |
| 137 |
* @param val1 [in] = 検査値 |
| 138 |
* @param val2 [in] = 検査値 |
| 139 |
* @return 大きい方の値 |
| 140 |
*/ |
| 141 |
STATICINLINE IrisS32 iris_allegrex_max(IrisArgS32 val1, IrisArgS32 val2) |
| 142 |
{ |
| 143 |
return (val1 > val2) ? val1 : val2; |
| 144 |
} |
| 145 |
|
| 146 |
/** |
| 147 |
* @brief min |
| 148 |
* @param val1 [in] = 検査値 |
| 149 |
* @param val2 [in] = 検査値 |
| 150 |
* @return 小さい方の値 |
| 151 |
*/ |
| 152 |
STATICINLINE IrisS32 iris_allegrex_min(IrisArgS32 val1, IrisArgS32 val2) |
| 153 |
{ |
| 154 |
return (val1 < val2) ? val1 : val2; |
| 155 |
} |
| 156 |
|
| 157 |
/** |
| 158 |
* @brief bit 7符号拡張 |
| 159 |
* Sign-Extend Byte |
| 160 |
* @param val [in] = 値 |
| 161 |
* @return 結果 |
| 162 |
*/ |
| 163 |
STATICINLINE IrisS32 iris_allegrex_seb(IrisArgS8 val) |
| 164 |
{ |
| 165 |
return (IrisS32)val; |
| 166 |
} |
| 167 |
|
| 168 |
/** |
| 169 |
* @brief bit 15符号拡張 |
| 170 |
* Sign-Extend Half |
| 171 |
* @param val [in] = 値 |
| 172 |
* @return 結果 |
| 173 |
*/ |
| 174 |
STATICINLINE IrisS32 iris_allegrex_seh(IrisArgS16 val) |
| 175 |
{ |
| 176 |
return (IrisS32)val; |
| 177 |
} |
| 178 |
|
| 179 |
/** |
| 180 |
* @brief バイト単位でワード内スワップ |
| 181 |
* Word Swap Byte within Word |
| 182 |
* @param val [in] = 値 |
| 183 |
* @return 結果 |
| 184 |
*/ |
| 185 |
STATICINLINE IrisU32 iris_allegrex_wsbw(IrisArgU32 val) |
| 186 |
{ |
| 187 |
return (IrisU32)( ((val&0xFF) << 24) | ((val&0xFF00) << 8) | ((val>>8) & 0xFF00) | ((val>>24) & 0xFF) ); |
| 188 |
} |
| 189 |
|
| 190 |
/** |
| 191 |
* @brief バイト単位でハーフワード内スワップ |
| 192 |
* Word Swap Byte within Halfword |
| 193 |
* @param val [in] = 値 |
| 194 |
* @return 結果 |
| 195 |
*/ |
| 196 |
STATICINLINE IrisU32 iris_allegrex_wsbh(IrisArgU32 val) |
| 197 |
{ |
| 198 |
return (IrisU32)( ((val<<8) & 0xFF00FF00) | ((val>>8) & 0x00FF00FF) ); |
| 199 |
} |
| 200 |
|
| 201 |
/** |
| 202 |
* @brief ビット単位でワード内スワップ |
| 203 |
* Bit Reverse |
| 204 |
* @param val [in] = 値 |
| 205 |
* @return 結果 |
| 206 |
*/ |
| 207 |
STATICINLINE IrisU32 iris_allegrex_bitrev(IrisArgU32 val) |
| 208 |
{ |
| 209 |
IrisU32 n = 0; |
| 210 |
for( int i=0; i < 32; ++i ) n |= ((val>>i) & 0x1) << (31-i); |
| 211 |
return n; |
| 212 |
} |
| 213 |
|
| 214 |
IRIS_PRAGMA_INTRINSIC_BEGIN(_lrotl) |
| 215 |
IRIS_PRAGMA_INTRINSIC_BEGIN(_lrotr) |
| 216 |
|
| 217 |
/** |
| 218 |
* @brief 右にnビット回転n |
| 219 |
* @param [in] x = 値 |
| 220 |
* @param [in] n = シフト数 |
| 221 |
* @return 回転後の値 |
| 222 |
*/ |
| 223 |
STATICINLINE IrisU32 iris_allegrex_rotr(IrisArgU32 x, IrisArgInt n) |
| 224 |
{ |
| 225 |
#ifdef _MSC_VER |
| 226 |
return ::_lrotr(x, n); |
| 227 |
#else |
| 228 |
return IRIS_rotr32(x, n); |
| 229 |
#endif |
| 230 |
} |
| 231 |
|
| 232 |
/** |
| 233 |
* @brief 左にnビット回転n |
| 234 |
* @param [in] x = 値 |
| 235 |
* @param [in] n = シフト数 |
| 236 |
* @return 回転後の値 |
| 237 |
*/ |
| 238 |
STATICINLINE IrisU32 iris_allegrex_rotl(IrisArgU32 x, IrisArgInt n) |
| 239 |
{ |
| 240 |
#ifdef _MSC_VER |
| 241 |
return ::_lrotl(x, n); |
| 242 |
#else |
| 243 |
return IRIS_rotl32(x, n); |
| 244 |
#endif |
| 245 |
} |
| 246 |
|
| 247 |
IRIS_PRAGMA_INTRINSIC_END(_lrotl) |
| 248 |
IRIS_PRAGMA_INTRINSIC_END(_lrotr) |
| 249 |
|
| 250 |
/** |
| 251 |
* @brief ビットの抜き出し |
| 252 |
* @param [in] val = 値 |
| 253 |
* @param [in] pos = 抜き出し開始位置 |
| 254 |
* @param [in] size = 抜き出し開始位置 |
| 255 |
* @return valのposビット位置からsizeビットを抜き出し、右詰した結果 |
| 256 |
*/ |
| 257 |
STATICINLINE IrisU32 iris_allegrex_ext(IrisArgU32 val, IrisArgU32 pos, IrisArgU32 size) |
| 258 |
{ |
| 259 |
return (IrisU32)( ((val>>pos)<<(32 - size)) >> (32-size) ); |
| 260 |
} |
| 261 |
|
| 262 |
/** |
| 263 |
* @brief ビットの挿入 |
| 264 |
* @param [in] val1 = 値 |
| 265 |
* @param [in] val2 = 挿入する値 |
| 266 |
* @param [in] pos = 抜き出し開始位置 |
| 267 |
* @param [in] size = 抜き出し開始位置 |
| 268 |
* @return val2の下位sizeビットをval1のposに挿入した結果 |
| 269 |
*/ |
| 270 |
STATICINLINE IrisU32 iris_allegrex_ins(IrisArgU32 val1, IrisArgU32 val2, IrisArgU32 pos, IrisArgU32 size) |
| 271 |
{ |
| 272 |
IrisU32 mask = (IrisU32)(((0x1 << (size+1)) - 1) << pos); |
| 273 |
return (IrisU32)( (val1 & ~mask) | ((val2 << pos) & mask) ); |
| 274 |
} |
| 275 |
|
| 276 |
/** |
| 277 |
* @brief 2のべき乗か判断する |
| 278 |
* @param [in] val = 検査値 |
| 279 |
* @return 真偽値 |
| 280 |
*/ |
| 281 |
STATICINLINE IrisBool iris_allegrex_ispow2(IrisArgU32 val) |
| 282 |
{ |
| 283 |
return (val & (val - 1)) == 0 ? IRIS_TRUE : IRIS_FALSE; |
| 284 |
} |
| 285 |
|
| 286 |
/** |
| 287 |
* @brief 逆数 |
| 288 |
* @param [in] fs = 入力 |
| 289 |
* @return 結果 |
| 290 |
*/ |
| 291 |
STATICINLINE IrisF32 iris_allegrex_rcpf(IrisArgF32 fs) |
| 292 |
{ |
| 293 |
IrisFInt fi; |
| 294 |
if( (*(IrisS32*)&fs & 0x7FFFFFFF) == 0 ) return MATH_F32_P_INF_BITS; |
| 295 |
fi.iv = (IrisU32)(0x7F000000 - *(IrisS32*)&fs); |
| 296 |
fi.fv *= (2.0f - (fi.fv * fs)); |
| 297 |
fi.fv *= (2.0f - (fi.fv * fs)); |
| 298 |
return fi.fv * (2.0f - (fi.fv * fs)); |
| 299 |
} |
| 300 |
|
| 301 |
/** |
| 302 |
* @brief floorf |
| 303 |
* @param [in] fs = 入力 |
| 304 |
* @return 結果 |
| 305 |
*/ |
| 306 |
STATICINLINE IrisF32 iris_allegrex_floorf(IrisArgF32 fs) |
| 307 |
{ |
| 308 |
#if 1 |
| 309 |
return (IrisF32)(IrisS32)fs; |
| 310 |
#else |
| 311 |
IrisS32 bias = 0xBF7FFFFF & ((*(IrisS32*)&fs) >> 31); |
| 312 |
return (IrisF32)( (IrisS32)( fs + (*(IrisF32*)&bias) ) ); |
| 313 |
#endif |
| 314 |
} |
| 315 |
|
| 316 |
/** |
| 317 |
* @brief ceilf |
| 318 |
* @param [in] fs = 入力 |
| 319 |
* @return 結果 |
| 320 |
*/ |
| 321 |
STATICINLINE IrisF32 iris_allegrex_ceilf(IrisArgF32 fs) |
| 322 |
{ |
| 323 |
IrisF32Int fi; |
| 324 |
fi.fv = fs; |
| 325 |
fi.iv = 0x3F7FFFFF & ((((IrisS32)fi.iv) ^ (IrisS32)0x80000000) >> 31); |
| 326 |
return (IrisF32)( (IrisS32)( fs + fi.fv ) ); |
| 327 |
} |
| 328 |
|
| 329 |
/** |
| 330 |
* @brief sqrtf |
| 331 |
* @param [in] fs = 入力 |
| 332 |
* @return 平方根 |
| 333 |
*/ |
| 334 |
STATICINLINE IrisF32 iris_allegrex_sqrtf(IrisArgF32 fs) |
| 335 |
{ |
| 336 |
IrisF32Int fi; |
| 337 |
fi.fv = fs; |
| 338 |
if( fi.iv <= 0 ) |
| 339 |
{ |
| 340 |
if( (fi.iv & 0x7FFFFFFF) == 0 ) return fs; |
| 341 |
fi.iv = MATH_F32_N_QNAN_BITS; |
| 342 |
return fi.fv; |
| 343 |
} |
| 344 |
IrisF32Int x, y; |
| 345 |
x.iv = fi.iv + 0xFF800000; |
| 346 |
y.iv = (IrisU32)(0x5F3759DF - (fi.iv >> 1)); |
| 347 |
|
| 348 |
y.fv *= (1.5f - ( y.fv * y.fv * x.fv )); |
| 349 |
y.fv *= (1.5f - ( y.fv * y.fv * x.fv )); |
| 350 |
return (y.fv * (1.5f - (y.fv * y.fv * x.fv))) * fs; |
| 351 |
} |
| 352 |
|
| 353 |
/** |
| 354 |
* @brief rsqrtf |
| 355 |
* @param [in] fs = 入力 |
| 356 |
* @return 平方根 |
| 357 |
*/ |
| 358 |
STATICINLINE IrisF32 iris_allegrex_rsqrtf(IrisArgF32 fs) |
| 359 |
{ |
| 360 |
IrisF32Int fi; |
| 361 |
fi.fv = fs; |
| 362 |
if( fi.iv & 0x80000000 ) |
| 363 |
{ |
| 364 |
IrisFInt r; |
| 365 |
r.iv = MATH_F32_N_INF_BITS; |
| 366 |
return r.fv; |
| 367 |
} |
| 368 |
IrisF32Int x, y; |
| 369 |
x.iv = fi.iv + 0xFF800000; |
| 370 |
y.iv = (IrisU32)(0x5F3759DF - (fi.iv >> 1)); |
| 371 |
|
| 372 |
y.fv *= (1.5f - ( y.fv * y.fv * x.fv )); |
| 373 |
y.fv *= (1.5f - ( y.fv * y.fv * x.fv )); |
| 374 |
return (y.fv * (1.5f - (y.fv * y.fv * x.fv))); |
| 375 |
} |
| 376 |
|
| 377 |
/** |
| 378 |
* @brief hypot |
| 379 |
* Moler-Morrison Algorithm での実装 |
| 380 |
* @param [in] fa = 入力 |
| 381 |
* @param [in] fb = 入力 |
| 382 |
* @return √a*a + b*b |
| 383 |
*/ |
| 384 |
STATICINLINE IrisF32 iris_allegrex_hypotf(IrisArgF32 fa, IrisArgF32 fb) |
| 385 |
{ |
| 386 |
//IrisF32 a = fabsf(fa); |
| 387 |
//IrisF32 b = fabsf(fb); |
| 388 |
IrisF32Int fia; |
| 389 |
fia.fv = fa; |
| 390 |
fia.iv &= 0x7FFFFFFF; |
| 391 |
IrisF32Int fib; |
| 392 |
fib.fv = fb; |
| 393 |
fib.iv &= 0x7FFFFFFF; |
| 394 |
|
| 395 |
if( fia.iv < fib.iv ) |
| 396 |
{ |
| 397 |
iris_xor_swap(fia.iv, fib.iv); |
| 398 |
} |
| 399 |
if( fib.iv == 0 ) return fia.fv; |
| 400 |
IrisF32 s; |
| 401 |
for( int i=0; i < 3; ++i ) |
| 402 |
{ |
| 403 |
s = fib.fv/fia.fv; |
| 404 |
s *= s; |
| 405 |
s /= 4+s; |
| 406 |
fia.fv += 2*fia.fv*s; |
| 407 |
fib.fv *= s; |
| 408 |
} |
| 409 |
return fia.fv; |
| 410 |
} |
| 411 |
|
| 412 |
|
| 413 |
EXTERN_C_END |
| 414 |
|
| 415 |
#endif |