Revision | 784c7e958188181428b4c1b3b682ff9255695758 (tree) |
---|---|
Time | 2020-04-20 22:57:02 |
Author | Starg <starg@user...> |
Commiter | Starg |
Merge branch 'dev41' into unicode
@@ -2506,23 +2506,9 @@ static void do_drive_mono(Drive *drv, DATA_T *inout) | ||
2506 | 2506 | int32 index; |
2507 | 2507 | FLOAT_T in, sign, v1, v2, fp; |
2508 | 2508 | |
2509 | - static FLOAT_T max = 0, avg = 0, sum = 0; | |
2510 | - static int32 tc = 0; | |
2511 | - | |
2512 | - | |
2513 | - | |
2514 | - | |
2515 | 2509 | in = *inout; |
2516 | 2510 | sign = (in < 0) ? (-1.0) : (1.0); |
2517 | 2511 | in *= drv->cnv * sign; |
2518 | - | |
2519 | - if(in > 1 && in > max) | |
2520 | - max = in; | |
2521 | - ++tc; | |
2522 | - sum += in; | |
2523 | - avg = sum / (FLOAT_T)tc; | |
2524 | - | |
2525 | - | |
2526 | 2512 | fp = floor(in); |
2527 | 2513 | index = fp; |
2528 | 2514 | fp = in - fp; |
@@ -505,9 +505,9 @@ int32 | ||
505 | 505 | in: 0.0 ~ 8.0 (1.0: 1<<(DRIVE_INPUT_BIT) , DRIVE_SCALE_BIT+DRIVE_BASE_BIT+FRACTION_BITS < 30bit |
506 | 506 | out: 0.0 ~ 8.0 * clip_level |
507 | 507 | */ |
508 | -#define DRIVE_SCALE_BIT (3) // 1.0 * 2^MATH_SCALE_BIT | |
508 | +#define DRIVE_SCALE_BIT (2) // 1.0 * 2^MATH_SCALE_BIT | |
509 | 509 | #define DRIVE_SCALE_MAX (1 << DRIVE_SCALE_BIT) // table max 1.0 * MATH_SCALE_MAX |
510 | -#define DRIVE_BASE_BIT (6) // 0.0~1.0 table size | |
510 | +#define DRIVE_BASE_BIT (8) // 0.0~1.0 table size | |
511 | 511 | #define DRIVE_BASE_LENGTH (1 << (DRIVE_BASE_BIT)) // 0.0~1.0:table size |
512 | 512 | #define DRIVE_TABLE_LENGTH (1 << (DRIVE_BASE_BIT + DRIVE_SCALE_BIT)) // 0.0~1.0 * MATH_SCALE_MAX table size |
513 | 513 | #define DRIVE_FRAC_BIT (14) // for int32 |
@@ -1970,6 +1970,8 @@ void init_effect_buffer_thread(void) | ||
1970 | 1970 | memset(delay_effect_buffer_sub, 0, sizeof(delay_effect_buffer_sub)); |
1971 | 1971 | memset(reverb_effect_buffer_sub, 0, sizeof(reverb_effect_buffer_sub)); |
1972 | 1972 | |
1973 | + memset(master_effect_buffer_thread, 0, sizeof(master_effect_buffer_thread)); | |
1974 | + | |
1973 | 1975 | reset_effect_thread_var(); |
1974 | 1976 | } |
1975 | 1977 |
@@ -594,66 +594,7 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
594 | 594 | } |
595 | 595 | break; |
596 | 596 | case 1: |
597 | -#if 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
598 | - { | |
599 | - const int32 req_count_mask = ~(0x7); | |
600 | - int32 count2 = count & req_count_mask; | |
601 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
602 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
603 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
604 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
605 | - const __m256 vvq = _mm256_set1_ps(0.25); | |
606 | - for (i = 0; i < count2; i += 8) { | |
607 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
608 | - vtmp1 = MM256_SET2X_PS( | |
609 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])), | |
610 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])) ); | |
611 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); _mm256_cmp_ps( | |
612 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
613 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
614 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
615 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
616 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
617 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
618 | - vtmp1 = _mm256_sub_ps(vtmp1, vvp1); | |
619 | - vtmp1 = _mm256_mul_ps(vtmp1, vvq); | |
620 | - vtmp1 = _mm256_add_ps(vtmp1, vvp1); | |
621 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
622 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
623 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
624 | - _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x0))); | |
625 | - _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x1))); | |
626 | - } | |
627 | - } | |
628 | -#elif 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_FLOAT) | |
629 | - { | |
630 | - const int32 req_count_mask = ~(0x7); | |
631 | - int32 count2 = count & req_count_mask; | |
632 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
633 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
634 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
635 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
636 | - const __m256 vvq = _mm256_set1_ps(0.25); | |
637 | - for (i = 0; i < count2; i += 8) { | |
638 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
639 | - vtmp1 = _mm256_loadu_ps(&sp[i]); | |
640 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
641 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
642 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
643 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
644 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
645 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
646 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
647 | - vtmp1 = _mm256_sub_ps(vtmp1, vvp1); | |
648 | - vtmp1 = _mm256_mul_ps(vtmp1, vvq); | |
649 | - vtmp1 = _mm256_add_ps(vtmp1, vvp1); | |
650 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
651 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
652 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
653 | - _mm256_storeu_ps(&sp[i], vtmp1); | |
654 | - } | |
655 | - } | |
656 | -#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
597 | +#if (USE_X86_EXT_INTRIN >= 2) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) | |
657 | 598 | { |
658 | 599 | const int32 req_count_mask = ~(0x7); |
659 | 600 | int32 count2 = count & req_count_mask; |
@@ -663,46 +604,85 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
663 | 604 | const __m128 vvn1 = _mm_set1_ps(-1.0); |
664 | 605 | const __m128 vvq = _mm_set1_ps(0.25); |
665 | 606 | for (i = 0; i < count2; i += 8) { |
607 | +/* | |
608 | +tmp = sp[i] * info->velgain; | |
609 | +sp = tmp >= 0 ? (1.0) : (0.0); | |
610 | +sn = tmp < 0 ? (-1.0) : (0.0); | |
611 | +sign = sp | sn; | |
612 | +base = tmp = tmp * sign; | |
613 | +tmp = 1.0 + (tmp - 1.0) * 0.25; | |
614 | +sp = base > 1.0 ? tmp : 0; | |
615 | +sn = base <= 1.0 ? base : 0; | |
616 | +tmp = sp | sn; | |
617 | +sp[i] = tmp * sign * info->level; | |
618 | +*/ | |
666 | 619 | __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; |
620 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
621 | + vtmp1 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])); | |
622 | + vtmp2 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])); | |
623 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
667 | 624 | vtmp1 = _mm_shuffle_ps( |
668 | 625 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i])), |
669 | 626 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 2])), 0x44); |
670 | 627 | vtmp2 = _mm_shuffle_ps( |
671 | 628 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 4])), |
672 | 629 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 6])), 0x44); |
630 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
631 | + vtmp1 = _mm_loadu_ps(&sp[i]); | |
632 | + vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
633 | +#endif | |
673 | 634 | vtmp1 = _mm_mul_ps(vtmp1, vgain); |
674 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
675 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
676 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
677 | - vsign1 = _mm_or_ps(vsp, vsn); | |
678 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
679 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
680 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
681 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
682 | - vsign2 = _mm_or_ps(vsp, vsn); | |
683 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
684 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
685 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
686 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
635 | + vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
636 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp1, _mm_setzero_ps())); | |
637 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, _mm_setzero_ps())); | |
638 | + vsign1 = _mm_or_ps(vsp, vsn); | |
639 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp2, _mm_setzero_ps())); | |
640 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, _mm_setzero_ps())); | |
641 | + vsign2 = _mm_or_ps(vsp, vsn); | |
642 | + vbase1 = vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
643 | + vbase2 = vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
687 | 644 | vtmp1 = _mm_sub_ps(vtmp1, vvp1); |
688 | 645 | vtmp2 = _mm_sub_ps(vtmp2, vvp1); |
689 | 646 | vtmp1 = _mm_mul_ps(vtmp1, vvq); |
690 | 647 | vtmp2 = _mm_mul_ps(vtmp2, vvq); |
691 | 648 | vtmp1 = _mm_add_ps(vtmp1, vvp1); |
692 | - vtmp2 = _mm_add_ps(vtmp2, vvp1); | |
649 | + vtmp2 = _mm_add_ps(vtmp2, vvp1); | |
650 | + vsp = _mm_and_ps(vtmp1, _mm_cmpgt_ps(vbase1, vvp1)); | |
651 | + vsn = _mm_and_ps(vbase1, _mm_cmple_ps(vbase1, vvp1)); | |
652 | + vtmp1 = _mm_or_ps(vsp, vsn); | |
653 | + vsp = _mm_and_ps(vtmp2, _mm_cmpgt_ps(vbase2, vvp1)); | |
654 | + vsn = _mm_and_ps(vbase2, _mm_cmple_ps(vbase2, vvp1)); | |
655 | + vtmp2 = _mm_or_ps(vsp, vsn); | |
693 | 656 | vtmp1 = _mm_mul_ps(vtmp1, vsign1); |
694 | 657 | vtmp2 = _mm_mul_ps(vtmp2, vsign2); |
695 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
696 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
697 | 658 | vtmp1 = _mm_mul_ps(vtmp1, vlevel); |
698 | 659 | vtmp2 = _mm_mul_ps(vtmp2, vlevel); |
660 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
661 | + _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(vtmp1)); | |
662 | + _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(vtmp2)); | |
663 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
699 | 664 | _mm_storeu_pd(&sp[i], _mm_cvtps_pd(vtmp1)); |
700 | 665 | _mm_storeu_pd(&sp[i + 2], _mm_cvtps_pd(_mm_movehl_ps(vtmp1,vtmp1))); |
701 | 666 | _mm_storeu_pd(&sp[i + 4], _mm_cvtps_pd(vtmp2)); |
702 | 667 | _mm_storeu_pd(&sp[i + 6], _mm_cvtps_pd(_mm_movehl_ps(vtmp2,vtmp2))); |
668 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
669 | + _mm_storeu_ps(&sp[i], vtmp1); | |
670 | + _mm_storeu_ps(&sp[i + 4], vtmp2); | |
671 | +#endif | |
703 | 672 | } |
704 | 673 | } |
705 | -#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
674 | +#endif // USE_X86_EXT_INTRIN | |
675 | + for (; i < count; i++) { | |
676 | + FLOAT_T tmp = sp[i] * info->velgain; | |
677 | + if(tmp > 1.0) | |
678 | + tmp = 1.0 + (tmp - 1.0) * 0.25; | |
679 | + else if(tmp < -1.0) | |
680 | + tmp = -1.0 - (tmp + 1.0) * 0.25; | |
681 | + sp[i] = tmp * info->level; | |
682 | + } | |
683 | + break; | |
684 | + case 2: | |
685 | +#if (USE_X86_EXT_INTRIN >= 2) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) | |
706 | 686 | { |
707 | 687 | const int32 req_count_mask = ~(0x7); |
708 | 688 | int32 count2 = count & req_count_mask; |
@@ -712,37 +692,71 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
712 | 692 | const __m128 vvn1 = _mm_set1_ps(-1.0); |
713 | 693 | const __m128 vvq = _mm_set1_ps(0.25); |
714 | 694 | for (i = 0; i < count2; i += 8) { |
695 | +/* | |
696 | +tmp = sp[i] * info->velgain; | |
697 | +sp = tmp >= 0 ? (1.0) : (0.0); | |
698 | +sn = tmp < 0 ? (-1.0) : (0.0); | |
699 | +sign = sp | sn; | |
700 | +base = tmp = tmp * sign; | |
701 | +tmp = 1.0 - (tmp - 1.0) * 0.25; | |
702 | +sp = base > 1.0 ? tmp : 0; | |
703 | +sn = base <= 1.0 ? base : 0; | |
704 | +tmp = sp | sn; | |
705 | +sp[i] = tmp * sign * info->level; | |
706 | +*/ | |
715 | 707 | __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; |
708 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
709 | + vtmp1 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])); | |
710 | + vtmp2 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])); | |
711 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
712 | + vtmp1 = _mm_shuffle_ps( | |
713 | + _mm_cvtpd_ps(_mm_loadu_pd(&sp[i])), | |
714 | + _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 2])), 0x44); | |
715 | + vtmp2 = _mm_shuffle_ps( | |
716 | + _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 4])), | |
717 | + _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 6])), 0x44); | |
718 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
716 | 719 | vtmp1 = _mm_loadu_ps(&sp[i]); |
717 | 720 | vtmp2 = _mm_loadu_ps(&sp[i + 4]); |
721 | +#endif | |
718 | 722 | vtmp1 = _mm_mul_ps(vtmp1, vgain); |
719 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
720 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
721 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
722 | - vsign1 = _mm_or_ps(vsp, vsn); | |
723 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
724 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
725 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
726 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
727 | - vsign2 = _mm_or_ps(vsp, vsn); | |
728 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
729 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
730 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
731 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
723 | + vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
724 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp1, _mm_setzero_ps())); | |
725 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, _mm_setzero_ps())); | |
726 | + vsign1 = _mm_or_ps(vsp, vsn); | |
727 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp2, _mm_setzero_ps())); | |
728 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, _mm_setzero_ps())); | |
729 | + vsign2 = _mm_or_ps(vsp, vsn); | |
730 | + vbase1 = vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
731 | + vbase2 = vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
732 | 732 | vtmp1 = _mm_sub_ps(vtmp1, vvp1); |
733 | 733 | vtmp2 = _mm_sub_ps(vtmp2, vvp1); |
734 | 734 | vtmp1 = _mm_mul_ps(vtmp1, vvq); |
735 | 735 | vtmp2 = _mm_mul_ps(vtmp2, vvq); |
736 | - vtmp1 = _mm_add_ps(vtmp1, vvp1); | |
737 | - vtmp2 = _mm_add_ps(vtmp2, vvp1); | |
736 | + vtmp1 = _mm_sub_ps(vvp1, vtmp1); | |
737 | + vtmp2 = _mm_sub_ps(vvp1, vtmp2); | |
738 | + vsp = _mm_and_ps(vtmp1, _mm_cmpgt_ps(vbase1, vvp1)); | |
739 | + vsn = _mm_and_ps(vbase1, _mm_cmple_ps(vbase1, vvp1)); | |
740 | + vtmp1 = _mm_or_ps(vsp, vsn); | |
741 | + vsp = _mm_and_ps(vtmp2, _mm_cmpgt_ps(vbase2, vvp1)); | |
742 | + vsn = _mm_and_ps(vbase2, _mm_cmple_ps(vbase2, vvp1)); | |
743 | + vtmp2 = _mm_or_ps(vsp, vsn); | |
738 | 744 | vtmp1 = _mm_mul_ps(vtmp1, vsign1); |
739 | 745 | vtmp2 = _mm_mul_ps(vtmp2, vsign2); |
740 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
741 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
742 | 746 | vtmp1 = _mm_mul_ps(vtmp1, vlevel); |
743 | 747 | vtmp2 = _mm_mul_ps(vtmp2, vlevel); |
748 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
749 | + _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(vtmp1)); | |
750 | + _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(vtmp2)); | |
751 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
752 | + _mm_storeu_pd(&sp[i], _mm_cvtps_pd(vtmp1)); | |
753 | + _mm_storeu_pd(&sp[i + 2], _mm_cvtps_pd(_mm_movehl_ps(vtmp1,vtmp1))); | |
754 | + _mm_storeu_pd(&sp[i + 4], _mm_cvtps_pd(vtmp2)); | |
755 | + _mm_storeu_pd(&sp[i + 6], _mm_cvtps_pd(_mm_movehl_ps(vtmp2,vtmp2))); | |
756 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
744 | 757 | _mm_storeu_ps(&sp[i], vtmp1); |
745 | 758 | _mm_storeu_ps(&sp[i + 4], vtmp2); |
759 | +#endif | |
746 | 760 | } |
747 | 761 | } |
748 | 762 | #endif // USE_X86_EXT_INTRIN |
@@ -755,61 +769,8 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
755 | 769 | sp[i] = tmp * info->level; |
756 | 770 | } |
757 | 771 | break; |
758 | - case 2: | |
759 | -#if 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
760 | - { | |
761 | - const int32 req_count_mask = ~(0x7); | |
762 | - int32 count2 = count & req_count_mask; | |
763 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
764 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
765 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
766 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
767 | - for (i = 0; i < count2; i += 8) { | |
768 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
769 | - vtmp1 = MM256_SET2X_PS( | |
770 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])), | |
771 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])) ); | |
772 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
773 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
774 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
775 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
776 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
777 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
778 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
779 | - vtmp1 = _mm256_sqrt_ps(vtmp1); | |
780 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
781 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
782 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
783 | - _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x0))); | |
784 | - _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x1))); | |
785 | - } | |
786 | - } | |
787 | -#elif 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_FLOAT) | |
788 | - { | |
789 | - const int32 req_count_mask = ~(0x7); | |
790 | - int32 count2 = count & req_count_mask; | |
791 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
792 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
793 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
794 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
795 | - for (i = 0; i < count2; i += 8) { | |
796 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
797 | - vtmp1 = _mm256_loadu_ps(&sp[i]); | |
798 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
799 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
800 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
801 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
802 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
803 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
804 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
805 | - vtmp1 = _mm256_sqrt_ps(vtmp1); | |
806 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
807 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
808 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
809 | - _mm256_storeu_ps(&sp[i], vtmp1); | |
810 | - } | |
811 | - } | |
812 | -#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
772 | + case 3: | |
773 | +#if (USE_X86_EXT_INTRIN >= 2) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) | |
813 | 774 | { |
814 | 775 | const int32 req_count_mask = ~(0x7); |
815 | 776 | int32 count2 = count & req_count_mask; |
@@ -818,78 +779,67 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
818 | 779 | const __m128 vvp1 = _mm_set1_ps(1.0); |
819 | 780 | const __m128 vvn1 = _mm_set1_ps(-1.0); |
820 | 781 | for (i = 0; i < count2; i += 8) { |
782 | +/* | |
783 | +tmp = sp[i] * info->velgain; | |
784 | +sp = tmp >= 0 ? (1.0) : 0; | |
785 | +sn = tmp < 0 ? (-1.0) : 0; | |
786 | +sign = sp | sn; | |
787 | +base = tmp = tmp * sign; | |
788 | +tmp = sqrt(tmp); | |
789 | +sp = base > 1.0 ? tmp : 0; | |
790 | +sn = base <= 1.0 ? base : 0; | |
791 | +tmp = sp | sn; | |
792 | +sp[i] = tmp * sign * info->level; | |
793 | +*/ | |
821 | 794 | __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; |
822 | - __m128d vtmpd1, vtmpd2; | |
795 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
796 | + vtmp1 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])); | |
797 | + vtmp2 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])); | |
798 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
823 | 799 | vtmp1 = _mm_shuffle_ps( |
824 | 800 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i])), |
825 | 801 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 2])), 0x44); |
826 | 802 | vtmp2 = _mm_shuffle_ps( |
827 | 803 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 4])), |
828 | 804 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 6])), 0x44); |
805 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
806 | + vtmp1 = _mm_loadu_ps(&sp[i]); | |
807 | + vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
808 | +#endif | |
829 | 809 | vtmp1 = _mm_mul_ps(vtmp1, vgain); |
830 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
831 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
832 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
833 | - vsign1 = _mm_or_ps(vsp, vsn); | |
834 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
835 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
836 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
837 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
838 | - vsign2 = _mm_or_ps(vsp, vsn); | |
839 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
840 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
841 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
842 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
810 | + vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
811 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp1, _mm_setzero_ps())); | |
812 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, _mm_setzero_ps())); | |
813 | + vsign1 = _mm_or_ps(vsp, vsn); | |
814 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp2, _mm_setzero_ps())); | |
815 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, _mm_setzero_ps())); | |
816 | + vsign2 = _mm_or_ps(vsp, vsn); | |
817 | + vbase1 = vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
818 | + vbase2 = vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
843 | 819 | vtmp1 = _mm_sqrt_ps(vtmp1); |
844 | 820 | vtmp2 = _mm_sqrt_ps(vtmp2); |
821 | + vsp = _mm_and_ps(vtmp1, _mm_cmpgt_ps(vbase1, vvp1)); | |
822 | + vsn = _mm_and_ps(vbase1, _mm_cmple_ps(vbase1, vvp1)); | |
823 | + vtmp1 = _mm_or_ps(vsp, vsn); | |
824 | + vsp = _mm_and_ps(vtmp2, _mm_cmpgt_ps(vbase2, vvp1)); | |
825 | + vsn = _mm_and_ps(vbase2, _mm_cmple_ps(vbase2, vvp1)); | |
826 | + vtmp2 = _mm_or_ps(vsp, vsn); | |
845 | 827 | vtmp1 = _mm_mul_ps(vtmp1, vsign1); |
846 | 828 | vtmp2 = _mm_mul_ps(vtmp2, vsign2); |
847 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
848 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
849 | 829 | vtmp1 = _mm_mul_ps(vtmp1, vlevel); |
850 | 830 | vtmp2 = _mm_mul_ps(vtmp2, vlevel); |
831 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
832 | + _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(vtmp1)); | |
833 | + _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(vtmp2)); | |
834 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
851 | 835 | _mm_storeu_pd(&sp[i], _mm_cvtps_pd(vtmp1)); |
852 | 836 | _mm_storeu_pd(&sp[i + 2], _mm_cvtps_pd(_mm_movehl_ps(vtmp1,vtmp1))); |
853 | 837 | _mm_storeu_pd(&sp[i + 4], _mm_cvtps_pd(vtmp2)); |
854 | 838 | _mm_storeu_pd(&sp[i + 6], _mm_cvtps_pd(_mm_movehl_ps(vtmp2,vtmp2))); |
855 | - } | |
856 | - } | |
857 | 839 | #elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) |
858 | - { | |
859 | - const int32 req_count_mask = ~(0x7); | |
860 | - int32 count2 = count & req_count_mask; | |
861 | - __m128 vgain = _mm_set1_ps((float)info->velgain); | |
862 | - __m128 vlevel = _mm_set1_ps((float)info->level); | |
863 | - const __m128 vvp1 = _mm_set1_ps(1.0); | |
864 | - const __m128 vvn1 = _mm_set1_ps(-1.0); | |
865 | - for (i = 0; i < count2; i += 8) { | |
866 | - __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; | |
867 | - vtmp1 = _mm_loadu_ps(&sp[i]); | |
868 | - vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
869 | - vtmp1 = _mm_mul_ps(vtmp1, vgain); | |
870 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
871 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
872 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
873 | - vsign1 = _mm_or_ps(vsp, vsn); | |
874 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
875 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
876 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
877 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
878 | - vsign2 = _mm_or_ps(vsp, vsn); | |
879 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
880 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
881 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
882 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
883 | - vtmp1 = _mm_sqrt_ps(vtmp1); | |
884 | - vtmp2 = _mm_sqrt_ps(vtmp2); | |
885 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
886 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
887 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
888 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
889 | - vtmp1 = _mm_mul_ps(vtmp1, vlevel); | |
890 | - vtmp2 = _mm_mul_ps(vtmp2, vlevel); | |
891 | 840 | _mm_storeu_ps(&sp[i], vtmp1); |
892 | 841 | _mm_storeu_ps(&sp[i + 4], vtmp2); |
842 | +#endif | |
893 | 843 | } |
894 | 844 | } |
895 | 845 | #endif // USE_X86_EXT_INTRIN |
@@ -903,64 +853,7 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
903 | 853 | } |
904 | 854 | break; |
905 | 855 | case 4: |
906 | -#if 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
907 | - { | |
908 | - const int32 req_count_mask = ~(0x7); | |
909 | - int32 count2 = count & req_count_mask; | |
910 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
911 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
912 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
913 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
914 | - const __m256 vvp2 = _mm256_set1_ps(2.0); | |
915 | - for (i = 0; i < count2; i += 8) { | |
916 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
917 | - vtmp1 = MM256_SET2X_PS( | |
918 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])), | |
919 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])) ); | |
920 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
921 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
922 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
923 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
924 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
925 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
926 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
927 | - vtmp1 = _mm256_sqrt_ps(vtmp1); | |
928 | - vtmp1 = _mm256_sub_ps(vvp2, vtmp1); | |
929 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
930 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
931 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
932 | - _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x0))); | |
933 | - _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x1))); | |
934 | - } | |
935 | - } | |
936 | -#elif 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_FLOAT) | |
937 | - { | |
938 | - const int32 req_count_mask = ~(0x7); | |
939 | - int32 count2 = count & req_count_mask; | |
940 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
941 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
942 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
943 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
944 | - const __m256 vvp2 = _mm256_set1_ps(2.0); | |
945 | - for (i = 0; i < count2; i += 8) { | |
946 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
947 | - vtmp1 = _mm256_loadu_ps(&sp[i]); | |
948 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
949 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
950 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
951 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
952 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
953 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
954 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
955 | - vtmp1 = _mm256_sqrt_ps(vtmp1); | |
956 | - vtmp1 = _mm256_sub_ps(vvp2, vtmp1); | |
957 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
958 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
959 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
960 | - _mm256_storeu_ps(&sp[i], vtmp1); | |
961 | - } | |
962 | - } | |
963 | -#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
856 | +#if (USE_X86_EXT_INTRIN >= 2) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) | |
964 | 857 | { |
965 | 858 | const int32 req_count_mask = ~(0x7); |
966 | 859 | int32 count2 = count & req_count_mask; |
@@ -970,83 +863,69 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
970 | 863 | const __m128 vvn1 = _mm_set1_ps(-1.0); |
971 | 864 | const __m128 vvp2 = _mm_set1_ps(2.0); |
972 | 865 | for (i = 0; i < count2; i += 8) { |
866 | +/* | |
867 | +tmp = sp[i] * info->velgain; | |
868 | +sp = tmp >= 0 ? (1.0) : (0.0); | |
869 | +sn = tmp < 0 ? (-1.0) : (0.0); | |
870 | +sign = sp | sn; | |
871 | +base = tmp = tmp * sign; | |
872 | +tmp = 2.0 - sqrt(tmp); | |
873 | +sp = base > 1.0 ? tmp : 0; | |
874 | +sn = base <= 1.0 ? base : 0; | |
875 | +tmp = sp | sn; | |
876 | +sp[i] = tmp * sign * info->level; | |
877 | +*/ | |
973 | 878 | __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; |
974 | - __m128d vtmpd1, vtmpd2; | |
879 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
880 | + vtmp1 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])); | |
881 | + vtmp2 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])); | |
882 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
975 | 883 | vtmp1 = _mm_shuffle_ps( |
976 | 884 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i])), |
977 | 885 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 2])), 0x44); |
978 | 886 | vtmp2 = _mm_shuffle_ps( |
979 | 887 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 4])), |
980 | 888 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 6])), 0x44); |
889 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
890 | + vtmp1 = _mm_loadu_ps(&sp[i]); | |
891 | + vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
892 | +#endif | |
981 | 893 | vtmp1 = _mm_mul_ps(vtmp1, vgain); |
982 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
983 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
984 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
985 | - vsign1 = _mm_or_ps(vsp, vsn); | |
986 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
987 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
988 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
989 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
990 | - vsign2 = _mm_or_ps(vsp, vsn); | |
991 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
992 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
993 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
994 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
894 | + vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
895 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp1, _mm_setzero_ps())); | |
896 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, _mm_setzero_ps())); | |
897 | + vsign1 = _mm_or_ps(vsp, vsn); | |
898 | + vsp = _mm_and_ps(vvp1, _mm_cmpge_ps(vtmp2, _mm_setzero_ps())); | |
899 | + vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, _mm_setzero_ps())); | |
900 | + vsign2 = _mm_or_ps(vsp, vsn); | |
901 | + vbase1 = vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
902 | + vbase2 = vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
995 | 903 | vtmp1 = _mm_sqrt_ps(vtmp1); |
996 | 904 | vtmp2 = _mm_sqrt_ps(vtmp2); |
997 | 905 | vtmp1 = _mm_sub_ps(vvp2, vtmp1); |
998 | 906 | vtmp2 = _mm_sub_ps(vvp2, vtmp2); |
907 | + vsp = _mm_and_ps(vtmp1, _mm_cmpgt_ps(vbase1, vvp1)); | |
908 | + vsn = _mm_and_ps(vbase1, _mm_cmple_ps(vbase1, vvp1)); | |
909 | + vtmp1 = _mm_or_ps(vsp, vsn); | |
910 | + vsp = _mm_and_ps(vtmp2, _mm_cmpgt_ps(vbase2, vvp1)); | |
911 | + vsn = _mm_and_ps(vbase2, _mm_cmple_ps(vbase2, vvp1)); | |
912 | + vtmp2 = _mm_or_ps(vsp, vsn); | |
999 | 913 | vtmp1 = _mm_mul_ps(vtmp1, vsign1); |
1000 | 914 | vtmp2 = _mm_mul_ps(vtmp2, vsign2); |
1001 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
1002 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
1003 | 915 | vtmp1 = _mm_mul_ps(vtmp1, vlevel); |
1004 | 916 | vtmp2 = _mm_mul_ps(vtmp2, vlevel); |
917 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
918 | + _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(vtmp1)); | |
919 | + _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(vtmp2)); | |
920 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
1005 | 921 | _mm_storeu_pd(&sp[i], _mm_cvtps_pd(vtmp1)); |
1006 | 922 | _mm_storeu_pd(&sp[i + 2], _mm_cvtps_pd(_mm_movehl_ps(vtmp1,vtmp1))); |
1007 | 923 | _mm_storeu_pd(&sp[i + 4], _mm_cvtps_pd(vtmp2)); |
1008 | 924 | _mm_storeu_pd(&sp[i + 6], _mm_cvtps_pd(_mm_movehl_ps(vtmp2,vtmp2))); |
1009 | - } | |
1010 | - } | |
1011 | 925 | #elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) |
1012 | - { | |
1013 | - const int32 req_count_mask = ~(0x7); | |
1014 | - int32 count2 = count & req_count_mask; | |
1015 | - __m128 vgain = _mm_set1_ps((float)info->velgain); | |
1016 | - __m128 vlevel = _mm_set1_ps((float)info->level); | |
1017 | - const __m128 vvp1 = _mm_set1_ps(1.0); | |
1018 | - const __m128 vvn1 = _mm_set1_ps(-1.0); | |
1019 | - const __m128 vvp2 = _mm_set1_ps(2.0); | |
1020 | - for (i = 0; i < count2; i += 8) { | |
1021 | - __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; | |
1022 | - vtmp1 = _mm_loadu_ps(&sp[i]); | |
1023 | - vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
1024 | - vtmp1 = _mm_mul_ps(vtmp1, vgain); | |
1025 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
1026 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
1027 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
1028 | - vsign1 = _mm_or_ps(vsp, vsn); | |
1029 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
1030 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
1031 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
1032 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
1033 | - vsign2 = _mm_or_ps(vsp, vsn); | |
1034 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
1035 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
1036 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1037 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1038 | - vtmp1 = _mm_sqrt_ps(vtmp1); | |
1039 | - vtmp2 = _mm_sqrt_ps(vtmp2); | |
1040 | - vtmp1 = _mm_sub_ps(vvp2, vtmp1); | |
1041 | - vtmp2 = _mm_sub_ps(vvp2, vtmp2); | |
1042 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1043 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1044 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
1045 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
1046 | - vtmp1 = _mm_mul_ps(vtmp1, vlevel); | |
1047 | - vtmp2 = _mm_mul_ps(vtmp2, vlevel); | |
1048 | 926 | _mm_storeu_ps(&sp[i], vtmp1); |
1049 | 927 | _mm_storeu_ps(&sp[i + 4], vtmp2); |
928 | +#endif | |
1050 | 929 | } |
1051 | 930 | } |
1052 | 931 | #endif // USE_X86_EXT_INTRIN |
@@ -1068,163 +947,55 @@ static inline void do_vfx_distortion(int v, VoiceEffect *vfx, DATA_T *sp, int32 | ||
1068 | 947 | } |
1069 | 948 | break; |
1070 | 949 | case 6: |
1071 | -#if 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
1072 | - { | |
1073 | - const int32 req_count_mask = ~(0x7); | |
1074 | - int32 count2 = count & req_count_mask; | |
1075 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
1076 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
1077 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
1078 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
1079 | - const __m256 vvq = _mm256_set1_ps(-0.15); | |
1080 | - const __m256 vv11 = _mm256_set1_ps(1.1); | |
1081 | - const __m256 vmsign = _mm256_set1_ps(-0.0f); | |
1082 | - for (i = 0; i < count2; i += 8) { | |
1083 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
1084 | - vtmp1 = MM256_SET2X_PS( | |
1085 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])), | |
1086 | - _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])) ); | |
1087 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
1088 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
1089 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
1090 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
1091 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
1092 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
1093 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
1094 | - vtmp1 = _mm256_andnot_ps(vtmp1, vmsign); | |
1095 | - vtmp1 = _mm256_mul_ps(vtmp1, vvq); | |
1096 | - vtmp1 = _mm256_add_ps(vtmp1, vv11); | |
1097 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
1098 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
1099 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
1100 | - _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x0))); | |
1101 | - _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(_mm256_extract_ps(vtmp1, 0x1))); | |
1102 | - } | |
1103 | - } | |
1104 | -#elif 0 && (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_FLOAT) | |
1105 | - { | |
1106 | - const int32 req_count_mask = ~(0x7); | |
1107 | - int32 count2 = count & req_count_mask; | |
1108 | - __m256 vgain = _mm256_set1_ps((float)info->velgain); | |
1109 | - __m256 vlevel = _mm256_set1_ps((float)info->level); | |
1110 | - const __m256 vvp1 = _mm256_set1_ps(1.0); | |
1111 | - const __m256 vvn1 = _mm256_set1_ps(-1.0); | |
1112 | - const __m256 vvq = _mm256_set1_ps(-0.15); | |
1113 | - const __m256 vv11 = _mm256_set1_ps(1.1); | |
1114 | - const __m256 vmsign = _mm256_set1_ps(-0.0f); | |
1115 | - for (i = 0; i < count2; i += 8) { | |
1116 | - __m256 vtmp1, vme, vsp, vsn, vsign1, vbase1; | |
1117 | - vtmp1 = _mm256_loadu_ps(&sp[i]); | |
1118 | - vtmp1 = _mm256_mul_ps(vtmp1, vgain); | |
1119 | - vsp = _mm256_and_ps(vvp1, _mm256_cmpgt_ps(vtmp1, vvp1)); | |
1120 | - vsn = _mm256_and_ps(vvn1, _mm256_cmplt_ps(vtmp1, vvn1)); | |
1121 | - vsign1 = _mm256_or_ps(vsp, vsn); | |
1122 | - vme = _mm256_and_ps(_mm256_cmple_ps(vtmp1, vvp1), _mm256_cmpge_ps(vtmp1, vvn1)); | |
1123 | - vbase1 = _mm256_and_ps(vtmp1, vme); | |
1124 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1) | |
1125 | - vtmp1 = _mm256_andnot_ps(vtmp1, vmsign); | |
1126 | - vtmp1 = _mm256_mul_ps(vtmp1, vvq); | |
1127 | - vtmp1 = _mm256_add_ps(vtmp1, vv11); | |
1128 | - vtmp1 = _mm256_mul_ps(vtmp1, vsign1); | |
1129 | - vtmp1 = _mm256_add_ps(vtmp1, vbase1); | |
1130 | - vtmp1 = _mm256_mul_ps(vtmp1, vlevel); | |
1131 | - _mm256_storeu_ps(&sp[i], vtmp1); | |
1132 | - } | |
1133 | - } | |
1134 | -#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
950 | +#if (USE_X86_EXT_INTRIN >= 2) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) | |
1135 | 951 | { |
1136 | 952 | const int32 req_count_mask = ~(0x7); |
1137 | 953 | int32 count2 = count & req_count_mask; |
1138 | 954 | __m128 vgain = _mm_set1_ps((float)info->velgain); |
1139 | 955 | __m128 vlevel = _mm_set1_ps((float)info->level); |
1140 | - const __m128 vvp1 = _mm_set1_ps(1.0); | |
1141 | - const __m128 vvn1 = _mm_set1_ps(-1.0); | |
1142 | 956 | const __m128 vvq = _mm_set1_ps(-0.15); |
1143 | 957 | const __m128 vv11 = _mm_set1_ps(1.1); |
1144 | 958 | const __m128 vmsign = _mm_set1_ps(-0.0f); |
1145 | 959 | for (i = 0; i < count2; i += 8) { |
1146 | - __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; | |
960 | + __m128 vtmp1, vtmp2, vbase1, vbase2; | |
961 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
962 | + vtmp1 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i])); | |
963 | + vtmp2 = _mm256_cvtpd_ps(_mm256_loadu_pd(&sp[i + 4])); | |
964 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
1147 | 965 | vtmp1 = _mm_shuffle_ps( |
1148 | 966 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i])), |
1149 | 967 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 2])), 0x44); |
1150 | 968 | vtmp2 = _mm_shuffle_ps( |
1151 | 969 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 4])), |
1152 | 970 | _mm_cvtpd_ps(_mm_loadu_pd(&sp[i + 6])), 0x44); |
1153 | - vtmp1 = _mm_mul_ps(vtmp1, vgain); | |
1154 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
1155 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
1156 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
1157 | - vsign1 = _mm_or_ps(vsp, vsn); | |
1158 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
1159 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
1160 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
1161 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
1162 | - vsign2 = _mm_or_ps(vsp, vsn); | |
1163 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
1164 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
1165 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1166 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1167 | - vtmp1 = _mm_andnot_ps(vtmp1, vmsign); | |
1168 | - vtmp2 = _mm_andnot_ps(vtmp2, vmsign); | |
971 | +#elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) | |
972 | + vtmp1 = _mm_loadu_ps(&sp[i]); | |
973 | + vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
974 | +#endif | |
975 | + vbase1 = vtmp1 = _mm_mul_ps(vtmp1, vgain); | |
976 | + vbase2 = vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
977 | + vtmp1 = _mm_andnot_ps(vtmp1, vmsign); // fabs | |
978 | + vtmp2 = _mm_andnot_ps(vtmp2, vmsign); // fabs | |
1169 | 979 | vtmp1 = _mm_mul_ps(vtmp1, vvq); |
1170 | 980 | vtmp2 = _mm_mul_ps(vtmp2, vvq); |
1171 | 981 | vtmp1 = _mm_add_ps(vtmp1, vv11); |
1172 | 982 | vtmp2 = _mm_add_ps(vtmp2, vv11); |
1173 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1174 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1175 | 983 | vtmp1 = _mm_add_ps(vtmp1, vbase1); |
1176 | 984 | vtmp2 = _mm_add_ps(vtmp2, vbase2); |
1177 | 985 | vtmp1 = _mm_mul_ps(vtmp1, vlevel); |
1178 | 986 | vtmp2 = _mm_mul_ps(vtmp2, vlevel); |
987 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) | |
988 | + _mm256_storeu_pd(&sp[i], _mm256_cvtps_pd(vtmp1)); | |
989 | + _mm256_storeu_pd(&sp[i + 4], _mm256_cvtps_pd(vtmp2)); | |
990 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) | |
1179 | 991 | _mm_storeu_pd(&sp[i], _mm_cvtps_pd(vtmp1)); |
1180 | 992 | _mm_storeu_pd(&sp[i + 2], _mm_cvtps_pd(_mm_movehl_ps(vtmp1,vtmp1))); |
1181 | 993 | _mm_storeu_pd(&sp[i + 4], _mm_cvtps_pd(vtmp2)); |
1182 | 994 | _mm_storeu_pd(&sp[i + 6], _mm_cvtps_pd(_mm_movehl_ps(vtmp2,vtmp2))); |
1183 | - } | |
1184 | - } | |
1185 | 995 | #elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT) |
1186 | - { | |
1187 | - const int32 req_count_mask = ~(0x7); | |
1188 | - int32 count2 = count & req_count_mask; | |
1189 | - __m128 vgain = _mm_set1_ps((float)info->velgain); | |
1190 | - __m128 vlevel = _mm_set1_ps((float)info->level); | |
1191 | - const __m128 vvp1 = _mm_set1_ps(1.0); | |
1192 | - const __m128 vvn1 = _mm_set1_ps(-1.0); | |
1193 | - const __m128 vvq = _mm_set1_ps(-0.15); | |
1194 | - const __m128 vv11 = _mm_set1_ps(1.1); | |
1195 | - const __m128 vmsign = _mm_set1_ps(-0.0f); | |
1196 | - for (i = 0; i < count2; i += 8) { | |
1197 | - __m128 vtmp1, vtmp2, vme, vsp, vsn, vsign1, vsign2, vbase1, vbase2; | |
1198 | - vtmp1 = _mm_loadu_ps(&sp[i]); | |
1199 | - vtmp2 = _mm_loadu_ps(&sp[i + 4]); | |
1200 | - vtmp1 = _mm_mul_ps(vtmp1, vgain); | |
1201 | - vtmp2 = _mm_mul_ps(vtmp2, vgain); | |
1202 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp1, vvp1)); | |
1203 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp1, vvn1)); | |
1204 | - vsign1 = _mm_or_ps(vsp, vsn); | |
1205 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp1, vvp1), _mm_cmpge_ps(vtmp1, vvn1)); | |
1206 | - vbase1 = _mm_and_ps(vtmp1, vme); | |
1207 | - vsp = _mm_and_ps(vvp1, _mm_cmpgt_ps(vtmp2, vvp1)); | |
1208 | - vsn = _mm_and_ps(vvn1, _mm_cmplt_ps(vtmp2, vvn1)); | |
1209 | - vsign2 = _mm_or_ps(vsp, vsn); | |
1210 | - vme = _mm_and_ps(_mm_cmple_ps(vtmp2, vvp1), _mm_cmpge_ps(vtmp2, vvn1)); | |
1211 | - vbase2 = _mm_and_ps(vtmp2, vme); | |
1212 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1213 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1214 | - vtmp1 = _mm_andnot_ps(vtmp1, vmsign); | |
1215 | - vtmp2 = _mm_andnot_ps(vtmp2, vmsign); | |
1216 | - vtmp1 = _mm_mul_ps(vtmp1, vvq); | |
1217 | - vtmp2 = _mm_mul_ps(vtmp2, vvq); | |
1218 | - vtmp1 = _mm_add_ps(vtmp1, vv11); | |
1219 | - vtmp2 = _mm_add_ps(vtmp2, vv11); | |
1220 | - vtmp1 = _mm_mul_ps(vtmp1, vsign1); | |
1221 | - vtmp2 = _mm_mul_ps(vtmp2, vsign2); | |
1222 | - vtmp1 = _mm_add_ps(vtmp1, vbase1); | |
1223 | - vtmp2 = _mm_add_ps(vtmp2, vbase2); | |
1224 | - vtmp1 = _mm_mul_ps(vtmp1, vlevel); | |
1225 | - vtmp2 = _mm_mul_ps(vtmp2, vlevel); | |
1226 | 996 | _mm_storeu_ps(&sp[i], vtmp1); |
1227 | 997 | _mm_storeu_ps(&sp[i + 4], vtmp2); |
998 | +#endif | |
1228 | 999 | } |
1229 | 1000 | } |
1230 | 1001 | #endif // USE_X86_EXT_INTRIN |
@@ -578,6 +578,10 @@ static int get_device(IMMDevice **ppMMDevice, int devnum) | ||
578 | 578 | goto error; |
579 | 579 | if(pszDeviceId) |
580 | 580 | CoTaskMemFree(pszDeviceId); |
581 | + if(pdev) | |
582 | + IMMDevice_Release(pdev); | |
583 | + if(pdc) | |
584 | + IMMDeviceCollection_Release(pdc); | |
581 | 585 | if(pde) |
582 | 586 | IMMDeviceEnumerator_Release(pde); |
583 | 587 | return TRUE; |
@@ -585,6 +589,10 @@ static int get_device(IMMDevice **ppMMDevice, int devnum) | ||
585 | 589 | error: |
586 | 590 | if(pszDeviceId) |
587 | 591 | CoTaskMemFree(pszDeviceId); |
592 | + if(pdev) | |
593 | + IMMDevice_Release(pdev); | |
594 | + if(pdc) | |
595 | + IMMDeviceCollection_Release(pdc); | |
588 | 596 | if(pde) |
589 | 597 | IMMDeviceEnumerator_Release(pde); |
590 | 598 | return FALSE; |
@@ -637,7 +645,7 @@ static void print_device_list(void) | ||
637 | 645 | device[0].LatencyMax = LatencyMax; |
638 | 646 | device[0].LatencyMin = LatencyMin; |
639 | 647 | if(tmpClient){ |
640 | - tmpClient->lpVtbl->Release(tmpClient); | |
648 | + IAudioClient_Release(tmpClient); | |
641 | 649 | tmpClient = NULL; |
642 | 650 | } |
643 | 651 | if(defdev){ |
@@ -688,7 +696,7 @@ static void print_device_list(void) | ||
688 | 696 | device[i+1].LatencyMax = LatencyMax; |
689 | 697 | device[i+1].LatencyMin = LatencyMin; |
690 | 698 | if(tmpClient){ |
691 | - tmpClient->lpVtbl->Release(tmpClient); | |
699 | + IAudioClient_Release(tmpClient); | |
692 | 700 | tmpClient = NULL; |
693 | 701 | } |
694 | 702 | if(dev){ |
@@ -696,12 +704,12 @@ static void print_device_list(void) | ||
696 | 704 | dev = NULL; |
697 | 705 | } |
698 | 706 | if(pps){ |
699 | - pps->lpVtbl->Release(pps); | |
707 | + IPropertyStore_Release(pps); | |
700 | 708 | pps = NULL; |
701 | 709 | } |
702 | 710 | } |
703 | 711 | if(pdc) |
704 | - pdc->lpVtbl->Release(pdc); | |
712 | + IMMDeviceCollection_Release(pdc); | |
705 | 713 | if(pde) |
706 | 714 | IMMDeviceEnumerator_Release(pde); |
707 | 715 | for(i = 0; i < num; i++){ |
@@ -713,9 +721,9 @@ static void print_device_list(void) | ||
713 | 721 | return; |
714 | 722 | error1: |
715 | 723 | if(tmpClient) |
716 | - tmpClient->lpVtbl->Release(tmpClient); | |
724 | + IAudioClient_Release(tmpClient); | |
717 | 725 | if(pdc){ |
718 | - pdc->lpVtbl->Release(pdc); | |
726 | + IMMDeviceCollection_Release(pdc); | |
719 | 727 | } |
720 | 728 | if(pde) |
721 | 729 | IMMDeviceEnumerator_Release(pde); |
@@ -1226,7 +1234,7 @@ int wasapi_device_list(WASAPI_DEVICELIST *device) | ||
1226 | 1234 | device[0].LatencyMax = LatencyMax; |
1227 | 1235 | device[0].LatencyMin = LatencyMin; |
1228 | 1236 | if(tmpClient){ |
1229 | - tmpClient->lpVtbl->Release(tmpClient); | |
1237 | + IAudioClient_Release(tmpClient); | |
1230 | 1238 | tmpClient = NULL; |
1231 | 1239 | } |
1232 | 1240 | if(defdev){ |
@@ -1277,7 +1285,7 @@ int wasapi_device_list(WASAPI_DEVICELIST *device) | ||
1277 | 1285 | device[i+1].LatencyMax = LatencyMax; |
1278 | 1286 | device[i+1].LatencyMin = LatencyMin; |
1279 | 1287 | if(tmpClient){ |
1280 | - tmpClient->lpVtbl->Release(tmpClient); | |
1288 | + IAudioClient_Release(tmpClient); | |
1281 | 1289 | tmpClient = NULL; |
1282 | 1290 | } |
1283 | 1291 | if(dev){ |
@@ -1285,21 +1293,21 @@ int wasapi_device_list(WASAPI_DEVICELIST *device) | ||
1285 | 1293 | dev = NULL; |
1286 | 1294 | } |
1287 | 1295 | if(pps){ |
1288 | - pps->lpVtbl->Release(pps); | |
1296 | + IPropertyStore_Release(pps); | |
1289 | 1297 | pps = NULL; |
1290 | 1298 | } |
1291 | 1299 | } |
1292 | 1300 | if(pdc) |
1293 | - pdc->lpVtbl->Release(pdc); | |
1301 | + IMMDeviceCollection_Release(pdc); | |
1294 | 1302 | if(pde) |
1295 | 1303 | IMMDeviceEnumerator_Release(pde); |
1296 | 1304 | return num + 1; // +1 def dev |
1297 | 1305 | |
1298 | 1306 | error1: |
1299 | 1307 | if(tmpClient) |
1300 | - tmpClient->lpVtbl->Release(tmpClient); | |
1308 | + IAudioClient_Release(tmpClient); | |
1301 | 1309 | if(pdc){ |
1302 | - pdc->lpVtbl->Release(pdc); | |
1310 | + IMMDeviceCollection_Release(pdc); | |
1303 | 1311 | } |
1304 | 1312 | if(pde) |
1305 | 1313 | IMMDeviceEnumerator_Release(pde); |