Skip to content

Commit 25f2a71

Browse files
AntoinePrvserge-sans-paille
authored andcommitted
Use utility function in SSE2 swizzle
1 parent 0c5b3e8 commit 25f2a71

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <limits>
1717
#include <type_traits>
1818

19+
#include "../types/xsimd_batch_constant.hpp"
1920
#include "../types/xsimd_sse2_register.hpp"
2021

2122
namespace xsimd
@@ -1952,8 +1953,7 @@ namespace xsimd
19521953
return _mm_sub_pd(self, other);
19531954
}
19541955

1955-
// swizzle
1956-
1956+
// swizzle (constant mask)
19571957
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
19581958
XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3>, requires_arch<sse2>) noexcept
19591959
{
@@ -2024,7 +2024,7 @@ namespace xsimd
20242024
return hi_all;
20252025
}
20262026
// Only pick elements from the low lane
2027-
XSIMD_IF_CONSTEXPR((V0 < 4) && (V1 < 4) && (V2 < 4) && (V3 < 4) && (V4 < 4) && (V5 < 4) && (V6 < 4) && (V7 < 4))
2027+
XSIMD_IF_CONSTEXPR(detail::is_only_from_lo(mask))
20282028
{
20292029
// permute within each sub lane
20302030
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
@@ -2036,7 +2036,7 @@ namespace xsimd
20362036
return _mm_unpacklo_epi64(lol, loh);
20372037
}
20382038
// Only pick elements from the high lane
2039-
XSIMD_IF_CONSTEXPR((V0 >= 4) && (V1 >= 4) && (V2 >= 4) && (V3 >= 4) && (V4 >= 4) && (V5 >= 4) && (V6 >= 4) && (V7 >= 4))
2039+
XSIMD_IF_CONSTEXPR(detail::is_only_from_hi(mask))
20402040
{
20412041
// permute within each sub lane
20422042
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
@@ -2063,7 +2063,7 @@ namespace xsimd
20632063
__m128i hi = _mm_unpackhi_epi64(hil, hih);
20642064

20652065
// mask to choose the right lane
2066-
batch_bool_constant<uint16_t, A, (V0 < 4), (V1 < 4), (V2 < 4), (V3 < 4), (V4 < 4), (V5 < 4), (V6 < 4), (V7 < 4)> blend_mask;
2066+
constexpr auto blend_mask = mask < make_batch_constant<uint16_t, 4, A>();
20672067

20682068
// blend the two permutes
20692069
return select(blend_mask, batch<uint16_t, A>(lo), batch<uint16_t, A>(hi));

0 commit comments

Comments
 (0)