diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp index 3a45d52bd3a..372337796e6 100644 --- a/include/ck/utility/sequence.hpp +++ b/include/ck/utility/sequence.hpp @@ -597,31 +597,49 @@ struct is_valid_sequence_map : is_same -struct sequence_map_inverse +// Invert a permutation sequence: given X2Y = {a, b, c, ...}, compute Y2X where Y2X[X2Y[i]] = i +// Example: Sequence<2,0,1> (meaning pos0->2, pos1->0, pos2->1) inverts to Sequence<1,2,0> +// +// Why this implementation is faster to compile than recursive templates: +// +// The old recursive approach created a new template type for each element: +// sequence_map_inverse> -> sequence_map_inverse> -> +// sequence_map_inverse> +// Each "->" is a new type the compiler must create, track, and manage. For N elements, that's +// N template types, each with overhead (name mangling, debug info, symbol table entries). +// +// This implementation uses O(N) direct assignment with a fold expression: +// For input Sequence<2,0,1>, the fold expression ((result[Is] = pos++), ...) expands to: +// result[2]=0, result[0]=1, result[1]=2 +// This builds the inverse permutation in a single pass without any searching. +// +template +struct sequence_map_inverse> { - template - struct sequence_map_inverse_impl + private: + struct InverseArray { - static constexpr auto new_y2x = - WorkingY2X::Modify(X2Y::At(Number{}), Number{}); - - using type = - typename sequence_map_inverse_impl:: - type; + index_t data[sizeof...(Is)] = {}; }; - template - struct sequence_map_inverse_impl + static constexpr auto build_inverse() { - using type = WorkingY2X; - }; + InverseArray result{}; + index_t pos = 0; + ((result.data[Is] = pos++), ...); + return result; + } - using type = - typename sequence_map_inverse_impl::type, - 0, - SeqMap::Size()>::type; + static constexpr InverseArray inverse = build_inverse(); + + template + static constexpr auto compute(Sequence) + { + return Sequence{}; + } + + public: + using type = decltype(compute(make_index_sequence{})); }; template