@@ -19,6 +19,7 @@ struct InstructionCost
1919end
2020InstructionCost (sl:: Int , srt:: Float64 , scaling:: Float64 = - 3.0 ) = InstructionCost (scaling, srt, sl, 0 )
2121
22+ nocost (c:: InstructionCost ) = c. scalar_reciprical_throughput == 0.0
2223flatcost (c:: InstructionCost ) = c. scaling == - 3.0
2324offsetscaling (c:: InstructionCost ) = c. scaling == - 2.0
2425linearscaling (c:: InstructionCost ) = c. scaling == - 1.0
@@ -29,7 +30,7 @@ function scalar_cost(ic::InstructionCost)#, ::Type{T} = Float64) where {T}
2930end
3031function vector_cost (ic:: InstructionCost , Wshift, sizeof_T)
3132 srt, sl, srp = scalar_cost (ic)
32- if flatcost (ic) || Wshift == 0 # No scaling
33+ if flatcost (ic) || Wshift == 0 || nocost (ic) # No scaling
3334 return srt, sl, srp
3435 elseif offsetscaling (ic) # offset scaling
3536 srt *= 1 << (Wshift + VectorizationBase. intlog2 (sizeof_T) - 4 )
@@ -85,6 +86,7 @@ const COST = Dict{Instruction,InstructionCost}(
8586 Instruction (:vmul ) => InstructionCost (4 ,0.5 ),
8687 Instruction (:vdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
8788 Instruction (:abs2 ) => InstructionCost (4 ,0.5 ),
89+ Instruction (:vabs2 ) => InstructionCost (4 ,0.5 ),
8890 Instruction (:(== )) => InstructionCost (1 , 0.5 ),
8991 Instruction (:isequal ) => InstructionCost (1 , 0.5 ),
9092 Instruction (:(& )) => InstructionCost (1 , 0.5 ),
@@ -94,6 +96,7 @@ const COST = Dict{Instruction,InstructionCost}(
9496 Instruction (:(>= )) => InstructionCost (1 , 0.5 ),
9597 Instruction (:(<= )) => InstructionCost (1 , 0.5 ),
9698 Instruction (:inv ) => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
99+ Instruction (:vinv ) => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
97100 Instruction (:muladd ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
98101 Instruction (:fma ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
99102 Instruction (:vmuladd ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
@@ -102,12 +105,19 @@ const COST = Dict{Instruction,InstructionCost}(
102105 Instruction (:vfmsub ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
103106 Instruction (:vfnmadd ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
104107 Instruction (:vfnmsub ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
108+ Instruction (:vfmadd_fast ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
109+ Instruction (:vfmsub_fast ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
110+ Instruction (:vfnmadd_fast ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
111+ Instruction (:vfnmsub_fast ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
105112 Instruction (:sqrt ) => InstructionCost (15 ,4.0 ,- 2.0 ),
106113 Instruction (:log ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
107114 Instruction (:exp ) => InstructionCost (20 ,20.0 ,20.0 ,18 ),
108115 Instruction (:sin ) => InstructionCost (18 ,15.0 ,68.0 ,23 ),
109116 Instruction (:cos ) => InstructionCost (18 ,15.0 ,68.0 ,26 ),
110- Instruction (:sincos ) => InstructionCost (25 ,22.0 ,70.0 ,26 )# ,
117+ Instruction (:sincos ) => InstructionCost (25 ,22.0 ,70.0 ,26 ),
118+ Instruction (:identity ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
119+ Instruction (:adjoint ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
120+ Instruction (:transpose ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
111121 # Symbol("##CONSTANT##") => InstructionCost(0,0.0)
112122)
113123# for (k, v) ∈ COST # so we can look up Symbol(typeof(function))
0 commit comments