Skip to content

Commit a6a926d

Browse files
committed
Added a few more instruction costs.
1 parent c32809e commit a6a926d

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

src/costs.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct InstructionCost
1919
end
2020
InstructionCost(sl::Int, srt::Float64, scaling::Float64 = -3.0) = InstructionCost(scaling, srt, sl, 0)
2121

22+
nocost(c::InstructionCost) = c.scalar_reciprical_throughput == 0.0
2223
flatcost(c::InstructionCost) = c.scaling == -3.0
2324
offsetscaling(c::InstructionCost) = c.scaling == -2.0
2425
linearscaling(c::InstructionCost) = c.scaling == -1.0
@@ -29,7 +30,7 @@ function scalar_cost(ic::InstructionCost)#, ::Type{T} = Float64) where {T}
2930
end
3031
function vector_cost(ic::InstructionCost, Wshift, sizeof_T)
3132
srt, sl, srp = scalar_cost(ic)
32-
if flatcost(ic) || Wshift == 0 # No scaling
33+
if flatcost(ic) || Wshift == 0 || nocost(ic) # No scaling
3334
return srt, sl, srp
3435
elseif offsetscaling(ic) # offset scaling
3536
srt *= 1 << (Wshift + VectorizationBase.intlog2(sizeof_T) - 4)
@@ -85,6 +86,7 @@ const COST = Dict{Instruction,InstructionCost}(
8586
Instruction(:vmul) => InstructionCost(4,0.5),
8687
Instruction(:vdiv) => InstructionCost(13,4.0,-2.0),
8788
Instruction(:abs2) => InstructionCost(4,0.5),
89+
Instruction(:vabs2) => InstructionCost(4,0.5),
8890
Instruction(:(==)) => InstructionCost(1, 0.5),
8991
Instruction(:isequal) => InstructionCost(1, 0.5),
9092
Instruction(:(&)) => InstructionCost(1, 0.5),
@@ -94,6 +96,7 @@ const COST = Dict{Instruction,InstructionCost}(
9496
Instruction(:(>=)) => InstructionCost(1, 0.5),
9597
Instruction(:(<=)) => InstructionCost(1, 0.5),
9698
Instruction(:inv) => InstructionCost(13,4.0,-2.0,1),
99+
Instruction(:vinv) => InstructionCost(13,4.0,-2.0,1),
97100
Instruction(:muladd) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
98101
Instruction(:fma) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
99102
Instruction(:vmuladd) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
@@ -102,12 +105,19 @@ const COST = Dict{Instruction,InstructionCost}(
102105
Instruction(:vfmsub) => InstructionCost(4,0.5), # - and * will fuse into this, so much of the time they're not twice as expensive
103106
Instruction(:vfnmadd) => InstructionCost(4,0.5), # + and -* will fuse into this, so much of the time they're not twice as expensive
104107
Instruction(:vfnmsub) => InstructionCost(4,0.5), # - and -* will fuse into this, so much of the time they're not twice as expensive
108+
Instruction(:vfmadd_fast) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
109+
Instruction(:vfmsub_fast) => InstructionCost(4,0.5), # - and * will fuse into this, so much of the time they're not twice as expensive
110+
Instruction(:vfnmadd_fast) => InstructionCost(4,0.5), # + and -* will fuse into this, so much of the time they're not twice as expensive
111+
Instruction(:vfnmsub_fast) => InstructionCost(4,0.5), # - and -* will fuse into this, so much of the time they're not twice as expensive
105112
Instruction(:sqrt) => InstructionCost(15,4.0,-2.0),
106113
Instruction(:log) => InstructionCost(20,20.0,40.0,20),
107114
Instruction(:exp) => InstructionCost(20,20.0,20.0,18),
108115
Instruction(:sin) => InstructionCost(18,15.0,68.0,23),
109116
Instruction(:cos) => InstructionCost(18,15.0,68.0,26),
110-
Instruction(:sincos) => InstructionCost(25,22.0,70.0,26)#,
117+
Instruction(:sincos) => InstructionCost(25,22.0,70.0,26),
118+
Instruction(:identity) => InstructionCost(0,0.0,0.0,0),
119+
Instruction(:adjoint) => InstructionCost(0,0.0,0.0,0),
120+
Instruction(:transpose) => InstructionCost(0,0.0,0.0,0),
111121
# Symbol("##CONSTANT##") => InstructionCost(0,0.0)
112122
)
113123
# for (k, v) ∈ COST # so we can look up Symbol(typeof(function))

0 commit comments

Comments
 (0)