Skip to content

Commit 8911b1d

Browse files
committed
Optimize unique tags
We have two general code paths: 1. Merge two Tags in to one 2. Merge two Tags in to one, excluding certain tags We used to handle number 1 by passing an empty map to number 2, however these can be seen as fast (add static tags) and slow (add static tags + perform filtering) code-paths. The fast code path is now optimized by performing an array search instead of using a map to track things that have been seen. This is more efficient for small values. There is diminishing returns eventually, but that's with a high number of tags, which is an anti-pattern. The primary benefit is in removing the map allocation. The slow code path is unchanged, but could be improved by ensuring the map is sufficiently large, or using the same linear search method to check if a tag should be excluded. Note: this benchmark isn't perfect because we clone the source arrays, but that can be identified in the profile. BenchmarkUniqueTagsPractical/original-22 40025511 300.1 ns/op 304 B/op 3 allocs/op BenchmarkUniqueTagsPractical/prealloc-22 38560572 308.3 ns/op 304 B/op 3 allocs/op BenchmarkUniqueTagsPractical/array-search-22 56932317 210.9 ns/op 304 B/op 3 allocs/op
1 parent c70e0a0 commit 8911b1d

File tree

3 files changed

+108
-7
lines changed

3 files changed

+108
-7
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111
github.com/aws/aws-sdk-go-v2/service/ec2 v1.187.0
1212
github.com/cenkalti/backoff v2.2.1+incompatible
1313
github.com/go-redis/redis/v8 v8.11.5
14+
github.com/google/uuid v1.6.0
1415
github.com/gorilla/mux v1.8.0
1516
github.com/jessevdk/go-flags v1.5.0
1617
github.com/json-iterator/go v1.1.12

pkg/statsd/handler_tags.go

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package statsd
22

33
import (
44
"context"
5+
"slices"
56

67
"github.com/sirupsen/logrus"
78
"github.com/spf13/viper"
@@ -36,7 +37,7 @@ func NewTagHandlerFromViper(v *viper.Viper, handler gostatsd.PipelineHandler, ta
3637
// NewTagHandler initialises a new handler which adds unique tags, and sends metrics/events to the next handler based
3738
// on filter rules.
3839
func NewTagHandler(handler gostatsd.PipelineHandler, tags gostatsd.Tags, filters []Filter) *TagHandler {
39-
tags = uniqueTags(tags, gostatsd.Tags{}) // de-dupe tags
40+
tags = uniqueTagsSimple(tags, gostatsd.Tags{}) // de-dupe tags
4041
return &TagHandler{
4142
handler: handler,
4243
tags: tags,
@@ -147,7 +148,7 @@ func (th *TagHandler) DispatchMetricMap(ctx context.Context, mm *gostatsd.Metric
147148
// Returns true if the metric should be processed further, or false to drop it.
148149
func (th *TagHandler) uniqueFilterAndAddTags(mName string, mHostname *gostatsd.Source, mTags *gostatsd.Tags) bool {
149150
if len(th.filters) == 0 {
150-
*mTags = uniqueTags(*mTags, th.tags)
151+
*mTags = uniqueTagsSimple(*mTags, th.tags)
151152
return true
152153
}
153154

@@ -193,7 +194,7 @@ func (th *TagHandler) uniqueFilterAndAddTags(mName string, mHostname *gostatsd.S
193194

194195
// DispatchEvent adds the unique tags from the TagHandler to the event and passes it to the next stage in the pipeline
195196
func (th *TagHandler) DispatchEvent(ctx context.Context, e *gostatsd.Event) {
196-
e.Tags = uniqueTags(e.Tags, th.tags)
197+
e.Tags = uniqueTagsSimple(e.Tags, th.tags)
197198
th.handler.DispatchEvent(ctx, e)
198199
}
199200

@@ -202,12 +203,40 @@ func (th *TagHandler) WaitForEvents() {
202203
th.handler.WaitForEvents()
203204
}
204205

205-
// uniqueTags returns the set of t1 | t2. It may modify the contents of t1 and t2.
206-
func uniqueTags(t1 gostatsd.Tags, t2 gostatsd.Tags) gostatsd.Tags {
207-
return uniqueTagsWithSeen(map[string]struct{}{}, t1, t2)
206+
// uniqueTagsSimple returns the set of t1 | t2. It may modify the contents of t1. It will not modify the contents
207+
// of t2.
208+
func uniqueTagsSimple(t1 gostatsd.Tags, t2 gostatsd.Tags) gostatsd.Tags {
209+
// This originally tracked seen tags in a map, however as the number of tags is relatively small, it's actually
210+
// faster to do a linear scan than to put things in a map, even if the map is pre-allocated. The break-even
211+
// point is approximately 20 unique items.
212+
//
213+
// Benchmarking against the https://github.com/golang/go/wiki/SliceTricks style of filtering a slice shows
214+
// this is slightly faster, at the expense of breaking "nearly sorted" ordering. Benchmarking with a
215+
// `.SortedString()` on the output shows that this is still better.
216+
217+
last := len(t1)
218+
for idx := 1; idx < last; { // start at 1 because we know the first item will be unique.
219+
if slices.Contains(t1[:idx-1], t1[idx]) {
220+
// Delete the current item by copying the last item in to this slot, and "shrinking" the slice.
221+
last--
222+
t1[idx] = t1[last]
223+
} else {
224+
idx++
225+
}
226+
}
227+
t1 = t1[:last]
228+
229+
for _, tag := range t2 {
230+
if !slices.Contains(t1, tag) {
231+
t1 = append(t1, tag)
232+
}
233+
}
234+
235+
return t1
208236
}
209237

210-
// uniqueTags returns the set of (t1 | t2) - seen. It may modify the contents of t1, t2, and seen.
238+
// uniqueTagsWithSeen returns the set of (t1 | t2) - seen. It may modify the contents of t1 and seen. It will not
239+
// modify the contents of t2.
211240
func uniqueTagsWithSeen(seen map[string]struct{}, t1 gostatsd.Tags, t2 gostatsd.Tags) gostatsd.Tags {
212241
last := len(t1)
213242
for idx := 0; idx < last; {

pkg/statsd/handler_tags_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@ package statsd
33
import (
44
"bytes"
55
"context"
6+
"slices"
67
"sort"
8+
"strconv"
79
"strings"
810
"testing"
911

12+
"github.com/google/uuid"
1013
"github.com/spf13/viper"
1114
"github.com/stretchr/testify/assert"
1215
"github.com/stretchr/testify/require"
@@ -15,6 +18,74 @@ import (
1518
. "github.com/atlassian/gostatsd/internal/fixtures"
1619
)
1720

21+
func benchmarkUnique(b *testing.B, t1 gostatsd.Tags, t2 gostatsd.Tags) {
22+
// Sanity check.
23+
u1 := uniqueTagsSimple(slices.Clone(t1), slices.Clone(t2))
24+
u2 := uniqueTagsWithSeen(map[string]struct{}{}, slices.Clone(t1), slices.Clone(t2))
25+
u3 := uniqueTagsWithSeen(make(map[string]struct{}, len(t1)), slices.Clone(t1), slices.Clone(t2))
26+
require.Equal(b, u1.SortedString(), u2.SortedString())
27+
require.Equal(b, u1.SortedString(), u3.SortedString())
28+
29+
runBenchmark := func(name string, f func(t1, t2 gostatsd.Tags) gostatsd.Tags) {
30+
b.Run(name, func(b *testing.B) {
31+
b.ReportAllocs()
32+
for b.Loop() {
33+
_ = f(slices.Clone(t1), slices.Clone(t2))
34+
// This can be used to measure the cost of sorting.
35+
//_ = f(slices.Clone(t1), slices.Clone(t2)).SortedString()
36+
}
37+
})
38+
}
39+
40+
runBenchmark("original", func(t1, t2 gostatsd.Tags) gostatsd.Tags {
41+
return uniqueTagsWithSeen(map[string]struct{}{}, t1, t2)
42+
})
43+
44+
runBenchmark("prealloc", func(t1, t2 gostatsd.Tags) gostatsd.Tags {
45+
return uniqueTagsWithSeen(make(map[string]struct{}, len(t1)), t1, t2)
46+
})
47+
48+
runBenchmark("array-search", func(t1, t2 gostatsd.Tags) gostatsd.Tags {
49+
return uniqueTagsSimple(t1, t2)
50+
})
51+
}
52+
53+
func BenchmarkUniqueTagsPractical(b *testing.B) {
54+
// Generate 5 tags. These are the tags emitted at the call-site, and are dynamic.
55+
dynamicTags := gostatsd.Tags{
56+
uuid.New().String() + ":" + uuid.New().String(),
57+
uuid.New().String() + ":" + uuid.New().String(),
58+
uuid.New().String() + ":" + uuid.New().String(),
59+
uuid.New().String() + ":" + uuid.New().String(),
60+
uuid.New().String() + ":" + uuid.New().String(),
61+
}
62+
63+
// Generate 1 overlapping tag, and 3 unique tags. These are the tags added by TagHandler, and are static.
64+
staticTags := gostatsd.Tags{
65+
dynamicTags[0],
66+
uuid.New().String() + ":" + uuid.New().String(),
67+
uuid.New().String() + ":" + uuid.New().String(),
68+
uuid.New().String() + ":" + uuid.New().String(),
69+
}
70+
benchmarkUnique(b, dynamicTags, staticTags)
71+
}
72+
73+
func BenchmarkUniqueTagsWithSeen(b *testing.B) {
74+
for tagCount := range 30 {
75+
b.Run(strconv.Itoa(tagCount)+"-tags", func(b *testing.B) {
76+
originalTags := gostatsd.Tags{}
77+
for i := 0; i < tagCount; i++ {
78+
originalTags = append(originalTags, uuid.New().String()+":"+uuid.New().String())
79+
}
80+
originalTags2 := slices.Clone(originalTags)
81+
for i := tagCount / 2; i < tagCount; i++ {
82+
originalTags2[i] = uuid.New().String() + ":" + uuid.New().String()
83+
}
84+
benchmarkUnique(b, originalTags, originalTags2)
85+
})
86+
}
87+
}
88+
1889
func TestTagStripMergesCounters(t *testing.T) {
1990
t.Parallel()
2091
tch := &capturingHandler{}

0 commit comments

Comments
 (0)