From bafa615952a01fba80de0fb7917324f063a3ce05 Mon Sep 17 00:00:00 2001 From: ThinkChaos Date: Sat, 11 Nov 2023 17:17:10 -0500 Subject: [PATCH] fix(cache): don't add entries multiple times in chains Also means we save a bit of time by not checking every single entry with all caches. --- cache/stringcache/chained_grouped_cache.go | 8 +++- .../stringcache/chained_grouped_cache_test.go | 18 +++++--- cache/stringcache/grouped_cache_interface.go | 2 +- cache/stringcache/in_memory_grouped_cache.go | 4 +- .../in_memory_grouped_cache_test.go | 33 ++++--------- cache/stringcache/string_caches.go | 46 +++++++++++-------- .../string_caches_benchmark_test.go | 4 +- cache/stringcache/string_caches_test.go | 38 ++++++++------- lists/list_cache.go | 10 ++-- 9 files changed, 86 insertions(+), 77 deletions(-) diff --git a/cache/stringcache/chained_grouped_cache.go b/cache/stringcache/chained_grouped_cache.go index 36f6fce1b..d2deb27a1 100644 --- a/cache/stringcache/chained_grouped_cache.go +++ b/cache/stringcache/chained_grouped_cache.go @@ -56,10 +56,14 @@ type chainedGroupFactory struct { cacheFactories []GroupFactory } -func (c *chainedGroupFactory) AddEntry(entry string) { +func (c *chainedGroupFactory) AddEntry(entry string) bool { for _, factory := range c.cacheFactories { - factory.AddEntry(entry) + if factory.AddEntry(entry) { + return true + } } + + return false } func (c *chainedGroupFactory) Count() int { diff --git a/cache/stringcache/chained_grouped_cache_test.go b/cache/stringcache/chained_grouped_cache_test.go index 161493e25..8fe2432ae 100644 --- a/cache/stringcache/chained_grouped_cache_test.go +++ b/cache/stringcache/chained_grouped_cache_test.go @@ -24,6 +24,10 @@ var _ = Describe("Chained grouped cache", func() { It("should not find any string", func() { Expect(cache.Contains("searchString", []string{"someGroup"})).Should(BeEmpty()) }) + + It("should not add entries", func() { + Expect(cache.Refresh("group").AddEntry("test")).Should(BeFalse()) + }) }) }) Describe("Delegation", func() { @@ -44,12 +48,12 @@ var _ = Describe("Chained grouped cache", func() { }) It("factory has 4 elements (both caches)", func() { - Expect(factory.Count()).Should(BeNumerically("==", 4)) + Expect(factory.Count()).Should(BeNumerically("==", 2)) }) It("should have element count of 4", func() { factory.Finish() - Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4)) + Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2)) }) It("should find strings", func() { @@ -80,8 +84,8 @@ var _ = Describe("Chained grouped cache", func() { }) It("should contain 4 elements in 2 groups", func() { - Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4)) - Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4)) + Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2)) + Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2)) Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(ConsistOf("group1")) Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2")) Expect(cache.Contains("both", []string{"group1", "group2"})).Should(ConsistOf("group1", "group2")) @@ -92,8 +96,8 @@ var _ = Describe("Chained grouped cache", func() { factory.AddEntry("newString") factory.Finish() - Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2)) - Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4)) + Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1)) + Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2)) Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(BeEmpty()) Expect(cache.Contains("newString", []string{"group1", "group2"})).Should(ConsistOf("group1")) Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2")) @@ -105,7 +109,7 @@ var _ = Describe("Chained grouped cache", func() { factory.AddEntry("begone") factory.Finish() - Expect(cache.ElementCount("group")).Should(BeNumerically("==", 2)) + Expect(cache.ElementCount("group")).Should(BeNumerically("==", 1)) factory = cache.Refresh("group") factory.Finish() diff --git a/cache/stringcache/grouped_cache_interface.go b/cache/stringcache/grouped_cache_interface.go index 779de81b9..53dec3c05 100644 --- a/cache/stringcache/grouped_cache_interface.go +++ b/cache/stringcache/grouped_cache_interface.go @@ -15,7 +15,7 @@ type GroupedStringCache interface { type GroupFactory interface { // AddEntry adds a new string to the factory to be added later to the cache groups. - AddEntry(entry string) + AddEntry(entry string) bool // Count returns amount of processed string in the factory Count() int diff --git a/cache/stringcache/in_memory_grouped_cache.go b/cache/stringcache/in_memory_grouped_cache.go index b5d8d2f99..7a304a184 100644 --- a/cache/stringcache/in_memory_grouped_cache.go +++ b/cache/stringcache/in_memory_grouped_cache.go @@ -73,8 +73,8 @@ type inMemoryGroupFactory struct { finishFn func(stringCache) } -func (c *inMemoryGroupFactory) AddEntry(entry string) { - c.factory.addEntry(entry) +func (c *inMemoryGroupFactory) AddEntry(entry string) bool { + return c.factory.addEntry(entry) } func (c *inMemoryGroupFactory) Count() int { diff --git a/cache/stringcache/in_memory_grouped_cache_test.go b/cache/stringcache/in_memory_grouped_cache_test.go index bddb7d4b6..f239795e5 100644 --- a/cache/stringcache/in_memory_grouped_cache_test.go +++ b/cache/stringcache/in_memory_grouped_cache_test.go @@ -46,8 +46,8 @@ var _ = Describe("In-Memory grouped cache", func() { cache = stringcache.NewInMemoryGroupedStringCache() factory = cache.Refresh("group1") - factory.AddEntry("string1") - factory.AddEntry("string2") + Expect(factory.AddEntry("string1")).Should(BeTrue()) + Expect(factory.AddEntry("string2")).Should(BeTrue()) }) It("cache should still have 0 element, since finish was not executed", func() { @@ -69,28 +69,13 @@ var _ = Describe("In-Memory grouped cache", func() { Expect(cache.Contains("string2", []string{"group1", "someOtherGroup"})).Should(ConsistOf("group1")) }) }) - When("String grouped cache is used", func() { - BeforeEach(func() { - cache = stringcache.NewInMemoryGroupedStringCache() - factory = cache.Refresh("group1") - - factory.AddEntry("string1") - factory.AddEntry("/string2/") - factory.Finish() - }) - - It("should ignore regex", func() { - Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1)) - Expect(cache.Contains("string1", []string{"group1"})).Should(ConsistOf("group1")) - }) - }) When("Regex grouped cache is used", func() { BeforeEach(func() { cache = stringcache.NewInMemoryGroupedRegexCache() factory = cache.Refresh("group1") - factory.AddEntry("string1") - factory.AddEntry("/string2/") + Expect(factory.AddEntry("string1")).Should(BeFalse()) + Expect(factory.AddEntry("/string2/")).Should(BeTrue()) factory.Finish() }) @@ -109,13 +94,13 @@ var _ = Describe("In-Memory grouped cache", func() { cache = stringcache.NewInMemoryGroupedStringCache() factory = cache.Refresh("group1") - factory.AddEntry("g1") - factory.AddEntry("both") + Expect(factory.AddEntry("g1")).Should(BeTrue()) + Expect(factory.AddEntry("both")).Should(BeTrue()) factory.Finish() factory = cache.Refresh("group2") - factory.AddEntry("g2") - factory.AddEntry("both") + Expect(factory.AddEntry("g2")).Should(BeTrue()) + Expect(factory.AddEntry("both")).Should(BeTrue()) factory.Finish() }) @@ -129,7 +114,7 @@ var _ = Describe("In-Memory grouped cache", func() { It("Should replace group content on refresh", func() { factory = cache.Refresh("group1") - factory.AddEntry("newString") + Expect(factory.AddEntry("newString")).Should(BeTrue()) factory.Finish() Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1)) diff --git a/cache/stringcache/string_caches.go b/cache/stringcache/string_caches.go index 8a5d52721..1de6345bd 100644 --- a/cache/stringcache/string_caches.go +++ b/cache/stringcache/string_caches.go @@ -14,7 +14,7 @@ type stringCache interface { } type cacheFactory interface { - addEntry(entry string) + addEntry(entry string) bool create() stringCache count() int } @@ -86,7 +86,7 @@ func (s *stringCacheFactory) insertString(entry string) { ix := sort.SearchStrings(bucket, normalized) if !(ix < len(bucket) && bucket[ix] == normalized) { - // extent internal bucket + // extend internal bucket bucket = append(s.getBucket(entryLen), "") // move elements to make place for the insertion @@ -98,12 +98,15 @@ func (s *stringCacheFactory) insertString(entry string) { } } -func (s *stringCacheFactory) addEntry(entry string) { - // skip empty strings and regex - if len(entry) > 0 && !isRegex(entry) { - s.cnt++ - s.insertString(entry) +func (s *stringCacheFactory) addEntry(entry string) bool { + if len(entry) == 0 { + return true // invalid but handled } + + s.cnt++ + s.insertString(entry) + + return true } func (s *stringCacheFactory) create() stringCache { @@ -121,10 +124,6 @@ func (s *stringCacheFactory) create() stringCache { return cache } -func isRegex(s string) bool { - return strings.HasPrefix(s, "/") && strings.HasSuffix(s, "/") -} - type regexCache []*regexp.Regexp func (cache regexCache) elementCount() int { @@ -147,17 +146,24 @@ type regexCacheFactory struct { cache regexCache } -func (r *regexCacheFactory) addEntry(entry string) { - if isRegex(entry) { - entry = strings.TrimSpace(entry[1 : len(entry)-1]) - compile, err := regexp.Compile(entry) +func (r *regexCacheFactory) addEntry(entry string) bool { + if !strings.HasPrefix(entry, "/") || !strings.HasSuffix(entry, "/") { + return false + } - if err != nil { - log.Log().Warnf("invalid regex '%s'", entry) - } else { - r.cache = append(r.cache, compile) - } + // Trim slashes + entry = strings.TrimSpace(entry[1 : len(entry)-1]) + + compile, err := regexp.Compile(entry) + if err != nil { + log.Log().Warnf("invalid regex '%s'", entry) + + return true // invalid but handled } + + r.cache = append(r.cache, compile) + + return true } func (r *regexCacheFactory) count() int { diff --git a/cache/stringcache/string_caches_benchmark_test.go b/cache/stringcache/string_caches_benchmark_test.go index 60457ea92..8088824bc 100644 --- a/cache/stringcache/string_caches_benchmark_test.go +++ b/cache/stringcache/string_caches_benchmark_test.go @@ -14,7 +14,9 @@ func BenchmarkStringCache(b *testing.B) { factory := newStringCacheFactory() for _, s := range testdata { - factory.addEntry(s) + if !factory.addEntry(s) { + b.Fatalf("cache didn't insert value: %s", s) + } } factory.create() diff --git a/cache/stringcache/string_caches_test.go b/cache/stringcache/string_caches_test.go index 41bfb17c0..4502d4f9c 100644 --- a/cache/stringcache/string_caches_test.go +++ b/cache/stringcache/string_caches_test.go @@ -10,6 +10,7 @@ var _ = Describe("Caches", func() { cache stringCache factory cacheFactory ) + Describe("String StringCache", func() { It("should not return a cache when empty", func() { Expect(newStringCacheFactory().create()).Should(BeNil()) @@ -18,7 +19,7 @@ var _ = Describe("Caches", func() { It("should recognise the empty string", func() { factory := newStringCacheFactory() - factory.addEntry("") + Expect(factory.addEntry("")).Should(BeTrue()) Expect(factory.count()).Should(BeNumerically("==", 0)) Expect(factory.create()).Should(BeNil()) @@ -27,11 +28,11 @@ var _ = Describe("Caches", func() { When("string StringCache was created", func() { BeforeEach(func() { factory = newStringCacheFactory() - factory.addEntry("google.com") - factory.addEntry("apple.com") - factory.addEntry("") - factory.addEntry("google.com") - factory.addEntry("APPLe.com") + Expect(factory.addEntry("google.com")).Should(BeTrue()) + Expect(factory.addEntry("apple.com")).Should(BeTrue()) + Expect(factory.addEntry("")).Should(BeTrue()) // invalid, but handled + Expect(factory.addEntry("google.com")).Should(BeTrue()) + Expect(factory.addEntry("APPLe.com")).Should(BeTrue()) cache = factory.create() }) @@ -42,13 +43,16 @@ var _ = Describe("Caches", func() { Expect(cache.contains("www.google.com")).Should(BeFalse()) Expect(cache.contains("")).Should(BeFalse()) }) + It("should match case-insensitive", func() { Expect(cache.contains("aPPle.com")).Should(BeTrue()) Expect(cache.contains("google.COM")).Should(BeTrue()) Expect(cache.contains("www.google.com")).Should(BeFalse()) Expect(cache.contains("")).Should(BeFalse()) }) + It("should return correct element count", func() { + Expect(factory.count()).Should(Equal(4)) Expect(cache.elementCount()).Should(Equal(2)) }) }) @@ -62,10 +66,10 @@ var _ = Describe("Caches", func() { It("should recognise invalid regexes", func() { factory := newRegexCacheFactory() - factory.addEntry("/*/") - factory.addEntry("/?/") - factory.addEntry("/+/") - factory.addEntry("/[/") + Expect(factory.addEntry("/*/")).Should(BeTrue()) + Expect(factory.addEntry("/?/")).Should(BeTrue()) + Expect(factory.addEntry("/+/")).Should(BeTrue()) + Expect(factory.addEntry("/[/")).Should(BeTrue()) Expect(factory.count()).Should(BeNumerically("==", 0)) Expect(factory.create()).Should(BeNil()) @@ -74,14 +78,15 @@ var _ = Describe("Caches", func() { When("regex StringCache was created", func() { BeforeEach(func() { factory = newRegexCacheFactory() - factory.addEntry("/.*google.com/") - factory.addEntry("/^apple\\.(de|com)$/") - factory.addEntry("/amazon/") - // this is not a regex, will be ignored - factory.addEntry("/(wrongRegex/") - factory.addEntry("plaintext") + Expect(factory.addEntry("/.*google.com/")).Should(BeTrue()) + Expect(factory.addEntry("/^apple\\.(de|com)$/")).Should(BeTrue()) + Expect(factory.addEntry("/amazon/")).Should(BeTrue()) + Expect(factory.addEntry("/(wrongRegex/")).Should(BeTrue()) // recognized as regex but ignored because invalid + Expect(factory.addEntry("plaintext")).Should(BeFalse()) + cache = factory.create() }) + It("should match if one regex in StringCache matches string", func() { Expect(cache.contains("google.com")).Should(BeTrue()) Expect(cache.contains("google.coma")).Should(BeTrue()) @@ -96,6 +101,7 @@ var _ = Describe("Caches", func() { Expect(cache.contains("amazon.com")).Should(BeTrue()) Expect(cache.contains("myamazon.com")).Should(BeTrue()) }) + It("should return correct element count", func() { Expect(factory.count()).Should(Equal(3)) Expect(cache.elementCount()).Should(Equal(3)) diff --git a/lists/list_cache.go b/lists/list_cache.go index f731b015b..94f149209 100644 --- a/lists/list_cache.go +++ b/lists/list_cache.go @@ -62,8 +62,8 @@ func NewListCache(ctx context.Context, ) (*ListCache, error) { c := &ListCache{ groupedCache: stringcache.NewChainedGroupedCache( - stringcache.NewInMemoryGroupedStringCache(), stringcache.NewInMemoryGroupedRegexCache(), + stringcache.NewInMemoryGroupedStringCache(), // accepts all values, must be last ), cfg: cfg, @@ -168,9 +168,11 @@ func (b *ListCache) createCacheForGroup( producers.GoConsume(func(ctx context.Context, ch <-chan string) error { for host := range ch { - hasEntries = true - - groupFactory.AddEntry(host) + if groupFactory.AddEntry(host) { + hasEntries = true + } else { + logger().WithField("host", host).Warn("no list cache was able to use host") + } } return nil