Skip to content

Commit

Permalink
fix(cache): don't add entries multiple times in chains
Browse files Browse the repository at this point in the history
Also means we save a bit of time by not checking every single entry with
all caches.
  • Loading branch information
ThinkChaos committed Nov 12, 2023
1 parent 435615b commit bafa615
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 77 deletions.
8 changes: 6 additions & 2 deletions cache/stringcache/chained_grouped_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,14 @@ type chainedGroupFactory struct {
cacheFactories []GroupFactory
}

func (c *chainedGroupFactory) AddEntry(entry string) {
func (c *chainedGroupFactory) AddEntry(entry string) bool {
for _, factory := range c.cacheFactories {
factory.AddEntry(entry)
if factory.AddEntry(entry) {
return true
}
}

return false
}

func (c *chainedGroupFactory) Count() int {
Expand Down
18 changes: 11 additions & 7 deletions cache/stringcache/chained_grouped_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ var _ = Describe("Chained grouped cache", func() {
It("should not find any string", func() {
Expect(cache.Contains("searchString", []string{"someGroup"})).Should(BeEmpty())
})

It("should not add entries", func() {
Expect(cache.Refresh("group").AddEntry("test")).Should(BeFalse())
})
})
})
Describe("Delegation", func() {
Expand All @@ -44,12 +48,12 @@ var _ = Describe("Chained grouped cache", func() {
})

It("factory has 4 elements (both caches)", func() {
Expect(factory.Count()).Should(BeNumerically("==", 4))
Expect(factory.Count()).Should(BeNumerically("==", 2))
})

It("should have element count of 4", func() {
factory.Finish()
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
})

It("should find strings", func() {
Expand Down Expand Up @@ -80,8 +84,8 @@ var _ = Describe("Chained grouped cache", func() {
})

It("should contain 4 elements in 2 groups", func() {
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2))
Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(ConsistOf("group1"))
Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2"))
Expect(cache.Contains("both", []string{"group1", "group2"})).Should(ConsistOf("group1", "group2"))
Expand All @@ -92,8 +96,8 @@ var _ = Describe("Chained grouped cache", func() {
factory.AddEntry("newString")
factory.Finish()

Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2))
Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(BeEmpty())
Expect(cache.Contains("newString", []string{"group1", "group2"})).Should(ConsistOf("group1"))
Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2"))
Expand All @@ -105,7 +109,7 @@ var _ = Describe("Chained grouped cache", func() {
factory.AddEntry("begone")
factory.Finish()

Expect(cache.ElementCount("group")).Should(BeNumerically("==", 2))
Expect(cache.ElementCount("group")).Should(BeNumerically("==", 1))

factory = cache.Refresh("group")
factory.Finish()
Expand Down
2 changes: 1 addition & 1 deletion cache/stringcache/grouped_cache_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type GroupedStringCache interface {

type GroupFactory interface {
// AddEntry adds a new string to the factory to be added later to the cache groups.
AddEntry(entry string)
AddEntry(entry string) bool

// Count returns amount of processed string in the factory
Count() int
Expand Down
4 changes: 2 additions & 2 deletions cache/stringcache/in_memory_grouped_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ type inMemoryGroupFactory struct {
finishFn func(stringCache)
}

func (c *inMemoryGroupFactory) AddEntry(entry string) {
c.factory.addEntry(entry)
func (c *inMemoryGroupFactory) AddEntry(entry string) bool {
return c.factory.addEntry(entry)
}

func (c *inMemoryGroupFactory) Count() int {
Expand Down
33 changes: 9 additions & 24 deletions cache/stringcache/in_memory_grouped_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ var _ = Describe("In-Memory grouped cache", func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("string2")
Expect(factory.AddEntry("string1")).Should(BeTrue())
Expect(factory.AddEntry("string2")).Should(BeTrue())
})

It("cache should still have 0 element, since finish was not executed", func() {
Expand All @@ -69,28 +69,13 @@ var _ = Describe("In-Memory grouped cache", func() {
Expect(cache.Contains("string2", []string{"group1", "someOtherGroup"})).Should(ConsistOf("group1"))
})
})
When("String grouped cache is used", func() {
BeforeEach(func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("/string2/")
factory.Finish()
})

It("should ignore regex", func() {
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expect(cache.Contains("string1", []string{"group1"})).Should(ConsistOf("group1"))
})
})
When("Regex grouped cache is used", func() {
BeforeEach(func() {
cache = stringcache.NewInMemoryGroupedRegexCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("/string2/")
Expect(factory.AddEntry("string1")).Should(BeFalse())
Expect(factory.AddEntry("/string2/")).Should(BeTrue())
factory.Finish()
})

Expand All @@ -109,13 +94,13 @@ var _ = Describe("In-Memory grouped cache", func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("g1")
factory.AddEntry("both")
Expect(factory.AddEntry("g1")).Should(BeTrue())
Expect(factory.AddEntry("both")).Should(BeTrue())
factory.Finish()

factory = cache.Refresh("group2")
factory.AddEntry("g2")
factory.AddEntry("both")
Expect(factory.AddEntry("g2")).Should(BeTrue())
Expect(factory.AddEntry("both")).Should(BeTrue())
factory.Finish()
})

Expand All @@ -129,7 +114,7 @@ var _ = Describe("In-Memory grouped cache", func() {

It("Should replace group content on refresh", func() {
factory = cache.Refresh("group1")
factory.AddEntry("newString")
Expect(factory.AddEntry("newString")).Should(BeTrue())
factory.Finish()

Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expand Down
46 changes: 26 additions & 20 deletions cache/stringcache/string_caches.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ type stringCache interface {
}

type cacheFactory interface {
addEntry(entry string)
addEntry(entry string) bool
create() stringCache
count() int
}
Expand Down Expand Up @@ -86,7 +86,7 @@ func (s *stringCacheFactory) insertString(entry string) {
ix := sort.SearchStrings(bucket, normalized)

if !(ix < len(bucket) && bucket[ix] == normalized) {
// extent internal bucket
// extend internal bucket
bucket = append(s.getBucket(entryLen), "")

// move elements to make place for the insertion
Expand All @@ -98,12 +98,15 @@ func (s *stringCacheFactory) insertString(entry string) {
}
}

func (s *stringCacheFactory) addEntry(entry string) {
// skip empty strings and regex
if len(entry) > 0 && !isRegex(entry) {
s.cnt++
s.insertString(entry)
func (s *stringCacheFactory) addEntry(entry string) bool {
if len(entry) == 0 {
return true // invalid but handled
}

s.cnt++
s.insertString(entry)

return true
}

func (s *stringCacheFactory) create() stringCache {
Expand All @@ -121,10 +124,6 @@ func (s *stringCacheFactory) create() stringCache {
return cache
}

func isRegex(s string) bool {
return strings.HasPrefix(s, "/") && strings.HasSuffix(s, "/")
}

type regexCache []*regexp.Regexp

func (cache regexCache) elementCount() int {
Expand All @@ -147,17 +146,24 @@ type regexCacheFactory struct {
cache regexCache
}

func (r *regexCacheFactory) addEntry(entry string) {
if isRegex(entry) {
entry = strings.TrimSpace(entry[1 : len(entry)-1])
compile, err := regexp.Compile(entry)
func (r *regexCacheFactory) addEntry(entry string) bool {
if !strings.HasPrefix(entry, "/") || !strings.HasSuffix(entry, "/") {
return false
}

if err != nil {
log.Log().Warnf("invalid regex '%s'", entry)
} else {
r.cache = append(r.cache, compile)
}
// Trim slashes
entry = strings.TrimSpace(entry[1 : len(entry)-1])

compile, err := regexp.Compile(entry)
if err != nil {
log.Log().Warnf("invalid regex '%s'", entry)

return true // invalid but handled
}

r.cache = append(r.cache, compile)

return true
}

func (r *regexCacheFactory) count() int {
Expand Down
4 changes: 3 additions & 1 deletion cache/stringcache/string_caches_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ func BenchmarkStringCache(b *testing.B) {
factory := newStringCacheFactory()

for _, s := range testdata {
factory.addEntry(s)
if !factory.addEntry(s) {
b.Fatalf("cache didn't insert value: %s", s)
}
}

factory.create()
Expand Down
38 changes: 22 additions & 16 deletions cache/stringcache/string_caches_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ var _ = Describe("Caches", func() {
cache stringCache
factory cacheFactory
)

Describe("String StringCache", func() {
It("should not return a cache when empty", func() {
Expect(newStringCacheFactory().create()).Should(BeNil())
Expand All @@ -18,7 +19,7 @@ var _ = Describe("Caches", func() {
It("should recognise the empty string", func() {
factory := newStringCacheFactory()

factory.addEntry("")
Expect(factory.addEntry("")).Should(BeTrue())

Expect(factory.count()).Should(BeNumerically("==", 0))
Expect(factory.create()).Should(BeNil())
Expand All @@ -27,11 +28,11 @@ var _ = Describe("Caches", func() {
When("string StringCache was created", func() {
BeforeEach(func() {
factory = newStringCacheFactory()
factory.addEntry("google.com")
factory.addEntry("apple.com")
factory.addEntry("")
factory.addEntry("google.com")
factory.addEntry("APPLe.com")
Expect(factory.addEntry("google.com")).Should(BeTrue())
Expect(factory.addEntry("apple.com")).Should(BeTrue())
Expect(factory.addEntry("")).Should(BeTrue()) // invalid, but handled
Expect(factory.addEntry("google.com")).Should(BeTrue())
Expect(factory.addEntry("APPLe.com")).Should(BeTrue())

cache = factory.create()
})
Expand All @@ -42,13 +43,16 @@ var _ = Describe("Caches", func() {
Expect(cache.contains("www.google.com")).Should(BeFalse())
Expect(cache.contains("")).Should(BeFalse())
})

It("should match case-insensitive", func() {
Expect(cache.contains("aPPle.com")).Should(BeTrue())
Expect(cache.contains("google.COM")).Should(BeTrue())
Expect(cache.contains("www.google.com")).Should(BeFalse())
Expect(cache.contains("")).Should(BeFalse())
})

It("should return correct element count", func() {
Expect(factory.count()).Should(Equal(4))
Expect(cache.elementCount()).Should(Equal(2))
})
})
Expand All @@ -62,10 +66,10 @@ var _ = Describe("Caches", func() {
It("should recognise invalid regexes", func() {
factory := newRegexCacheFactory()

factory.addEntry("/*/")
factory.addEntry("/?/")
factory.addEntry("/+/")
factory.addEntry("/[/")
Expect(factory.addEntry("/*/")).Should(BeTrue())
Expect(factory.addEntry("/?/")).Should(BeTrue())
Expect(factory.addEntry("/+/")).Should(BeTrue())
Expect(factory.addEntry("/[/")).Should(BeTrue())

Expect(factory.count()).Should(BeNumerically("==", 0))
Expect(factory.create()).Should(BeNil())
Expand All @@ -74,14 +78,15 @@ var _ = Describe("Caches", func() {
When("regex StringCache was created", func() {
BeforeEach(func() {
factory = newRegexCacheFactory()
factory.addEntry("/.*google.com/")
factory.addEntry("/^apple\\.(de|com)$/")
factory.addEntry("/amazon/")
// this is not a regex, will be ignored
factory.addEntry("/(wrongRegex/")
factory.addEntry("plaintext")
Expect(factory.addEntry("/.*google.com/")).Should(BeTrue())
Expect(factory.addEntry("/^apple\\.(de|com)$/")).Should(BeTrue())
Expect(factory.addEntry("/amazon/")).Should(BeTrue())
Expect(factory.addEntry("/(wrongRegex/")).Should(BeTrue()) // recognized as regex but ignored because invalid
Expect(factory.addEntry("plaintext")).Should(BeFalse())

cache = factory.create()
})

It("should match if one regex in StringCache matches string", func() {
Expect(cache.contains("google.com")).Should(BeTrue())
Expect(cache.contains("google.coma")).Should(BeTrue())
Expand All @@ -96,6 +101,7 @@ var _ = Describe("Caches", func() {
Expect(cache.contains("amazon.com")).Should(BeTrue())
Expect(cache.contains("myamazon.com")).Should(BeTrue())
})

It("should return correct element count", func() {
Expect(factory.count()).Should(Equal(3))
Expect(cache.elementCount()).Should(Equal(3))
Expand Down
10 changes: 6 additions & 4 deletions lists/list_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ func NewListCache(ctx context.Context,
) (*ListCache, error) {
c := &ListCache{
groupedCache: stringcache.NewChainedGroupedCache(
stringcache.NewInMemoryGroupedStringCache(),
stringcache.NewInMemoryGroupedRegexCache(),
stringcache.NewInMemoryGroupedStringCache(), // accepts all values, must be last
),

cfg: cfg,
Expand Down Expand Up @@ -168,9 +168,11 @@ func (b *ListCache) createCacheForGroup(

producers.GoConsume(func(ctx context.Context, ch <-chan string) error {
for host := range ch {
hasEntries = true

groupFactory.AddEntry(host)
if groupFactory.AddEntry(host) {
hasEntries = true
} else {
logger().WithField("host", host).Warn("no list cache was able to use host")
}
}

return nil
Expand Down

0 comments on commit bafa615

Please sign in to comment.