Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions examples/bloom_filter_example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package examples

import (
"fmt"
"testing"

"github.com/apache/datasketches-go/filters"
"github.com/stretchr/testify/assert"
)

func TestBloomFilter(t *testing.T) {
// Create a Bloom filter for 1000 items with 1% false positive rate
filter, err := filters.NewBloomFilterByAccuracy(1000, 0.01)
assert.NoError(t, err)
assert.True(t, filter.IsEmpty())

// Add items to the filter
for i := 0; i < 500; i++ {
err := filter.UpdateString(fmt.Sprintf("user_%d", i))
assert.NoError(t, err)
}
assert.False(t, filter.IsEmpty())

// Query for items in the filter
assert.True(t, filter.QueryString("user_0"))
assert.True(t, filter.QueryString("user_100"))

// Query for items not in the filter (may have false positives)
notFoundCount := 0
for i := 1000; i < 1100; i++ {
if !filter.QueryString(fmt.Sprintf("user_%d", i)) {
notFoundCount++
}
}
assert.Greater(t, notFoundCount, 90)

// Use different data types
_ = filter.UpdateInt64(12345)
assert.True(t, filter.QueryInt64(12345))

// QueryAndUpdate for atomic test-and-set
wasPresent := filter.QueryAndUpdateString("new_item")
assert.False(t, wasPresent)
wasPresent = filter.QueryAndUpdateString("new_item")
assert.True(t, wasPresent)

// Create a second filter for union
filter2, err := filters.NewBloomFilterByAccuracy(1000, 0.01)
assert.NoError(t, err)
for i := 250; i < 750; i++ {
_ = filter2.UpdateString(fmt.Sprintf("user_%d", i))
}

// Union two filters
filter3, err := filters.NewBloomFilterByAccuracy(1000, 0.01)
assert.NoError(t, err)
for i := 0; i < 500; i++ {
_ = filter3.UpdateString(fmt.Sprintf("user_%d", i))
}
err = filter3.Union(filter2)
assert.NoError(t, err)
assert.True(t, filter3.QueryString("user_0"))
assert.True(t, filter3.QueryString("user_600"))

// Serialize and deserialize
bytes, err := filter.ToCompactSlice()
assert.NoError(t, err)

restored, err := filters.NewBloomFilterFromSlice(bytes)
assert.NoError(t, err)
assert.True(t, restored.QueryString("user_0"))
assert.Equal(t, filter.BitsUsed(), restored.BitsUsed())
}
84 changes: 84 additions & 0 deletions examples/count_min_example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package examples

import (
"bytes"
"fmt"
"testing"

"github.com/apache/datasketches-go/count"
"github.com/stretchr/testify/assert"
)

func TestCountMinSketch(t *testing.T) {
seed := int64(12345)

// Create a Count-Min Sketch with suggested parameters
numBuckets, err := count.SuggestNumBuckets(0.1)
assert.NoError(t, err)
numHashes, err := count.SuggestNumHashes(0.99)
assert.NoError(t, err)

sketch, err := count.NewCountMinSketch(numHashes, numBuckets, seed)
assert.NoError(t, err)

// Update with frequency data
for i := 0; i < 1000; i++ {
_ = sketch.UpdateString("apple", 1)
}
for i := 0; i < 500; i++ {
_ = sketch.UpdateString("banana", 1)
}
for i := 0; i < 100; i++ {
_ = sketch.UpdateString(fmt.Sprintf("item_%d", i), 1)
}

// Get frequency estimates (Count-Min never underestimates)
assert.GreaterOrEqual(t, sketch.GetEstimateString("apple"), int64(1000))
assert.GreaterOrEqual(t, sketch.GetEstimateString("banana"), int64(500))

// Update with weight
_ = sketch.UpdateString("grape", 50)
assert.GreaterOrEqual(t, sketch.GetEstimateString("grape"), int64(50))

// Create a second sketch for merging
sketch2, err := count.NewCountMinSketch(numHashes, numBuckets, seed)
assert.NoError(t, err)
for i := 0; i < 500; i++ {
_ = sketch2.UpdateString("apple", 1)
}
for i := 0; i < 300; i++ {
_ = sketch2.UpdateString("orange", 1)
}

// Merge sketches
err = sketch.Merge(sketch2)
assert.NoError(t, err)
assert.GreaterOrEqual(t, sketch.GetEstimateString("apple"), int64(1500))
assert.GreaterOrEqual(t, sketch.GetEstimateString("orange"), int64(300))

// Serialize and deserialize
var buf bytes.Buffer
err = sketch.Serialize(&buf)
assert.NoError(t, err)

restored, err := sketch.Deserialize(buf.Bytes(), seed)
assert.NoError(t, err)
assert.Equal(t, sketch.GetTotalWeight(), restored.GetTotalWeight())
}
89 changes: 89 additions & 0 deletions examples/theta_example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package examples

import (
"fmt"
"testing"

"github.com/apache/datasketches-go/theta"
"github.com/stretchr/testify/assert"
)

func TestThetaSketch(t *testing.T) {
// Create a new Theta sketch
sketch, err := theta.NewQuickSelectUpdateSketch()
assert.NoError(t, err)

// Update the sketch with 1000 items
for i := 0; i < 1000; i++ {
_ = sketch.UpdateString(fmt.Sprintf("item_%d", i))
}

// Get the estimate of the number of unique items
estimate := sketch.Estimate()
assert.InDelta(t, 1000, estimate, 1000*0.05)

// Create a second sketch with overlapping items
sketch2, err := theta.NewQuickSelectUpdateSketch(theta.WithUpdateSketchLgK(14))
assert.NoError(t, err)

for i := 500; i < 1500; i++ {
_ = sketch2.UpdateString(fmt.Sprintf("item_%d", i))
}

// Convert to compact form for set operations
compact1 := sketch.Compact(true)
compact2 := sketch2.Compact(true)

// Compute union of two sketches
union, err := theta.NewUnion()
assert.NoError(t, err)
err = union.Update(compact1)
assert.NoError(t, err)
err = union.Update(compact2)
assert.NoError(t, err)

unionResult, err := union.OrderedResult()
assert.NoError(t, err)
assert.InDelta(t, 1500, unionResult.Estimate(), 1500*0.05)

// Compute intersection of two sketches
intersection := theta.NewIntersection()
err = intersection.Update(compact1)
assert.NoError(t, err)
err = intersection.Update(compact2)
assert.NoError(t, err)

intersectionResult, err := intersection.OrderedResult()
assert.NoError(t, err)
assert.InDelta(t, 500, intersectionResult.Estimate(), 500*0.1)

// Compute set difference (A \ B)
aNotBResult, err := theta.ANotB(compact1, compact2, theta.DefaultSeed, true)
assert.NoError(t, err)
assert.InDelta(t, 500, aNotBResult.Estimate(), 500*0.1)

// Serialize and deserialize
bytes1, err := compact1.MarshalBinary()
assert.NoError(t, err)

deserialized, err := theta.WrapCompactSketch(bytes1, theta.DefaultSeed)
assert.NoError(t, err)
assert.InDelta(t, estimate, deserialized.Estimate(), 1)
}
121 changes: 121 additions & 0 deletions examples/tuple_example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package examples

import (
"fmt"
"testing"

"github.com/apache/datasketches-go/tuple"
"github.com/stretchr/testify/assert"
)

// SumSummary is a custom Summary type that sums float64 values.
type SumSummary struct {
sum float64
count int
}

func (s *SumSummary) Reset() {
s.sum = 0
s.count = 0
}

func (s *SumSummary) Clone() tuple.Summary {
return &SumSummary{sum: s.sum, count: s.count}
}

func (s *SumSummary) Update(value float64) {
s.sum += value
s.count++
}

func (s *SumSummary) GetSum() float64 { return s.sum }
func (s *SumSummary) GetCount() int { return s.count }
func (s *SumSummary) String() string { return fmt.Sprintf("{sum: %.2f, count: %d}", s.sum, s.count) }
func newSumSummary() *SumSummary { return &SumSummary{} }

// SumMergePolicy implements tuple.Policy for merging SumSummary instances.
type SumMergePolicy struct{}

func (p *SumMergePolicy) Apply(internal *SumSummary, incoming *SumSummary) {
internal.sum += incoming.sum
internal.count += incoming.count
}

func TestTupleSketch(t *testing.T) {
// Create a Tuple Sketch with custom Summary
sketch, err := tuple.NewUpdateSketch[*SumSummary, float64](newSumSummary)
assert.NoError(t, err)

// Update with aggregated data (customer spending)
_ = sketch.UpdateString("alice", 100.50)
_ = sketch.UpdateString("alice", 50.25)
_ = sketch.UpdateString("alice", 75.00)
_ = sketch.UpdateString("bob", 200.00)
_ = sketch.UpdateString("bob", 30.00)

for i := 0; i < 100; i++ {
_ = sketch.UpdateString(fmt.Sprintf("customer_%d", i), 10.0)
}

// Verify distinct count
assert.InDelta(t, 102, sketch.Estimate(), 10)

// Verify aggregated values
for _, summary := range sketch.All() {
if summary.GetCount() == 3 {
assert.InDelta(t, 225.75, summary.GetSum(), 0.01)
}
if summary.GetCount() == 2 && summary.GetSum() > 200 {
assert.InDelta(t, 230.00, summary.GetSum(), 0.01)
}
}

// Create a second sketch for set operations
sketch2, err := tuple.NewUpdateSketch[*SumSummary, float64](newSumSummary)
assert.NoError(t, err)
_ = sketch2.UpdateString("alice", 150.00)
_ = sketch2.UpdateString("diana", 300.00)

// Compact the sketches
compact1, err := sketch.Compact(true)
assert.NoError(t, err)
compact2, err := sketch2.Compact(true)
assert.NoError(t, err)

// Union with custom merge policy
mergePolicy := &SumMergePolicy{}
union, err := tuple.NewUnion[*SumSummary](mergePolicy)
assert.NoError(t, err)
_ = union.Update(compact1)
_ = union.Update(compact2)

unionResult, err := union.Result(true)
assert.NoError(t, err)
assert.InDelta(t, 103, unionResult.Estimate(), 10)

// Intersection
intersection := tuple.NewIntersection[*SumSummary](mergePolicy)
_ = intersection.Update(compact1)
_ = intersection.Update(compact2)

intersectionResult, err := intersection.Result(true)
assert.NoError(t, err)
assert.InDelta(t, 1, intersectionResult.Estimate(), 1)
}