Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TBD: throughput on production servers.

**Input**: [dmesg](plugin/input/dmesg/README.md), [fake](plugin/input/fake/README.md), [file](plugin/input/file/README.md), [http](plugin/input/http/README.md), [journalctl](plugin/input/journalctl/README.md), [k8s](plugin/input/k8s/README.md), [kafka](plugin/input/kafka/README.md), [socket](plugin/input/socket/README.md)

**Action**: [add_file_name](plugin/action/add_file_name/README.md), [add_host](plugin/action/add_host/README.md), [convert_date](plugin/action/convert_date/README.md), [convert_log_level](plugin/action/convert_log_level/README.md), [convert_utf8_bytes](plugin/action/convert_utf8_bytes/README.md), [debug](plugin/action/debug/README.md), [decode](plugin/action/decode/README.md), [discard](plugin/action/discard/README.md), [flatten](plugin/action/flatten/README.md), [hash](plugin/action/hash/README.md), [join](plugin/action/join/README.md), [join_template](plugin/action/join_template/README.md), [json_decode](plugin/action/json_decode/README.md), [json_encode](plugin/action/json_encode/README.md), [json_extract](plugin/action/json_extract/README.md), [keep_fields](plugin/action/keep_fields/README.md), [mask](plugin/action/mask/README.md), [modify](plugin/action/modify/README.md), [move](plugin/action/move/README.md), [parse_es](plugin/action/parse_es/README.md), [parse_re2](plugin/action/parse_re2/README.md), [remove_fields](plugin/action/remove_fields/README.md), [rename](plugin/action/rename/README.md), [set_time](plugin/action/set_time/README.md), [split](plugin/action/split/README.md), [throttle](plugin/action/throttle/README.md)
**Action**: [add_file_name](plugin/action/add_file_name/README.md), [add_host](plugin/action/add_host/README.md), [cardinality](plugin/action/cardinality/README.md), [convert_date](plugin/action/convert_date/README.md), [convert_log_level](plugin/action/convert_log_level/README.md), [convert_utf8_bytes](plugin/action/convert_utf8_bytes/README.md), [debug](plugin/action/debug/README.md), [decode](plugin/action/decode/README.md), [discard](plugin/action/discard/README.md), [flatten](plugin/action/flatten/README.md), [hash](plugin/action/hash/README.md), [join](plugin/action/join/README.md), [join_template](plugin/action/join_template/README.md), [json_decode](plugin/action/json_decode/README.md), [json_encode](plugin/action/json_encode/README.md), [json_extract](plugin/action/json_extract/README.md), [keep_fields](plugin/action/keep_fields/README.md), [mask](plugin/action/mask/README.md), [modify](plugin/action/modify/README.md), [move](plugin/action/move/README.md), [parse_es](plugin/action/parse_es/README.md), [parse_re2](plugin/action/parse_re2/README.md), [remove_fields](plugin/action/remove_fields/README.md), [rename](plugin/action/rename/README.md), [set_time](plugin/action/set_time/README.md), [split](plugin/action/split/README.md), [throttle](plugin/action/throttle/README.md)

**Output**: [clickhouse](plugin/output/clickhouse/README.md), [devnull](plugin/output/devnull/README.md), [elasticsearch](plugin/output/elasticsearch/README.md), [file](plugin/output/file/README.md), [gelf](plugin/output/gelf/README.md), [kafka](plugin/output/kafka/README.md), [loki](plugin/output/loki/README.md), [postgres](plugin/output/postgres/README.md), [s3](plugin/output/s3/README.md), [splunk](plugin/output/splunk/README.md), [stdout](plugin/output/stdout/README.md)

Expand Down
1 change: 1 addition & 0 deletions _sidebar.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Action
- [add_file_name](plugin/action/add_file_name/README.md)
- [add_host](plugin/action/add_host/README.md)
- [cardinality](plugin/action/cardinality/README.md)
- [convert_date](plugin/action/convert_date/README.md)
- [convert_log_level](plugin/action/convert_log_level/README.md)
- [convert_utf8_bytes](plugin/action/convert_utf8_bytes/README.md)
Expand Down
1 change: 1 addition & 0 deletions cmd/file.d/file.d.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/ozontech/file.d/pipeline"
_ "github.com/ozontech/file.d/plugin/action/add_file_name"
_ "github.com/ozontech/file.d/plugin/action/add_host"
_ "github.com/ozontech/file.d/plugin/action/cardinality"
_ "github.com/ozontech/file.d/plugin/action/convert_date"
_ "github.com/ozontech/file.d/plugin/action/convert_log_level"
_ "github.com/ozontech/file.d/plugin/action/convert_utf8_bytes"
Expand Down
1 change: 1 addition & 0 deletions e2e/start_work_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/ozontech/file.d/fd"
_ "github.com/ozontech/file.d/plugin/action/add_file_name"
_ "github.com/ozontech/file.d/plugin/action/add_host"
_ "github.com/ozontech/file.d/plugin/action/cardinality"
_ "github.com/ozontech/file.d/plugin/action/convert_date"
_ "github.com/ozontech/file.d/plugin/action/convert_log_level"
_ "github.com/ozontech/file.d/plugin/action/convert_utf8_bytes"
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/alecthomas/kingpin v2.2.6+incompatible
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137
github.com/alicebob/miniredis/v2 v2.30.5
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310
github.com/bitly/go-simplejson v0.5.1
github.com/bmatcuk/doublestar/v4 v4.0.2
github.com/bufbuild/protocompile v0.13.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ github.com/alicebob/miniredis/v2 v2.30.5 h1:3r6kTHdKnuP4fkS8k2IrvSfxpxUTcW1SOL0w
github.com/alicebob/miniredis/v2 v2.30.5/go.mod h1:b25qWj4fCEsBeAAR2mlb0ufImGC6uH3VlUfb/HS5zKg=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310 h1:BUAU3CGlLvorLI26FmByPp2eC2qla6E1Tw+scpcg/to=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A=
github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down
4 changes: 4 additions & 0 deletions plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ It is only applicable for input plugins k8s and file.
It adds field containing hostname to an event.

[More details...](plugin/action/add_host/README.md)
## cardinality
Limits the cardinality of fields on events, drops events or just do nothing.

[More details...](plugin/action/cardinality/README.md)
## convert_date
It converts field date/time data to different format.

Expand Down
4 changes: 4 additions & 0 deletions plugin/action/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ It is only applicable for input plugins k8s and file.
It adds field containing hostname to an event.

[More details...](plugin/action/add_host/README.md)
## cardinality
Limits the cardinality of fields on events, drops events or just do nothing.

[More details...](plugin/action/cardinality/README.md)
## convert_date
It converts field date/time data to different format.

Expand Down
8 changes: 8 additions & 0 deletions plugin/action/cardinality/README.idoc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Cardinality limit plugin
@introduction

## Examples
@examples

## Config params
@config-params|description
116 changes: 116 additions & 0 deletions plugin/action/cardinality/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Cardinality limit plugin
Limits the cardinality of fields on events, drops events or just do nothing.

## Examples
Discarding events with high cardinality field:
```yaml
pipelines:
example_pipeline:
...
- type: cardinality
limit: 2
action: discard
ttl: 1m
metric_prefix: service_client
key:
- service
fields:
- client_id
...
```
The original event:
```jsonl
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "2"}
{"service": "registration", "client_id": "3"} // will be discarded
```
The resulting event:
```json
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "2"}
```
---

Discarding events with high cardinality field:
```yaml
pipelines:
example_pipeline:
...
- type: cardinality
limit: 2
action: remove_fields
ttl: 1m
metric_prefix: service_client
key:
- service
fields:
- client_id
...
```
The original event:
```jsonl
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "2"}
{"service": "registration", "client_id": "3"}
```
The resulting event:
```json
{"service": "registration", "client_id": "1"}
{"service": "registration", "client_id": "2"}
{"service": "registration"}
```
---

## Config params
**`key`** *`[]cfg.FieldSelector`* *`required`*

Fields used to group events before calculating cardinality.
Events with the same key values are aggregated together.
Required for proper cardinality tracking per logical group.

<br>

**`fields`** *`[]cfg.FieldSelector`* *`required`*

Target fields whose unique values are counted within each key group.
The plugin monitors how many distinct values these fields contain.
Required to define what constitutes high cardinality.

<br>

**`action`** *`string`* *`default=nothing`* *`options=discard|remove_fields|nothing`*

Action to perform when cardinality limit is exceeded.
Determines whether to discard events, remove fields, or just monitor.
Choose based on whether you need to preserve other event data.

<br>

**`metric_prefix`** *`string`*

Prefix added to metric names for better organization.
Useful when running multiple instances to avoid metric name collisions.
Leave empty for default metric naming.

<br>

**`limit`** *`int`* *`default=10000`*

Maximum allowed number of unique values for monitored fields.
When exceeded within a key group, the configured action triggers.
Set based on expected diversity and system capacity.

<br>

**`ttl`** *`cfg.Duration`* *`default=1h`*

Time-to-live for cardinality tracking cache entries.
Prevents unbounded memory growth by forgetting old unique values.
Should align with typical patterns of field value changes.

<br>


<br>*Generated using [__insane-doc__](https://github.com/vitkovskii/insane-doc)*
69 changes: 69 additions & 0 deletions plugin/action/cardinality/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package cardinality

import (
"sync"
"time"

radix "github.com/armon/go-radix"
"github.com/ozontech/file.d/xtime"
)

type Cache struct {
mu *sync.RWMutex
tree *radix.Tree
ttl int64
}

func NewCache(ttl time.Duration) *Cache {
return &Cache{
tree: radix.New(),
ttl: ttl.Nanoseconds(),
mu: &sync.RWMutex{},
}
}

func (c *Cache) Set(key string) bool {
c.mu.Lock()
defer c.mu.Unlock()

_, result := c.tree.Insert(key, xtime.GetInaccurateUnixNano())
return result
}

func (c *Cache) isExpire(now, value int64) bool {
diff := now - value
return diff > c.ttl
}

func (c *Cache) delete(keysToDelete ...string) {
if len(keysToDelete) == 0 {
return
}
c.mu.Lock()
defer c.mu.Unlock()

for _, key := range keysToDelete {
c.tree.Delete(key)
}
}

func (c *Cache) CountPrefix(prefix string) (count int) {
var keysToDelete []string
now := xtime.GetInaccurateUnixNano()
c.mu.RLock()
c.tree.WalkPrefix(prefix, func(s string, v any) bool {
timeValue := v.(int64)
if c.isExpire(now, timeValue) {
keysToDelete = append(keysToDelete, s)
} else {
count++
}
return false
})
c.mu.RUnlock()

if len(keysToDelete) > 0 {
go c.delete(keysToDelete...)
}
return
}
Loading