-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample-config.yaml
More file actions
68 lines (61 loc) · 2.05 KB
/
example-config.yaml
File metadata and controls
68 lines (61 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Example configuration for Heidi Anonymizer
# This file demonstrates how to configure custom keywords and regex patterns
# for PII detection and redaction.
# Built-in recognizers to enable.
# If omitted, all five recognizers below are enabled with their default settings.
# Supported types: email, iban, credit_card, ip, spacy
recognizers:
- type: email
- type: iban
- type: credit_card
- type: ip
ipv4_only: true # set to false to also detect IPv6 (may increase false positives)
- type: spacy
entities:
- ORGANIZATION
# - PERSON
# List of keywords that should be treated as PII if found
# These will be matched as whole words (deny-list based detection)
keywords:
- "confidential"
# Custom regex patterns for detecting sensitive information
# Each pattern creates a new entity type based on the name
patterns:
# Internal domain/hostname
- name: "internal_domain"
regex: "(?:^|\\s|:)([a-z0-9-]+\\.internal)(?:\\s|:|$)"
score: 0.8
- name: "cluster.local FQDN"
regex: r"\b[a-z0-9]([-a-z0-9.]*[a-z0-9])?\.(svc|pod)\.cluster\.local\b"
score: 0.9
# Kubernetes namespaces specified as command line arguments
- name: "-n <ns>"
regex: r"(?<=-n )[a-z0-9][a-z0-9-]*"
score: 0.7
- name: "--namespace <ns>"
regex: r"(?<=--namespace )[a-z0-9][a-z0-9-]*"
score: 0.7
- name: "--namespace=<ns>"
regex: r"(?<=--namespace=)[a-z0-9][a-z0-9-]*"
score: 0.7
# Language for NLP-based entity detection (default: "en")
language: "en"
# Optional NLP engine configuration.
# If omitted, defaults to spaCy with the en_core_web_lg model.
# For "spacy", model_name is a string.
# For "transformers", model_name must be a mapping with
# both "spacy" and "transformers" keys.
nlp_engine:
nlp_engine_name: spacy
models:
- lang_code: en
model_name: en_core_web_sm
# Example transformers NLP engine with spacy as "tokenizer".
#
# nlp_engine:
# nlp_engine_name: transformers
# models:
# - lang_code: en
# model_name:
# spacy: en_core_web_sm
# transformers: StanfordAIMI/stanford-deidentifier-base