diff --git a/go.mod b/go.mod
index 5539eec..f15f5ee 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module scrub-pii
+module scrub
go 1.19
diff --git a/main.go b/main.go
index bd1905e..5258af5 100644
--- a/main.go
+++ b/main.go
@@ -21,12 +21,12 @@ func main() {
prettyPtr := flag.Bool("pretty", true, "display pretty output; otherwise do: -pretty=false")
flag.Parse()
if len(os.Args) < 2 {
- fmt.Println("Usage: scrub-pii ")
+ fmt.Println("Usage: scrub ")
os.Exit(1)
}
- inputPath := flag.Args()[0]
- sensitiveFieldsPath := flag.Args()[1]
+ sensitiveFieldsPath := flag.Args()[0]
+ inputPath := flag.Args()[1]
// scrub the input file for the given sensitive fields
output, err := ScrubPersonalInfo(inputPath, sensitiveFieldsPath)
diff --git a/scrub b/scrub
new file mode 100755
index 0000000..1101fad
Binary files /dev/null and b/scrub differ
diff --git a/scrub-pii b/scrub-pii
deleted file mode 100755
index a9cf129..0000000
Binary files a/scrub-pii and /dev/null differ
diff --git a/scrub.go b/scrub.go
index ff2acf7..36327fd 100644
--- a/scrub.go
+++ b/scrub.go
@@ -12,15 +12,8 @@ import (
"reflect"
"regexp"
"strings"
-
- "github.com/perimeterx/marshmallow"
)
-// the only known property in the input json ahead of time
-type userID struct {
- ID int `json:"id"`
-}
-
// return a json string with scrubbed sensitive information from the provided input and sensitive fields files
func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) {
if inputPath == "" {
@@ -33,8 +26,8 @@ func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) {
inputFile, _ := ioutil.ReadFile(inputPath)
sensitiveFields, _ := getSensitiveFields(sensitiveFieldsPath)
- uid := userID{}
- input, err := marshmallow.Unmarshal([]byte(inputFile), &uid)
+ var input interface{}
+ err := json.Unmarshal([]byte(inputFile), &input)
if err != nil {
log.Fatal(err)
return "", err
@@ -51,7 +44,7 @@ func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) {
b, _ = json.Marshal(input)
// reset all scrubbed values back to their original values
- scrubRecursive(input, "", sensitiveFields, &savedValues, false /* unmask */, false /* doScrub */)
+ scrubRecursive(&input, "", sensitiveFields, &savedValues, false /* unmask */, false /* doScrub */)
// return the scrubbed string
return string(b), nil
@@ -80,10 +73,6 @@ func scrubRecursive(field interface{}, fieldName string, sensitiveFields map[str
return
}
- // skip if no field names
- if fieldName == "" {
- return
- }
// skip these types
if !fieldValue.CanSet() || fieldValue.IsZero() {
return
@@ -91,8 +80,9 @@ func scrubRecursive(field interface{}, fieldName string, sensitiveFields map[str
if fieldType.Kind() == reflect.Interface {
_, doFieldScrub := sensitiveFields[strings.ToLower(fieldName)]
- // if parent field is sensitive field, scrub all children (sensitive or not), track in doScrub
- ok := doScrub || doFieldScrub
+ // skip if no field name and if parent field is a sensitive field, scrub all children
+ // (sensitive or not), track in doScrub
+ ok := fieldName != "" && (doScrub || doFieldScrub)
// scrub leaf nodes; otherwise, continue recursing
switch fValue := fieldValue.Interface().(type) {
diff --git a/tests/00_basic_array/input.json b/tests/00_basic_array/input.json
new file mode 100644
index 0000000..e2bde86
--- /dev/null
+++ b/tests/00_basic_array/input.json
@@ -0,0 +1,12 @@
+[{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567"
+},
+{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567"
+}]
diff --git a/tests/00_basic_array/output.json b/tests/00_basic_array/output.json
new file mode 100644
index 0000000..020c83b
--- /dev/null
+++ b/tests/00_basic_array/output.json
@@ -0,0 +1,12 @@
+[{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********"
+},
+{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********"
+}]
\ No newline at end of file
diff --git a/tests/00_basic_array/sensitive_fields.txt b/tests/00_basic_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/00_basic_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/01_alphanumeric_array/input.json b/tests/01_alphanumeric_array/input.json
new file mode 100644
index 0000000..345914e
--- /dev/null
+++ b/tests/01_alphanumeric_array/input.json
@@ -0,0 +1,12 @@
+[{
+ "name": "Kelly Doe",
+ "email": "k.doe@example.com",
+ "id": 12324,
+ "phone": "(555) 123 - 4567"
+},
+{
+ "name": "Kelly Doe",
+ "email": "k.doe@example.com",
+ "id": 12324,
+ "phone": "(555) 123 - 4567"
+}]
diff --git a/tests/01_alphanumeric_array/output.json b/tests/01_alphanumeric_array/output.json
new file mode 100644
index 0000000..b09c9a8
--- /dev/null
+++ b/tests/01_alphanumeric_array/output.json
@@ -0,0 +1,12 @@
+[{
+ "name": "***** ***",
+ "email": "*.***@*******.***",
+ "id": 12324,
+ "phone": "(***) *** - ****"
+},
+{
+ "name": "***** ***",
+ "email": "*.***@*******.***",
+ "id": 12324,
+ "phone": "(***) *** - ****"
+}]
diff --git a/tests/01_alphanumeric_array/sensitive_fields.txt b/tests/01_alphanumeric_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/01_alphanumeric_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/02_array_array/input.json b/tests/02_array_array/input.json
new file mode 100644
index 0000000..2d34598
--- /dev/null
+++ b/tests/02_array_array/input.json
@@ -0,0 +1,12 @@
+[{
+ "name": "Kelly Doe",
+ "email": ["kdoe@example.com", "kelly@gmail.com", "kelly@doe.net"],
+ "id": 12324,
+ "phone": "5551234567"
+},
+{
+ "name": "Kelly Doe",
+ "email": ["kdoe@example.com", "kelly@gmail.com", "kelly@doe.net"],
+ "id": 12324,
+ "phone": "5551234567"
+}]
diff --git a/tests/02_array_array/output.json b/tests/02_array_array/output.json
new file mode 100644
index 0000000..9d1bdc9
--- /dev/null
+++ b/tests/02_array_array/output.json
@@ -0,0 +1,12 @@
+[{
+ "name": "***** ***",
+ "email": ["****@*******.***", "*****@*****.***", "*****@***.***"],
+ "id": 12324,
+ "phone": "**********"
+},
+{
+ "name": "***** ***",
+ "email": ["****@*******.***", "*****@*****.***", "*****@***.***"],
+ "id": 12324,
+ "phone": "**********"
+}]
diff --git a/tests/02_array_array/sensitive_fields.txt b/tests/02_array_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/02_array_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/03_booleans_array/input.json b/tests/03_booleans_array/input.json
new file mode 100644
index 0000000..06636e7
--- /dev/null
+++ b/tests/03_booleans_array/input.json
@@ -0,0 +1,16 @@
+[{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "us_citizen": false,
+ "admin": false
+},
+{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "us_citizen": false,
+ "admin": false
+}]
diff --git a/tests/03_booleans_array/output.json b/tests/03_booleans_array/output.json
new file mode 100644
index 0000000..e35baee
--- /dev/null
+++ b/tests/03_booleans_array/output.json
@@ -0,0 +1,16 @@
+[{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "us_citizen": "-",
+ "admin": false
+},
+{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "us_citizen": "-",
+ "admin": false
+}]
\ No newline at end of file
diff --git a/tests/03_booleans_array/sensitive_fields.txt b/tests/03_booleans_array/sensitive_fields.txt
new file mode 100644
index 0000000..b262402
--- /dev/null
+++ b/tests/03_booleans_array/sensitive_fields.txt
@@ -0,0 +1,4 @@
+name
+email
+phone
+us_citizen
diff --git a/tests/04_numbers_array/input.json b/tests/04_numbers_array/input.json
new file mode 100644
index 0000000..8f0e503
--- /dev/null
+++ b/tests/04_numbers_array/input.json
@@ -0,0 +1,12 @@
+[{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": 5551234567
+},
+{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": 5551234567
+}]
diff --git a/tests/04_numbers_array/output.json b/tests/04_numbers_array/output.json
new file mode 100644
index 0000000..d475822
--- /dev/null
+++ b/tests/04_numbers_array/output.json
@@ -0,0 +1,12 @@
+[{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********"
+},
+{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********"
+}]
diff --git a/tests/04_numbers_array/sensitive_fields.txt b/tests/04_numbers_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/04_numbers_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/05_floats_array/input.json b/tests/05_floats_array/input.json
new file mode 100644
index 0000000..f778918
--- /dev/null
+++ b/tests/05_floats_array/input.json
@@ -0,0 +1,16 @@
+[{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "account_balance": 1234.56,
+ "title": "manager"
+},
+{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "account_balance": 1234.56,
+ "title": "manager"
+}]
diff --git a/tests/05_floats_array/output.json b/tests/05_floats_array/output.json
new file mode 100644
index 0000000..c8552fa
--- /dev/null
+++ b/tests/05_floats_array/output.json
@@ -0,0 +1,16 @@
+[{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "account_balance": "****.**",
+ "title": "manager"
+},
+{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "account_balance": "****.**",
+ "title": "manager"
+}]
diff --git a/tests/05_floats_array/sensitive_fields.txt b/tests/05_floats_array/sensitive_fields.txt
new file mode 100644
index 0000000..12150c5
--- /dev/null
+++ b/tests/05_floats_array/sensitive_fields.txt
@@ -0,0 +1,4 @@
+name
+email
+phone
+account_balance
diff --git a/tests/06_nested_object_array/input.json b/tests/06_nested_object_array/input.json
new file mode 100644
index 0000000..1287c10
--- /dev/null
+++ b/tests/06_nested_object_array/input.json
@@ -0,0 +1,17 @@
+[{
+ "name": "Kelly Doe",
+ "id": 12324,
+ "contact": {
+ "email": "kdoe@example.com",
+ "phone": "5551234567"
+ }
+},
+{
+ "name": "Kelly Doe",
+ "id": 12324,
+ "contact": {
+ "email": "kdoe@example.com",
+ "phone": "5551234567"
+ }
+}]
+
diff --git a/tests/06_nested_object_array/output.json b/tests/06_nested_object_array/output.json
new file mode 100644
index 0000000..cbfc7cd
--- /dev/null
+++ b/tests/06_nested_object_array/output.json
@@ -0,0 +1,16 @@
+[{
+ "name": "***** ***",
+ "id": 12324,
+ "contact": {
+ "email": "****@*******.***",
+ "phone": "**********"
+ }
+},
+{
+ "name": "***** ***",
+ "id": 12324,
+ "contact": {
+ "email": "****@*******.***",
+ "phone": "**********"
+ }
+}]
diff --git a/tests/06_nested_object_array/sensitive_fields.txt b/tests/06_nested_object_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/06_nested_object_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/07_mixed_type_arrays_array/input.json b/tests/07_mixed_type_arrays_array/input.json
new file mode 100644
index 0000000..90f5324
--- /dev/null
+++ b/tests/07_mixed_type_arrays_array/input.json
@@ -0,0 +1,46 @@
+[{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "contacts": [{
+ "name": "Bob Doe",
+ "us_citizen": false
+ },
+ 12345,
+ "bob@example.com",
+ {
+ "id": 2343,
+ "name": "John Smith",
+ "email": "john.smith@yahoo.com"
+ },
+ {
+ "phone": "(555) 234-2343",
+ "name": "Joe Schmoe",
+ "email": "jschmoe@aol.com"
+ }
+ ]
+},
+{
+ "name": "Kelly Doe",
+ "email": "kdoe@example.com",
+ "id": 12324,
+ "phone": "5551234567",
+ "contacts": [{
+ "name": "Bob Doe",
+ "us_citizen": false
+ },
+ 12345,
+ "bob@example.com",
+ {
+ "id": 2343,
+ "name": "John Smith",
+ "email": "john.smith@yahoo.com"
+ },
+ {
+ "phone": "(555) 234-2343",
+ "name": "Joe Schmoe",
+ "email": "jschmoe@aol.com"
+ }
+ ]
+}]
diff --git a/tests/07_mixed_type_arrays_array/output.json b/tests/07_mixed_type_arrays_array/output.json
new file mode 100644
index 0000000..473c324
--- /dev/null
+++ b/tests/07_mixed_type_arrays_array/output.json
@@ -0,0 +1,46 @@
+[{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "contacts": [{
+ "name": "*** ***",
+ "us_citizen": "-"
+ },
+ 12345,
+ "bob@example.com",
+ {
+ "id": 2343,
+ "name": "**** *****",
+ "email": "****.*****@*****.***"
+ },
+ {
+ "phone": "(***) ***-****",
+ "name": "*** ******",
+ "email": "*******@***.***"
+ }
+ ]
+},
+{
+ "name": "***** ***",
+ "email": "****@*******.***",
+ "id": 12324,
+ "phone": "**********",
+ "contacts": [{
+ "name": "*** ***",
+ "us_citizen": "-"
+ },
+ 12345,
+ "bob@example.com",
+ {
+ "id": 2343,
+ "name": "**** *****",
+ "email": "****.*****@*****.***"
+ },
+ {
+ "phone": "(***) ***-****",
+ "name": "*** ******",
+ "email": "*******@***.***"
+ }
+ ]
+}]
diff --git a/tests/07_mixed_type_arrays_array/sensitive_fields.txt b/tests/07_mixed_type_arrays_array/sensitive_fields.txt
new file mode 100644
index 0000000..afb6ff8
--- /dev/null
+++ b/tests/07_mixed_type_arrays_array/sensitive_fields.txt
@@ -0,0 +1,5 @@
+name
+email
+phone
+us_citizen
+account_balance
diff --git a/tests/08_sensitive_nested_objects_array/input.json b/tests/08_sensitive_nested_objects_array/input.json
new file mode 100644
index 0000000..b18d6d2
--- /dev/null
+++ b/tests/08_sensitive_nested_objects_array/input.json
@@ -0,0 +1,19 @@
+[{
+ "name": {
+ "first": "Kelly",
+ "last": "Doe"
+ },
+ "id": 12324,
+ "email": "kdoe@example.com",
+ "phone": "5551234567"
+},
+{
+ "name": {
+ "first": "Kelly",
+ "last": "Doe"
+ },
+ "id": 12324,
+ "email": "kdoe@example.com",
+ "phone": "5551234567"
+}]
+
diff --git a/tests/08_sensitive_nested_objects_array/output.json b/tests/08_sensitive_nested_objects_array/output.json
new file mode 100644
index 0000000..89fa88c
--- /dev/null
+++ b/tests/08_sensitive_nested_objects_array/output.json
@@ -0,0 +1,19 @@
+[{
+ "name": {
+ "first": "*****",
+ "last": "***"
+ },
+ "id": 12324,
+ "email": "****@*******.***",
+ "phone": "**********"
+},
+{
+ "name": {
+ "first": "*****",
+ "last": "***"
+ },
+ "id": 12324,
+ "email": "****@*******.***",
+ "phone": "**********"
+}]
+
diff --git a/tests/08_sensitive_nested_objects_array/sensitive_fields.txt b/tests/08_sensitive_nested_objects_array/sensitive_fields.txt
new file mode 100644
index 0000000..2196018
--- /dev/null
+++ b/tests/08_sensitive_nested_objects_array/sensitive_fields.txt
@@ -0,0 +1,3 @@
+name
+email
+phone
diff --git a/tests/09_sensitive_nested_arrays_array/input.json b/tests/09_sensitive_nested_arrays_array/input.json
new file mode 100644
index 0000000..7da9960
--- /dev/null
+++ b/tests/09_sensitive_nested_arrays_array/input.json
@@ -0,0 +1,34 @@
+[{
+ "name": "Kelly Doe",
+ "id": 12324,
+ "email": [{
+ "id": 23432,
+ "value": "kdoe@example.com"
+ }, {
+ "id": 23432,
+ "value": "kdoe@gmail.com"
+ }
+ ],
+ "phone": [
+ ["555", "123", "4561"],
+ ["555", "989", "4444"],
+ ["555", "781", "4630"]
+ ]
+},
+{
+ "name": "Kelly Doe",
+ "id": 12324,
+ "email": [{
+ "id": 23432,
+ "value": "kdoe@example.com"
+ }, {
+ "id": 23432,
+ "value": "kdoe@gmail.com"
+ }
+ ],
+ "phone": [
+ ["555", "123", "4561"],
+ ["555", "989", "4444"],
+ ["555", "781", "4630"]
+ ]
+}]
\ No newline at end of file
diff --git a/tests/09_sensitive_nested_arrays_array/output.json b/tests/09_sensitive_nested_arrays_array/output.json
new file mode 100644
index 0000000..0c43662
--- /dev/null
+++ b/tests/09_sensitive_nested_arrays_array/output.json
@@ -0,0 +1,34 @@
+[{
+ "name": "***** ***",
+ "id": 12324,
+ "email": [{
+ "id": "*****",
+ "value": "****@*******.***"
+ }, {
+ "id": "*****",
+ "value": "****@*****.***"
+ }
+ ],
+ "phone": [
+ ["***", "***", "****"],
+ ["***", "***", "****"],
+ ["***", "***", "****"]
+ ]
+},
+{
+ "name": "***** ***",
+ "id": 12324,
+ "email": [{
+ "id": "*****",
+ "value": "****@*******.***"
+ }, {
+ "id": "*****",
+ "value": "****@*****.***"
+ }
+ ],
+ "phone": [
+ ["***", "***", "****"],
+ ["***", "***", "****"],
+ ["***", "***", "****"]
+ ]
+}]
\ No newline at end of file
diff --git a/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt b/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt
new file mode 100644
index 0000000..afb6ff8
--- /dev/null
+++ b/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt
@@ -0,0 +1,5 @@
+name
+email
+phone
+us_citizen
+account_balance