diff --git a/go.mod b/go.mod index 5539eec..f15f5ee 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module scrub-pii +module scrub go 1.19 diff --git a/main.go b/main.go index bd1905e..5258af5 100644 --- a/main.go +++ b/main.go @@ -21,12 +21,12 @@ func main() { prettyPtr := flag.Bool("pretty", true, "display pretty output; otherwise do: -pretty=false") flag.Parse() if len(os.Args) < 2 { - fmt.Println("Usage: scrub-pii ") + fmt.Println("Usage: scrub ") os.Exit(1) } - inputPath := flag.Args()[0] - sensitiveFieldsPath := flag.Args()[1] + sensitiveFieldsPath := flag.Args()[0] + inputPath := flag.Args()[1] // scrub the input file for the given sensitive fields output, err := ScrubPersonalInfo(inputPath, sensitiveFieldsPath) diff --git a/scrub b/scrub new file mode 100755 index 0000000..1101fad Binary files /dev/null and b/scrub differ diff --git a/scrub-pii b/scrub-pii deleted file mode 100755 index a9cf129..0000000 Binary files a/scrub-pii and /dev/null differ diff --git a/scrub.go b/scrub.go index ff2acf7..36327fd 100644 --- a/scrub.go +++ b/scrub.go @@ -12,15 +12,8 @@ import ( "reflect" "regexp" "strings" - - "github.com/perimeterx/marshmallow" ) -// the only known property in the input json ahead of time -type userID struct { - ID int `json:"id"` -} - // return a json string with scrubbed sensitive information from the provided input and sensitive fields files func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) { if inputPath == "" { @@ -33,8 +26,8 @@ func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) { inputFile, _ := ioutil.ReadFile(inputPath) sensitiveFields, _ := getSensitiveFields(sensitiveFieldsPath) - uid := userID{} - input, err := marshmallow.Unmarshal([]byte(inputFile), &uid) + var input interface{} + err := json.Unmarshal([]byte(inputFile), &input) if err != nil { log.Fatal(err) return "", err @@ -51,7 +44,7 @@ func ScrubPersonalInfo(inputPath, sensitiveFieldsPath string) (string, error) { b, _ = json.Marshal(input) // reset all scrubbed values back to their original values - scrubRecursive(input, "", sensitiveFields, &savedValues, false /* unmask */, false /* doScrub */) + scrubRecursive(&input, "", sensitiveFields, &savedValues, false /* unmask */, false /* doScrub */) // return the scrubbed string return string(b), nil @@ -80,10 +73,6 @@ func scrubRecursive(field interface{}, fieldName string, sensitiveFields map[str return } - // skip if no field names - if fieldName == "" { - return - } // skip these types if !fieldValue.CanSet() || fieldValue.IsZero() { return @@ -91,8 +80,9 @@ func scrubRecursive(field interface{}, fieldName string, sensitiveFields map[str if fieldType.Kind() == reflect.Interface { _, doFieldScrub := sensitiveFields[strings.ToLower(fieldName)] - // if parent field is sensitive field, scrub all children (sensitive or not), track in doScrub - ok := doScrub || doFieldScrub + // skip if no field name and if parent field is a sensitive field, scrub all children + // (sensitive or not), track in doScrub + ok := fieldName != "" && (doScrub || doFieldScrub) // scrub leaf nodes; otherwise, continue recursing switch fValue := fieldValue.Interface().(type) { diff --git a/tests/00_basic_array/input.json b/tests/00_basic_array/input.json new file mode 100644 index 0000000..e2bde86 --- /dev/null +++ b/tests/00_basic_array/input.json @@ -0,0 +1,12 @@ +[{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567" +}, +{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567" +}] diff --git a/tests/00_basic_array/output.json b/tests/00_basic_array/output.json new file mode 100644 index 0000000..020c83b --- /dev/null +++ b/tests/00_basic_array/output.json @@ -0,0 +1,12 @@ +[{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********" +}, +{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********" +}] \ No newline at end of file diff --git a/tests/00_basic_array/sensitive_fields.txt b/tests/00_basic_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/00_basic_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/01_alphanumeric_array/input.json b/tests/01_alphanumeric_array/input.json new file mode 100644 index 0000000..345914e --- /dev/null +++ b/tests/01_alphanumeric_array/input.json @@ -0,0 +1,12 @@ +[{ + "name": "Kelly Doe", + "email": "k.doe@example.com", + "id": 12324, + "phone": "(555) 123 - 4567" +}, +{ + "name": "Kelly Doe", + "email": "k.doe@example.com", + "id": 12324, + "phone": "(555) 123 - 4567" +}] diff --git a/tests/01_alphanumeric_array/output.json b/tests/01_alphanumeric_array/output.json new file mode 100644 index 0000000..b09c9a8 --- /dev/null +++ b/tests/01_alphanumeric_array/output.json @@ -0,0 +1,12 @@ +[{ + "name": "***** ***", + "email": "*.***@*******.***", + "id": 12324, + "phone": "(***) *** - ****" +}, +{ + "name": "***** ***", + "email": "*.***@*******.***", + "id": 12324, + "phone": "(***) *** - ****" +}] diff --git a/tests/01_alphanumeric_array/sensitive_fields.txt b/tests/01_alphanumeric_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/01_alphanumeric_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/02_array_array/input.json b/tests/02_array_array/input.json new file mode 100644 index 0000000..2d34598 --- /dev/null +++ b/tests/02_array_array/input.json @@ -0,0 +1,12 @@ +[{ + "name": "Kelly Doe", + "email": ["kdoe@example.com", "kelly@gmail.com", "kelly@doe.net"], + "id": 12324, + "phone": "5551234567" +}, +{ + "name": "Kelly Doe", + "email": ["kdoe@example.com", "kelly@gmail.com", "kelly@doe.net"], + "id": 12324, + "phone": "5551234567" +}] diff --git a/tests/02_array_array/output.json b/tests/02_array_array/output.json new file mode 100644 index 0000000..9d1bdc9 --- /dev/null +++ b/tests/02_array_array/output.json @@ -0,0 +1,12 @@ +[{ + "name": "***** ***", + "email": ["****@*******.***", "*****@*****.***", "*****@***.***"], + "id": 12324, + "phone": "**********" +}, +{ + "name": "***** ***", + "email": ["****@*******.***", "*****@*****.***", "*****@***.***"], + "id": 12324, + "phone": "**********" +}] diff --git a/tests/02_array_array/sensitive_fields.txt b/tests/02_array_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/02_array_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/03_booleans_array/input.json b/tests/03_booleans_array/input.json new file mode 100644 index 0000000..06636e7 --- /dev/null +++ b/tests/03_booleans_array/input.json @@ -0,0 +1,16 @@ +[{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "us_citizen": false, + "admin": false +}, +{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "us_citizen": false, + "admin": false +}] diff --git a/tests/03_booleans_array/output.json b/tests/03_booleans_array/output.json new file mode 100644 index 0000000..e35baee --- /dev/null +++ b/tests/03_booleans_array/output.json @@ -0,0 +1,16 @@ +[{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "us_citizen": "-", + "admin": false +}, +{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "us_citizen": "-", + "admin": false +}] \ No newline at end of file diff --git a/tests/03_booleans_array/sensitive_fields.txt b/tests/03_booleans_array/sensitive_fields.txt new file mode 100644 index 0000000..b262402 --- /dev/null +++ b/tests/03_booleans_array/sensitive_fields.txt @@ -0,0 +1,4 @@ +name +email +phone +us_citizen diff --git a/tests/04_numbers_array/input.json b/tests/04_numbers_array/input.json new file mode 100644 index 0000000..8f0e503 --- /dev/null +++ b/tests/04_numbers_array/input.json @@ -0,0 +1,12 @@ +[{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": 5551234567 +}, +{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": 5551234567 +}] diff --git a/tests/04_numbers_array/output.json b/tests/04_numbers_array/output.json new file mode 100644 index 0000000..d475822 --- /dev/null +++ b/tests/04_numbers_array/output.json @@ -0,0 +1,12 @@ +[{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********" +}, +{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********" +}] diff --git a/tests/04_numbers_array/sensitive_fields.txt b/tests/04_numbers_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/04_numbers_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/05_floats_array/input.json b/tests/05_floats_array/input.json new file mode 100644 index 0000000..f778918 --- /dev/null +++ b/tests/05_floats_array/input.json @@ -0,0 +1,16 @@ +[{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "account_balance": 1234.56, + "title": "manager" +}, +{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "account_balance": 1234.56, + "title": "manager" +}] diff --git a/tests/05_floats_array/output.json b/tests/05_floats_array/output.json new file mode 100644 index 0000000..c8552fa --- /dev/null +++ b/tests/05_floats_array/output.json @@ -0,0 +1,16 @@ +[{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "account_balance": "****.**", + "title": "manager" +}, +{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "account_balance": "****.**", + "title": "manager" +}] diff --git a/tests/05_floats_array/sensitive_fields.txt b/tests/05_floats_array/sensitive_fields.txt new file mode 100644 index 0000000..12150c5 --- /dev/null +++ b/tests/05_floats_array/sensitive_fields.txt @@ -0,0 +1,4 @@ +name +email +phone +account_balance diff --git a/tests/06_nested_object_array/input.json b/tests/06_nested_object_array/input.json new file mode 100644 index 0000000..1287c10 --- /dev/null +++ b/tests/06_nested_object_array/input.json @@ -0,0 +1,17 @@ +[{ + "name": "Kelly Doe", + "id": 12324, + "contact": { + "email": "kdoe@example.com", + "phone": "5551234567" + } +}, +{ + "name": "Kelly Doe", + "id": 12324, + "contact": { + "email": "kdoe@example.com", + "phone": "5551234567" + } +}] + diff --git a/tests/06_nested_object_array/output.json b/tests/06_nested_object_array/output.json new file mode 100644 index 0000000..cbfc7cd --- /dev/null +++ b/tests/06_nested_object_array/output.json @@ -0,0 +1,16 @@ +[{ + "name": "***** ***", + "id": 12324, + "contact": { + "email": "****@*******.***", + "phone": "**********" + } +}, +{ + "name": "***** ***", + "id": 12324, + "contact": { + "email": "****@*******.***", + "phone": "**********" + } +}] diff --git a/tests/06_nested_object_array/sensitive_fields.txt b/tests/06_nested_object_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/06_nested_object_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/07_mixed_type_arrays_array/input.json b/tests/07_mixed_type_arrays_array/input.json new file mode 100644 index 0000000..90f5324 --- /dev/null +++ b/tests/07_mixed_type_arrays_array/input.json @@ -0,0 +1,46 @@ +[{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "contacts": [{ + "name": "Bob Doe", + "us_citizen": false + }, + 12345, + "bob@example.com", + { + "id": 2343, + "name": "John Smith", + "email": "john.smith@yahoo.com" + }, + { + "phone": "(555) 234-2343", + "name": "Joe Schmoe", + "email": "jschmoe@aol.com" + } + ] +}, +{ + "name": "Kelly Doe", + "email": "kdoe@example.com", + "id": 12324, + "phone": "5551234567", + "contacts": [{ + "name": "Bob Doe", + "us_citizen": false + }, + 12345, + "bob@example.com", + { + "id": 2343, + "name": "John Smith", + "email": "john.smith@yahoo.com" + }, + { + "phone": "(555) 234-2343", + "name": "Joe Schmoe", + "email": "jschmoe@aol.com" + } + ] +}] diff --git a/tests/07_mixed_type_arrays_array/output.json b/tests/07_mixed_type_arrays_array/output.json new file mode 100644 index 0000000..473c324 --- /dev/null +++ b/tests/07_mixed_type_arrays_array/output.json @@ -0,0 +1,46 @@ +[{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "contacts": [{ + "name": "*** ***", + "us_citizen": "-" + }, + 12345, + "bob@example.com", + { + "id": 2343, + "name": "**** *****", + "email": "****.*****@*****.***" + }, + { + "phone": "(***) ***-****", + "name": "*** ******", + "email": "*******@***.***" + } + ] +}, +{ + "name": "***** ***", + "email": "****@*******.***", + "id": 12324, + "phone": "**********", + "contacts": [{ + "name": "*** ***", + "us_citizen": "-" + }, + 12345, + "bob@example.com", + { + "id": 2343, + "name": "**** *****", + "email": "****.*****@*****.***" + }, + { + "phone": "(***) ***-****", + "name": "*** ******", + "email": "*******@***.***" + } + ] +}] diff --git a/tests/07_mixed_type_arrays_array/sensitive_fields.txt b/tests/07_mixed_type_arrays_array/sensitive_fields.txt new file mode 100644 index 0000000..afb6ff8 --- /dev/null +++ b/tests/07_mixed_type_arrays_array/sensitive_fields.txt @@ -0,0 +1,5 @@ +name +email +phone +us_citizen +account_balance diff --git a/tests/08_sensitive_nested_objects_array/input.json b/tests/08_sensitive_nested_objects_array/input.json new file mode 100644 index 0000000..b18d6d2 --- /dev/null +++ b/tests/08_sensitive_nested_objects_array/input.json @@ -0,0 +1,19 @@ +[{ + "name": { + "first": "Kelly", + "last": "Doe" + }, + "id": 12324, + "email": "kdoe@example.com", + "phone": "5551234567" +}, +{ + "name": { + "first": "Kelly", + "last": "Doe" + }, + "id": 12324, + "email": "kdoe@example.com", + "phone": "5551234567" +}] + diff --git a/tests/08_sensitive_nested_objects_array/output.json b/tests/08_sensitive_nested_objects_array/output.json new file mode 100644 index 0000000..89fa88c --- /dev/null +++ b/tests/08_sensitive_nested_objects_array/output.json @@ -0,0 +1,19 @@ +[{ + "name": { + "first": "*****", + "last": "***" + }, + "id": 12324, + "email": "****@*******.***", + "phone": "**********" +}, +{ + "name": { + "first": "*****", + "last": "***" + }, + "id": 12324, + "email": "****@*******.***", + "phone": "**********" +}] + diff --git a/tests/08_sensitive_nested_objects_array/sensitive_fields.txt b/tests/08_sensitive_nested_objects_array/sensitive_fields.txt new file mode 100644 index 0000000..2196018 --- /dev/null +++ b/tests/08_sensitive_nested_objects_array/sensitive_fields.txt @@ -0,0 +1,3 @@ +name +email +phone diff --git a/tests/09_sensitive_nested_arrays_array/input.json b/tests/09_sensitive_nested_arrays_array/input.json new file mode 100644 index 0000000..7da9960 --- /dev/null +++ b/tests/09_sensitive_nested_arrays_array/input.json @@ -0,0 +1,34 @@ +[{ + "name": "Kelly Doe", + "id": 12324, + "email": [{ + "id": 23432, + "value": "kdoe@example.com" + }, { + "id": 23432, + "value": "kdoe@gmail.com" + } + ], + "phone": [ + ["555", "123", "4561"], + ["555", "989", "4444"], + ["555", "781", "4630"] + ] +}, +{ + "name": "Kelly Doe", + "id": 12324, + "email": [{ + "id": 23432, + "value": "kdoe@example.com" + }, { + "id": 23432, + "value": "kdoe@gmail.com" + } + ], + "phone": [ + ["555", "123", "4561"], + ["555", "989", "4444"], + ["555", "781", "4630"] + ] +}] \ No newline at end of file diff --git a/tests/09_sensitive_nested_arrays_array/output.json b/tests/09_sensitive_nested_arrays_array/output.json new file mode 100644 index 0000000..0c43662 --- /dev/null +++ b/tests/09_sensitive_nested_arrays_array/output.json @@ -0,0 +1,34 @@ +[{ + "name": "***** ***", + "id": 12324, + "email": [{ + "id": "*****", + "value": "****@*******.***" + }, { + "id": "*****", + "value": "****@*****.***" + } + ], + "phone": [ + ["***", "***", "****"], + ["***", "***", "****"], + ["***", "***", "****"] + ] +}, +{ + "name": "***** ***", + "id": 12324, + "email": [{ + "id": "*****", + "value": "****@*******.***" + }, { + "id": "*****", + "value": "****@*****.***" + } + ], + "phone": [ + ["***", "***", "****"], + ["***", "***", "****"], + ["***", "***", "****"] + ] +}] \ No newline at end of file diff --git a/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt b/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt new file mode 100644 index 0000000..afb6ff8 --- /dev/null +++ b/tests/09_sensitive_nested_arrays_array/sensitive_fields.txt @@ -0,0 +1,5 @@ +name +email +phone +us_citizen +account_balance