11from skyflow .error import SkyflowError
22from skyflow import Env , Skyflow , LogLevel
33from skyflow .utils .enums import DetectEntities , MaskingMethod , DetectOutputTranscriptions
4- from skyflow .vault .detect import DeidentifyFileRequest , TokenFormat , Transformations , DateTransformation , Bleep , FileInput
4+ from skyflow .vault .detect import (
5+ DeidentifyFileRequest ,
6+ TokenFormat ,
7+ Transformations ,
8+ DateTransformation ,
9+ Bleep ,
10+ FileInput ,
11+ )
512
613"""
714 * Skyflow Deidentify File Example
1118 * spreadsheets, presentations, structured text.
1219"""
1320
21+
1422def perform_file_deidentification ():
1523 try :
1624 # Step 1: Configure Credentials
@@ -23,7 +31,7 @@ def perform_file_deidentification():
2331 'vault_id' : '<YOUR_VAULT_ID>' , # Replace with your vault ID
2432 'cluster_id' : '<YOUR_CLUSTER_ID>' , # Replace with your cluster ID
2533 'env' : Env .PROD , # Deployment environment
26- 'credentials' : credentials
34+ 'credentials' : credentials ,
2735 }
2836
2937 # Step 3: Configure & Initialize Skyflow Client
@@ -36,70 +44,66 @@ def perform_file_deidentification():
3644
3745 # Step 4: Create File Object
3846 file_path = '<FILE_PATH>' # Replace with your file path
39- file = open (file_path , 'rb' )
40- # Step 5: Configure Deidentify File Request with all options
41- deidentify_request = DeidentifyFileRequest (
42- file = FileInput (file ), # File to de-identify (can also provide a file path)
43- entities = [DetectEntities .SSN , DetectEntities .CREDIT_CARD ], # Entities to detect
44- allow_regex_list = ['<YOUR_REGEX_PATTERN>' ], # Optional: Patterns to allow
45- restrict_regex_list = ['<YOUR_REGEX_PATTERN>' ], # Optional: Patterns to restrict
46-
47- # Token format configuration
48- token_format = TokenFormat (
49- vault_token = [DetectEntities .SSN ], # Use vault tokens for these entities
50- ),
51-
52- # Optional: Custom transformations
53- # transformations=Transformations(
54- # shift_dates=DateTransformation(
55- # max_days=30,
56- # min_days=10,
57- # entities=[DetectEntities.DOB]
58- # )
59- # ),
60-
61- # Output configuration
62- output_directory = '<OUTPUT_DIRECTORY_PATH>' , # Where to save processed file
63- wait_time = 15 , # Max wait time in seconds (max 64)
64-
65- # Image-specific options
66- output_processed_image = True , # Include processed image in output
67- output_ocr_text = True , # Include OCR text in response
68- masking_method = MaskingMethod .BLACKBOX , # Masking method for images
69-
70- # PDF-specific options
71- pixel_density = 15 , # Pixel density for PDF processing
72- max_resolution = 2000 , # Max resolution for PDF
7347
74- # Audio-specific options
75- output_processed_audio = True , # Include processed audio
76- output_transcription = DetectOutputTranscriptions .PLAINTEXT_TRANSCRIPTION , # Transcription type
77-
78- # Audio bleep configuration
79-
80- # bleep=Bleep(
81- # gain=5, # Loudness in dB
82- # frequency=1000, # Pitch in Hz
83- # start_padding=0.1, # Padding at start (seconds)
84- # stop_padding=0.2 # Padding at end (seconds)
85- # )
86- )
87-
88- # Step 6: Call deidentifyFile API
89- response = skyflow_client .detect ().deidentify_file (deidentify_request )
48+ # Step 5: Configure Deidentify File Request and call API
49+ with open (file_path , 'rb' ) as file :
50+ deidentify_request = DeidentifyFileRequest (
51+ file = FileInput (file ), # File to de-identify (can also provide a file path)
52+ entities = [DetectEntities .SSN , DetectEntities .CREDIT_CARD ], # Entities to detect
53+ allow_regex_list = ['<YOUR_REGEX_PATTERN>' ], # Optional: Patterns to allow
54+ restrict_regex_list = ['<YOUR_REGEX_PATTERN>' ], # Optional: Patterns to restrict
55+ # Token format configuration
56+ token_format = TokenFormat (
57+ vault_token = [DetectEntities .SSN ], # Use vault tokens for these entities
58+ ),
59+ # Optional: Custom transformations
60+ # transformations=Transformations(
61+ # shift_dates=DateTransformation(
62+ # max_days=30,
63+ # min_days=10,
64+ # entities=[DetectEntities.DOB]
65+ # )
66+ # ),
67+ # Output configuration
68+ output_directory = '<OUTPUT_DIRECTORY_PATH>' , # Where to save processed file
69+ wait_time = 15 , # Max wait time in seconds (max 64)
70+ # Image-specific options
71+ output_processed_image = True , # Include processed image in output
72+ output_ocr_text = True , # Include OCR text in response
73+ masking_method = MaskingMethod .BLACKBOX , # Masking method for images
74+ # PDF-specific options
75+ pixel_density = 15 , # Pixel density for PDF processing
76+ max_resolution = 2000 , # Max resolution for PDF
77+ # Audio-specific options
78+ output_processed_audio = True , # Include processed audio
79+ output_transcription = DetectOutputTranscriptions .PLAINTEXT_TRANSCRIPTION , # Transcription type
80+ # Audio bleep configuration
81+ # bleep=Bleep(
82+ # gain=5, # Loudness in dB
83+ # frequency=1000, # Pitch in Hz
84+ # start_padding=0.1, # Padding at start (seconds)
85+ # stop_padding=0.2 # Padding at end (seconds)
86+ # )
87+ )
88+
89+ # Step 6: Call deidentifyFile API
90+ response = skyflow_client .detect ().deidentify_file (deidentify_request )
9091
9192 # Handle Successful Response
92- print (" \n Deidentify File Response:" , response )
93+ print (' \n Deidentify File Response:' , response )
9394
9495 except SkyflowError as error :
9596 # Handle Skyflow-specific errors
96- print ('\n Skyflow Error:' , {
97- 'http_code' : error .http_code ,
98- 'grpc_code' : error .grpc_code ,
99- 'http_status' : error .http_status ,
100- 'message' : error .message ,
101- 'details' : error .details
102- })
97+ print (
98+ '\n Skyflow Error:' ,
99+ {
100+ 'http_code' : error .http_code ,
101+ 'grpc_code' : error .grpc_code ,
102+ 'http_status' : error .http_status ,
103+ 'message' : error .message ,
104+ 'details' : error .details ,
105+ },
106+ )
103107 except Exception as error :
104108 # Handle unexpected errors
105109 print ('Unexpected Error:' , error )
0 commit comments