From 8a4947a10a97cb91ad1c38838bc4696b6e54b492 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Fri, 23 May 2025 15:05:40 +0530 Subject: [PATCH 1/3] SK-2068: Add readme and samples for detect --- README.md | 485 ++++++++++++++++++++++++++ samples/detect_api/deidentify_file.py | 105 ++++++ samples/detect_api/deidentify_text.py | 82 +++++ samples/detect_api/get_detect_run.py | 61 ++++ samples/detect_api/reidentify_text.py | 66 ++++ 5 files changed, 799 insertions(+) create mode 100644 samples/detect_api/deidentify_file.py create mode 100644 samples/detect_api/deidentify_text.py create mode 100644 samples/detect_api/get_detect_run.py create mode 100644 samples/detect_api/reidentify_text.py diff --git a/README.md b/README.md index de6ce235..011c9c9f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,11 @@ The Skyflow Python SDK is designed to help with integrating Skyflow into a Pytho - [Delete](#delete) - [Invoke Connection](#invoke-connection) - [Query](#query) +- [Detect](#detect-apis) + - [Deidentify Text](#deidentify-text) + - [Reidentify Text](#reidentify-text) + - [Deidentify File](#deidentify-file) + - [Get Detect Run](#get-detect-run) - [Connections](#connections) - [Invoke a connection](#invoke-a-connection) - [Authenticate with bearer tokens](#authenticate-with-bearer-tokens) @@ -1669,6 +1674,486 @@ QueryResponse( ) ``` +## Detect +Skyflow Detect enables you to deidentify and reidentify sensitive data in text and files, supporting advanced privacy-preserving workflows. The Detect API supports the following operations: + +### Deidentify Text +To deidentify text, use the `deidentify_text` method. The `DeidentifyTextRequest` class creates a deidentify text request, which includes the text to be deidentified and options for controlling the deidentification process. + +#### Construct a Deidentify Text request + +```python +from skyflow.error import SkyflowError +from skyflow.utils.enums import DetectEntities, TokenType +from skyflow.vault.detect import DeidentifyTextRequest, TokenFormat, Transformations +""" +This example demonstrates how to deidentify text, along with corresponding DeidentifyTextRequest schema. +""" +try: + # Initialize Skyflow client + # Step 1: Create request with text to deidentify + request = DeidentifyTextRequest( + text="", + entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect + token_format = TokenFormat( # Specify the token format for deidentified entities + default=TokenType.VAULT_TOKEN, + ), + # transformations=Transformations( # Specify custom transformations for entities + # shift_dates={ + # "max_days": 30, + # "min_days": 10, + # "entities": [DetectEntities.DOB] + # } + # ), + allow_regex_list=[""], # Optional regex patterns to allow + restrict_regex_list=[""] # Optional regex patterns to restrict + ) + + # Step 2: Call deidentify_text + deidentify_text_response = skyflow_client.detect('').deidentify_text(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the deidentified text response + print('Response: ', deidentify_text_response) + + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +#### An example of Deidentify Text call + +```python +from skyflow.error import SkyflowError +from skyflow.utils.enums import DetectEntities, TokenType +from skyflow.vault.detect import DeidentifyTextRequest, TokenFormat, Transformations +""" + * Skyflow Text De-identification Example + * + * This example demonstrates how to: + * 1. Configure Skyflow client credentials + * 2. Set up vault configuration + * 3. Create a deidentify text request with all available options + * 4. Handle response and errors +""" +try: + # Initialize Skyflow Client + # Step 1: Create request with sample text containing sensitive data + request = DeidentifyTextRequest( + text="My SSN is 123-45-6789 and my card is 4111 1111 1111 1111.", + entities=[ + DetectEntities.SSN, + DetectEntities.CREDIT_CARD + ], + token_format = TokenFormat( # Specify the token format for deidentified entities + default=TokenType.VAULT_TOKEN, + ), + # transformations=Transformations( # Specify custom transformations for entities + # shift_dates={ + # "max_days": 30, + # "min_days": 30, + # "entities": [DetectEntities.DOB] + # } + # ) + ) + + # Step 2: Call deidentify_text + deidentify_text_response = skyflow_client.detect('').deidentify_text(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the deidentified text response + print('Response: ', deidentify_text_response) + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +Sample Response: +```python +DeidentifyTextResponse( + processed_text='My SSN is [SSN_VqLazzA] and my card is [CREDIT_CARD_54lAgtk].', + entities=[ + EntityInfo( + token='SSN_VqLazzA', + value='123-45-6789', + text_index=TextIndex(start=10, end=21), + processed_index=TextIndex(start=10, end=23), + entity='SSN', + scores={'SSN': 0.9383999705314636} + ), + EntityInfo( + token='CREDIT_CARD_54lAgtk', + value='4111 1111 1111 1111', + text_index=TextIndex(start=37, end=56), + processed_index=TextIndex(start=39, end=60), + entity='CREDIT_CARD', + scores={'CREDIT_CARD': 0.9050999879837036} + ) + ], + word_count=9, + char_count=57 +) +``` + +### Reidentify Text + +To reidentify text, use the `reidentify_text` method. The `ReidentifyTextRequest` class creates a reidentify text request, which includes the redacted or deidentified text to be reidentified. + +#### Construct a Reidentify Text request + +```python +from skyflow.error import SkyflowError +from skyflow.vault.detect import ReidentifyTextRequest, ReidentifyFormat +""" +This example demonstrates how to reidentify text, along with corresponding ReidentifyTextRequest schema. +""" +try: + # Initialize Skyflow client + # Step 1: Create request to reidentify + request = ReidentifyTextRequest( + text="", # Text containing tokens to reidentify + format=ReidentifyFormat( + redacted=[""], # Entities to show redacted + masked=[""], # Entities to show masked + plaintext=[""] # Entities to show as plain text + ) + ) + + # Step 2: Call reidentify_text + reidentify_text_response = skyflow_client.detect('').reidentify_text(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the reidentified text response + print('Response: ', reidentify_text_response) + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +#### An example for Reidentify Text call + +```python +from skyflow.error import SkyflowError +from skyflow.vault.detect import ReidentifyTextRequest, ReidentifyFormat +from skyflow.utils.enums import DetectEntities +""" + * Skyflow Text Re-identification Example + * + * This example demonstrates how to: + * 1. Configure credentials + * 2. Set up vault configuration + * 3. Create a reidentify text request + * 4. Use all available options for reidentification + * 5. Handle response and errors +""" +try: + # Initialize Skyflow Client + # Step 1: Create request with deidentified text + request = ReidentifyTextRequest( + text="My SSN is [SSN_VqLazzA] and my card is [CREDIT_CARD_54lAgtk].", + # format=ReidentifyFormat( + # redacted=[DetectEntities.SSN], # Show SSN redacted + # masked=[DetectEntities.CREDIT_CARD], # Show credit card masked + # plaintext=[DetectEntities.DOB] # Show DOB as plain text + # ) + ) + + # Step 2: Call reidentify_text + reidentify_text_response = skyflow_client.detect('').reidentify_text(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the reidentified text response + print('Response: ', reidentify_text_response) + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +Sample Response: +```python +ReidentifyTextResponse( + processed_text='My SSN is 123-45-6789 and my card is 4111 1111 1111 1111.' +) +``` + +### Deidentify File +To deidentify files, use the `deidentify_file` method. The `DeidentifyFileRequest` class creates a deidentify file request, which includes the file to be deidentified and various configuration options. + +#### Construct a Deidentify File request +```python +from skyflow.error import SkyflowError +from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, Bleep +""" +This example demonstrates how to deidentify file, along with corresponding DeidentifyFileRequest schema. +""" +try: + # Initialize Skyflow client + # Step 1: Open file for deidentification + file = open('', 'rb') # Open the file in read-binary mode + # Step 2: Create deidentify file request + request = DeidentifyFileRequest( + file=file, # File object to deidentify + entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect + + # Token format configuration + token_format=TokenFormat( + default=True, + vault_token=[DetectEntities.SSN] + ), + + # Output configuration + output_directory='', # Output directory for saving the deidentified file + wait_time=15, # Max wait time in seconds (max 64) + + # Image-specific options + # output_processed_image=True, # Include processed image + # output_ocr_text=True, # Include OCR text + # masking_method=MaskingMethod.BLACKOUT, # Masking method + + # PDF-specific options + # pixel_density=1.5, # PDF processing density + # max_resolution=2000, # Max PDF resolution + + # Audio-specific options + # output_processed_audio=True, # Include processed audio + # output_transcription=DetectOutputTranscriptions.PLAINTEXT, # Transcription type + + # Audio bleep configuration + # bleep=Bleep( + # gain=5, # Loudness in dB + # frequency=1000, # Pitch in Hz + # start_padding=0.1, # Start padding in seconds + # stop_padding=0.2 # End padding in seconds + # ) + ) + + # Step 3: Call deidentify_file + deidentify_file_response = skyflow_client.detect('').deidentify_file(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the reidentified text response + print('Response: ', deidentify_file_response) + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +#### An example for Deidentify File call + +```python +from skyflow.error import SkyflowError +from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Bleep +""" + * Skyflow Deidentify File Example + * + * This sample demonstrates how to use all available options for deidentifying files. + * Supported file types: images (jpg, png, etc.), pdf, audio (mp3, wav), documents, + * spreadsheets, presentations, structured text. +""" +try: + # Initialize Skyflow client + # Step 1: Open file for deidentification + file = open('sensitive_document.txt', 'rb') # Open the file in read-binary mode + # Step 2: Create deidentify file request + request = DeidentifyFileRequest( + file=file, # File object to deidentify + entities=[ + DetectEntities.SSN, + DetectEntities.CREDIT_CARD + ], + # Token format configuration + token_format=TokenFormat( + default=True, + vault_token=[DetectEntities.SSN] + ), + output_directory="/tmp/processed", # Output directory for saving the deidentified file + wait_time=30, # Max wait time in seconds (max 64) + ) + + # Step 3: Call deidentify_file + deidentify_file_response = skyflow_client.detect('').deidentify_file(request) + # Replace with your actual Skyflow vault ID + + # Step 3: Print the reidentified text response + print('Response: ', deidentify_file_response) + +except SkyflowError as error: + # Step 4: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +Sample Response +```python +DeidentifyFileResponse( + file='TXkgY2FyZCBudW1iZXIgaXMgW0NSRURJVF9DQVJEXQpteSBzZWNvbmQ…', # Base64 encoded file content + type='redacted_file', + extension='txt', + word_count=19, + char_count=111, + size_in_kb=0.11, + duration_in_seconds=None, + page_count=None, + slide_count=None, + entities=[ + { + 'file': 'W3sicHJvY2Vzc2VleHQiOiJDUkVESVRfQ0FSRCIsInRleHQiOiIxMjM0NTY0Nzg5MDEyMzQ1NiIsImxvY2F0aW9uIjp7InN0dF9pZHgiOjE4LCJlbmRfaWR4IjozNSwic3R0X2lkeF9wcm9jZXNzZWR…', # Base64 encoded JSON string of entities + 'type': 'entities', + 'extension': 'json' + } + ], + run_id='83abcdef-2b61-4a83-a4e0-cbc71ffabffd', + status='SUCCESS', + errors=[] +) +``` + +### Get Detect Run +To retrieve the results of a previously started file deidentification operation, use the `get_detect_run` method. The `GetDetectRunRequest` class is initialized with the run_id returned from a prior `deidentify_file` call. + +#### Construct a Get Detect Run request + +```python +from skyflow.error import SkyflowError +from skyflow.vault.detect import GetDetectRunRequest + +""" +Example program to demonstrate get detect run using run id, along with corresponding GetDetectRunRequest schema. +""" + +try: + # Initialize Skyflow client + # Step 1: Create GetDetectRunRequest + request = GetDetectRunRequest( + run_id='' # Replace with runId from deidentify_file + ) + + # Step 2: Call get_detect_run + get_detect_run_response = skyflow_client.detect('').get_detect_run(request) + # Replace with your actual vault ID + + # Print the response from the get detect run operation + print('Response: ', get_detect_run_response) + +except SkyflowError as error: + # Step 3: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes + +``` + +#### An example for Get Detect Run Call + +```python +from skyflow.error import SkyflowError +from skyflow.vault.detect import GetDetectRunRequest +""" + * Skyflow Get Detect Run Example + * + * This example demonstrates how to: + * 1. Configure credentials + * 2. Set up vault configuration + * 3. Create a get detect run request + * 4. Call getDetectRun to poll for file processing results + * 5. Handle response and errors +""" +try: + # Initialize Skyflow client + # Step 1: Create GetDetectRunRequest + request = GetDetectRunRequest( + run_id="48ec05ba-96ec-4641-a8e2-35e066afef95" + ) + + # Step 2: Call get_detect_run + get_detect_run_response = skyflow_client.detect('').get_detect_run(request) + # Replace with your actual vault ID + + # Print the response from the get detect run operation + print('Response: ', get_detect_run_response) + +except SkyflowError as error: + # Step 3: Handle any exceptions that may occur during the insert operation + print('Skyflow Specific Error: ', { + 'code': error.http_code, + 'message': error.message, + 'details': error.details + }) +except Exception as error: + print('Unexpected Error:', error) # Print the stack trace for debugging purposes +``` + +Sample Response +```python +DeidentifyFileResponse( + file='TXkgY2FyZCBudW1iZXIgaXMgW0NSRURJVF9DQVJEXQpteSBzZWNvbmQ…', # Base64 encoded file content + type='redacted_file', + extension='txt', + word_count=19, + char_count=111, + size_in_kb=0.11, + duration_in_seconds=None, + page_count=None, + slide_count=None, + entities=[ + { + 'file': 'W3sicHJvY2Vzc2VleHQiOiJDUkVESVRfQ0FSRCIsInRleHQiOiIxMjM0NTY0Nzg5MDEyMzQ1NiIsImxvY2F0aW9uIjp7InN0dF9pZHgiOjE4LCJlbmRfaWR4IjozNSwic3R0X2lkeF9wcm9jZXNzZWR…', # Base64 encoded JSON string of entities + 'type': 'entities', + 'extension': 'json' + } + ], + run_id='48ec05ba-96ec-4641-a8e2-35e066afef95', + status='SUCCESS', + errors=[] +) +``` + ### Connections Skyflow Connections is a gateway service that uses tokenization to securely send and receive data between your systems and first- or third-party services. The [connections](https://github.com/skyflowapi/skyflow-python/tree/v2/skyflow/vault/connection) module invokes both inbound and/or outbound connections. diff --git a/samples/detect_api/deidentify_file.py b/samples/detect_api/deidentify_file.py new file mode 100644 index 00000000..34a38939 --- /dev/null +++ b/samples/detect_api/deidentify_file.py @@ -0,0 +1,105 @@ +from skyflow.error import SkyflowError +from skyflow import Env, Skyflow, LogLevel +from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep + +""" + * Skyflow Deidentify File Example + * + * This sample demonstrates how to use all available options for deidentifying files. + * Supported file types: images (jpg, png, etc.), pdf, audio (mp3, wav), documents, + * spreadsheets, presentations, structured text. +""" + +def perform_file_deidentification(): + try: + # Step 1: Configure Credentials + credentials = { + 'path': '/path/to/credentials.json' # Path to credentials file + } + + # Step 2: Configure Vault + vault_config = { + 'vault_id': '', # Replace with your vault ID + 'cluster_id': '', # Replace with your cluster ID + 'env': Env.PROD, # Deployment environment + 'credentials': credentials + } + + # Step 3: Configure & Initialize Skyflow Client + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.INFO) # Use LogLevel.ERROR in production + .build() + ) + + # Step 4: Create File Object + file_path = '' # Replace with your file path + file = open(file_path, 'rb') + # Step 5: Configure Deidentify File Request with all options + deidentify_request = DeidentifyFileRequest( + file=file, # File object to deidentify + entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect + allow_regex_list=[''], # Optional: Patterns to allow + restrict_regex_list=[''], # Optional: Patterns to restrict + + # Token format configuration + token_format=TokenFormat( + vault_token=[DetectEntities.SSN], # Use vault tokens for these entities + ), + + # Optional: Custom transformations + # transformations=Transformations( + # shift_dates=DateTransformation( + # max_days=30, + # min_days=10, + # entities=[DetectEntities.DOB] + # ) + # ), + + # Output configuration + output_directory='', # Where to save processed file + wait_time=15, # Max wait time in seconds (max 64) + + # Image-specific options + output_processed_image=True, # Include processed image in output + output_ocr_text=True, # Include OCR text in response + masking_method=MaskingMethod.BLACKOUT, # Masking method for images + + # PDF-specific options + pixel_density=1.5, # Pixel density for PDF processing + max_resolution=2000, # Max resolution for PDF + + # Audio-specific options + output_processed_audio=True, # Include processed audio + output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type + + # Audio bleep configuration + + # bleep=Bleep( + # gain=5, # Loudness in dB + # frequency=1000, # Pitch in Hz + # start_padding=0.1, # Padding at start (seconds) + # stop_padding=0.2 # Padding at end (seconds) + # ) + ) + + # Step 6: Call deidentifyFile API + response = skyflow_client.detect().deidentify_file(deidentify_request) + + # Handle Successful Response + print("\nDeidentify File Response:", response) + + except SkyflowError as error: + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': error.http_code, + 'grpc_code': error.grpc_code, + 'http_status': error.http_status, + 'message': error.message, + 'details': error.details + }) + except Exception as error: + # Handle unexpected errors + print('Unexpected Error:', error) diff --git a/samples/detect_api/deidentify_text.py b/samples/detect_api/deidentify_text.py new file mode 100644 index 00000000..c543b6f8 --- /dev/null +++ b/samples/detect_api/deidentify_text.py @@ -0,0 +1,82 @@ +from skyflow.error import SkyflowError +from skyflow import Env, Skyflow, LogLevel +from skyflow.utils.enums import DetectEntities +from skyflow.vault.detect import DeidentifyTextRequest, TokenFormat, Transformations, DateTransformation + +""" + * Skyflow Text De-identification Example + * + * This example demonstrates how to: + * 1. Configure Skyflow client credentials + * 2. Set up vault configuration + * 3. Create a deidentify text request with all available options + * 4. Handle response and errors +""" + +def perform_text_deidentification(): + try: + # Step 1: Configure Credentials + credentials = { + 'path': '/path/to/credentials.json' # Path to credentials file + } + + # Step 2: Configure Vault + vault_config = { + 'vault_id': '', # Replace with your vault ID + 'cluster_id': '', # Replace with your cluster ID + 'env': Env.PROD, # Deployment environment + 'credentials': credentials + } + + # Step 3: Configure & Initialize Skyflow Client + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.ERROR) + .build() + ) + + # Step 4: Prepare Sample Text + sample_text = "My SSN is 123-45-6789 and my card is 4111 1111 1111 1111." + + # Step 5: Configure Token Format + token_format = TokenFormat( + vault_token=[DetectEntities.CREDIT_CARD, DetectEntities.SSN], # Use vault tokens for these entities + ) + + # Step 6: Configure Transformations + transformations = Transformations( + shift_dates=DateTransformation( + max_days=30, # Maximum days to shift + min_days=30, # Minimum days to shift + entities=[DetectEntities.DOB] # Apply shift to DOB entities + ) + ) + + # Step 7: Create Deidentify Request + deidentify_request = DeidentifyTextRequest( + text=sample_text, + entities=[DetectEntities.CREDIT_CARD, DetectEntities.SSN], # Entities to detect and deidentify + token_format=token_format, + transformations=transformations, + allow_regex_list=[''], # Optional: regex patterns to allow + restrict_regex_list=[''] # Optional: regex patterns to restrict + ) + + # Step 8: Perform Text Deidentification + response = skyflow_client.detect().deidentify_text(deidentify_request) + + # Handle Successful Response + print("\nDeidentify Text Response:", response) + + except SkyflowError as error: + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': error.http_code, + 'grpc_code': error.grpc_code, + 'http_status': error.http_status, + 'message': error.message, + 'details': error.details + }) + except Exception as error: + print('Unexpected Error:', error) diff --git a/samples/detect_api/get_detect_run.py b/samples/detect_api/get_detect_run.py new file mode 100644 index 00000000..c2380c27 --- /dev/null +++ b/samples/detect_api/get_detect_run.py @@ -0,0 +1,61 @@ +from skyflow.error import SkyflowError +from skyflow import Env, Skyflow, LogLevel +from skyflow.vault.detect import GetDetectRunRequest + +""" + * Skyflow Get Detect Run Example + * + * This example demonstrates how to: + * 1. Configure credentials + * 2. Set up vault configuration + * 3. Create a get detect run request + * 4. Call getDetectRun to poll for file processing results + * 5. Handle response and errors +""" + +def perform_get_detect_run(): + try: + # Step 1: Configure Credentials + credentials = { + 'path': '/path/to/credentials.json' # Path to credentials file + } + + # Step 2: Configure Vault + vault_config = { + 'vault_id': '', # Replace with your vault ID + 'cluster_id': '', # Replace with your cluster ID + 'env': Env.PROD, # Deployment environment + 'credentials': credentials + } + + # Step 3: Configure & Initialize Skyflow Client + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.INFO) # Use LogLevel.ERROR in production + .build() + ) + + # Step 4: Create GetDetectRunRequest + get_detect_run_request = GetDetectRunRequest( + run_id='' # Replace with the runId from deidentifyFile call + ) + + # Step 5: Call getDetectRun API + response = skyflow_client.detect().get_detect_run(get_detect_run_request) + + # Handle Successful Response + print("\nGet Detect Run Response:", response) + + except SkyflowError as error: + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': error.http_code, + 'grpc_code': error.grpc_code, + 'http_status': error.http_status, + 'message': error.message, + 'details': error.details + }) + except Exception as error: + # Handle unexpected errors + print('Unexpected Error:', error) diff --git a/samples/detect_api/reidentify_text.py b/samples/detect_api/reidentify_text.py new file mode 100644 index 00000000..d158733f --- /dev/null +++ b/samples/detect_api/reidentify_text.py @@ -0,0 +1,66 @@ +from skyflow.error import SkyflowError +from skyflow import Env, Skyflow, LogLevel +from skyflow.utils.enums import DetectEntities +from skyflow.vault.detect import ReidentifyTextRequest + +""" + * Skyflow Text Re-identification Example + * + * This example demonstrates how to: + * 1. Configure credentials + * 2. Set up vault configuration + * 3. Create a reidentify text request + * 4. Use all available options for reidentification + * 5. Handle response and errors +""" + +def perform_text_reidentification(): + try: + # Step 1: Configure Credentials + credentials = { + 'path': '/path/to/credentials.json' # Path to credentials file + } + + # Step 2: Configure Vault + vault_config = { + 'vault_id': '', # Replace with your vault ID + 'cluster_id': '', # Replace with your cluster ID + 'env': Env.PROD, # Deployment environment + 'credentials': credentials + } + + # Step 3: Configure & Initialize Skyflow Client + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.ERROR) + .build() + ) + + # Step 4: Prepare Sample Redacted Text + redacted_text = "" # Replace with your redacted text + + # Step 5: Create Reidentify Request + reidentify_request = ReidentifyTextRequest( + text=redacted_text, + plain_text_entities=[DetectEntities.PHONE_NUMBER] + ) + + # Step 6: Perform Text Reidentification + response = skyflow_client.detect().reidentify_text(reidentify_request) + + # Step 7: Handle Successful Response + print("\nReidentify Text Response:", response) + + except SkyflowError as error: + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': error.http_code, + 'grpc_code': error.grpc_code, + 'http_status': error.http_status, + 'message': error.message, + 'details': error.details + }) + except Exception as error: + # Handle unexpected errors + print('Unexpected Error:', error) From 6826c003587f4d4ab00e854e4269abd77da49043 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Fri, 23 May 2025 17:31:35 +0530 Subject: [PATCH 2/3] SK-2068: address comments --- README.md | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 011c9c9f..738d0cc4 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ The Skyflow Python SDK is designed to help with integrating Skyflow into a Pytho - [Authenticate](#authenticate) - [Initialize the client](#initialize-the-client) - [Insert data into the vault](#insert-data-into-the-vault) -- [Vault](#vault-apis) +- [Vault](#vault) - [Insert data into the vault](#insert-data-into-the-vault) - [Detokenize](#detokenize) - [Tokenize](#tokenize) @@ -30,9 +30,8 @@ The Skyflow Python SDK is designed to help with integrating Skyflow into a Pytho - [Redaction types](#redaction-types) - [Update](#update) - [Delete](#delete) - - [Invoke Connection](#invoke-connection) - [Query](#query) -- [Detect](#detect-apis) +- [Detect](#detect) - [Deidentify Text](#deidentify-text) - [Reidentify Text](#reidentify-text) - [Deidentify File](#deidentify-file) @@ -1698,13 +1697,13 @@ try: token_format = TokenFormat( # Specify the token format for deidentified entities default=TokenType.VAULT_TOKEN, ), - # transformations=Transformations( # Specify custom transformations for entities - # shift_dates={ - # "max_days": 30, - # "min_days": 10, - # "entities": [DetectEntities.DOB] - # } - # ), + transformations=Transformations( # Specify custom transformations for entities + shift_dates={ + "max_days": 30, + "min_days": 10, + "entities": [DetectEntities.DOB] + } + ), allow_regex_list=[""], # Optional regex patterns to allow restrict_regex_list=[""] # Optional regex patterns to restrict ) @@ -1755,13 +1754,13 @@ try: token_format = TokenFormat( # Specify the token format for deidentified entities default=TokenType.VAULT_TOKEN, ), - # transformations=Transformations( # Specify custom transformations for entities - # shift_dates={ - # "max_days": 30, - # "min_days": 30, - # "entities": [DetectEntities.DOB] - # } - # ) + transformations=Transformations( # Specify custom transformations for entities + shift_dates={ + "max_days": 30, + "min_days": 30, + "entities": [DetectEntities.DOB] + } + ) ) # Step 2: Call deidentify_text @@ -1826,11 +1825,9 @@ try: # Step 1: Create request to reidentify request = ReidentifyTextRequest( text="", # Text containing tokens to reidentify - format=ReidentifyFormat( - redacted=[""], # Entities to show redacted - masked=[""], # Entities to show masked - plaintext=[""] # Entities to show as plain text - ) + redacted_entities=[""], # Entities to show redacted + masked_entities=[""], # Entities to show masked + plain_text_entities=[""] # Entities to show as plain text ) # Step 2: Call reidentify_text @@ -1872,11 +1869,6 @@ try: # Step 1: Create request with deidentified text request = ReidentifyTextRequest( text="My SSN is [SSN_VqLazzA] and my card is [CREDIT_CARD_54lAgtk].", - # format=ReidentifyFormat( - # redacted=[DetectEntities.SSN], # Show SSN redacted - # masked=[DetectEntities.CREDIT_CARD], # Show credit card masked - # plaintext=[DetectEntities.DOB] # Show DOB as plain text - # ) ) # Step 2: Call reidentify_text From 9096b30b31676e0112fb7a8c750b7181e4c1710c Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Fri, 23 May 2025 18:18:48 +0530 Subject: [PATCH 3/3] SK-2068: update deidentify file sample --- samples/detect_api/deidentify_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/detect_api/deidentify_file.py b/samples/detect_api/deidentify_file.py index 34a38939..ed13bfda 100644 --- a/samples/detect_api/deidentify_file.py +++ b/samples/detect_api/deidentify_file.py @@ -68,7 +68,7 @@ def perform_file_deidentification(): masking_method=MaskingMethod.BLACKOUT, # Masking method for images # PDF-specific options - pixel_density=1.5, # Pixel density for PDF processing + pixel_density=15, # Pixel density for PDF processing max_resolution=2000, # Max resolution for PDF # Audio-specific options