22from lib .logging_utils import init_logger
33import json
44import requests
5+ import uuid
6+ import boto3
7+ from botocore .exceptions import ClientError
58from typing import Dict , List , Any , Optional
69
710logger = init_logger (__name__ )
811logger .info ("MDO THIS SHOULD PRINT" )
912
13+ _REDACTED = "[REDACTED]"
14+
15+
16+ def _redact_option_value (key : str , value : Any ) -> Any :
17+ """
18+ Redact sensitive option values before logging.
19+
20+ This prevents leaking secrets (for example AWS credentials) into logs.
21+ """
22+ key_l = (key or "" ).lower ()
23+ if (
24+ key_l == "aws_secret_access_key"
25+ or "secret" in key_l
26+ or "password" in key_l
27+ or "token" in key_l
28+ or key_l .endswith ("_secret" )
29+ ):
30+ return _REDACTED
31+ return value
32+
1033
1134# Default options that control which elements to remove
1235default_options = {
1538 "remove_analysis" : False , # Remove all analysis data
1639 "remove_attachment_types" : [], # List of attachment types to remove (e.g., ["image/jpeg", "audio/mp3"])
1740 "remove_system_prompts" : False , # Remove system_prompt keys to prevent LLM instruction insertion
41+ # S3 storage options for dialog bodies
42+ "s3_bucket" : "" , # S3 bucket name for storing dialog bodies
43+ "s3_path" : "" , # Optional path prefix within the bucket
44+ "aws_access_key_id" : "" , # AWS access key ID
45+ "aws_secret_access_key" : "" , # AWS secret access key
46+ "aws_region" : "us-east-1" , # AWS region (default: us-east-1)
47+ "presigned_url_expiration" : None , # Presigned URL expiration in seconds (None = no expiration/default 1 hour)
1848}
1949
50+
51+ def _get_s3_client (options : Dict [str , Any ]):
52+ """Create and return an S3 client with the provided credentials."""
53+ return boto3 .client (
54+ "s3" ,
55+ aws_access_key_id = options ["aws_access_key_id" ],
56+ aws_secret_access_key = options ["aws_secret_access_key" ],
57+ region_name = options .get ("aws_region" , "us-east-1" ),
58+ )
59+
60+
61+ def _upload_to_s3_and_get_presigned_url (
62+ content : str ,
63+ vcon_uuid : str ,
64+ dialog_id : str ,
65+ options : Dict [str , Any ]
66+ ) -> Optional [str ]:
67+ """
68+ Upload dialog body content to S3 and return a presigned URL.
69+
70+ Args:
71+ content: The dialog body content to upload
72+ vcon_uuid: The vCon UUID
73+ dialog_id: The dialog ID
74+ options: Configuration options including S3 credentials and bucket info
75+
76+ Returns:
77+ Presigned URL to access the uploaded content, or None if upload fails
78+ """
79+ try :
80+ s3 = _get_s3_client (options )
81+
82+ # Generate a unique key for this dialog body
83+ unique_id = str (uuid .uuid4 ())
84+ key = f"{ dialog_id } _{ unique_id } .txt" if dialog_id else f"{ unique_id } .txt"
85+
86+ # Add vcon_uuid as a directory level
87+ key = f"{ vcon_uuid } /{ key } "
88+
89+ # Add optional path prefix
90+ if options .get ("s3_path" ):
91+ key = f"{ options ['s3_path' ]} /{ key } "
92+
93+ bucket = options ["s3_bucket" ]
94+
95+ # Upload the content
96+ s3 .put_object (
97+ Bucket = bucket ,
98+ Key = key ,
99+ Body = content .encode ("utf-8" ) if isinstance (content , str ) else content ,
100+ ContentType = "text/plain" ,
101+ )
102+
103+ logger .info (f"Successfully uploaded dialog body to s3://{ bucket } /{ key } " )
104+
105+ # Generate presigned URL
106+ expiration = options .get ("presigned_url_expiration" )
107+ if expiration is None :
108+ # Default to 1 hour (3600 seconds) if not specified
109+ expiration = 3600
110+
111+ presigned_url = s3 .generate_presigned_url (
112+ "get_object" ,
113+ Params = {"Bucket" : bucket , "Key" : key },
114+ ExpiresIn = expiration ,
115+ )
116+
117+ logger .info (f"Generated presigned URL with expiration { expiration } s" )
118+ return presigned_url
119+
120+ except ClientError as e :
121+ logger .error (f"S3 client error uploading dialog body: { e } " )
122+ return None
123+ except Exception as e :
124+ logger .error (f"Exception uploading dialog body to S3: { e } " )
125+ return None
126+
127+
20128def run (vcon_uuid , link_name , opts = default_options ):
21129 logger .info ("Starting diet::run" )
22130
23131 # Merge provided options with defaults
24132 options = {** default_options , ** opts }
25133
26134 for key , value in options .items ():
27- logger .info (f "diet::{ key } : { value } " )
135+ logger .info ("diet::%s: %s" , key , _redact_option_value ( key , value ) )
28136
29137 # Load vCon from Redis using JSON.GET
30138 vcon = redis .json ().get (f"vcon:{ vcon_uuid } " )
@@ -41,12 +149,27 @@ def run(vcon_uuid, link_name, opts=default_options):
41149 logger .info ("diet::got dialog" )
42150 if options ["remove_dialog_body" ] and "body" in dialog :
43151 logger .info ("diet::remove_dialog_body AND body" )
44- if options ["post_media_to_url" ] and dialog .get ("body" ):
152+ dialog_body = dialog .get ("body" )
153+ dialog_id = dialog .get ("id" , "" )
154+
155+ # Check if S3 storage is configured
156+ if options .get ("s3_bucket" ) and dialog_body :
157+ logger .info ("diet::uploading to S3" )
158+ presigned_url = _upload_to_s3_and_get_presigned_url (
159+ dialog_body , vcon_uuid , dialog_id , options
160+ )
161+ if presigned_url :
162+ dialog ["body" ] = presigned_url
163+ dialog ["body_type" ] = "url"
164+ else :
165+ logger .error ("Failed to upload to S3, removing body" )
166+ dialog ["body" ] = ""
167+ elif options ["post_media_to_url" ] and dialog_body :
45168 try :
46169 # Post the body content to the specified URL
47170 response = requests .post (
48171 options ["post_media_to_url" ],
49- json = {"content" : dialog [ "body" ] , "vcon_uuid" : vcon_uuid , "dialog_id" : dialog . get ( "id" , "" ) }
172+ json = {"content" : dialog_body , "vcon_uuid" : vcon_uuid , "dialog_id" : dialog_id }
50173 )
51174 if response .status_code == 200 :
52175 # Replace body with the URL to the stored content
0 commit comments