@@ -801,3 +801,97 @@ async def test_environment(self, staging_client: LiteralClient):
801801 persisted_run = staging_client .api .get_step (run_id )
802802 assert persisted_run is not None
803803 assert persisted_run .environment == "staging"
804+
805+ @pytest .mark .timeout (5 )
806+ async def test_pii_removal (
807+ self , client : LiteralClient , async_client : AsyncLiteralClient
808+ ):
809+ """Test that PII is properly removed by the preprocess function."""
810+ import re
811+
812+ # Define a PII removal function
813+ def remove_pii (steps ):
814+ # Patterns for common PII
815+ email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
816+ phone_pattern = r"\b(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b"
817+ ssn_pattern = r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b"
818+
819+ for step in steps :
820+ # Process content field if it exists
821+ if "output" in step and step ["output" ]["content" ]:
822+ # Replace emails with [EMAIL REDACTED]
823+ step ["output" ]["content" ] = re .sub (
824+ email_pattern , "[EMAIL REDACTED]" , step ["output" ]["content" ]
825+ )
826+
827+ # Replace phone numbers with [PHONE REDACTED]
828+ step ["output" ]["content" ] = re .sub (
829+ phone_pattern , "[PHONE REDACTED]" , step ["output" ]["content" ]
830+ )
831+
832+ # Replace SSNs with [SSN REDACTED]
833+ step ["output" ]["content" ] = re .sub (
834+ ssn_pattern , "[SSN REDACTED]" , step ["output" ]["content" ]
835+ )
836+
837+ return steps
838+
839+ # Set the PII removal function on the client
840+ client .set_preprocess_steps_function (remove_pii )
841+
842+ @client .thread
843+ def thread_with_pii ():
844+ thread = client .get_current_thread ()
845+
846+ # User message with PII
847+ user_step = client .message (
848+ content = "My email is test@example.com and my phone is (123) 456-7890. My SSN is 123-45-6789." ,
849+ type = "user_message" ,
850+ metadata = {"contact_info" : "Call me at 987-654-3210" },
851+ )
852+ user_step_id = user_step .id
853+
854+ # Assistant message with PII reference
855+ assistant_step = client .message (
856+ content = "I'll contact you at test@example.com" , type = "assistant_message"
857+ )
858+ assistant_step_id = assistant_step .id
859+
860+ return thread .id , user_step_id , assistant_step_id
861+
862+ # Run the thread
863+ thread_id , user_step_id , assistant_step_id = thread_with_pii ()
864+
865+ # Wait for processing to occur
866+ client .flush ()
867+
868+ # Fetch the steps and verify PII was removed
869+ user_step = client .api .get_step (id = user_step_id )
870+ assistant_step = client .api .get_step (id = assistant_step_id )
871+
872+ assert user_step
873+ assert assistant_step
874+
875+ user_step_output = user_step .output ["content" ] # type: ignore
876+
877+ # Check user message
878+ assert "test@example.com" not in user_step_output
879+ assert "(123) 456-7890" not in user_step_output
880+ assert "123-45-6789" not in user_step_output
881+ assert "[EMAIL REDACTED]" in user_step_output
882+ assert "[PHONE REDACTED]" in user_step_output
883+ assert "[SSN REDACTED]" in user_step_output
884+
885+ assistant_step_output = assistant_step .output ["content" ] # type: ignore
886+
887+ # Check assistant message
888+ assert "test@example.com" not in assistant_step_output
889+ assert "[EMAIL REDACTED]" in assistant_step_output
890+
891+ # Clean up
892+ client .api .delete_step (id = user_step_id )
893+ client .api .delete_step (id = assistant_step_id )
894+ client .api .delete_thread (id = thread_id )
895+
896+ # Reset the preprocess function to avoid affecting other tests
897+ client .set_preprocess_steps_function (None )
0 commit comments