FINALIT/app.py at main · Rhushya/FINALIT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
import streamlit as st
import os
import tempfile
import base64
import io
from typing import Dict, Any, List, Optional, Tuple
import logging
import time
from langdetect import detect
from googletrans import Translator
from gtts import gTTS
import hashlib

# Import utility modules
from utils.config import settings
from utils.sarvam_api import SarvamAIService
from utils.llm_service import LLMService
from utils.loan_manager import LoanDataManager
from utils.session import (
    init_session_state, add_message_to_history, get_conversation_history,
    clear_conversation_history, update_user_context, get_user_context,
    set_language, get_language, get_language_code, set_input_mode,
    get_input_mode, set_audio_data, get_audio_data, extract_entities_from_conversation
)
from utils.audio_utils import (
    record_audio, convert_audio_format, audio_to_base64,
    base64_to_audio, play_audio, create_audio_recording_ui, chunk_audio,
    create_continuous_conversation_ui
)
# Import the new TTS service
from utils.tts_service import text_to_speech, get_voice_for_language

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize services
sarvam_service = SarvamAIService()
llm_service = LLMService()
loan_manager = LoanDataManager()

# Page setup
st.set_page_config(
    page_title="Multilingual Loan Advisor",
    page_icon="💰",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Initialize session state variables for preventing duplicate processing
if 'processed_inputs' not in st.session_state:
    st.session_state.processed_inputs = set()

if 'processing_in_progress' not in st.session_state:
    st.session_state.processing_in_progress = False

if 'current_input_id' not in st.session_state:
    st.session_state.current_input_id = None

def setup_sidebar():
    """Setup sidebar UI components"""
    st.sidebar.title("Settings")

    # Language selection
    language_options = list(settings.SUPPORTED_LANGUAGES.keys())

    # Initialize language in session state if not set
    if "language" not in st.session_state:
        st.session_state.language = language_options[0]  # Default to first language

    selected_language = st.sidebar.selectbox(
        "Select Language",
        language_options,
        index=language_options.index(get_language())
    )

    if selected_language != get_language():
        language_code = settings.SUPPORTED_LANGUAGES[selected_language]
        set_language(selected_language, language_code)
        st.sidebar.success(f"Language set to {selected_language}")

    # Input mode selection
    input_mode_options = ["Text", "Voice"]
    selected_input_mode = st.sidebar.radio(
        "Input Mode",
        input_mode_options,
        index=input_mode_options.index(get_input_mode())
    )

    if selected_input_mode != get_input_mode():
        set_input_mode(selected_input_mode)
        st.sidebar.success(f"Input mode set to {selected_input_mode}")

    # Audio playback and download options
    st.sidebar.subheader("Audio Settings")

    # Audio autoplay option
    if "autoplay_audio" not in st.session_state:
        st.session_state.autoplay_audio = True

    st.session_state.autoplay_audio = st.sidebar.checkbox(
        "Auto-play audio responses",
        value=st.session_state.autoplay_audio
    )

    # Auto-download option
    if "auto_download_audio" not in st.session_state:
        st.session_state.auto_download_audio = False

    st.session_state.auto_download_audio = st.sidebar.checkbox(
        "Auto-download audio responses",
        value=st.session_state.auto_download_audio
    )

    # Collapsible area for showing user information
    st.sidebar.markdown("---")

    # User information section
    st.sidebar.subheader("Your Information")

    # Display current user context
    user_context = get_user_context()
    if user_context:
        for key, value in user_context.items():
            if key == "age":
                st.sidebar.text(f"Age: {value}")
            elif key == "income":
                st.sidebar.text(f"Income: ₹{value:,}/month")
            elif key == "credit_score":
                st.sidebar.text(f"Credit Score: {value}")
            elif key == "employment_type":
                st.sidebar.text(f"Employment: {value.replace('_', ' ').title()}")
    else:
        st.sidebar.text("No user information saved yet.")

    # Clear conversation button
    if st.sidebar.button("Clear Conversation"):
        clear_conversation_history()
        st.session_state.processed_inputs = set()

def display_conversation():
    """Display the conversation history"""
    st.title("Multilingual Loan Advisor 💰")

    # Display welcome message
    if not get_conversation_history():
        st.markdown("""
        👋 **Welcome to the Multilingual Loan Advisor!**

        I'm here to help you with loan-related queries in multiple languages. You can ask me about:

        - Loan eligibility criteria
        - Application process for different types of loans
        - Financial advice and tips

        You'll receive both **text and audio responses** for a better experience. Feel free to type your question or use voice input.

        **New Features**:
        - Improved voice recording with start/stop buttons
        - **NEW! Continuous Conversation Mode** - talk back and forth naturally without clicking buttons
        - Audio responses now automatically play (can be disabled in Settings)
        - Multiple options to download audio responses
        - Option to automatically download audio responses (enable in Settings)
        """)

    # Display conversation history
    for message in get_conversation_history():
        with st.chat_message(message["role"]):
            st.write(message["content"])

            # Show translation if available
            if message.get("translated_content") and message["content"] != message["translated_content"]:
                with st.expander("Show translation"):
                    st.write(message["translated_content"])

@st.cache_data(show_spinner=False)
def process_text(text: str, is_voice: bool = False):
    """
    Process text input and return response with audio

    Args:
        text (str): The text input to process
        is_voice (bool): Whether the input came from voice recording (affects language detection)

    Returns:
        Tuple containing:
        - response (str): The assistant's response in the detected language
        - english_response (str): The assistant's response in English
        - input_hash (str): A hash of the input for caching
        - audio_response (bytes): The assistant's response as audio data
    """
    # Generate hash for this input
    input_hash = hashlib.md5(text.encode()).hexdigest()

    # Detect language if not from voice (voice is already in the correct language)
    detected_lang_code = get_language_code()  # Default to user's selected language
    translated_input = text

    try:
        # Try to detect language if not voice input (voice is handled by Sarvam)
        if not is_voice:
            detected_lang = detect(text)
            # Map detected language to language code format "xx-IN"
            for lang_name, lang_code in settings.SUPPORTED_LANGUAGES.items():
                if lang_code.startswith(detected_lang):
                    detected_lang_code = lang_code
                    break

            logger.info(f"Detected language code: {detected_lang_code}")
        else:
            # For voice input in continuous mode, try to use the last detected language
            # This helps maintain language continuity in conversations
            if st.session_state.continuous_voice_mode:
                # Check if there's a stored language from previous interaction
                if "last_detected_language" in st.session_state:
                    detected_lang_code = st.session_state.last_detected_language
                    logger.info(f"Using stored language from previous interaction: {detected_lang_code}")

        # Store the detected language code for response generation
        input_language = detected_lang_code
        # Save the detected language for future interactions in continuous mode
        if is_voice and st.session_state.continuous_voice_mode:
            st.session_state.last_detected_language = input_language

        # Translate to English for LLM processing if not in English
        if not input_language.startswith("en"):
            try:
                # Use only Sarvam AI for translation
                translation_result = sarvam_service.translate_text(
                    text,
                    source_language=input_language,
                    target_language="en-IN"
                )
                if "error" not in translation_result:
                    translated_input = translation_result.get("translated_text", text)
                    logger.info(f"Successfully translated input to English using Sarvam AI")
                else:
                    logger.error(f"Sarvam translation error: {translation_result.get('error')}")
                    # If Sarvam translation fails, keep original text - don't use Google fallback
                    translated_input = text
            except Exception as e:
                logger.error(f"Translation error: {str(e)}")
                # Keep original text if translation fails
                translated_input = text
    except Exception as e:
        logger.error(f"Language detection error: {str(e)}")

    # Generate response using LLM
    english_response = llm_service.generate_response(
        translated_input,
        language_code=input_language,  # Use detected language instead of user's selected language
        user_context=get_user_context()
    )

    # Translate response back to the detected language (if not English)
    response = english_response

    # Always translate back to the language of the input (not the user's selected language)
    if not input_language.startswith("en"):
        try:
            # First, translate the English response to the detected language
            translation_result = sarvam_service.translate_text(
                english_response,
                source_language="en-IN",
                target_language=input_language
            )

            if "error" not in translation_result:
                translated_response = translation_result.get("translated_text", english_response)
                logger.info(f"Successfully translated response to {input_language} using Sarvam AI")

                # Then, ensure the response is in the same script as the input language
                # by using the transliteration API
                transliteration_result = sarvam_service.transliterate_text(
                    translated_response,
                    source_language="en-IN",
                    target_language=input_language
                )

                if "error" not in transliteration_result:
                    response = transliteration_result.get("transliterated_text", translated_response)
                    logger.info(f"Successfully transliterated response using Sarvam AI")
                else:
                    logger.error(f"Sarvam transliteration error: {transliteration_result.get('error')}")
                    # If transliteration fails, use the translated response
                    response = translated_response
            else:
                logger.error(f"Sarvam response translation error: {translation_result.get('error')}")
                # If Sarvam translation fails, use English response
                response = english_response
        except Exception as e:
            logger.error(f"Response translation/transliteration error: {str(e)}")
            response = english_response

    # Generate audio response
    audio_response = None
    try:
        # Use the new TTS service with the detected language code
        # Get appropriate voice for the language
        voice = get_voice_for_language(input_language)

        # Convert text to speech in the input language
        audio_response = text_to_speech(
            response,  # Use the translated response
            language_code=input_language,  # Use the detected input language
            voice=voice,
            pace=1.0
        )

        # Fallback to gTTS if new service fails
        if not audio_response:
            logger.warning("TTS service failed, falling back to gTTS")
            tts = gTTS(text=response, lang=input_language[:2])
            audio_file = io.BytesIO()
            tts.write_to_fp(audio_file)
            audio_file.seek(0)
            audio_response = audio_file.read()
            logger.info(f"Using gTTS for audio generation in {input_language}")
    except Exception as e:
        logger.error(f"Text-to-speech error: {str(e)}")

    return response, english_response, input_hash, audio_response

def handle_user_input():
    """Handle user input based on mode"""
    # Initialize session state as needed
    if 'processed_inputs' not in st.session_state:
        st.session_state.processed_inputs = set()

    if 'processing_in_progress' not in st.session_state:
        st.session_state.processing_in_progress = False

    # Check for setting from localStorage about language continuity
    check_last_language_js = """
    <script>
        // Function to check and retrieve last detected language
        function checkAndPassLanguage() {
            var lastLanguage = localStorage.getItem('lastDetectedLanguage');
            if (lastLanguage) {
                // We use an event to pass this to Streamlit
                const event = new CustomEvent('streamlit:language', {
                    detail: { language: lastLanguage }
                });
                window.dispatchEvent(event);
                console.log("Passed language to Streamlit: " + lastLanguage);

                // Store in a hidden input element as well
                var input = document.createElement('input');
                input.type = 'hidden';
                input.id = 'detected_language_input';
                input.value = lastLanguage;
                document.body.appendChild(input);
            }
        }

        // Run on page load
        window.addEventListener('DOMContentLoaded', checkAndPassLanguage);
    </script>
    """
    st.markdown(check_last_language_js, unsafe_allow_html=True)

    # Check for input mode
    if st.session_state.input_mode == "Text":
        # Text input
        text_input = st.chat_input("Type your message here...")

        if text_input and not st.session_state.processing_in_progress:
            # Check if we've already processed this input
            input_hash = hashlib.md5(text_input.encode()).hexdigest()

            if input_hash in st.session_state.processed_inputs:
                return

            st.session_state.processing_in_progress = True
            st.session_state.current_input_id = input_hash

            # Add user message to history
            add_message_to_history("user", text_input)

            with st.spinner("Processing..."):
                # Process the text input
                response, english_response, _, audio_response = process_text(text_input)

                # Add assistant message to history
                add_message_to_history(
                    "assistant",
                    response,
                    english_response if response != english_response else None
                )

                # If audio response was generated, play it (regardless of input mode)
                if audio_response:
                    # Create a clear visual response section
                    st.markdown("---")
                    st.markdown("### 🤖 Assistant Response")

                    # Display text response in a highlighted container
                    with st.container():
                        st.markdown(f"**Text Response:**")
                        st.markdown(f"<div style='background-color: #f0f2f6; padding: 15px; border-radius: 5px;'>{response}</div>", unsafe_allow_html=True)

                        # Show translation if available and different
                        if english_response and response != english_response:
                            with st.expander("Show English translation"):
                                st.markdown(f"<div style='background-color: #e6f3ff; padding: 10px; border-radius: 5px;'>{english_response}</div>", unsafe_allow_html=True)

                    # Display audio response with a clear label
                    st.markdown("### 🔊 Audio Response")

                    # Create a unique ID for the audio element
                    audio_element_id = f"audio_element_{input_hash}"

                    # Create a visually distinct container for the audio player
                    with st.container():
                        st.markdown("""
                            <style>
                                .audio-container {
                                    background-color: #f0f2f6;
                                    padding: 20px;
                                    border-radius: 10px;
                                    margin: 10px 0;
                                    border: 1px solid #e0e0e0;
                                }
                                audio {
                                    width: 100%;
                                    margin: 10px 0;
                                }
                                .stAudio {
                                    background-color: white !important;
                                    padding: 10px !important;
                                    border-radius: 8px !important;
                                }
                            </style>
                            <div class="audio-container">
                        """, unsafe_allow_html=True)

                        if st.session_state.continuous_voice_mode:
                            # Custom audio element with autoplay for continuous mode
                            audio_element_html = f"""
                                <audio id="{audio_element_id}" controls autoplay="autoplay" style="width: 100%;">
                                    <source src="data:audio/wav;base64,{base64.b64encode(audio_response).decode()}" type="audio/wav">
                                    Your browser does not support the audio element.
                                </audio>
                            """
                            st.markdown(audio_element_html, unsafe_allow_html=True)

                            # Add JavaScript to ensure audio plays and handle continuous conversation
                            continuous_convo_js = f"""
                                <script>
                                    document.addEventListener('DOMContentLoaded', function() {{
                                        var audioElement = document.getElementById('{audio_element_id}');
                                        if (audioElement) {{
                                            console.log('Setting up audio element for continuous conversation');

                                            // Force audio play when ready
                                            audioElement.play().catch(function(error) {{
                                                console.log('Auto-play failed, waiting for user interaction');
                                            }});

                                            // When audio finishes playing, trigger next recording
                                            audioElement.addEventListener('ended', function() {{
                                                console.log('Audio playback ended, triggering new recording');
                                                localStorage.setItem('autoRecordingTriggered', 'true');
                                                window.location.reload();
                                            }});
                                        }}
                                    }});
                                </script>
                            """
                            st.markdown(continuous_convo_js, unsafe_allow_html=True)
                        else:
                            # Standard Streamlit audio player for single recording mode
                            st.audio(audio_response, format="audio/wav", start_time=0)

                        st.markdown("</div>", unsafe_allow_html=True)

                    # Display status message for continuous mode
                    if st.session_state.continuous_voice_mode:
                        st.info("🔄 Continuous conversation mode active - The next recording will start automatically after the response finishes playing.")

                    # Generate a unique ID for the download link
                    download_id = f"download_link_{input_hash}"
                    timestamp = int(time.time())

                    # Add multiple download options for better compatibility
                    audio_b64 = base64.b64encode(audio_response).decode()

                    # Create download buttons columns
                    col1, col2 = st.columns(2)

                    # Method 1: Direct href download link
                    href = f'<a id="{download_id}" href="data:audio/wav;base64,{audio_b64}" download="assistant_response_{timestamp}.wav" style="display: inline-block; padding: 0.25em 0.75em; background-color: #4CAF50; color: white; text-decoration: none; border-radius: 4px; cursor: pointer;">Download audio</a>'
                    col1.markdown(href, unsafe_allow_html=True)

                    # Method 2: Save the audio data to a temporary file
                    # and provide a download button
                    if "temp_audio_file" not in st.session_state:
                        st.session_state.temp_audio_file = {}

                    # Store the audio data with the timestamp as key
                    st.session_state.temp_audio_file[timestamp] = audio_response

                    # Create a download button
                    if col2.download_button(
                        label="Save audio",
                        data=audio_response,
                        file_name=f"assistant_response_{timestamp}.wav",
                        mime="audio/wav"
                    ):
                        st.success("Audio downloaded successfully!")

                    # Auto-download script if enabled
                    if st.session_state.auto_download_audio:
                        # Add JavaScript to auto-trigger the download
                        auto_download_js = f"""
                        <script>
                            (function() {{
                                // Function to try different download methods
                                function tryDownload() {{
                                    // Try the link click first
                                    var link = document.getElementById('{download_id}');
                                    if (link) {{
                                        console.log('Triggering download via link click');
                                        link.click();
                                        return true;
                                    }}
                                    return false;
                                }}

                                // Wait for DOM to be ready
                                if (document.readyState === 'complete') {{
                                    setTimeout(tryDownload, 1500);
                                }} else {{
                                    window.addEventListener('load', function() {{
                                        setTimeout(tryDownload, 1500);
                                    }});
                                }}
                            }})();
                        </script>
                        """
                        st.markdown(auto_download_js, unsafe_allow_html=True)

                    st.markdown("---")

                # Extract entities and update user context
                entities = extract_entities_from_conversation()
                if entities:
                    update_user_context(entities)

                # Mark this input as processed
                st.session_state.processed_inputs.add(input_hash)
                st.session_state.processing_in_progress = False
                # Trigger a refresh but only if this input hasn't been processed
                st.rerun()
    else:
        # Voice input handling with Continuous Conversation mode

        # First check if we're in continuous conversation mode
        if "continuous_voice_mode" not in st.session_state:
            st.session_state.continuous_voice_mode = False

        # Mode selection tabs
        tab1, tab2 = st.tabs(["Single Recording", "Continuous Conversation"])

        with tab1:
            # Original voice input handling
            recording_started, audio_data = create_audio_recording_ui()
            if recording_started:
                st.session_state.continuous_voice_mode = False

        with tab2:
            # Continuous conversation mode
            cont_recording_started, cont_audio_data, end_conversation = create_continuous_conversation_ui()
            if cont_recording_started:
                st.session_state.continuous_voice_mode = True
                recording_started = True
                audio_data = cont_audio_data

        # Process the audio data (from either mode)
        if audio_data and not st.session_state.processing_in_progress:
            # Generate a unique hash for this audio
            audio_hash = hashlib.md5(audio_data).hexdigest()

            if audio_hash in st.session_state.processed_inputs:
                return

            st.session_state.processing_in_progress = True
            st.session_state.current_input_id = audio_hash

            with st.spinner("Processing your voice input..."):
                try:
                    # Store audio data for potential debugging
                    set_audio_data(audio_data)

                    # Display audio player for the input
                    st.audio(audio_data, format="audio/wav")

                    # Step 1: Convert audio to text using Sarvam API
                    source_language = get_language_code()
                    logger.info(f"Using language code for speech recognition: {source_language}")

                    stt_result = sarvam_service.speech_to_text(
                        audio_data,
                        source_language=source_language
                    )

                    if "error" not in stt_result and "text" in stt_result and stt_result["text"].strip():
                        user_text = stt_result["text"].strip()

                        # Show what user said
                        st.success(f"You said: {user_text}")

                        # Add user message to history
                        add_message_to_history("user", user_text)

                        # Step 2: Detect language and translate to English if necessary
                        detected_lang_code = source_language
                        english_input = user_text

                        # If the detected language is not English, translate to English
                        if not detected_lang_code.startswith("en"):
                            try:
                                translation_result = sarvam_service.translate_text(
                                    user_text,
                                    source_language=detected_lang_code,
                                    target_language="en-IN"
                                )

                                if "error" not in translation_result:
                                    english_input = translation_result.get("translated_text", user_text)
                                    logger.info(f"Successfully translated input to English: {english_input}")
                            except Exception as e:
                                logger.error(f"Error translating input: {str(e)}")

                        # Step 3: Generate response using LLM
                        english_response = llm_service.generate_response(
                            english_input,
                            language_code=detected_lang_code,  # Use detected language code, not user selected
                            user_context=get_user_context()
                        )

                        # Step 4: Translate response back to the detected language if needed
                        response = english_response

                        if not detected_lang_code.startswith("en"):
                            try:
                                # Translate the English response to the detected language
                                translation_result = sarvam_service.translate_text(
                                    english_response,
                                    source_language="en-IN",
                                    target_language=detected_lang_code
                                )

                                if "error" not in translation_result:
                                    translated_response = translation_result.get("translated_text", english_response)

                                    # Transliterate to ensure correct script
                                    transliteration_result = sarvam_service.transliterate_text(
                                        translated_response,
                                        source_language="en-IN",
                                        target_language=detected_lang_code
                                    )

                                    if "error" not in transliteration_result:
                                        response = transliteration_result.get("transliterated_text", translated_response)
                                    else:
                                        response = translated_response
                                else:
                                    response = english_response
                            except Exception as e:
                                logger.error(f"Error in translation/transliteration: {str(e)}")
                                response = english_response

                        # Step 5: Add assistant message to history
                        add_message_to_history(
                            "assistant",
                            response,
                            english_response if response != english_response else None
                        )

                        # Step 6: Generate audio response using the new TTS service
                        # Get appropriate voice for the detected language (not user selected)
                        voice = get_voice_for_language(detected_lang_code)

                        # Convert text to speech in the detected language
                        audio_response = text_to_speech(
                            response,  # Use the translated response text
                            language_code=detected_lang_code,  # Use detected language, not user selected
                            voice=voice,
                            pace=1.0
                        )

                        # Fallback to old method if new service fails
                        if not audio_response:
                            logger.warning(f"New TTS service failed, falling back to old method with language {detected_lang_code}")
                            audio_response = sarvam_service.text_to_speech(
                                response,
                                target_language=detected_lang_code  # Use detected language, not user selected
                            )

                        # Step 7: Create a clear visual response section
                        st.markdown("---")
                        st.markdown("### 🤖 Assistant Response")

                        # Display text response in a highlighted container
                        with st.container():
                            st.markdown(f"**Text Response:**")
                            st.markdown(f"<div style='background-color: #f0f2f6; padding: 15px; border-radius: 5px;'>{response}</div>", unsafe_allow_html=True)

                            # Show translation if available and different
                            if english_response and response != english_response:
                                with st.expander("Show English translation"):
                                    st.markdown(f"<div style='background-color: #e6f3ff; padding: 10px; border-radius: 5px;'>{english_response}</div>", unsafe_allow_html=True)

                        # Display audio response with a clear label
                        st.markdown("### 🔊 Audio Response")

                        # Create a unique ID for the audio element
                        audio_element_id = f"audio_element_{audio_hash}"

                        # Create a visually distinct container for the audio player
                        with st.container():
                            st.markdown("""
                                <style>
                                    .audio-container {
                                        background-color: #f0f2f6;
                                        padding: 20px;
                                        border-radius: 10px;
                                        margin: 10px 0;
                                        border: 1px solid #e0e0e0;
                                    }
                                    audio {
                                        width: 100%;
                                        margin: 10px 0;
                                    }
                                    .stAudio {
                                        background-color: white !important;
                                        padding: 10px !important;
                                        border-radius: 8px !important;
                                    }
                                </style>
                                <div class="audio-container">
                            """, unsafe_allow_html=True)

                            if st.session_state.continuous_voice_mode:
                                # Custom audio element with autoplay for continuous mode
                                audio_element_html = f"""
                                    <audio id="{audio_element_id}" controls autoplay="autoplay" style="width: 100%;">
                                        <source src="data:audio/wav;base64,{base64.b64encode(audio_response).decode()}" type="audio/wav">
                                        Your browser does not support the audio element.
                                    </audio>
                                """
                                st.markdown(audio_element_html, unsafe_allow_html=True)

                                # Add JavaScript to ensure audio plays and handle continuous conversation
                                continuous_convo_js = f"""
                                    <script>
                                        document.addEventListener('DOMContentLoaded', function() {{
                                            var audioElement = document.getElementById('{audio_element_id}');
                                            if (audioElement) {{
                                                console.log('Setting up audio element for continuous conversation');

                                                // Store the detected language for the next recording
                                                localStorage.setItem('lastDetectedLanguage', '{detected_lang_code}');
                                                console.log('Stored detected language: {detected_lang_code}');

                                                // Force audio play when ready
                                                audioElement.play().catch(function(error) {{
                                                    console.log('Auto-play failed, waiting for user interaction');
                                                }});

                                                // When audio finishes playing, trigger next recording
                                                audioElement.addEventListener('ended', function() {{
                                                    console.log('Audio playback ended, triggering new recording');
                                                    localStorage.setItem('autoRecordingTriggered', 'true');
                                                    window.location.reload();
                                                }});
                                            }}
                                        }});
                                    </script>
                                """
                                st.markdown(continuous_convo_js, unsafe_allow_html=True)
                            else:
                                # Standard Streamlit audio player for single recording mode
                                st.audio(audio_response, format="audio/wav", start_time=0)

                            st.markdown("</div>", unsafe_allow_html=True)

                        # Display status message for continuous mode
                        if st.session_state.continuous_voice_mode:
                            st.info(f"🔄 Continuous conversation active in {detected_lang_code} - The next recording will start automatically after the response finishes playing.")

                        # Generate a unique ID for the download link
                        download_id = f"download_link_{audio_hash}"
                        timestamp = int(time.time())

                        # Add multiple download options for better compatibility
                        audio_b64 = base64.b64encode(audio_response).decode()

                        # Create download buttons columns
                        col1, col2 = st.columns(2)

                        # Method 1: Direct href download link
                        href = f'<a id="{download_id}" href="data:audio/wav;base64,{audio_b64}" download="assistant_response_{timestamp}.wav" style="display: inline-block; padding: 0.25em 0.75em; background-color: #4CAF50; color: white; text-decoration: none; border-radius: 4px; cursor: pointer;">Download audio</a>'
                        col1.markdown(href, unsafe_allow_html=True)

                        # Method 2: Save the audio data to a temporary file
                        # and provide a download button
                        if "temp_audio_file" not in st.session_state:
                            st.session_state.temp_audio_file = {}

                        # Store the audio data with the timestamp as key
                        st.session_state.temp_audio_file[timestamp] = audio_response

                        # Create a download button
                        if col2.download_button(
                            label="Save audio",
                            data=audio_response,
                            file_name=f"assistant_response_{timestamp}.wav",
                            mime="audio/wav"
                        ):
                            st.success("Audio downloaded successfully!")

                        # Auto-download script if enabled
                        if st.session_state.auto_download_audio:
                            # Add JavaScript to auto-trigger the download
                            auto_download_js = f"""
                            <script>
                                (function() {{
                                    // Function to try different download methods
                                    function tryDownload() {{
                                        // Try the link click first
                                        var link = document.getElementById('{download_id}');
                                        if (link) {{
                                            console.log('Triggering download via link click');
                                            link.click();
                                            return true;
                                        }}
                                        return false;
                                    }}

                                    // Wait for DOM to be ready
                                    if (document.readyState === 'complete') {{
                                        setTimeout(tryDownload, 1500);
                                    }} else {{
                                        window.addEventListener('load', function() {{
                                            setTimeout(tryDownload, 1500);
                                        }});
                                    }}
                                }})();
                            </script>
                            """
                            st.markdown(auto_download_js, unsafe_allow_html=True)

                        st.markdown("---")

                    # Step 8: Extract entities and update user context
                    entities = extract_entities_from_conversation()
                    if entities:
                        update_user_context(entities)

                    # Mark this input as processed
                    st.session_state.processed_inputs.add(audio_hash)
                    st.session_state.processing_in_progress = False

                    # For continuous mode, set the auto_recording flag after processing
                    if st.session_state.continuous_voice_mode:
                        # This will be set to true when the audio finishes playing
                        if 'trigger_auto_recording' not in st.session_state:
                            st.session_state.trigger_auto_recording = False

                    # Trigger a refresh
                    st.rerun()
                except Exception as e:
                    logger.error(f"Error processing voice input: {str(e)}")
                    st.error(f"Error processing voice input: {str(e)}")

                    # For continuous mode, re-enable recording even after error
                    if st.session_state.continuous_voice_mode:
                        st.session_state.auto_recording = True

                    st.session_state.processing_in_progress = False

def main():
    """Main application function"""
    # Initialize session state
    init_session_state()

    # Add JavaScript to check for continuous conversation flag on page load
    continuous_check_js = """
    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // Check if we should continue the conversation based on localStorage flag
            if (localStorage.getItem('continueConversation') === 'true') {
                console.log('Continuous conversation flag detected, clearing it');
                localStorage.removeItem('continueConversation');

                // Set timeout to give page time to load, then click the tab for continuous conversation
                setTimeout(function() {
                    console.log('Selecting continuous conversation tab');
                    // Find the tab buttons (they're usually aria-selected attributes or data-baseweb attributes)
                    var tabButtons = document.querySelectorAll('[role="tab"]');
                    if (tabButtons.length >= 2) {
                        // Second tab should be continuous conversation
                        tabButtons[1].click();

                        // After tab is shown, find and click the Start Continuous Conversation button
                        setTimeout(function() {
                            var buttons = document.querySelectorAll('button');
                            for(var i=0; i<buttons.length; i++) {
                                if(buttons[i].innerText.includes('Start Continuous Conversation')) {
                                    console.log('Found and clicking Start Continuous Conversation button');
                                    buttons[i].click();
                                    break;
                                }
                            }
                        }, 500);
                    }
                }, 1000);
            }
        });
    </script>
    """
    st.markdown(continuous_check_js, unsafe_allow_html=True)

    # Setup sidebar
    setup_sidebar()

    # Display conversation
    display_conversation()

    # Handle user input
    handle_user_input()

if __name__ == "__main__":
    main()