tableau · wjsutton · Oct 29, 2024 · Nov 8, 2024 · Mar 12, 2025 · Mar 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -129,6 +129,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+.env-til
 
 # Spyder project settings
 .spyderproject
@@ -165,6 +166,9 @@ cython_debug/
 .langgraph_api
 
 data/
+vector_db/
+dashboard_images/
+chains/query_data_chain/static/dashboard_images/
 
 # Mac
 .DS_Store

diff --git a/environment.yml b/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - langchain=0.3.21
     - langsmith=0.2.11
     - pip:
+        - chromadb==0.6.3
         - langgraph==0.3.21
         - langgraph-cli==0.1.81
         - langchain-anthropic==0.3.10

diff --git a/experimental/agents/demos/search/__init__.py b/experimental/agents/demos/search/__init__.py
diff --git a/experimental/agents/demos/search/agent.py b/experimental/agents/demos/search/agent.py
@@ -0,0 +1,50 @@
+import os
+
+from dotenv import load_dotenv
+
+from langgraph.prebuilt import create_react_agent
+from langgraph.store.memory import InMemoryStore
+
+from experimental.agents.models import select_model
+from experimental.agents.demos.search.tooling import tools
+from experimental.agents.demos.search.prompt import AGENT_SYSTEM_PROMPT
+
+
+"""
+TABLEAU AGENT
+
+This Agent uses the Langgraph prebuilt `create_react_agent` to handle conversations on Tableau subjects such as:
+    - Metrics (canonical source of truth for metrics, includes machine learning insights generated by Tableau Pulse)
+    - Workbooks (contains analytics such as dashboards and charts that server as canonical interfaces for data exploration)
+    - Data Sources (describes sources of data available for querying and exploration)
+    - VizQL Data Service (can query a data source for on-demand data sets including aggregations, filters and calculations)
+
+This represents the most straightforward implementation of Tableau tooling for Langgraph without further customizing the
+Agent for specific applications
+"""
+# environment variables available to current process and sub processes
+load_dotenv()
+
+# configure running model for the agent
+llm = select_model(
+    provider=os.environ["MODEL_PROVIDER"],
+    model_name=os.environ["AGENT_MODEL"],
+    temperature=0.2
+)
+
+# initialize a memory store
+memory = InMemoryStore()
+
+# set agent debugging state
+if os.getenv('DEBUG') == '1':
+    debugging = True
+else:
+    debugging = False
+
+# define the agent graph
+analytics_agent = create_react_agent(
+    model=llm,
+    tools=tools,
+    debug=debugging,
+    prompt=AGENT_SYSTEM_PROMPT
+)
diff --git a/experimental/agents/demos/search/prompt.py b/experimental/agents/demos/search/prompt.py
@@ -0,0 +1,80 @@
+AGENT_SYSTEM_PROMPT = """Instructions:
+You are an AI Analyst designed to generate data-driven insights to provide answers, guidance and analysis
+to humans and other AI Agents. Your role is to understand the tasks assigned to you and use one or more tools
+to obtain the information necessary to answer a question.
+
+Tool Choice:
+1. Query Data Source: performs ad-hoc queries and analysis. Prioritize this tool for most requests, especially if
+the user explicitly asks for data queries/fetches. This tool is great for getting values for specific dates, for
+breakdowns by category, for aggregations such as AVG and MAX, for filtered results, etc.
+2. Metrics: returns ML generated metric insights describing KPI trends, activity and the impact of other fields of data
+on metric performance. This is not a good tool for fetching values for specific dates, filter conditions, aggegations, etc.,
+rather it describes user metrics according to definitions useful to them. Use this tool for metrics research when you are
+asked to produce a more long form report or document.
+3. Datasource Search: searches for alternative Tableau data sources (datasets) relevant to the user's query. Use this tool
+when the current dataset might not contain the information needed, or when the user is asking about data that might be in
+a different dataset. This tool returns information about potentially relevant datasets including their IDs, names, and descriptions.
+4. Switch Datasource: switches to a different Tableau datasource using its LUID (ID). Use this tool after finding a relevant 
+datasource with the Datasource Search tool to change which dataset you're querying with the Query Data Source tool.
+
+Multi-Datasource Workflow:
+When a user asks about information that isn't likely in the current dataset:
+1. First use the Datasource Search tool to find relevant alternative datasources
+2. Then use the Switch Datasource tool with the LUID (ID) of the most relevant datasource
+3. Finally use the Query Data Source tool to query the newly selected datasource
+4. If no relevant datasource is found, inform the user that the information may not be available
+
+Sample Interactions:
+
+Scenario 1 - Metrics Summary
+User: How are my KPIs doing?
+Assistant: [provides a summary of KPI activity using data from the metrics tool]
+Result: Correct by prioritizing fast answers to the user needs
+
+User: How are my KPIs doing?
+Assistant: What metrics are you interested in knowing more about?
+Result: Incorrect, available tools should be able to provide a simple summary to answer this question
+or to gather more information before continuing the conversation with the user
+
+Scenario 2 - Metrics Research
+User: How is my sales metric performing?
+Assistant: [sends a scoping query to the metrics tool asking about performance and additional fields or dimensions]
+Assistant: [analyzes these preliminary results and sends follow up queries]
+User: Thanks, I would like to know which categories, states and customers have the greates and lowest sales
+Assistant: [sends queries to the metrics tool using these follow up instructions]
+Result: Correct by gathering preliminary information and additional context to answer a complex question
+
+User: How is my sales metric performing?
+Assistant: [sends the question verbatim to the metrics tool and generates a response without follow ups]
+Result: Incorrect, the agent is not effectively doing metrics research by not making multiple and thorough queries
+
+Scenario 3 - Data Querying
+User: what is the value of sales for the east region in the year 2024?
+Assistant: [uses the data query tool]
+Result: Correct, even though this question may be related to a metric it implies that a data query
+is necessary since it is requesting specific data with filtering and aggregations. Metrics cannot
+produce specific values such as sales on a specific date
+
+User: what is the value of sales for the east region in the year 2024?
+Assistant: [searches for an answer with the metrics tool]
+Result: Incorrect, even though this question may be related to a metric this tool is not useful for
+fetching specific values involving dates, categories or other filters
+
+Scenario 4 - Alternative Datasource Workflow
+User: Do we have information about Olympic sports performance?
+Assistant: [uses datasource_search to find relevant datasources]
+Assistant: [uses switch_datasource to select the most relevant datasource]
+Assistant: [uses Query Data Source on the new datasource to answer the question]
+Result: Correct, as the query requires finding a more relevant dataset and then querying it
+
+User: I need information about Olympic sports performance.
+Assistant: [directly queries the current datasource without checking if better data exists]
+Result: Incorrect, the agent should first check if there are more relevant datasources for this topic
+
+Restrictions:
+- DO NOT HALLUCINATE metrics or data sets if they are not mentioned via available tools
+
+Output:
+Your output should be structured like a report noting the source of information (metrics, data source, or datasource search)
+Always answer the question first and then provide any additional details or insights
+"""
diff --git a/experimental/agents/demos/search/tooling.py b/experimental/agents/demos/search/tooling.py
@@ -0,0 +1,51 @@
+import os
+from dotenv import load_dotenv
+from typing import Dict, Any, Optional
+
+from langchain_core.tools import ToolException, Tool
+
+from experimental.agents.tools import tableau_metrics, tavily_tool
+from experimental.agents.shared_state import get_datasource_luid
+
+# Working from experimental due to environment variable dependencies in langchain_tableau
+from experimental.tools.datasource_qa import initialize_datasource_qa
+from experimental.tools.search_datasource import initialize_datasource_search, initialize_datasource_switch
+
+from experimental.utilities.metadata import get_data_dictionary
+
+# Load environment variables before accessing them
+load_dotenv()
+tableau_domain = os.environ['TABLEAU_DOMAIN']
+tableau_site = os.environ['TABLEAU_SITE']
+tableau_jwt_client_id = os.environ['TABLEAU_JWT_CLIENT_ID']
+tableau_jwt_secret_id = os.environ['TABLEAU_JWT_SECRET_ID']
+tableau_jwt_secret = os.environ['TABLEAU_JWT_SECRET']
+tableau_api_version = os.environ['TABLEAU_API_VERSION']
+tableau_user = os.environ['TABLEAU_USER']
+datasource_luid = get_datasource_luid()
+tooling_llm_model = os.environ['TOOLING_MODEL']
+
+# Function to create the datasource QA tool with a specific LUID
+def create_datasource_qa_tool(luid: str):
+    """Create a datasource QA tool with the specified datasource LUID"""
+    return initialize_datasource_qa(
+        domain=tableau_domain,
+        site=tableau_site,
+        jwt_client_id=tableau_jwt_client_id,
+        jwt_secret_id=tableau_jwt_secret_id,
+        jwt_secret=tableau_jwt_secret,
+        tableau_api_version=tableau_api_version,
+        tableau_user=tableau_user,
+        datasource_luid=luid,
+        tooling_llm_model=tooling_llm_model
+    )
+
+# Initial creation of the datasource QA tool
+analyze_datasource = create_datasource_qa_tool(datasource_luid)
+
+datasource_search = initialize_datasource_search()
+
+datasource_switch = initialize_datasource_switch()
+
+# List of tools used to build the state graph and for binding them to nodes
+tools = [tableau_metrics, analyze_datasource, datasource_search, datasource_switch]
diff --git a/experimental/agents/demos/superstore/tooling.py b/experimental/agents/demos/superstore/tooling.py
@@ -4,6 +4,7 @@
 from langchain_tableau.tools.simple_datasource_qa import initialize_simple_datasource_qa
 
 from experimental.agents.tools import tableau_metrics, tavily_tool
+from experimental.agents.shared_state import get_datasource_luid
 from experimental.tools.datasource_qa import initialize_datasource_qa
 
 # Load environment variables before accessing them
@@ -15,7 +16,7 @@
 tableau_jwt_secret = os.environ['TABLEAU_JWT_SECRET']
 tableau_api_version = os.environ['TABLEAU_API_VERSION']
 tableau_user = os.environ['TABLEAU_USER']
-datasource_luid = os.environ['DATASOURCE_LUID']
+datasource_luid = get_datasource_luid()
 tooling_llm_model = os.environ['TOOLING_MODEL']
 
 # Tableau VizQL Data Service Query Tool

diff --git a/experimental/agents/shared_state.py b/experimental/agents/shared_state.py
@@ -0,0 +1,11 @@
+# File to manage the Datasource LUID for VDS
+# Enables the setting and switching of the LUID for search agent
+
+current_datasource_luid = None
+
+def set_datasource_luid(luid: str):
+    global current_datasource_luid
+    current_datasource_luid = luid
+
+def get_datasource_luid():
+    return current_datasource_luid
diff --git a/experimental/agents/utils/search_agent_utils.py b/experimental/agents/utils/search_agent_utils.py
@@ -0,0 +1,101 @@
+from typing import Dict, TypedDict, Optional
+import os
+import json
+
+from IPython.display import Image, display
+from experimental.agents.shared_state import get_datasource_luid
+
+def _visualize_graph(graph):
+    """
+    Creates a mermaid visualization of the State Graph in .png format
+    """
+
+    # Attempt to generate and save PNG
+    try:
+        png_data = graph.get_graph().draw_mermaid_png()
+        filename = "graph_visualization.png"
+        with open(filename, "wb") as f:
+            f.write(png_data)
+
+        if os.path.exists(filename):
+            file_size = os.path.getsize(filename)
+            print(f"Agent Graph saved as '{filename}' | file size: {file_size} bytes")
+
+
+            display(Image(png_data))
+        else:
+            print(f"Failed to create file '{filename}'")
+    except Exception as e:
+        print(f"Failed to generate PNG: {str(e)}")
+
+
+async def stream_graph_updates(message: dict, graph):
+    """
+    This function streams responses from Agents to clients, such as chat interfaces, by processing
+    user inputs and dynamically updating the conversation.
+
+    The function takes a string input from the user and passes it to the state graph's streaming interface.
+    It initiates a stream of events based on the provided user input, which is wrapped in a dictionary with
+    a key "messages" containing a tuple of the user role and content.
+
+    As each event is generated by the graph, the function iterates over the values returned. Within each event,
+    it specifically looks for messages associated with the 'messages' key. The function extracts and prints the
+    content of the last message in the sequence, which is assumed to be the assistant's most recent response.
+    This enables a real-time conversation-like interaction where responses are generated and displayed immediately
+    based on user input.
+
+    if debugging is enabled (checked via an environment variable), it prints out the content of the last message
+    for further inspection.
+
+    Parameters:
+    - message (dict): contains a string with the user_message and additional operating parameters
+    - graph: a representation of the agents behavior and tool set
+
+    Returns:
+    - None. The function's primary side effect is to print the assistant's response to the console.
+    """
+
+    message_string = json.dumps(message['user_message'])
+
+    tableau_credentials = message['agent_inputs']['tableau_credentials']
+
+    # Always use the current datasource LUID (which may have been updated by the switch_datasource tool)
+    datasource = message['agent_inputs']['datasource'].copy()  # Make a copy to avoid modifying the original
+    # datasource['luid'] = current_datasource_luid  # Use the current LUID from tooling
+    datasource['luid'] = get_datasource_luid()
+    # print(f"Using datasource LUID: {current_datasource_luid}")
+
+    # Print the current datasource LUID for debugging purposes
+    print(f"Using datasource LUID: {get_datasource_luid()}")
+
+    # this is how client apps should format their requests to the Agent API
+    input_stream = {
+        "messages": [("user", message_string)],
+        "tableau_credentials": tableau_credentials,
+        "datasource": datasource
+    }
+
+    # Add any additional agent inputs that might be present
+    for key, value in message['agent_inputs'].items():
+        if key not in ['user_message', 'tableau_credentials', 'datasource']:
+            input_stream[key] = value
+
+    # gets value DEBUG value or sets it to empty string, condition applies if string is empty or 0
+    if os.environ.get("DEBUG", "") in ["0", ""]:
+        # streams events from the agent graph started by the client input containing user queries
+        async for event in graph.astream(input_stream):
+            agent_output = event.get('agent')
+            if event.get('agent'):
+                agent_message = agent_output["messages"][0].content
+                if len(agent_message) > 0:
+                    print("\nAgent:")
+                    print(f"{agent_message} \n")
+
+    elif (os.environ["DEBUG"] == "1"):
+        # display tableau credentials to prove access to the environment
+        print('*** tableau_credentials ***', tableau_credentials)
+        print('*** datasource ***', datasource)
+
+        async for event in graph.astream(input_stream):
+            print(f"*** EVENT *** type: {type(event)}")
+            print(event)