From 5fefd7b344c57c492bab15961cb4a95e6f79dd26 Mon Sep 17 00:00:00 2001
From: David Fridrich <fridrich.david19@gmail.com>
Date: Fri, 29 Aug 2025 10:36:29 +0200
Subject: [PATCH] mcp with llm tools

---
 python/mcp-ollama-rag/README.md            | 111 ++++++++++
 python/mcp-ollama-rag/client/client.py     |  46 +++++
 python/mcp-ollama-rag/function/__init__.py |   1 +
 python/mcp-ollama-rag/function/func.py     | 227 +++++++++++++++++++++
 python/mcp-ollama-rag/function/parser.py   |  54 +++++
 python/mcp-ollama-rag/pyproject.toml       |  28 +++
 python/mcp-ollama-rag/tests/test_func.py   |  38 ++++
 python/mcp-ollama/README.md                | 116 +++++++++++
 python/mcp-ollama/client/client.py         |  45 ++++
 python/mcp-ollama/function/__init__.py     |   1 +
 python/mcp-ollama/function/func.py         | 148 ++++++++++++++
 python/mcp-ollama/pyproject.toml           |  26 +++
 python/mcp-ollama/tests/test_func.py       |  38 ++++
 13 files changed, 879 insertions(+)
 create mode 100644 python/mcp-ollama-rag/README.md
 create mode 100644 python/mcp-ollama-rag/client/client.py
 create mode 100644 python/mcp-ollama-rag/function/__init__.py
 create mode 100644 python/mcp-ollama-rag/function/func.py
 create mode 100644 python/mcp-ollama-rag/function/parser.py
 create mode 100644 python/mcp-ollama-rag/pyproject.toml
 create mode 100644 python/mcp-ollama-rag/tests/test_func.py
 create mode 100644 python/mcp-ollama/README.md
 create mode 100644 python/mcp-ollama/client/client.py
 create mode 100644 python/mcp-ollama/function/__init__.py
 create mode 100644 python/mcp-ollama/function/func.py
 create mode 100644 python/mcp-ollama/pyproject.toml
 create mode 100644 python/mcp-ollama/tests/test_func.py

diff --git a/python/mcp-ollama-rag/README.md b/python/mcp-ollama-rag/README.md
new file mode 100644
index 0000000..77363f8
--- /dev/null
+++ b/python/mcp-ollama-rag/README.md
@@ -0,0 +1,111 @@
+# Ollama-MCP Function with RAG
+
+A Knative Function implementing a Model Context Protocol (MCP) server that
+provides integration with Ollama for LLM interactions. This function
+exposes Ollama capabilities through standardized MCP tools, enabling the
+interaction with hosted language models.
+
+The communication flow is as follows:
+`MCP client -> MCP Server (Function) -> Ollama Server`
+
+1) Setup `ollama` server (`ollama serve`)
+2) Run your function (MCP server) (`func run`)
+3) Connect using MCP client in `client/` dir (`python client.py`)
+
+## Architecture
+
+This project implements an ASGI-based Knative function with the following key
+components:
+
+### Core Components
+- **Function Class**: Main ASGI application entry point (This is your base
+Function)
+- **MCPServer Class**: FastMCP-based server implementing HTTP-streamable MCP
+protocol
+- **MCP Tools**: Three primary tools for Ollama interaction:
+  - `list_models`: Enumerate available models on the Ollama server
+  - `pull_model`: Download and install new models
+  - `call_model`: Send prompts to models and receive responses
+  - `rag_document`: RAG a document - accepts urls or text (strings)
+
+
+## Setup
+
+### Prerequisites
+
+- Python 3.9 or higher
+- Ollama server running locally or accessible via network
+
+### Local Development Setup
+
+1. **Install dependencies & setup env**
+    ```bash
+
+    # optionally setup venv
+    pythom -m venv venv
+    source venv/bin/activate
+
+    # and install deps
+    pip install -e .
+    ```
+
+2. **Start Ollama server:**
+    ```bash
+    # Install Ollama (if not already installed)
+    curl -fsSL https://ollama.com/install.sh | sh
+
+    # Start Ollama service (in different terminal/ in bg)
+    ollama serve
+
+    # Pull a model (optional, can be done via MCP tool)
+    ollama pull llama3.2:3b
+    ```
+
+Now you have a running Ollama Server
+
+3. **Run the function:**
+    ```bash
+    # Using func CLI (build via host builder)
+    func run --builder=host
+    ```
+
+Now you have a running MCP Server which has integration with ollama client tools
+that will enable you to: embed some documents, pull a model available on the
+ollama server and call the (now) specialized inference model with prompts.
+
+4. **Run MCP client**
+    ```bash
+    # In client/ directory.
+    # MODIFY THIS FILE
+    # By default it RAGs a document and prompts asking about it
+    python client.py
+    ```
+
+Now you've connected via MCP protocol to the running function, using an MCP client
+which has embedded a document into vector space for RAG tooling and prompted the
+model which can use the embeddings to answer your question (hopefuly) in a more
+sophisticated manner.
+
+### Deployment to cluster (not tested)
+
+#### Knative Function Deployment
+
+```bash
+# Deploy to Knative cluster
+func deploy
+
+# Or build and deploy with custom image
+func deploy --image your-registry/mcp-ollama-function
+```
+
+Here you would also need to ensure the access to the ollama server, using a pod
+or portforwarding etc.
+
+### Troubleshooting
+
+**Connection Issues:**
+- Ensure Ollama server is running and accessible
+- Check firewall settings for port 11434 (Ollama default)
+- Verify model availability with `ollama list`
+- Confirm function is running on expected port (default: 8080)
+
diff --git a/python/mcp-ollama-rag/client/client.py b/python/mcp-ollama-rag/client/client.py
new file mode 100644
index 0000000..987963d
--- /dev/null
+++ b/python/mcp-ollama-rag/client/client.py
@@ -0,0 +1,46 @@
+import asyncio
+from mcp import ClientSession
+from mcp.client.streamable_http import streamablehttp_client
+import json
+
+from mcp.types import CallToolResult
+
+def unload_list_models(models: CallToolResult) -> list[str]:
+    return [json.loads(item.text)["model"] for item in models.content if item.text.strip().startswith('{')] #pyright: ignore
+
+async def main():
+    # check your running Function MCP Server, it will output where its available
+    # at during initialization.
+    async with streamablehttp_client("http://localhost:8080/mcp") as streams:
+        read_stream,write_stream = streams[0],streams[1]
+
+        async with ClientSession(read_stream,write_stream) as sess:
+            print("Initializing connection...",end="")
+            await sess.initialize()
+            print("done!\n")
+
+
+            # embed some documents
+            embed = await sess.call_tool(
+                name="embed_document",
+                arguments={
+                    "data": [
+                        "https://raw.githubusercontent.com/knative/func/main/docs/function-templates/python.md",
+                        "https://context7.com/knative/docs/llms.txt?topic=functions",
+                        ],
+                    }
+                )
+            print(embed.content[0].text) # pyright: ignore[reportAttributeAccessIssue]
+            print("-"*60)
+
+            # prompt the inference model
+            resp = await sess.call_tool(
+                name="call_model",
+                arguments={
+                    "prompt": "What actually is a Knative Function?",
+                }
+            )
+            print(resp.content[0].text)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/mcp-ollama-rag/function/__init__.py b/python/mcp-ollama-rag/function/__init__.py
new file mode 100644
index 0000000..c16dbac
--- /dev/null
+++ b/python/mcp-ollama-rag/function/__init__.py
@@ -0,0 +1 @@
+from .func import new
diff --git a/python/mcp-ollama-rag/function/func.py b/python/mcp-ollama-rag/function/func.py
new file mode 100644
index 0000000..0714e41
--- /dev/null
+++ b/python/mcp-ollama-rag/function/func.py
@@ -0,0 +1,227 @@
+# function/func.py
+
+# Function as an MCP Server implementation
+import logging
+
+from mcp.server.fastmcp import FastMCP
+import ollama
+import asyncio
+import chromadb
+import requests
+
+def new():
+    """ New is the only method that must be implemented by a Function.
+    The instance returned can be of any name.
+    """
+    return Function()
+
+# Accepts any url link which points to a raw data (*.md/text files etc.)
+# example: https://raw.githubusercontent.com/knative/func/main/docs/function-templates/python.md
+def get_raw_content(url: str) -> str:
+    """ retrieve contents of github raw url as a text """
+    response = requests.get(url)
+    response.raise_for_status() # errors if bad response
+    print(f"fetch '{url}' - ok")
+    return response.text
+
+class MCPServer:
+    """
+    MCP server that exposes a chat with an LLM model running on Ollama server
+    as one of its tools.
+    """
+
+    def __init__(self):
+        # Create FastMCP instance with stateless HTTP for Kubernetes deployment
+        self.mcp = FastMCP("MCP-Ollama server", stateless_http=True)
+
+        # Get the ASGI app from FastMCP
+        self._app = self.mcp.streamable_http_app()
+
+        self.client = ollama.Client()
+
+        #init database stuff
+        self.dbClient = chromadb.Client()
+        self.collection = self.dbClient.create_collection(name="my_collection")
+        # default embedding model
+        self.embedding_model = "mxbai-embed-large"
+        # call this after self.embedding_model assignment, so its defined
+        self._register_tools()
+
+    def _register_tools(self):
+        """Register MCP tools."""
+        @self.mcp.tool()
+        def list_models():
+            """List all models currently available on the Ollama server"""
+            try:
+                models = self.client.list()
+            except Exception as e:
+                return f"Oops, failed to list models because: {str(e)}"
+            #return [model['name'] for model in models['models']]
+            return [model for model in models]
+
+        default_embedding_model = self.embedding_model
+        @self.mcp.tool()
+        def embed_document(data:list[str],model:str = default_embedding_model) -> str:
+            """
+            RAG (Retrieval-augmented generation) tool.
+            Embeds documents provided in data.
+            Arguments:
+            - data: expected to be of type str|list.
+            - model: embedding model to use, examples below.
+
+            # example embedding models:
+            # mxbai-embed-large - 334M *default
+            # nomic-embed-text - 137M
+            # all-minilm - 23M
+            """
+            count = 0
+
+            ############ TODO -- import im a separate file
+            # documents generator
+            #documents_gen = parse_data_generator(data)
+            #### 1) GENERATE
+            # generate vector embeddings via embedding model
+            #for i, d in enumerate(documents_gen):
+            #    response = ollama.embed(model=model,input=d)
+            #    embeddings = response["embeddings"]
+            #    self.collection.add(
+            #            ids=[str(i)],
+            #            embeddings=embeddings,
+            #            documents=[d]
+            #            )
+            #    count += 1
+
+            # for simplicity (until the above is resolved, this accecpts only URLs)
+            for i, d in enumerate(data):
+                response = ollama.embed(model=model,input=get_raw_content(d))
+                embeddings = response["embeddings"]
+                self.collection.add(
+                        ids=[str(i)],
+                        embeddings=embeddings,
+                        documents=[d]
+                        )
+                count += 1
+            return f"ok - Embedded {count} documents"
+
+        @self.mcp.tool()
+        def pull_model(model: str) -> str:
+            """Download and install an Ollama model into the running server"""
+            try:
+                _ = self.client.pull(model)
+            except Exception as e:
+                return f"Error occurred during pulling of a model: {str(e)}"
+            return f"Success! model {model} is available"
+
+        @self.mcp.tool()
+        def call_model(prompt: str,
+                       model: str = "llama3.2:3b",
+                       embed_model: str = self.embedding_model) -> str:
+            """Send a prompt to a model being served on ollama server"""
+            #### 2) RETRIEVE
+            # we embed the prompt but dont save it into db, then we retrieve
+            # the most relevant document (most similar vectors)
+            try:
+                response = ollama.embed(
+                        model=embed_model,
+                        input=prompt
+                        )
+                results = self.collection.query(
+                        query_embeddings=response["embeddings"],
+                        n_results=1
+                        )
+                data = results['documents'][0][0]
+
+            #### 3) GENERATE
+            # generate answer given a combination of prompt and data retrieved
+                output = ollama.generate(
+                        model=model,
+                        prompt=f'Using data: {data}, respond to prompt: {prompt}'
+                        )
+                print(output)
+            except Exception as e:
+                return f"Error occurred during calling the model: {str(e)}"
+            return output['response']
+
+    async def handle(self, scope, receive, send):
+        """Handle ASGI requests - both lifespan and HTTP."""
+        await self._app(scope, receive, send)
+
+class Function:
+    def __init__(self):
+        """ The init method is an optional method where initialization can be
+        performed. See the start method for a startup hook which includes
+        configuration.
+        """
+        self.mcp_server = MCPServer()
+        self._mcp_initialized = False
+
+    async def handle(self, scope, receive, send):
+        """
+        Main entry to your Function.
+        This handles all the incoming requests.
+        """
+
+        # Initialize MCP server on first request
+        if not self._mcp_initialized:
+            await self._initialize_mcp()
+
+        # Route MCP requests
+        if scope['path'].startswith('/mcp'):
+            await self.mcp_server.handle(scope, receive, send)
+            return
+
+        # Default response for non-MCP requests
+        await self._send_default_response(send)
+
+    async def _initialize_mcp(self):
+        """Initialize the MCP server by sending lifespan startup event."""
+        lifespan_scope = {'type': 'lifespan', 'asgi': {'version': '3.0'}}
+        startup_sent = False
+
+        async def lifespan_receive():
+            nonlocal startup_sent
+            if not startup_sent:
+                startup_sent = True
+                return {'type': 'lifespan.startup'}
+            await asyncio.Event().wait()  # Wait forever for shutdown
+
+        async def lifespan_send(message):
+            if message['type'] == 'lifespan.startup.complete':
+                self._mcp_initialized = True
+            elif message['type'] == 'lifespan.startup.failed':
+                logging.error(f"MCP startup failed: {message}")
+
+        # Start lifespan in background
+        asyncio.create_task(self.mcp_server.handle(
+            lifespan_scope, lifespan_receive, lifespan_send
+        ))
+
+        # Brief wait for startup completion
+        await asyncio.sleep(0.1)
+
+    async def _send_default_response(self, send):
+        """
+        Send default OK response.
+        This is for your non MCP requests if desired.
+        """
+        await send({
+            'type': 'http.response.start',
+            'status': 200,
+            'headers': [[b'content-type', b'text/plain']],
+        })
+        await send({
+            'type': 'http.response.body',
+            'body': b'OK',
+        })
+
+    def start(self, cfg):
+        logging.info("Function starting")
+
+    def stop(self):
+        logging.info("Function stopping")
+
+    def alive(self):
+        return True, "Alive"
+
+    def ready(self):
+        return True, "Ready"
diff --git a/python/mcp-ollama-rag/function/parser.py b/python/mcp-ollama-rag/function/parser.py
new file mode 100644
index 0000000..c888d01
--- /dev/null
+++ b/python/mcp-ollama-rag/function/parser.py
@@ -0,0 +1,54 @@
+import requests
+from urllib.parse import urlparse
+
+def parse_data_generator(data):
+    """
+    Generator that yields documents one at a time.
+    Handles any combination of urls and data strings.
+    Can be of type str|list.
+    example:
+    ["<url1>","<url2>","long data string"] etc.
+    """
+
+    # STR
+    if isinstance(data, str):
+        content = ''
+        if is_url(data):
+            content = get_raw_content(data)
+        else:
+            content = data
+        yield content.strip()
+
+    # LIST
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item,str):
+                if is_url(item):
+                    content = get_raw_content(item)
+                else:
+                    content = item
+                yield content.strip()
+            else:
+                print(f"warning: handling item {item} as a string")
+                yield str(item)
+    else:
+        print(f"Fallback: unknown type, handling {data} as a string")
+        yield str(data)
+
+def is_url(text: str):
+    """Check if text is a valid URL"""
+    try:
+        result = urlparse(text)
+        print(f"is_url: {result}")
+        return all([result.scheme, result.netloc])
+    except:
+        return False
+
+# Accepts any url link which points to a raw data (*.md/text files etc.)
+# example: https://raw.githubusercontent.com/knative/func/main/docs/function-templates/python.md
+def get_raw_content(url: str) -> str:
+    """ retrieve contents of github raw url as a text """
+    response = requests.get(url)
+    response.raise_for_status() # errors if bad response
+    print(f"fetch '{url}' - ok")
+    return response.text
diff --git a/python/mcp-ollama-rag/pyproject.toml b/python/mcp-ollama-rag/pyproject.toml
new file mode 100644
index 0000000..f538d71
--- /dev/null
+++ b/python/mcp-ollama-rag/pyproject.toml
@@ -0,0 +1,28 @@
+[project]
+name = "function"
+description = ""
+version = "0.1.0"
+requires-python = ">=3.9"
+readme = "README.md"
+license = "MIT"
+dependencies = [
+  "httpx",
+  "pytest",
+  "pytest-asyncio",
+  "mcp",
+  "ollama",
+  "requests",
+  "chromadb"
+]
+authors = [
+  { name="Your Name", email="you@example.com"},
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.pytest.ini_options]
+asyncio_mode = "strict"
+asyncio_default_fixture_loop_scope = "function"
+
diff --git a/python/mcp-ollama-rag/tests/test_func.py b/python/mcp-ollama-rag/tests/test_func.py
new file mode 100644
index 0000000..5b37a73
--- /dev/null
+++ b/python/mcp-ollama-rag/tests/test_func.py
@@ -0,0 +1,38 @@
+"""
+An example set of unit tests which confirm that the main handler (the
+callable function) returns 200 OK for a simple HTTP GET.
+"""
+import pytest
+from function import new
+
+
+@pytest.mark.asyncio
+async def test_function_handle():
+    f = new()  # Instantiate Function to Test
+
+    sent_ok = False
+    sent_headers = False
+    sent_body = False
+
+    # Mock Send
+    async def send(message):
+        nonlocal sent_ok
+        nonlocal sent_headers
+        nonlocal sent_body
+
+        if message.get('status') == 200:
+            sent_ok = True
+
+        if message.get('type') == 'http.response.start':
+            sent_headers = True
+
+        if message.get('type') == 'http.response.body':
+            sent_body = True
+
+    # Invoke the Function
+    await f.handle({}, {}, send)
+
+    # Assert send was called
+    assert sent_ok, "Function did not send a 200 OK"
+    assert sent_headers, "Function did not send headers"
+    assert sent_body, "Function did not send a body"
diff --git a/python/mcp-ollama/README.md b/python/mcp-ollama/README.md
new file mode 100644
index 0000000..24e22f7
--- /dev/null
+++ b/python/mcp-ollama/README.md
@@ -0,0 +1,116 @@
+# Ollama-MCP Function
+
+A Knative Function implementing a Model Context Protocol (MCP) server that
+provides integration with Ollama for local LLM interactions. This function
+exposes Ollama capabilities through standardized MCP tools, enabling the
+interaction with locally hosted language models.
+
+The communication flow is as follows:
+`MCP client -> MCP Server (Function) -> Ollama Server`
+
+1) Setup `ollama` server using `ollama serve`
+2) Run your function (MCP server)
+3) Connect using MCP client in `client/` dir (`python client.py`)
+
+## Architecture
+
+This project implements an ASGI-based Knative function with the following key
+components:
+
+### Core Components
+- **Function Class**: Main ASGI application entry point (This is your base
+Function)
+- **MCPServer Class**: FastMCP-based server implementing HTTP-streamable MCP
+protocol
+- **MCP Tools**: Three primary tools for Ollama interaction:
+  - `list_models`: Enumerate available models on the Ollama server
+  - `pull_model`: Download and install new models
+  - `call_model`: Send prompts to models and receive responses
+
+## Setup
+
+### Prerequisites
+
+- Python 3.9 or higher
+- Ollama server running locally or accessible via network
+
+### Local Development Setup
+
+1. **Install dependencies & setup env**
+    ```bash
+    # (optional)
+    # setup venv
+    python -m venv venv
+    # and run it
+    source venv/bin/activate
+
+    # install deps
+    pip install -e .
+    ```
+
+2. **Start Ollama server:**
+    ```bash
+    # Install Ollama (if not already installed)
+    curl -fsSL https://ollama.com/install.sh | sh
+
+    # Start Ollama service (in bg or different terminal)
+    ollama serve
+
+    # Pull a model (optional, can be done via MCP tool)
+    ollama pull llama3.2:3b
+    ```
+
+Now you have a running Ollama Server.
+
+3. **Run the function locally:**
+    ```bash
+    # Using func CLI
+    func run --builder=host
+    ```
+
+Now you have a running MCP Server which has access to the Ollama server.
+
+4. **Run MCP client**
+    ```bash
+    # In client/ directory
+    python client.py
+    ```
+
+Now you connect via MCP protocol to the running function, which will call a tool
+`call_model` which will invoke a request from the LLM running on Ollama server.
+
+Edit the `client/client.py` file to change any requests for the MCP server.
+Edit the `function/func.py` file to edit the MCP server and its tools etc.
+
+### Deployment to cluster (not tested)
+
+#### Knative Function Deployment
+
+```bash
+# Deploy to cluster with Knative
+func deploy
+
+# Or build and deploy with custom image
+func deploy --image your-registry/mcp-ollama-function
+```
+
+### Troubleshooting
+
+**Connection Issues:**
+- Ensure Ollama server is running and accessible
+- Check firewall settings for port 11434 (Ollama default)
+- Verify model availability with `ollama list`
+- Confirm function is running on expected port (default: 8080)
+
+**Performance Considerations:**
+- Model loading time varies by size (3B models ~2-5s, 7B+ models 10-30s)
+- Consider pre-loading frequently used models
+- Monitor memory usage for large models
+
+### Dependencies
+
+- **mcp**: Model Context Protocol implementation
+- **ollama**: Python client for Ollama API
+- **httpx**: Async HTTP client for external requests
+- **pytest/pytest-asyncio**: Testing framework with async support
+
diff --git a/python/mcp-ollama/client/client.py b/python/mcp-ollama/client/client.py
new file mode 100644
index 0000000..06a2d4a
--- /dev/null
+++ b/python/mcp-ollama/client/client.py
@@ -0,0 +1,45 @@
+import asyncio
+from mcp import ClientSession
+from mcp.client.streamable_http import streamablehttp_client
+import pprint
+import json
+
+from mcp.types import CallToolResult
+
+def unload_list_models(models: CallToolResult) -> list[str]:
+    return [json.loads(item.text)["model"] for item in models.content if item.text.strip().startswith('{')]
+
+async def main():
+    # check your running Function MCP Server, it will output where its available
+    # at during initialization.
+    async with streamablehttp_client("http://localhost:8080/mcp") as streams:
+        read_stream,write_stream = streams[0],streams[1]
+
+        async with ClientSession(read_stream,write_stream) as sess:
+            print("Initializing connection...",end="")
+            await sess.initialize()
+            print("done!\n")
+
+            ### List all available tools
+            #tools = await sess.list_tools()
+
+            # List all available models
+            #models = await sess.call_tool(
+            #    name="list_models",
+            #    )
+            # extract model names from the response
+            #models = unload_list_models(models)
+            #print(f"list of models currently available: {models}")
+
+            # create a request for the model
+            response = await sess.call_tool(
+                name="call_model",
+                arguments={
+                    "prompt":"How to properly tie a tie?",
+                    "model":"llama3.2:3b",
+                    }
+                )
+            print(response.content)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/mcp-ollama/function/__init__.py b/python/mcp-ollama/function/__init__.py
new file mode 100644
index 0000000..c16dbac
--- /dev/null
+++ b/python/mcp-ollama/function/__init__.py
@@ -0,0 +1 @@
+from .func import new
diff --git a/python/mcp-ollama/function/func.py b/python/mcp-ollama/function/func.py
new file mode 100644
index 0000000..e8172fc
--- /dev/null
+++ b/python/mcp-ollama/function/func.py
@@ -0,0 +1,148 @@
+# function/func.py
+
+# Function as an MCP Server implementation
+import logging
+
+from mcp.server.fastmcp import FastMCP
+import ollama
+import asyncio
+
+def new():
+    """ New is the only method that must be implemented by a Function.
+    The instance returned can be of any name.
+    """
+    return Function()
+
+class MCPServer:
+    """
+    MCP server that exposes a chat with an LLM model running on Ollama server
+    as one of its tools.
+    """
+
+    def __init__(self):
+        # Create FastMCP instance with stateless HTTP for Kubernetes deployment
+        self.mcp = FastMCP("MCP-Ollama server", stateless_http=True)
+
+        self._register_tools()
+
+        # Get the ASGI app from FastMCP
+        self._app = self.mcp.streamable_http_app()
+
+        self.client = ollama.Client()
+
+    def _register_tools(self):
+        """Register MCP tools."""
+        @self.mcp.tool()
+        def list_models():
+            """List all models currently available on the Ollama server"""
+            try:
+                models = self.client.list()
+            except Exception as e:
+                return f"Oops, failed to list models because: {str(e)}"
+            #return [model['name'] for model in models['models']]
+            return [model for model in models]
+
+        @self.mcp.tool()
+        def pull_model(model: str) -> str:
+            """Download and install an Ollama model into the running server"""
+            try:
+                _ = self.client.pull(model)
+            except Exception as e:
+                return f"Error occurred during pulling of a model: {str(e)}"
+            return f"Success! model {model} is available"
+
+        @self.mcp.tool()
+        def call_model(prompt: str, model: str = "llama3.2:3b") -> str:
+            """Send a prompt to a model being served on ollama server"""
+            try:
+                response = self.client.chat(
+                        model=model,
+                        messages=[{"role": "user", "content": prompt}]
+                        )
+            except Exception as e:
+                return f"Error occurred during calling the model: {str(e)}"
+            return response['message']['content']
+
+    async def handle(self, scope, receive, send):
+        """Handle ASGI requests - both lifespan and HTTP."""
+        await self._app(scope, receive, send)
+
+class Function:
+    def __init__(self):
+        """ The init method is an optional method where initialization can be
+        performed. See the start method for a startup hook which includes
+        configuration.
+        """
+        self.mcp_server = MCPServer()
+        self._mcp_initialized = False
+
+    async def handle(self, scope, receive, send):
+        """
+        Main entry to your Function.
+        This handles all the incoming requests.
+        """
+
+        # Initialize MCP server on first request
+        if not self._mcp_initialized:
+            await self._initialize_mcp()
+
+        # Route MCP requests
+        if scope['path'].startswith('/mcp'):
+            await self.mcp_server.handle(scope, receive, send)
+            return
+
+        # Default response for non-MCP requests
+        await self._send_default_response(send)
+
+    async def _initialize_mcp(self):
+        """Initialize the MCP server by sending lifespan startup event."""
+        lifespan_scope = {'type': 'lifespan', 'asgi': {'version': '3.0'}}
+        startup_sent = False
+
+        async def lifespan_receive():
+            nonlocal startup_sent
+            if not startup_sent:
+                startup_sent = True
+                return {'type': 'lifespan.startup'}
+            await asyncio.Event().wait()  # Wait forever for shutdown
+
+        async def lifespan_send(message):
+            if message['type'] == 'lifespan.startup.complete':
+                self._mcp_initialized = True
+            elif message['type'] == 'lifespan.startup.failed':
+                logging.error(f"MCP startup failed: {message}")
+
+        # Start lifespan in background
+        asyncio.create_task(self.mcp_server.handle(
+            lifespan_scope, lifespan_receive, lifespan_send
+        ))
+
+        # Brief wait for startup completion
+        await asyncio.sleep(0.1)
+
+    async def _send_default_response(self, send):
+        """
+        Send default OK response.
+        This is for your non MCP requests if desired.
+        """
+        await send({
+            'type': 'http.response.start',
+            'status': 200,
+            'headers': [[b'content-type', b'text/plain']],
+        })
+        await send({
+            'type': 'http.response.body',
+            'body': b'OK',
+        })
+
+    def start(self, cfg):
+        logging.info("Function starting")
+
+    def stop(self):
+        logging.info("Function stopping")
+
+    def alive(self):
+        return True, "Alive"
+
+    def ready(self):
+        return True, "Ready"
diff --git a/python/mcp-ollama/pyproject.toml b/python/mcp-ollama/pyproject.toml
new file mode 100644
index 0000000..a2ef98a
--- /dev/null
+++ b/python/mcp-ollama/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "function"
+description = ""
+version = "0.1.0"
+requires-python = ">=3.9"
+readme = "README.md"
+license = "MIT"
+dependencies = [
+  "httpx",
+  "pytest",
+  "pytest-asyncio",
+  "mcp",
+  "ollama"
+]
+authors = [
+  { name="Your Name", email="you@example.com"},
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.pytest.ini_options]
+asyncio_mode = "strict"
+asyncio_default_fixture_loop_scope = "function"
+
diff --git a/python/mcp-ollama/tests/test_func.py b/python/mcp-ollama/tests/test_func.py
new file mode 100644
index 0000000..5b37a73
--- /dev/null
+++ b/python/mcp-ollama/tests/test_func.py
@@ -0,0 +1,38 @@
+"""
+An example set of unit tests which confirm that the main handler (the
+callable function) returns 200 OK for a simple HTTP GET.
+"""
+import pytest
+from function import new
+
+
+@pytest.mark.asyncio
+async def test_function_handle():
+    f = new()  # Instantiate Function to Test
+
+    sent_ok = False
+    sent_headers = False
+    sent_body = False
+
+    # Mock Send
+    async def send(message):
+        nonlocal sent_ok
+        nonlocal sent_headers
+        nonlocal sent_body
+
+        if message.get('status') == 200:
+            sent_ok = True
+
+        if message.get('type') == 'http.response.start':
+            sent_headers = True
+
+        if message.get('type') == 'http.response.body':
+            sent_body = True
+
+    # Invoke the Function
+    await f.handle({}, {}, send)
+
+    # Assert send was called
+    assert sent_ok, "Function did not send a 200 OK"
+    assert sent_headers, "Function did not send headers"
+    assert sent_body, "Function did not send a body"