diff --git a/examples/django_example/README.md b/examples/django_example/README.md
deleted file mode 100644
index 1a0853bcb..000000000
--- a/examples/django_example/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# django_example
-
-This Django application demonstrates integrating Langfuse for event tracing and response generation within a Django framework.
-
-1. **Shutdown Behavior**: Implements shutdown logic using Django's framework. Shutdown, located in `myapp/__init__.py`, flushes all events to Langfuse to ensure data integrity.
-
-2. **Endpoints**:
-- `"/"`: Returns a JSON message to demonstrate Langfuse integration.
-- `"/campaign/"`: Accepts a `prompt` and employs Langfuse for event tracing. (Note: OpenAI is referenced for context but not used in this example).
-
-3. **Integration**:
-- Langfuse: Utilized for event tracing with `trace`, `score`, `generation`, and `span` operations. (Note that OpenAI is not actually used here to generate an answer to the prompt. This example is just to show how to use FastAPI with the Langfuse SDK)
-
-4. **Dependencies**:
-- Django: The primary framework for building the application.
-- Langfuse: Library for event tracing and management.
-
-5. **Usage**:<br>
-- Preparation: Ensure `langfuse` is installed and configured in the `myapp/langfuse_integration.py` file.<br>
-- Starting the Server: Navigate to the root directory of the project `langfuse-python/examples/django_examples`. Run `poetry run python manage.py runserver 0.0.0.0:8000` to start the server.
-- Accessing Endpoints: The application's endpoints can be accessed at `http://localhost:8000`.
-
-Refer to Django and Langfuse documentation for more detailed information.
diff --git a/examples/django_example/db.sqlite3 b/examples/django_example/db.sqlite3
deleted file mode 100644
index 955503bb2..000000000
Binary files a/examples/django_example/db.sqlite3 and /dev/null differ
diff --git a/examples/django_example/django_example/__init__.py b/examples/django_example/django_example/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/django_example/django_example/asgi.py b/examples/django_example/django_example/asgi.py
deleted file mode 100644
index d056699ed..000000000
--- a/examples/django_example/django_example/asgi.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""ASGI config for django_example project.
-
-It exposes the ASGI callable as a module-level variable named ``application``.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/
-"""
-
-import os
-
-from django.core.asgi import get_asgi_application
-
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_example.settings")
-
-application = get_asgi_application()
diff --git a/examples/django_example/django_example/settings.py b/examples/django_example/django_example/settings.py
deleted file mode 100644
index 087323b71..000000000
--- a/examples/django_example/django_example/settings.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Django settings for django_example project.
-
-Generated by 'django-admin startproject' using Django 5.0.2.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.0/topics/settings/
-
-For the full list of settings and their values, see
-https://docs.djangoproject.com/en/5.0/ref/settings/
-"""
-
-from pathlib import Path
-
-# Build paths inside the project like this: BASE_DIR / 'subdir'.
-BASE_DIR = Path(__file__).resolve().parent.parent
-
-
-# Quick-start development settings - unsuitable for production
-# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
-
-# SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = "django-insecure-4c6v7e7e*o&0uajrmb@7x9ti#e)!9kbdf#+1=t=qwd5fm&ui%b"
-
-# SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
-
-ALLOWED_HOSTS = ["localhost", "0.0.0.0"]
-
-
-# Application definition
-
-INSTALLED_APPS = [
-    "django.contrib.admin",
-    "django.contrib.auth",
-    "django.contrib.contenttypes",
-    "django.contrib.sessions",
-    "django.contrib.messages",
-    "django.contrib.staticfiles",
-    "myapp",
-]
-
-MIDDLEWARE = [
-    "django.middleware.security.SecurityMiddleware",
-    "django.contrib.sessions.middleware.SessionMiddleware",
-    "django.middleware.common.CommonMiddleware",
-    "django.middleware.csrf.CsrfViewMiddleware",
-    "django.contrib.auth.middleware.AuthenticationMiddleware",
-    "django.contrib.messages.middleware.MessageMiddleware",
-    "django.middleware.clickjacking.XFrameOptionsMiddleware",
-]
-
-ROOT_URLCONF = "django_example.urls"
-
-TEMPLATES = [
-    {
-        "BACKEND": "django.template.backends.django.DjangoTemplates",
-        "DIRS": [],
-        "APP_DIRS": True,
-        "OPTIONS": {
-            "context_processors": [
-                "django.template.context_processors.debug",
-                "django.template.context_processors.request",
-                "django.contrib.auth.context_processors.auth",
-                "django.contrib.messages.context_processors.messages",
-            ],
-        },
-    },
-]
-
-WSGI_APPLICATION = "django_example.wsgi.application"
-
-
-# Database
-# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
-
-DATABASES = {
-    "default": {
-        "ENGINE": "django.db.backends.sqlite3",
-        "NAME": BASE_DIR / "db.sqlite3",
-    }
-}
-
-
-# Password validation
-# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
-
-AUTH_PASSWORD_VALIDATORS = [
-    {
-        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
-    },
-]
-
-
-# Internationalization
-# https://docs.djangoproject.com/en/5.0/topics/i18n/
-
-LANGUAGE_CODE = "en-us"
-
-TIME_ZONE = "UTC"
-
-USE_I18N = True
-
-USE_TZ = True
-
-
-# Static files (CSS, JavaScript, Images)
-# https://docs.djangoproject.com/en/5.0/howto/static-files/
-
-STATIC_URL = "static/"
-
-# Default primary key field type
-# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
-
-DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
diff --git a/examples/django_example/django_example/urls.py b/examples/django_example/django_example/urls.py
deleted file mode 100644
index 954bde78e..000000000
--- a/examples/django_example/django_example/urls.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""URL configuration for django_example project.
-
-The `urlpatterns` list routes URLs to views. For more information please see:
-    https://docs.djangoproject.com/en/5.0/topics/http/urls/
-
-Examples:
-Function views
-    1. Add an import:  from my_app import views
-    2. Add a URL to urlpatterns:  path('', views.home, name='home')
-Class-based views
-    1. Add an import:  from other_app.views import Home
-    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
-Including another URLconf
-    1. Import the include() function: from django.urls import include, path
-    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
-"""
-
-from django.urls import path
-from myapp import views
-
-urlpatterns = [
-    path("", views.main_route, name="main_route"),
-    path("campaign/", views.campaign, name="campaign"),
-]
diff --git a/examples/django_example/django_example/wsgi.py b/examples/django_example/django_example/wsgi.py
deleted file mode 100644
index 88093747b..000000000
--- a/examples/django_example/django_example/wsgi.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""WSGI config for django_example project.
-
-It exposes the WSGI callable as a module-level variable named ``application``.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/
-"""
-
-import os
-
-from django.core.wsgi import get_wsgi_application
-
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_example.settings")
-
-application = get_wsgi_application()
diff --git a/examples/django_example/manage.py b/examples/django_example/manage.py
deleted file mode 100755
index b3f0b0f57..000000000
--- a/examples/django_example/manage.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-"""Django's command-line utility for administrative tasks."""
-
-import os
-import sys
-
-
-def main():
-    """Run administrative tasks."""
-    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_example.settings")
-    try:
-        from django.core.management import execute_from_command_line
-    except ImportError as exc:
-        raise ImportError(
-            "Couldn't import Django. Are you sure it's installed and "
-            "available on your PYTHONPATH environment variable? Did you "
-            "forget to activate a virtual environment?"
-        ) from exc
-    execute_from_command_line(sys.argv)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/django_example/myapp/__init__.py b/examples/django_example/myapp/__init__.py
deleted file mode 100644
index 69fa667a3..000000000
--- a/examples/django_example/myapp/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import signal
-import sys
-from .langfuse_integration import langfuse_flush
-
-
-def shutdown_handler(*args):
-    """This function handles the shutdown process.
-
-    It calls the langfuse_flush function to flush any pending changes,
-    and then exits the program with a status code of 0.
-    """
-    langfuse_flush()
-    sys.exit(0)
-
-
-# Register the shutdown_handler for SIGINT (Ctrl+C)
-signal.signal(signal.SIGINT, shutdown_handler)
-
-# Register the same shutdown_handler for SIGTERM
-signal.signal(signal.SIGTERM, shutdown_handler)
diff --git a/examples/django_example/myapp/apps.py b/examples/django_example/myapp/apps.py
deleted file mode 100644
index da45bfa47..000000000
--- a/examples/django_example/myapp/apps.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class MyappConfig(AppConfig):
-    default_auto_field = "django.db.models.BigAutoField"
-    name = "myapp"
diff --git a/examples/django_example/myapp/langfuse_integration.py b/examples/django_example/myapp/langfuse_integration.py
deleted file mode 100644
index d57b59a3e..000000000
--- a/examples/django_example/myapp/langfuse_integration.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from langfuse import Langfuse
-
-# Initialize Langfuse
-langfuse = Langfuse(public_key="pk-lf-1234567890", secret_key="sk-lf-1234567890")
-
-
-def get_response_openai(prompt):
-    """This simulates the response to a prompt using the OpenAI API.
-
-    Args:
-        prompt (str): The prompt for generating the response.
-
-    Returns:
-        dict: A dictionary containing the response status and message (always "This is a test message").
-    """
-    try:
-        trace = langfuse.trace(
-            name="this-is-a-trace",
-            user_id="test",
-            metadata="test",
-        )
-
-        trace = trace.score(
-            name="user-feedback",
-            value=1,
-            comment="Some user feedback",
-        )
-
-        generation = trace.generation(name="this-is-a-generation", metadata="test")
-
-        sub_generation = generation.generation(
-            name="this-is-a-sub-generation", metadata="test"
-        )
-
-        sub_sub_span = sub_generation.span(
-            name="this-is-a-sub-sub-span", metadata="test"
-        )
-
-        sub_sub_span = sub_sub_span.score(
-            name="user-feedback-o",
-            value=1,
-            comment="Some more user feedback",
-        )
-
-        response = {"status": "success", "message": "This is a test message"}
-    except Exception as e:
-        print("Error in creating campaigns from openAI:", str(e))
-        return 503
-    return response
-
-
-def langfuse_flush():
-    """Called by 'myapp/__init__.py' to flush any pending changes during shutdown."""
-    langfuse.flush()
diff --git a/examples/django_example/myapp/migrations/__init__.py b/examples/django_example/myapp/migrations/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/django_example/myapp/views.py b/examples/django_example/myapp/views.py
deleted file mode 100644
index a4cd55475..000000000
--- a/examples/django_example/myapp/views.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from django.http import JsonResponse
-from myapp.langfuse_integration import get_response_openai
-
-
-def main_route(request):
-    return JsonResponse(
-        {"message": "Hey, this is an example showing how to use Langfuse with Django."}
-    )
-
-
-def campaign(request):
-    prompt = request.GET.get("prompt", "")
-    response = get_response_openai(prompt)
-    return JsonResponse(response)
diff --git a/examples/django_example/poetry.lock b/examples/django_example/poetry.lock
deleted file mode 100644
index e5de2fb01..000000000
--- a/examples/django_example/poetry.lock
+++ /dev/null
@@ -1,520 +0,0 @@
-# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
-
-[[package]]
-name = "annotated-types"
-version = "0.6.0"
-description = "Reusable constraint types to use with typing.Annotated"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
-    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
-]
-
-[[package]]
-name = "anyio"
-version = "4.2.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee"},
-    {file = "anyio-4.2.0.tar.gz", hash = "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
-
-[[package]]
-name = "asgiref"
-version = "3.7.2"
-description = "ASGI specs, helper code, and adapters"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"},
-    {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"},
-]
-
-[package.dependencies]
-typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""}
-
-[package.extras]
-tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
-
-[[package]]
-name = "backoff"
-version = "2.2.1"
-description = "Function decoration for backoff and retry"
-optional = false
-python-versions = ">=3.7,<4.0"
-files = [
-    {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
-    {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
-]
-
-[[package]]
-name = "certifi"
-version = "2024.7.4"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
-    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
-]
-
-[[package]]
-name = "chevron"
-version = "0.14.0"
-description = "Mustache templating language renderer"
-optional = false
-python-versions = "*"
-files = [
-    {file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"},
-    {file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"},
-]
-
-[[package]]
-name = "colorama"
-version = "0.4.6"
-description = "Cross-platform colored terminal text."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-files = [
-    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
-    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
-]
-
-[[package]]
-name = "distro"
-version = "1.9.0"
-description = "Distro - an OS platform information API"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
-    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
-]
-
-[[package]]
-name = "django"
-version = "5.0.11"
-description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
-optional = false
-python-versions = ">=3.10"
-files = [
-    {file = "Django-5.0.11-py3-none-any.whl", hash = "sha256:09e8128f717266bf382d82ffa4933f13da05d82579abf008ede86acb15dec88b"},
-    {file = "Django-5.0.11.tar.gz", hash = "sha256:e7d98fa05ce09cb3e8d5ad6472fb602322acd1740bfdadc29c8404182d664f65"},
-]
-
-[package.dependencies]
-asgiref = ">=3.7.0,<4"
-sqlparse = ">=0.3.1"
-tzdata = {version = "*", markers = "sys_platform == \"win32\""}
-
-[package.extras]
-argon2 = ["argon2-cffi (>=19.1.0)"]
-bcrypt = ["bcrypt"]
-
-[[package]]
-name = "exceptiongroup"
-version = "1.2.0"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
-    {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.3"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
-    {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.24.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.25.2"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
-    {file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-
-[[package]]
-name = "idna"
-version = "3.7"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
-    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
-]
-
-[[package]]
-name = "langfuse"
-version = "2.13.3"
-description = "A client library for accessing langfuse"
-optional = false
-python-versions = ">=3.8.1,<4.0"
-files = [
-    {file = "langfuse-2.13.3-py3-none-any.whl", hash = "sha256:7bdcf02a74366ef77d5258c2aaae07d11fabde9a90c883f9022ecaf244bfdeca"},
-    {file = "langfuse-2.13.3.tar.gz", hash = "sha256:2be049382e867681eabf774d60aadad3e6c277841e2c7f06d71190379650c2d9"},
-]
-
-[package.dependencies]
-backoff = ">=2.2.1,<3.0.0"
-chevron = ">=0.14.0,<0.15.0"
-httpx = ">=0.15.4,<0.26.0"
-openai = ">=0.27.8"
-packaging = ">=23.2,<24.0"
-pydantic = ">=1.10.7,<3.0"
-wrapt = "1.14"
-
-[package.extras]
-langchain = ["langchain (>=0.0.309)"]
-
-[[package]]
-name = "openai"
-version = "1.12.0"
-description = "The official Python library for the openai API"
-optional = false
-python-versions = ">=3.7.1"
-files = [
-    {file = "openai-1.12.0-py3-none-any.whl", hash = "sha256:a54002c814e05222e413664f651b5916714e4700d041d5cf5724d3ae1a3e3481"},
-    {file = "openai-1.12.0.tar.gz", hash = "sha256:99c5d257d09ea6533d689d1cc77caa0ac679fa21efef8893d8b0832a86877f1b"},
-]
-
-[package.dependencies]
-anyio = ">=3.5.0,<5"
-distro = ">=1.7.0,<2"
-httpx = ">=0.23.0,<1"
-pydantic = ">=1.9.0,<3"
-sniffio = "*"
-tqdm = ">4"
-typing-extensions = ">=4.7,<5"
-
-[package.extras]
-datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-
-[[package]]
-name = "packaging"
-version = "23.2"
-description = "Core utilities for Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
-    {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
-]
-
-[[package]]
-name = "pydantic"
-version = "2.6.1"
-description = "Data validation using Python type hints"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"},
-    {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"},
-]
-
-[package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.16.2"
-typing-extensions = ">=4.6.1"
-
-[package.extras]
-email = ["email-validator (>=2.0.0)"]
-
-[[package]]
-name = "pydantic-core"
-version = "2.16.2"
-description = ""
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"},
-    {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"},
-    {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"},
-    {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"},
-    {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"},
-    {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"},
-    {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"},
-    {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"},
-    {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"},
-    {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"},
-    {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"},
-    {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"},
-    {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"},
-    {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"},
-]
-
-[package.dependencies]
-typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
-
-[[package]]
-name = "sniffio"
-version = "1.3.0"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
-    {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
-]
-
-[[package]]
-name = "sqlparse"
-version = "0.5.0"
-description = "A non-validating SQL parser."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "sqlparse-0.5.0-py3-none-any.whl", hash = "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663"},
-    {file = "sqlparse-0.5.0.tar.gz", hash = "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93"},
-]
-
-[package.extras]
-dev = ["build", "hatch"]
-doc = ["sphinx"]
-
-[[package]]
-name = "tqdm"
-version = "4.66.3"
-description = "Fast, Extensible Progress Meter"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"},
-    {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
-notebook = ["ipywidgets (>=6)"]
-slack = ["slack-sdk"]
-telegram = ["requests"]
-
-[[package]]
-name = "typing-extensions"
-version = "4.9.0"
-description = "Backported and Experimental Type Hints for Python 3.8+"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"},
-    {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
-]
-
-[[package]]
-name = "tzdata"
-version = "2024.1"
-description = "Provider of IANA time zone data"
-optional = false
-python-versions = ">=2"
-files = [
-    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
-    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
-]
-
-[[package]]
-name = "wrapt"
-version = "1.14.0"
-description = "Module for decorators, wrappers and monkey patching."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
-files = [
-    {file = "wrapt-1.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:5a9a1889cc01ed2ed5f34574c90745fab1dd06ec2eee663e8ebeefe363e8efd7"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:9a3ff5fb015f6feb78340143584d9f8a0b91b6293d6b5cf4295b3e95d179b88c"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:4b847029e2d5e11fd536c9ac3136ddc3f54bc9488a75ef7d040a3900406a91eb"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:9a5a544861b21e0e7575b6023adebe7a8c6321127bb1d238eb40d99803a0e8bd"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:88236b90dda77f0394f878324cfbae05ae6fde8a84d548cfe73a75278d760291"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f0408e2dbad9e82b4c960274214af533f856a199c9274bd4aff55d4634dedc33"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:9d8c68c4145041b4eeae96239802cfdfd9ef927754a5be3f50505f09f309d8c6"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:22626dca56fd7f55a0733e604f1027277eb0f4f3d95ff28f15d27ac25a45f71b"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:65bf3eb34721bf18b5a021a1ad7aa05947a1767d1aa272b725728014475ea7d5"},
-    {file = "wrapt-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09d16ae7a13cff43660155383a2372b4aa09109c7127aa3f24c3cf99b891c330"},
-    {file = "wrapt-1.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:debaf04f813ada978d7d16c7dfa16f3c9c2ec9adf4656efdc4defdf841fc2f0c"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:748df39ed634851350efa87690c2237a678ed794fe9ede3f0d79f071ee042561"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1807054aa7b61ad8d8103b3b30c9764de2e9d0c0978e9d3fc337e4e74bf25faa"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763a73ab377390e2af26042f685a26787c402390f682443727b847e9496e4a2a"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8529b07b49b2d89d6917cfa157d3ea1dfb4d319d51e23030664a827fe5fd2131"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:68aeefac31c1f73949662ba8affaf9950b9938b712fb9d428fa2a07e40ee57f8"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59d7d92cee84a547d91267f0fea381c363121d70fe90b12cd88241bd9b0e1763"},
-    {file = "wrapt-1.14.0-cp310-cp310-win32.whl", hash = "sha256:3a88254881e8a8c4784ecc9cb2249ff757fd94b911d5df9a5984961b96113fff"},
-    {file = "wrapt-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a242871b3d8eecc56d350e5e03ea1854de47b17f040446da0e47dc3e0b9ad4d"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a65bffd24409454b889af33b6c49d0d9bcd1a219b972fba975ac935f17bdf627"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9d9fcd06c952efa4b6b95f3d788a819b7f33d11bea377be6b8980c95e7d10775"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:db6a0ddc1282ceb9032e41853e659c9b638789be38e5b8ad7498caac00231c23"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:14e7e2c5f5fca67e9a6d5f753d21f138398cad2b1159913ec9e9a67745f09ba3"},
-    {file = "wrapt-1.14.0-cp35-cp35m-win32.whl", hash = "sha256:6d9810d4f697d58fd66039ab959e6d37e63ab377008ef1d63904df25956c7db0"},
-    {file = "wrapt-1.14.0-cp35-cp35m-win_amd64.whl", hash = "sha256:d808a5a5411982a09fef6b49aac62986274ab050e9d3e9817ad65b2791ed1425"},
-    {file = "wrapt-1.14.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b77159d9862374da213f741af0c361720200ab7ad21b9f12556e0eb95912cd48"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36a76a7527df8583112b24adc01748cd51a2d14e905b337a6fefa8b96fc708fb"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0057b5435a65b933cbf5d859cd4956624df37b8bf0917c71756e4b3d9958b9e"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0a4ca02752ced5f37498827e49c414d694ad7cf451ee850e3ff160f2bee9d3"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8c6be72eac3c14baa473620e04f74186c5d8f45d80f8f2b4eda6e1d18af808e8"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:21b1106bff6ece8cb203ef45b4f5778d7226c941c83aaaa1e1f0f4f32cc148cd"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:493da1f8b1bb8a623c16552fb4a1e164c0200447eb83d3f68b44315ead3f9036"},
-    {file = "wrapt-1.14.0-cp36-cp36m-win32.whl", hash = "sha256:89ba3d548ee1e6291a20f3c7380c92f71e358ce8b9e48161401e087e0bc740f8"},
-    {file = "wrapt-1.14.0-cp36-cp36m-win_amd64.whl", hash = "sha256:729d5e96566f44fccac6c4447ec2332636b4fe273f03da128fff8d5559782b06"},
-    {file = "wrapt-1.14.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:891c353e95bb11abb548ca95c8b98050f3620a7378332eb90d6acdef35b401d4"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23f96134a3aa24cc50614920cc087e22f87439053d886e474638c68c8d15dc80"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6807bcee549a8cb2f38f73f469703a1d8d5d990815c3004f21ddb68a567385ce"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6915682f9a9bc4cf2908e83caf5895a685da1fbd20b6d485dafb8e218a338279"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2f3bc7cd9c9fcd39143f11342eb5963317bd54ecc98e3650ca22704b69d9653"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3a71dbd792cc7a3d772ef8cd08d3048593f13d6f40a11f3427c000cf0a5b36a0"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5a0898a640559dec00f3614ffb11d97a2666ee9a2a6bad1259c9facd01a1d4d9"},
-    {file = "wrapt-1.14.0-cp37-cp37m-win32.whl", hash = "sha256:167e4793dc987f77fd476862d32fa404d42b71f6a85d3b38cbce711dba5e6b68"},
-    {file = "wrapt-1.14.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d066ffc5ed0be00cd0352c95800a519cf9e4b5dd34a028d301bdc7177c72daf3"},
-    {file = "wrapt-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d9bdfa74d369256e4218000a629978590fd7cb6cf6893251dad13d051090436d"},
-    {file = "wrapt-1.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2498762814dd7dd2a1d0248eda2afbc3dd9c11537bc8200a4b21789b6df6cd38"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f24ca7953f2643d59a9c87d6e272d8adddd4a53bb62b9208f36db408d7aafc7"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b835b86bd5a1bdbe257d610eecab07bf685b1af2a7563093e0e69180c1d4af1"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b21650fa6907e523869e0396c5bd591cc326e5c1dd594dcdccac089561cacfb8"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:354d9fc6b1e44750e2a67b4b108841f5f5ea08853453ecbf44c81fdc2e0d50bd"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1f83e9c21cd5275991076b2ba1cd35418af3504667affb4745b48937e214bafe"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61e1a064906ccba038aa3c4a5a82f6199749efbbb3cef0804ae5c37f550eded0"},
-    {file = "wrapt-1.14.0-cp38-cp38-win32.whl", hash = "sha256:28c659878f684365d53cf59dc9a1929ea2eecd7ac65da762be8b1ba193f7e84f"},
-    {file = "wrapt-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:b0ed6ad6c9640671689c2dbe6244680fe8b897c08fd1fab2228429b66c518e5e"},
-    {file = "wrapt-1.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3f7e671fb19734c872566e57ce7fc235fa953d7c181bb4ef138e17d607dc8a1"},
-    {file = "wrapt-1.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87fa943e8bbe40c8c1ba4086971a6fefbf75e9991217c55ed1bcb2f1985bd3d4"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4775a574e9d84e0212f5b18886cace049a42e13e12009bb0491562a48bb2b758"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d57677238a0c5411c76097b8b93bdebb02eb845814c90f0b01727527a179e4d"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00108411e0f34c52ce16f81f1d308a571df7784932cc7491d1e94be2ee93374b"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d332eecf307fca852d02b63f35a7872de32d5ba8b4ec32da82f45df986b39ff6"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:01f799def9b96a8ec1ef6b9c1bbaf2bbc859b87545efbecc4a78faea13d0e3a0"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47045ed35481e857918ae78b54891fac0c1d197f22c95778e66302668309336c"},
-    {file = "wrapt-1.14.0-cp39-cp39-win32.whl", hash = "sha256:2eca15d6b947cfff51ed76b2d60fd172c6ecd418ddab1c5126032d27f74bc350"},
-    {file = "wrapt-1.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:bb36fbb48b22985d13a6b496ea5fb9bb2a076fea943831643836c9f6febbcfdc"},
-    {file = "wrapt-1.14.0.tar.gz", hash = "sha256:8323a43bd9c91f62bb7d4be74cc9ff10090e7ef820e27bfe8815c57e68261311"},
-]
-
-[metadata]
-lock-version = "2.0"
-python-versions = "^3.10"
-content-hash = "50262a5ce4770994435421458f255accba11afb55d61b73263bac19980887419"
diff --git a/examples/django_example/pyproject.toml b/examples/django_example/pyproject.toml
deleted file mode 100644
index 909f99cf0..000000000
--- a/examples/django_example/pyproject.toml
+++ /dev/null
@@ -1,16 +0,0 @@
-[tool.poetry]
-name = "django-example"
-version = "0.1.0"
-description = ""
-authors = ["ChrisTho23 <christophe.thomassin23@gmail.com>"]
-readme = "README.md"
-
-[tool.poetry.dependencies]
-python = "^3.10"
-django = "^5.0.11"
-langfuse = "^2.13.3"
-
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/examples/fastapi_example/README.md b/examples/fastapi_example/README.md
deleted file mode 100644
index 6814e29ce..000000000
--- a/examples/fastapi_example/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# fastapi_example
-
-This is an example FastAPI application showcasing integration with Langfuse for event tracing and response generation.
-
-1. **Shutdown Behavior**: The application defines shutdown logic using FastAPI's lifespan feature. On shutdown, it flushes all events to Langfuse, ensuring data integrity and completeness.
-
-2. **Endpoints**:
-   - `/`: Returns a simple message demonstrating the usage of Langfuse with FastAPI.
-   - `"/campaign/"`: Accepts a `prompt` and employs Langfuse for event tracing. (Note: OpenAI is referenced for context but not used in this example).
-
-3. **Integration**:
-   - Langfuse: Utilized for event tracing with `trace`, `score`, `generation`, and `span` operations. (Note that OpenAI is not actually used here to generate an answer to the prompt. This example is just to show how to use FastAPI with the Langfuse SDK)
-
-4. **Dependencies**:
-   - FastAPI: Web framework for building APIs.
-   - Langfuse: Library for event tracing and management.
-
-5. **Usage**:
-   - Preparation: Ensure langfuse is installed and configured in the `fastapi_example/main.py` file.
-   - Starting the Server: Navigate to the root directory of the project `langfuse-python/examples/fastapi_examples`. Run the application using `poetry run start`.
-   - Access endpoints at `http://localhost:8000`.
-
-For more details on FastAPI and Langfuse refer to their respective documentation.
diff --git a/examples/fastapi_example/fastapi_example/__init__.py b/examples/fastapi_example/fastapi_example/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/fastapi_example/fastapi_example/main.py b/examples/fastapi_example/fastapi_example/main.py
deleted file mode 100644
index 4feac445a..000000000
--- a/examples/fastapi_example/fastapi_example/main.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, Query, BackgroundTasks
-from langfuse import Langfuse
-import uvicorn
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Operation on startup
-
-    yield  # wait until shutdown
-
-    # Flush all events to be sent to Langfuse on shutdown. This operation is blocking.
-    langfuse.flush()
-
-
-app = FastAPI(lifespan=lifespan)
-
-
-@app.get("/")
-async def main_route():
-    return {
-        "message": "Hey, this is an example showing how to use Langfuse with FastAPI."
-    }
-
-
-# Initialize Langfuse
-langfuse = Langfuse(public_key="pk-lf-1234567890", secret_key="sk-lf-1234567890")
-
-
-async def get_response_openai(prompt, background_tasks: BackgroundTasks):
-    """This simulates the response to a prompt using the OpenAI API.
-
-    Args:
-        prompt (str): The prompt for generating the response.
-        background_tasks (BackgroundTasks): An object for handling background tasks.
-
-    Returns:
-        dict: A dictionary containing the response status and message (always "This is a test message").
-    """
-    try:
-        trace = langfuse.trace(
-            name="this-is-a-trace",
-            user_id="test",
-            metadata="test",
-        )
-
-        trace = trace.score(
-            name="user-feedback",
-            value=1,
-            comment="Some user feedback",
-        )
-
-        generation = trace.generation(name="this-is-a-generation", metadata="test")
-
-        sub_generation = generation.generation(
-            name="this-is-a-sub-generation", metadata="test"
-        )
-
-        sub_sub_span = sub_generation.span(
-            name="this-is-a-sub-sub-span", metadata="test"
-        )
-
-        sub_sub_span = sub_sub_span.score(
-            name="user-feedback-o",
-            value=1,
-            comment="Some more user feedback",
-        )
-
-        response = {"status": "success", "message": "This is a test message"}
-    except Exception as e:
-        print("Error in creating campaigns from openAI:", str(e))
-        return 503
-    return response
-
-
-@app.get(
-    "/campaign/",
-    tags=["APIs"],
-)
-async def campaign(
-    background_tasks: BackgroundTasks, prompt: str = Query(..., max_length=20)
-):
-    return await get_response_openai(prompt, background_tasks)
-
-
-def start():
-    """Launched with `poetry run start` at root level"""
-    uvicorn.run("fastapi_example.main:app", host="0.0.0.0", port=8000, reload=True)
diff --git a/examples/fastapi_example/poetry.lock b/examples/fastapi_example/poetry.lock
deleted file mode 100644
index 5a5781fb8..000000000
--- a/examples/fastapi_example/poetry.lock
+++ /dev/null
@@ -1,526 +0,0 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "annotated-types"
-version = "0.6.0"
-description = "Reusable constraint types to use with typing.Annotated"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
-    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
-]
-
-[[package]]
-name = "anyio"
-version = "4.2.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee"},
-    {file = "anyio-4.2.0.tar.gz", hash = "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
-
-[[package]]
-name = "backoff"
-version = "2.2.1"
-description = "Function decoration for backoff and retry"
-optional = false
-python-versions = ">=3.7,<4.0"
-files = [
-    {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
-    {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
-]
-
-[[package]]
-name = "certifi"
-version = "2024.7.4"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
-    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
-]
-
-[[package]]
-name = "chevron"
-version = "0.14.0"
-description = "Mustache templating language renderer"
-optional = false
-python-versions = "*"
-files = [
-    {file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"},
-    {file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"},
-]
-
-[[package]]
-name = "click"
-version = "8.1.7"
-description = "Composable command line interface toolkit"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
-    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[[package]]
-name = "colorama"
-version = "0.4.6"
-description = "Cross-platform colored terminal text."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-files = [
-    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
-    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
-]
-
-[[package]]
-name = "distro"
-version = "1.9.0"
-description = "Distro - an OS platform information API"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
-    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
-]
-
-[[package]]
-name = "exceptiongroup"
-version = "1.2.0"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
-    {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "fastapi"
-version = "0.109.2"
-description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"},
-    {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"},
-]
-
-[package.dependencies]
-pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
-starlette = ">=0.36.3,<0.37.0"
-typing-extensions = ">=4.8.0"
-
-[package.extras]
-all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.3"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
-    {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.24.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.25.2"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
-    {file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-
-[[package]]
-name = "idna"
-version = "3.7"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.5"
-files = [
-    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
-    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
-]
-
-[[package]]
-name = "langfuse"
-version = "2.13.3"
-description = "A client library for accessing langfuse"
-optional = false
-python-versions = ">=3.8.1,<4.0"
-files = [
-    {file = "langfuse-2.13.3-py3-none-any.whl", hash = "sha256:7bdcf02a74366ef77d5258c2aaae07d11fabde9a90c883f9022ecaf244bfdeca"},
-    {file = "langfuse-2.13.3.tar.gz", hash = "sha256:2be049382e867681eabf774d60aadad3e6c277841e2c7f06d71190379650c2d9"},
-]
-
-[package.dependencies]
-backoff = ">=2.2.1,<3.0.0"
-chevron = ">=0.14.0,<0.15.0"
-httpx = ">=0.15.4,<0.26.0"
-openai = ">=0.27.8"
-packaging = ">=23.2,<24.0"
-pydantic = ">=1.10.7,<3.0"
-wrapt = "1.14"
-
-[package.extras]
-langchain = ["langchain (>=0.0.309)"]
-
-[[package]]
-name = "openai"
-version = "1.12.0"
-description = "The official Python library for the openai API"
-optional = false
-python-versions = ">=3.7.1"
-files = [
-    {file = "openai-1.12.0-py3-none-any.whl", hash = "sha256:a54002c814e05222e413664f651b5916714e4700d041d5cf5724d3ae1a3e3481"},
-    {file = "openai-1.12.0.tar.gz", hash = "sha256:99c5d257d09ea6533d689d1cc77caa0ac679fa21efef8893d8b0832a86877f1b"},
-]
-
-[package.dependencies]
-anyio = ">=3.5.0,<5"
-distro = ">=1.7.0,<2"
-httpx = ">=0.23.0,<1"
-pydantic = ">=1.9.0,<3"
-sniffio = "*"
-tqdm = ">4"
-typing-extensions = ">=4.7,<5"
-
-[package.extras]
-datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-
-[[package]]
-name = "packaging"
-version = "23.2"
-description = "Core utilities for Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
-    {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
-]
-
-[[package]]
-name = "pydantic"
-version = "2.6.1"
-description = "Data validation using Python type hints"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"},
-    {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"},
-]
-
-[package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.16.2"
-typing-extensions = ">=4.6.1"
-
-[package.extras]
-email = ["email-validator (>=2.0.0)"]
-
-[[package]]
-name = "pydantic-core"
-version = "2.16.2"
-description = ""
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"},
-    {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"},
-    {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"},
-    {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"},
-    {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"},
-    {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"},
-    {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"},
-    {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"},
-    {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"},
-    {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"},
-    {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"},
-    {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"},
-    {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"},
-    {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"},
-    {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"},
-    {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"},
-    {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"},
-    {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"},
-    {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"},
-    {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"},
-    {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"},
-]
-
-[package.dependencies]
-typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
-
-[[package]]
-name = "sniffio"
-version = "1.3.0"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
-    {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
-]
-
-[[package]]
-name = "starlette"
-version = "0.36.3"
-description = "The little ASGI library that shines."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"},
-    {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"},
-]
-
-[package.dependencies]
-anyio = ">=3.4.0,<5"
-
-[package.extras]
-full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
-
-[[package]]
-name = "tqdm"
-version = "4.66.3"
-description = "Fast, Extensible Progress Meter"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"},
-    {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
-notebook = ["ipywidgets (>=6)"]
-slack = ["slack-sdk"]
-telegram = ["requests"]
-
-[[package]]
-name = "typing-extensions"
-version = "4.9.0"
-description = "Backported and Experimental Type Hints for Python 3.8+"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"},
-    {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
-]
-
-[[package]]
-name = "uvicorn"
-version = "0.27.1"
-description = "The lightning-fast ASGI server."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "uvicorn-0.27.1-py3-none-any.whl", hash = "sha256:5c89da2f3895767472a35556e539fd59f7edbe9b1e9c0e1c99eebeadc61838e4"},
-    {file = "uvicorn-0.27.1.tar.gz", hash = "sha256:3d9a267296243532db80c83a959a3400502165ade2c1338dea4e67915fd4745a"},
-]
-
-[package.dependencies]
-click = ">=7.0"
-h11 = ">=0.8"
-typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
-
-[package.extras]
-standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
-
-[[package]]
-name = "wrapt"
-version = "1.14.0"
-description = "Module for decorators, wrappers and monkey patching."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
-files = [
-    {file = "wrapt-1.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:5a9a1889cc01ed2ed5f34574c90745fab1dd06ec2eee663e8ebeefe363e8efd7"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:9a3ff5fb015f6feb78340143584d9f8a0b91b6293d6b5cf4295b3e95d179b88c"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:4b847029e2d5e11fd536c9ac3136ddc3f54bc9488a75ef7d040a3900406a91eb"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:9a5a544861b21e0e7575b6023adebe7a8c6321127bb1d238eb40d99803a0e8bd"},
-    {file = "wrapt-1.14.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:88236b90dda77f0394f878324cfbae05ae6fde8a84d548cfe73a75278d760291"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f0408e2dbad9e82b4c960274214af533f856a199c9274bd4aff55d4634dedc33"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:9d8c68c4145041b4eeae96239802cfdfd9ef927754a5be3f50505f09f309d8c6"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:22626dca56fd7f55a0733e604f1027277eb0f4f3d95ff28f15d27ac25a45f71b"},
-    {file = "wrapt-1.14.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:65bf3eb34721bf18b5a021a1ad7aa05947a1767d1aa272b725728014475ea7d5"},
-    {file = "wrapt-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09d16ae7a13cff43660155383a2372b4aa09109c7127aa3f24c3cf99b891c330"},
-    {file = "wrapt-1.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:debaf04f813ada978d7d16c7dfa16f3c9c2ec9adf4656efdc4defdf841fc2f0c"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:748df39ed634851350efa87690c2237a678ed794fe9ede3f0d79f071ee042561"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1807054aa7b61ad8d8103b3b30c9764de2e9d0c0978e9d3fc337e4e74bf25faa"},
-    {file = "wrapt-1.14.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763a73ab377390e2af26042f685a26787c402390f682443727b847e9496e4a2a"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8529b07b49b2d89d6917cfa157d3ea1dfb4d319d51e23030664a827fe5fd2131"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:68aeefac31c1f73949662ba8affaf9950b9938b712fb9d428fa2a07e40ee57f8"},
-    {file = "wrapt-1.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59d7d92cee84a547d91267f0fea381c363121d70fe90b12cd88241bd9b0e1763"},
-    {file = "wrapt-1.14.0-cp310-cp310-win32.whl", hash = "sha256:3a88254881e8a8c4784ecc9cb2249ff757fd94b911d5df9a5984961b96113fff"},
-    {file = "wrapt-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a242871b3d8eecc56d350e5e03ea1854de47b17f040446da0e47dc3e0b9ad4d"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a65bffd24409454b889af33b6c49d0d9bcd1a219b972fba975ac935f17bdf627"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9d9fcd06c952efa4b6b95f3d788a819b7f33d11bea377be6b8980c95e7d10775"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:db6a0ddc1282ceb9032e41853e659c9b638789be38e5b8ad7498caac00231c23"},
-    {file = "wrapt-1.14.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:14e7e2c5f5fca67e9a6d5f753d21f138398cad2b1159913ec9e9a67745f09ba3"},
-    {file = "wrapt-1.14.0-cp35-cp35m-win32.whl", hash = "sha256:6d9810d4f697d58fd66039ab959e6d37e63ab377008ef1d63904df25956c7db0"},
-    {file = "wrapt-1.14.0-cp35-cp35m-win_amd64.whl", hash = "sha256:d808a5a5411982a09fef6b49aac62986274ab050e9d3e9817ad65b2791ed1425"},
-    {file = "wrapt-1.14.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b77159d9862374da213f741af0c361720200ab7ad21b9f12556e0eb95912cd48"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36a76a7527df8583112b24adc01748cd51a2d14e905b337a6fefa8b96fc708fb"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0057b5435a65b933cbf5d859cd4956624df37b8bf0917c71756e4b3d9958b9e"},
-    {file = "wrapt-1.14.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0a4ca02752ced5f37498827e49c414d694ad7cf451ee850e3ff160f2bee9d3"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8c6be72eac3c14baa473620e04f74186c5d8f45d80f8f2b4eda6e1d18af808e8"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:21b1106bff6ece8cb203ef45b4f5778d7226c941c83aaaa1e1f0f4f32cc148cd"},
-    {file = "wrapt-1.14.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:493da1f8b1bb8a623c16552fb4a1e164c0200447eb83d3f68b44315ead3f9036"},
-    {file = "wrapt-1.14.0-cp36-cp36m-win32.whl", hash = "sha256:89ba3d548ee1e6291a20f3c7380c92f71e358ce8b9e48161401e087e0bc740f8"},
-    {file = "wrapt-1.14.0-cp36-cp36m-win_amd64.whl", hash = "sha256:729d5e96566f44fccac6c4447ec2332636b4fe273f03da128fff8d5559782b06"},
-    {file = "wrapt-1.14.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:891c353e95bb11abb548ca95c8b98050f3620a7378332eb90d6acdef35b401d4"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23f96134a3aa24cc50614920cc087e22f87439053d886e474638c68c8d15dc80"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6807bcee549a8cb2f38f73f469703a1d8d5d990815c3004f21ddb68a567385ce"},
-    {file = "wrapt-1.14.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6915682f9a9bc4cf2908e83caf5895a685da1fbd20b6d485dafb8e218a338279"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2f3bc7cd9c9fcd39143f11342eb5963317bd54ecc98e3650ca22704b69d9653"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3a71dbd792cc7a3d772ef8cd08d3048593f13d6f40a11f3427c000cf0a5b36a0"},
-    {file = "wrapt-1.14.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5a0898a640559dec00f3614ffb11d97a2666ee9a2a6bad1259c9facd01a1d4d9"},
-    {file = "wrapt-1.14.0-cp37-cp37m-win32.whl", hash = "sha256:167e4793dc987f77fd476862d32fa404d42b71f6a85d3b38cbce711dba5e6b68"},
-    {file = "wrapt-1.14.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d066ffc5ed0be00cd0352c95800a519cf9e4b5dd34a028d301bdc7177c72daf3"},
-    {file = "wrapt-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d9bdfa74d369256e4218000a629978590fd7cb6cf6893251dad13d051090436d"},
-    {file = "wrapt-1.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2498762814dd7dd2a1d0248eda2afbc3dd9c11537bc8200a4b21789b6df6cd38"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f24ca7953f2643d59a9c87d6e272d8adddd4a53bb62b9208f36db408d7aafc7"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b835b86bd5a1bdbe257d610eecab07bf685b1af2a7563093e0e69180c1d4af1"},
-    {file = "wrapt-1.14.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b21650fa6907e523869e0396c5bd591cc326e5c1dd594dcdccac089561cacfb8"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:354d9fc6b1e44750e2a67b4b108841f5f5ea08853453ecbf44c81fdc2e0d50bd"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1f83e9c21cd5275991076b2ba1cd35418af3504667affb4745b48937e214bafe"},
-    {file = "wrapt-1.14.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61e1a064906ccba038aa3c4a5a82f6199749efbbb3cef0804ae5c37f550eded0"},
-    {file = "wrapt-1.14.0-cp38-cp38-win32.whl", hash = "sha256:28c659878f684365d53cf59dc9a1929ea2eecd7ac65da762be8b1ba193f7e84f"},
-    {file = "wrapt-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:b0ed6ad6c9640671689c2dbe6244680fe8b897c08fd1fab2228429b66c518e5e"},
-    {file = "wrapt-1.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3f7e671fb19734c872566e57ce7fc235fa953d7c181bb4ef138e17d607dc8a1"},
-    {file = "wrapt-1.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87fa943e8bbe40c8c1ba4086971a6fefbf75e9991217c55ed1bcb2f1985bd3d4"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4775a574e9d84e0212f5b18886cace049a42e13e12009bb0491562a48bb2b758"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d57677238a0c5411c76097b8b93bdebb02eb845814c90f0b01727527a179e4d"},
-    {file = "wrapt-1.14.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00108411e0f34c52ce16f81f1d308a571df7784932cc7491d1e94be2ee93374b"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d332eecf307fca852d02b63f35a7872de32d5ba8b4ec32da82f45df986b39ff6"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:01f799def9b96a8ec1ef6b9c1bbaf2bbc859b87545efbecc4a78faea13d0e3a0"},
-    {file = "wrapt-1.14.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47045ed35481e857918ae78b54891fac0c1d197f22c95778e66302668309336c"},
-    {file = "wrapt-1.14.0-cp39-cp39-win32.whl", hash = "sha256:2eca15d6b947cfff51ed76b2d60fd172c6ecd418ddab1c5126032d27f74bc350"},
-    {file = "wrapt-1.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:bb36fbb48b22985d13a6b496ea5fb9bb2a076fea943831643836c9f6febbcfdc"},
-    {file = "wrapt-1.14.0.tar.gz", hash = "sha256:8323a43bd9c91f62bb7d4be74cc9ff10090e7ef820e27bfe8815c57e68261311"},
-]
-
-[metadata]
-lock-version = "2.0"
-python-versions = "^3.10"
-content-hash = "c8fb6fd6f38ed6f69651891f935f962d500e98db1586c37ab7b01271c2aa5607"
diff --git a/examples/fastapi_example/pyproject.toml b/examples/fastapi_example/pyproject.toml
deleted file mode 100644
index 9a2fc7d3a..000000000
--- a/examples/fastapi_example/pyproject.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[tool.poetry]
-name = "fastapi-example"
-version = "0.1.0"
-description = ""
-authors = ["ChrisTho23 <christophe.thomassin23@gmail.com>"]
-readme = "README.md"
-
-[tool.poetry.dependencies]
-python = "^3.10"
-fastapi = "^0.109.2"
-uvicorn = "^0.27.1"
-langfuse = "^2.13.3"
-
-[tool.poetry.scripts]
-start = "fastapi_example.main:start"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/langfuse/_task_manager/ingestion_consumer.py b/langfuse/_task_manager/ingestion_consumer.py
index 9900654c2..afe315288 100644
--- a/langfuse/_task_manager/ingestion_consumer.py
+++ b/langfuse/_task_manager/ingestion_consumer.py
@@ -21,8 +21,8 @@
 
 from .media_manager import MediaManager
 
-MAX_EVENT_SIZE_BYTES = int(os.environ.get("LANGFUSE_MAX_EVENT_SIZE_BYTES", 1_000_000))
-MAX_BATCH_SIZE_BYTES = int(os.environ.get("LANGFUSE_MAX_BATCH_SIZE_BYTES", 2_500_000))
+MAX_EVENT_SIZE_BYTES = int(os.environ.get("LANGFUSE_MAX_EVENT_SIZE_BYTES", 20_000_000))
+MAX_BATCH_SIZE_BYTES = int(os.environ.get("LANGFUSE_MAX_BATCH_SIZE_BYTES", 40_500_000))
 
 
 class IngestionMetadata(pydantic.BaseModel):
@@ -142,6 +142,7 @@ def _next(self):
                 total_size += item_size
                 if total_size >= MAX_BATCH_SIZE_BYTES:
                     self._log.debug("hit batch size limit (size: %d)", total_size)
+                    raise RuntimeError(f"hit batch size limit (size: {total_size})")
                     break
 
             except Empty:
@@ -176,6 +177,9 @@ def _truncate_item_in_place(
                 "Item exceeds size limit (size: %s), dropping input / output / metadata of item until it fits.",
                 item_size,
             )
+            raise RuntimeError(
+                f"Item exceeds size limit (size: {item_size}), dropping input / output / metadata of item until it fits."
+            )
 
             if "body" in event:
                 drop_candidates = ["input", "output", "metadata"]
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/api_wrapper.py b/tests/api_wrapper.py
deleted file mode 100644
index 42f941550..000000000
--- a/tests/api_wrapper.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-from time import sleep
-
-import httpx
-
-
-class LangfuseAPI:
-    def __init__(self, username=None, password=None, base_url=None):
-        username = username if username else os.environ["LANGFUSE_PUBLIC_KEY"]
-        password = password if password else os.environ["LANGFUSE_SECRET_KEY"]
-        self.auth = (username, password)
-        self.BASE_URL = base_url if base_url else os.environ["LANGFUSE_HOST"]
-
-    def get_observation(self, observation_id):
-        sleep(1)
-        url = f"{self.BASE_URL}/api/public/observations/{observation_id}"
-        response = httpx.get(url, auth=self.auth)
-        return response.json()
-
-    def get_scores(self, page=None, limit=None, user_id=None, name=None):
-        sleep(1)
-        params = {"page": page, "limit": limit, "userId": user_id, "name": name}
-        url = f"{self.BASE_URL}/api/public/scores"
-        response = httpx.get(url, params=params, auth=self.auth)
-        return response.json()
-
-    def get_traces(self, page=None, limit=None, user_id=None, name=None):
-        sleep(1)
-        params = {"page": page, "limit": limit, "userId": user_id, "name": name}
-        url = f"{self.BASE_URL}/api/public/traces"
-        response = httpx.get(url, params=params, auth=self.auth)
-        return response.json()
-
-    def get_trace(self, trace_id):
-        sleep(1)
-        url = f"{self.BASE_URL}/api/public/traces/{trace_id}"
-        response = httpx.get(url, auth=self.auth)
-        return response.json()
diff --git a/tests/load_test.py b/tests/load_test.py
deleted file mode 100644
index 1be62636f..000000000
--- a/tests/load_test.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# create 5 different trace names
-from asyncio import gather
-from langfuse.client import Langfuse
-from langfuse.utils import _get_timestamp
-from tests.utils import create_uuid
-
-
-trace_names = [create_uuid() for _ in range(5)]
-
-# create 20 different generation names
-generation_names = [create_uuid() for _ in range(20)]
-
-# create 2000 different user ids
-user_ids = [create_uuid() for _ in range(2000)]
-
-
-async def execute():
-    start = _get_timestamp()
-
-    async def update_generation(i, langfuse: Langfuse):
-        trace = langfuse.trace(name=trace_names[i % 4], user_id=user_ids[i % 1999])
-        # random amount of generations, 1-10
-        for _ in range(i % 10):
-            generation = trace.generation(name=generation_names[i % 19])
-            generation.update(metadata={"count": str(i)})
-
-    langfuse = Langfuse(debug=False, threads=100)
-    print("start")
-    await gather(*(update_generation(i, langfuse) for i in range(100_000)))
-    print("flush")
-    langfuse.flush()
-    diff = _get_timestamp() - start
-    print(diff)
diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py
deleted file mode 100644
index a09b7c1f9..000000000
--- a/tests/test_core_sdk.py
+++ /dev/null
@@ -1,1532 +0,0 @@
-import os
-import time
-from asyncio import gather
-from datetime import datetime, timedelta, timezone
-from time import sleep
-
-import pytest
-
-from langfuse import Langfuse
-from langfuse.client import (
-    FetchObservationResponse,
-    FetchObservationsResponse,
-    FetchSessionsResponse,
-    FetchTraceResponse,
-    FetchTracesResponse,
-)
-from langfuse.utils import _get_timestamp
-from tests.api_wrapper import LangfuseAPI
-from tests.utils import (
-    CompletionUsage,
-    LlmUsage,
-    LlmUsageWithCost,
-    create_uuid,
-    get_api,
-)
-
-
-@pytest.mark.asyncio
-async def test_concurrency():
-    start = _get_timestamp()
-
-    async def update_generation(i, langfuse: Langfuse):
-        trace = langfuse.trace(name=str(i))
-        generation = trace.generation(name=str(i))
-        generation.update(metadata={"count": str(i)})
-
-    langfuse = Langfuse(debug=False, threads=5)
-    print("start")
-    await gather(*(update_generation(i, langfuse) for i in range(100)))
-    print("flush")
-    langfuse.flush()
-    diff = _get_timestamp() - start
-    print(diff)
-
-    api = get_api()
-    for i in range(100):
-        observation = api.observations.get_many(name=str(i)).data[0]
-        assert observation.name == str(i)
-        assert observation.metadata == {"count": i}
-
-
-def test_flush():
-    # set up the consumer with more requests than a single batch will allow
-    langfuse = Langfuse(debug=False)
-
-    for i in range(2):
-        langfuse.trace(
-            name=str(i),
-        )
-
-    langfuse.flush()
-    # Make sure that the client queue is empty after flushing
-    assert langfuse.task_manager._ingestion_queue.empty()
-
-
-def test_shutdown():
-    langfuse = Langfuse(debug=False)
-
-    for i in range(2):
-        langfuse.trace(
-            name=str(i),
-        )
-
-    langfuse.shutdown()
-    # we expect two things after shutdown:
-    # 1. client queue is empty
-    # 2. consumer thread has stopped
-    assert langfuse.task_manager._ingestion_queue.empty()
-
-
-def test_invalid_score_data_does_not_raise_exception():
-    langfuse = Langfuse(debug=False)
-
-    trace = langfuse.trace(
-        name="this-is-so-great-new",
-        user_id="test",
-        metadata="test",
-    )
-
-    langfuse.flush()
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    score_id = create_uuid()
-
-    langfuse.score(
-        id=score_id,
-        trace_id=trace.id,
-        name="this-is-a-score",
-        value=-1,
-        data_type="BOOLEAN",
-    )
-
-    langfuse.flush()
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-
-def test_create_numeric_score():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace = langfuse.trace(
-        name="this-is-so-great-new",
-        user_id="test",
-        metadata="test",
-    )
-
-    langfuse.flush()
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    score_id = create_uuid()
-
-    langfuse.score(
-        id=score_id,
-        trace_id=trace.id,
-        name="this-is-a-score",
-        value=1,
-    )
-
-    trace.generation(name="yet another child", metadata="test")
-
-    langfuse.flush()
-
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    trace = api_wrapper.get_trace(trace.id)
-
-    assert trace["scores"][0]["id"] == score_id
-    assert trace["scores"][0]["value"] == 1
-    assert trace["scores"][0]["dataType"] == "NUMERIC"
-    assert trace["scores"][0]["stringValue"] is None
-
-
-def test_create_boolean_score():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace = langfuse.trace(
-        name="this-is-so-great-new",
-        user_id="test",
-        metadata="test",
-    )
-
-    langfuse.flush()
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    score_id = create_uuid()
-
-    langfuse.score(
-        id=score_id,
-        trace_id=trace.id,
-        name="this-is-a-score",
-        value=1,
-        data_type="BOOLEAN",
-    )
-
-    trace.generation(name="yet another child", metadata="test")
-
-    langfuse.flush()
-
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    trace = api_wrapper.get_trace(trace.id)
-
-    assert trace["scores"][0]["id"] == score_id
-    assert trace["scores"][0]["dataType"] == "BOOLEAN"
-    assert trace["scores"][0]["value"] == 1
-    assert trace["scores"][0]["stringValue"] == "True"
-
-
-def test_create_categorical_score():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace = langfuse.trace(
-        name="this-is-so-great-new",
-        user_id="test",
-        metadata="test",
-    )
-
-    langfuse.flush()
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    score_id = create_uuid()
-
-    langfuse.score(
-        id=score_id,
-        trace_id=trace.id,
-        name="this-is-a-score",
-        value="high score",
-    )
-
-    trace.generation(name="yet another child", metadata="test")
-
-    langfuse.flush()
-
-    assert langfuse.task_manager._ingestion_queue.qsize() == 0
-
-    trace = api_wrapper.get_trace(trace.id)
-
-    assert trace["scores"][0]["id"] == score_id
-    assert trace["scores"][0]["dataType"] == "CATEGORICAL"
-    assert trace["scores"][0]["value"] == 0
-    assert trace["scores"][0]["stringValue"] == "high score"
-
-
-def test_create_trace():
-    langfuse = Langfuse(debug=False)
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(
-        name=trace_name,
-        user_id="test",
-        metadata={"key": "value"},
-        tags=["tag1", "tag2"],
-        public=True,
-    )
-
-    langfuse.flush()
-    sleep(2)
-
-    trace = LangfuseAPI().get_trace(trace.id)
-
-    assert trace["name"] == trace_name
-    assert trace["userId"] == "test"
-    assert trace["metadata"] == {"key": "value"}
-    assert trace["tags"] == ["tag1", "tag2"]
-    assert trace["public"] is True
-    assert True if not trace["externalId"] else False
-
-
-def test_create_update_trace():
-    langfuse = Langfuse()
-
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(
-        name=trace_name,
-        user_id="test",
-        metadata={"key": "value"},
-        public=True,
-    )
-    sleep(1)
-    trace.update(metadata={"key2": "value2"}, public=False)
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert trace.name == trace_name
-    assert trace.user_id == "test"
-    assert trace.metadata == {"key": "value", "key2": "value2"}
-    assert trace.public is False
-
-
-def test_create_generation():
-    langfuse = Langfuse(debug=True)
-
-    timestamp = _get_timestamp()
-    generation_id = create_uuid()
-    langfuse.generation(
-        id=generation_id,
-        name="query-generation",
-        start_time=timestamp,
-        end_time=timestamp,
-        model="gpt-3.5-turbo-0125",
-        model_parameters={
-            "max_tokens": "1000",
-            "temperature": "0.9",
-            "stop": ["user-1", "user-2"],
-        },
-        input=[
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "Please generate the start of a company documentation that contains the answer to the questinon: Write a summary of the Q3 OKR goals",
-            },
-        ],
-        output="This document entails the OKR goals for ACME",
-        usage=LlmUsage(promptTokens=50, completionTokens=49),
-        metadata={"interface": "whatsapp"},
-        level="DEBUG",
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == "query-generation"
-    assert trace.user_id is None
-    assert trace.metadata == {}
-
-    assert len(trace.observations) == 1
-
-    generation = trace.observations[0]
-
-    assert generation.id == generation_id
-    assert generation.name == "query-generation"
-    assert generation.start_time is not None
-    assert generation.end_time is not None
-    assert generation.model == "gpt-3.5-turbo-0125"
-    assert generation.model_parameters == {
-        "max_tokens": "1000",
-        "temperature": "0.9",
-        "stop": ["user-1", "user-2"],
-    }
-    assert generation.input == [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {
-            "role": "user",
-            "content": "Please generate the start of a company documentation that contains the answer to the questinon: Write a summary of the Q3 OKR goals",
-        },
-    ]
-    assert generation.output == "This document entails the OKR goals for ACME"
-    assert generation.level == "DEBUG"
-
-
-@pytest.mark.parametrize(
-    "usage, expected_usage, expected_input_cost, expected_output_cost, expected_total_cost",
-    [
-        (
-            CompletionUsage(prompt_tokens=51, completion_tokens=0, total_tokens=100),
-            "TOKENS",
-            None,
-            None,
-            None,
-        ),
-        (
-            LlmUsage(promptTokens=51, completionTokens=0, totalTokens=100),
-            "TOKENS",
-            None,
-            None,
-            None,
-        ),
-        (
-            {
-                "input": 51,
-                "output": 0,
-                "total": 100,
-                "unit": "TOKENS",
-                "input_cost": 100,
-                "output_cost": 200,
-                "total_cost": 300,
-            },
-            "TOKENS",
-            100,
-            200,
-            300,
-        ),
-        (
-            {
-                "input": 51,
-                "output": 0,
-                "total": 100,
-                "unit": "CHARACTERS",
-                "input_cost": 100,
-                "output_cost": 200,
-                "total_cost": 300,
-            },
-            "CHARACTERS",
-            100,
-            200,
-            300,
-        ),
-        (
-            LlmUsageWithCost(
-                promptTokens=51,
-                completionTokens=0,
-                totalTokens=100,
-                inputCost=100,
-                outputCost=200,
-                totalCost=300,
-            ),
-            "TOKENS",
-            100,
-            200,
-            300,
-        ),
-    ],
-)
-def test_create_generation_complex(
-    usage,
-    expected_usage,
-    expected_input_cost,
-    expected_output_cost,
-    expected_total_cost,
-):
-    langfuse = Langfuse(debug=False)
-
-    generation_id = create_uuid()
-    langfuse.generation(
-        id=generation_id,
-        name="query-generation",
-        input=[
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "Please generate the start of a company documentation that contains the answer to the questinon: Write a summary of the Q3 OKR goals",
-            },
-        ],
-        output=[{"foo": "bar"}],
-        usage=usage,
-        metadata=[{"tags": ["yo"]}],
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == "query-generation"
-    assert trace.user_id is None
-    assert trace.metadata == {}
-
-    assert len(trace.observations) == 1
-
-    generation = trace.observations[0]
-
-    assert generation.id == generation_id
-    assert generation.name == "query-generation"
-    assert generation.input == [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {
-            "role": "user",
-            "content": "Please generate the start of a company documentation that contains the answer to the questinon: Write a summary of the Q3 OKR goals",
-        },
-    ]
-    assert generation.output == [{"foo": "bar"}]
-    assert generation.metadata["metadata"] == [{"tags": ["yo"]}]
-    assert generation.start_time is not None
-    assert generation.usage_details == {"input": 51, "output": 0, "total": 100}
-    assert generation.cost_details == (
-        {
-            "input": expected_input_cost,
-            "output": expected_output_cost,
-            "total": expected_total_cost,
-        }
-        if any([expected_input_cost, expected_output_cost, expected_total_cost])
-        else {}
-    )
-
-
-def test_create_span():
-    langfuse = Langfuse(debug=False)
-
-    timestamp = _get_timestamp()
-    span_id = create_uuid()
-    langfuse.span(
-        id=span_id,
-        name="span",
-        start_time=timestamp,
-        end_time=timestamp,
-        input={"key": "value"},
-        output={"key": "value"},
-        metadata={"interface": "whatsapp"},
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == "span"
-    assert trace.user_id is None
-    assert trace.metadata == {}
-
-    assert len(trace.observations) == 1
-
-    span = trace.observations[0]
-
-    assert span.id == span_id
-    assert span.name == "span"
-    assert span.start_time is not None
-    assert span.end_time is not None
-    assert span.input == {"key": "value"}
-    assert span.output == {"key": "value"}
-    assert span.start_time is not None
-
-
-def test_score_trace():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(name=trace_name)
-
-    langfuse.score(
-        trace_id=langfuse.get_trace_id(),
-        name="valuation",
-        value=0.5,
-        comment="This is a comment",
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert trace["name"] == trace_name
-
-    assert len(trace["scores"]) == 1
-
-    score = trace["scores"][0]
-
-    assert score["name"] == "valuation"
-    assert score["value"] == 0.5
-    assert score["comment"] == "This is a comment"
-    assert score["observationId"] is None
-    assert score["dataType"] == "NUMERIC"
-
-
-def test_score_trace_nested_trace():
-    langfuse = Langfuse(debug=False)
-
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(name=trace_name)
-
-    trace.score(
-        name="valuation",
-        value=0.5,
-        comment="This is a comment",
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == trace_name
-
-    assert len(trace.scores) == 1
-
-    score = trace.scores[0]
-
-    assert score.name == "valuation"
-    assert score.value == 0.5
-    assert score.comment == "This is a comment"
-    assert score.observation_id is None
-    assert score.data_type == "NUMERIC"
-
-
-def test_score_trace_nested_observation():
-    langfuse = Langfuse(debug=False)
-
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(name=trace_name)
-    span = trace.span(name="span")
-
-    span.score(
-        name="valuation",
-        value=0.5,
-        comment="This is a comment",
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == trace_name
-
-    assert len(trace.scores) == 1
-
-    score = trace.scores[0]
-
-    assert score.name == "valuation"
-    assert score.value == 0.5
-    assert score.comment == "This is a comment"
-    assert score.observation_id == span.id
-    assert score.data_type == "NUMERIC"
-
-
-def test_score_span():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    spanId = create_uuid()
-    timestamp = _get_timestamp()
-    langfuse.span(
-        id=spanId,
-        name="span",
-        start_time=timestamp,
-        end_time=timestamp,
-        input={"key": "value"},
-        output={"key": "value"},
-        metadata={"interface": "whatsapp"},
-    )
-
-    langfuse.score(
-        trace_id=langfuse.get_trace_id(),
-        observation_id=spanId,
-        name="valuation",
-        value=1,
-        comment="This is a comment",
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["scores"]) == 1
-    assert len(trace["observations"]) == 1
-
-    score = trace["scores"][0]
-
-    assert score["name"] == "valuation"
-    assert score["value"] == 1
-    assert score["comment"] == "This is a comment"
-    assert score["observationId"] == spanId
-    assert score["dataType"] == "NUMERIC"
-
-
-def test_create_trace_and_span():
-    langfuse = Langfuse(debug=False)
-
-    trace_name = create_uuid()
-    spanId = create_uuid()
-
-    trace = langfuse.trace(name=trace_name)
-    trace.span(id=spanId, name="span")
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert trace.name == trace_name
-    assert len(trace.observations) == 1
-
-    span = trace.observations[0]
-    assert span.name == "span"
-    assert span.trace_id == trace.id
-    assert span.start_time is not None
-
-
-def test_create_trace_and_generation():
-    langfuse = Langfuse(debug=False)
-
-    trace_name = create_uuid()
-    generationId = create_uuid()
-
-    trace = langfuse.trace(
-        name=trace_name, input={"key": "value"}, session_id="test-session-id"
-    )
-    trace.generation(
-        id=generationId,
-        name="generation",
-        start_time=datetime.now(),
-        end_time=datetime.now(),
-    )
-
-    langfuse.flush()
-
-    dbTrace = get_api().trace.get(trace.id)
-    getTrace = langfuse.get_trace(trace.id)
-
-    assert dbTrace.name == trace_name
-    assert len(dbTrace.observations) == 1
-    assert getTrace.name == trace_name
-    assert len(getTrace.observations) == 1
-    assert getTrace.session_id == "test-session-id"
-
-    generation = getTrace.observations[0]
-    assert generation.name == "generation"
-    assert generation.trace_id == getTrace.id
-    assert generation.start_time is not None
-    assert getTrace.input == {"key": "value"}
-
-
-def backwards_compatibility_sessionId():
-    langfuse = Langfuse(debug=False)
-
-    trace = langfuse.trace(name="test", sessionId="test-sessionId")
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert trace.name == "test"
-    assert trace.session_id == "test-sessionId"
-
-
-def test_create_trace_with_manual_timestamp():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace_name = create_uuid()
-    trace_id = create_uuid()
-    timestamp = _get_timestamp()
-
-    langfuse.trace(id=trace_id, name=trace_name, timestamp=timestamp)
-
-    langfuse.flush()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert trace["name"] == trace_name
-    assert trace["id"] == trace_id
-    assert str(trace["timestamp"]).find(timestamp.isoformat()[0:23]) != -1
-
-
-def test_create_generation_and_trace():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace_name = create_uuid()
-    trace_id = create_uuid()
-
-    langfuse.generation(trace_id=trace_id, name="generation")
-    langfuse.trace(id=trace_id, name=trace_name)
-
-    langfuse.flush()
-    sleep(2)
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert trace["name"] == trace_name
-    assert len(trace["observations"]) == 1
-
-    span = trace["observations"][0]
-    assert span["name"] == "generation"
-    assert span["traceId"] == trace["id"]
-
-
-def test_create_span_and_get_observation():
-    langfuse = Langfuse(debug=False)
-
-    span_id = create_uuid()
-    langfuse.span(id=span_id, name="span")
-    langfuse.flush()
-
-    sleep(2)
-    observation = langfuse.get_observation(span_id)
-    assert observation.name == "span"
-    assert observation.id == span_id
-
-
-def test_update_generation():
-    langfuse = Langfuse(debug=False)
-
-    start = _get_timestamp()
-
-    generation = langfuse.generation(name="generation")
-    generation.update(start_time=start, metadata={"dict": "value"})
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(generation.trace_id)
-
-    assert trace.name == "generation"
-    assert len(trace.observations) == 1
-    retrieved_generation = trace.observations[0]
-    assert retrieved_generation.name == "generation"
-    assert retrieved_generation.trace_id == generation.trace_id
-    assert retrieved_generation.metadata == {"dict": "value"}
-    assert start.replace(
-        microsecond=0, tzinfo=timezone.utc
-    ) == retrieved_generation.start_time.replace(microsecond=0)
-
-
-def test_update_span():
-    langfuse = Langfuse(debug=False)
-
-    span = langfuse.span(name="span")
-    span.update(metadata={"dict": "value"})
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(span.trace_id)
-
-    assert trace.name == "span"
-    assert len(trace.observations) == 1
-
-    retrieved_span = trace.observations[0]
-    assert retrieved_span.name == "span"
-    assert retrieved_span.trace_id == span.trace_id
-    assert retrieved_span.metadata == {"dict": "value"}
-
-
-def test_create_event():
-    langfuse = Langfuse(debug=False)
-
-    event = langfuse.event(name="event")
-
-    langfuse.flush()
-
-    observation = get_api().observations.get(event.id)
-
-    assert observation.type == "EVENT"
-    assert observation.name == "event"
-
-
-def test_create_trace_and_event():
-    langfuse = Langfuse(debug=False)
-
-    trace_name = create_uuid()
-    eventId = create_uuid()
-
-    trace = langfuse.trace(name=trace_name)
-    trace.event(id=eventId, name="event")
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert trace.name == trace_name
-    assert len(trace.observations) == 1
-
-    span = trace.observations[0]
-    assert span.name == "event"
-    assert span.trace_id == trace.id
-    assert span.start_time is not None
-
-
-def test_create_span_and_generation():
-    langfuse = Langfuse(debug=False)
-
-    span = langfuse.span(name="span")
-    langfuse.generation(trace_id=span.trace_id, name="generation")
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(span.trace_id)
-
-    assert trace.name == "span"
-    assert len(trace.observations) == 2
-
-    span = trace.observations[0]
-    assert span.trace_id == trace.id
-
-    span = trace.observations[1]
-    assert span.trace_id == trace.id
-
-
-def test_create_trace_with_id_and_generation():
-    langfuse = Langfuse(debug=False)
-    api_wrapper = LangfuseAPI()
-
-    trace_name = create_uuid()
-    trace_id = create_uuid()
-
-    trace = langfuse.trace(id=trace_id, name=trace_name)
-    trace.generation(name="generation")
-
-    langfuse.flush()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert trace["name"] == trace_name
-    assert trace["id"] == trace_id
-    assert len(trace["observations"]) == 1
-
-    span = trace["observations"][0]
-    assert span["name"] == "generation"
-    assert span["traceId"] == trace["id"]
-
-
-def test_end_generation():
-    langfuse = Langfuse()
-    api_wrapper = LangfuseAPI()
-
-    timestamp = _get_timestamp()
-    generation = langfuse.generation(
-        name="query-generation",
-        start_time=timestamp,
-        model="gpt-3.5-turbo",
-        model_parameters={"max_tokens": "1000", "temperature": "0.9"},
-        input=[
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "Please generate the start of a company documentation that contains the answer to the questinon: Write a summary of the Q3 OKR goals",
-            },
-        ],
-        output="This document entails the OKR goals for ACME",
-        metadata={"interface": "whatsapp"},
-    )
-
-    generation.end()
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    span = trace["observations"][0]
-    assert span["endTime"] is not None
-
-
-def test_end_generation_with_data():
-    langfuse = Langfuse()
-    trace = langfuse.trace()
-
-    generation = trace.generation(
-        name="query-generation",
-    )
-
-    generation.end(
-        name="test_generation_end",
-        metadata={"dict": "value"},
-        level="ERROR",
-        status_message="Generation ended",
-        version="1.0",
-        completion_start_time=datetime(2023, 1, 1, 12, 3, tzinfo=timezone.utc),
-        model="test-model",
-        model_parameters={"param1": "value1", "param2": "value2"},
-        input=[{"test_input_key": "test_input_value"}],
-        output={"test_output_key": "test_output_value"},
-        usage={
-            "input": 100,
-            "output": 200,
-            "total": 500,
-            "unit": "CHARACTERS",
-            "input_cost": 111,
-            "output_cost": 222,
-            "total_cost": 444,
-        },
-    )
-
-    langfuse.flush()
-
-    fetched_trace = get_api().trace.get(trace.id)
-
-    generation = fetched_trace.observations[0]
-    assert generation.completion_start_time == datetime(
-        2023, 1, 1, 12, 3, tzinfo=timezone.utc
-    )
-    assert generation.name == "test_generation_end"
-    assert generation.metadata == {"dict": "value"}
-    assert generation.level == "ERROR"
-    assert generation.status_message == "Generation ended"
-    assert generation.version == "1.0"
-    assert generation.model == "test-model"
-    assert generation.model_parameters == {"param1": "value1", "param2": "value2"}
-    assert generation.input == [{"test_input_key": "test_input_value"}]
-    assert generation.output == {"test_output_key": "test_output_value"}
-    assert generation.usage.input == 100
-    assert generation.usage.output == 200
-    assert generation.usage.total == 500
-    assert generation.calculated_input_cost == 111
-    assert generation.calculated_output_cost == 222
-    assert generation.calculated_total_cost == 444
-
-
-def test_end_generation_with_openai_token_format():
-    langfuse = Langfuse()
-
-    generation = langfuse.generation(
-        name="query-generation",
-    )
-
-    generation.end(
-        usage={
-            "prompt_tokens": 100,
-            "completion_tokens": 200,
-            "total_tokens": 500,
-            "input_cost": 111,
-            "output_cost": 222,
-            "total_cost": 444,
-        },
-    )
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-    print(trace.observations[0])
-
-    generation = trace.observations[0]
-    assert generation.end_time is not None
-    assert generation.usage.input == 100
-    assert generation.usage.output == 200
-    assert generation.usage.total == 500
-    assert generation.usage.unit == "TOKENS"
-    assert generation.calculated_input_cost == 111
-    assert generation.calculated_output_cost == 222
-    assert generation.calculated_total_cost == 444
-
-
-def test_end_span():
-    langfuse = Langfuse()
-    api_wrapper = LangfuseAPI()
-
-    timestamp = _get_timestamp()
-    span = langfuse.span(
-        name="span",
-        start_time=timestamp,
-        input={"key": "value"},
-        output={"key": "value"},
-        metadata={"interface": "whatsapp"},
-    )
-
-    span.end()
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    span = trace["observations"][0]
-    assert span["endTime"] is not None
-
-
-def test_end_span_with_data():
-    langfuse = Langfuse()
-
-    timestamp = _get_timestamp()
-    span = langfuse.span(
-        name="span",
-        start_time=timestamp,
-        input={"key": "value"},
-        output={"key": "value"},
-        metadata={"interface": "whatsapp"},
-    )
-
-    span.end(metadata={"dict": "value"})
-
-    langfuse.flush()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    span = trace.observations[0]
-    assert span.end_time is not None
-    assert span.metadata == {"dict": "value", "interface": "whatsapp"}
-
-
-def test_get_generations():
-    langfuse = Langfuse(debug=False)
-
-    timestamp = _get_timestamp()
-
-    langfuse.generation(
-        name=create_uuid(),
-        start_time=timestamp,
-        end_time=timestamp,
-    )
-
-    generation_name = create_uuid()
-
-    langfuse.generation(
-        name=generation_name,
-        start_time=timestamp,
-        end_time=timestamp,
-        input="great-prompt",
-        output="great-completion",
-    )
-
-    langfuse.flush()
-
-    sleep(1)
-    generations = langfuse.get_generations(name=generation_name, limit=10, page=1)
-
-    assert len(generations.data) == 1
-    assert generations.data[0].name == generation_name
-    assert generations.data[0].input == "great-prompt"
-    assert generations.data[0].output == "great-completion"
-
-
-def test_get_generations_by_user():
-    langfuse = Langfuse(debug=False)
-
-    timestamp = _get_timestamp()
-
-    user_id = create_uuid()
-    generation_name = create_uuid()
-    trace = langfuse.trace(name="test-user", user_id=user_id)
-
-    trace.generation(
-        name=generation_name,
-        start_time=timestamp,
-        end_time=timestamp,
-        input="great-prompt",
-        output="great-completion",
-    )
-
-    langfuse.generation(
-        start_time=timestamp,
-        end_time=timestamp,
-    )
-
-    langfuse.flush()
-    sleep(1)
-
-    generations = langfuse.get_generations(limit=10, page=1, user_id=user_id)
-
-    assert len(generations.data) == 1
-    assert generations.data[0].name == generation_name
-    assert generations.data[0].input == "great-prompt"
-    assert generations.data[0].output == "great-completion"
-
-
-def test_kwargs():
-    langfuse = Langfuse()
-
-    timestamp = _get_timestamp()
-
-    dict = {
-        "start_time": timestamp,
-        "input": {"key": "value"},
-        "output": {"key": "value"},
-        "metadata": {"interface": "whatsapp"},
-    }
-
-    span = langfuse.span(
-        name="span",
-        **dict,
-    )
-
-    langfuse.flush()
-
-    observation = get_api().observations.get(span.id)
-    assert observation.start_time is not None
-    assert observation.input == {"key": "value"}
-    assert observation.output == {"key": "value"}
-    assert observation.metadata == {"interface": "whatsapp"}
-
-
-def test_timezone_awareness():
-    os.environ["TZ"] = "US/Pacific"
-    time.tzset()
-
-    utc_now = datetime.now(timezone.utc)
-    assert utc_now.tzinfo is not None
-
-    langfuse = Langfuse(debug=False)
-
-    trace = langfuse.trace(name="test")
-    span = trace.span(name="span")
-    span.end()
-    generation = trace.generation(name="generation")
-    generation.end()
-    trace.event(name="event")
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert len(trace.observations) == 3
-    for observation in trace.observations:
-        delta = observation.start_time - utc_now
-        assert delta.seconds < 5
-
-        if observation.type != "EVENT":
-            delta = observation.end_time - utc_now
-            assert delta.seconds < 5
-
-    os.environ["TZ"] = "UTC"
-    time.tzset()
-
-
-def test_timezone_awareness_setting_timestamps():
-    os.environ["TZ"] = "US/Pacific"
-    time.tzset()
-
-    now = datetime.now()
-    utc_now = datetime.now(timezone.utc)
-    assert utc_now.tzinfo is not None
-
-    print(now)
-    print(utc_now)
-
-    langfuse = Langfuse(debug=False)
-
-    trace = langfuse.trace(name="test")
-    trace.span(name="span", start_time=now, end_time=now)
-    trace.generation(name="generation", start_time=now, end_time=now)
-    trace.event(name="event", start_time=now)
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace.id)
-
-    assert len(trace.observations) == 3
-    for observation in trace.observations:
-        delta = utc_now - observation.start_time
-        assert delta.seconds < 5
-
-        if observation.type != "EVENT":
-            delta = utc_now - observation.end_time
-            assert delta.seconds < 5
-
-
-def test_get_trace_by_session_id():
-    langfuse = Langfuse(debug=False)
-
-    # Create a trace with a session_id
-    trace_name = create_uuid()
-    session_id = create_uuid()
-    trace = langfuse.trace(name=trace_name, session_id=session_id)
-
-    # create a trace without a session_id
-    langfuse.trace(name=create_uuid())
-
-    langfuse.flush()
-
-    # Retrieve the trace using the session_id
-    traces = get_api().trace.list(session_id=session_id)
-
-    # Verify that the trace was retrieved correctly
-    assert len(traces.data) == 1
-    retrieved_trace = traces.data[0]
-    assert retrieved_trace.name == trace_name
-    assert retrieved_trace.session_id == session_id
-    assert retrieved_trace.id == trace.id
-
-
-def test_fetch_trace():
-    langfuse = Langfuse()
-
-    # Create a trace
-    name = create_uuid()
-    trace = langfuse.trace(name=name)
-    langfuse.flush()
-
-    # Fetch the trace
-    sleep(1)
-    response = langfuse.fetch_trace(trace.id)
-
-    # Assert the structure of the response
-    assert isinstance(response, FetchTraceResponse)
-    assert hasattr(response, "data")
-    assert response.data.id == trace.id
-    assert response.data.name == name
-
-
-def test_fetch_traces():
-    langfuse = Langfuse()
-
-    # unique name
-    name = create_uuid()
-
-    # Create 3 traces with different timestamps
-    now = datetime.now()
-    trace_params = [
-        {"id": create_uuid(), "timestamp": now - timedelta(seconds=10)},
-        {"id": create_uuid(), "timestamp": now - timedelta(seconds=5)},
-        {"id": create_uuid(), "timestamp": now},
-    ]
-
-    for trace_param in trace_params:
-        langfuse.trace(
-            id=trace_param["id"],
-            name=name,
-            session_id="session-1",
-            input={"key": "value"},
-            output="output-value",
-            timestamp=trace_param["timestamp"],
-        )
-    langfuse.flush()
-    sleep(1)
-
-    all_traces = langfuse.fetch_traces(limit=10, name=name)
-    assert len(all_traces.data) == 3
-    assert all_traces.meta.total_items == 3
-
-    # Assert the structure of the response
-    assert isinstance(all_traces, FetchTracesResponse)
-    assert hasattr(all_traces, "data")
-    assert hasattr(all_traces, "meta")
-    assert isinstance(all_traces.data, list)
-    assert all_traces.data[0].name == name
-    assert all_traces.data[0].session_id == "session-1"
-
-    # Fetch traces with a time range that should only include the middle trace
-    from_timestamp = now - timedelta(seconds=7.5)
-    to_timestamp = now - timedelta(seconds=2.5)
-    response = langfuse.fetch_traces(
-        limit=10, name=name, from_timestamp=from_timestamp, to_timestamp=to_timestamp
-    )
-    assert len(response.data) == 1
-    assert response.meta.total_items == 1
-    fetched_trace = response.data[0]
-    assert fetched_trace.name == name
-    assert fetched_trace.session_id == "session-1"
-    assert fetched_trace.input == {"key": "value"}
-    assert fetched_trace.output == "output-value"
-    # compare timestamps without microseconds and in UTC
-    assert fetched_trace.timestamp.replace(microsecond=0) == trace_params[1][
-        "timestamp"
-    ].replace(microsecond=0).astimezone(timezone.utc)
-
-    # Fetch with pagination
-    paginated_response = langfuse.fetch_traces(limit=1, page=2, name=name)
-    assert len(paginated_response.data) == 1
-    assert paginated_response.meta.total_items == 3
-    assert paginated_response.meta.total_pages == 3
-
-
-def test_fetch_observation():
-    langfuse = Langfuse()
-
-    # Create a trace and a generation
-    name = create_uuid()
-    trace = langfuse.trace(name=name)
-    generation = trace.generation(name=name)
-    langfuse.flush()
-    sleep(1)
-
-    # Fetch the observation
-    response = langfuse.fetch_observation(generation.id)
-
-    # Assert the structure of the response
-    assert isinstance(response, FetchObservationResponse)
-    assert hasattr(response, "data")
-    assert response.data.id == generation.id
-    assert response.data.name == name
-    assert response.data.type == "GENERATION"
-
-
-def test_fetch_observations():
-    langfuse = Langfuse()
-
-    # Create a trace with multiple generations
-    name = create_uuid()
-    trace = langfuse.trace(name=name)
-    gen1 = trace.generation(name=name)
-    gen2 = trace.generation(name=name)
-    langfuse.flush()
-    sleep(1)
-
-    # Fetch observations
-    response = langfuse.fetch_observations(limit=10, name=name)
-
-    # Assert the structure of the response
-    assert isinstance(response, FetchObservationsResponse)
-    assert hasattr(response, "data")
-    assert hasattr(response, "meta")
-    assert isinstance(response.data, list)
-    assert len(response.data) == 2
-    assert response.meta.total_items == 2
-    assert response.data[0].id in [gen1.id, gen2.id]
-
-    # fetch only one
-    response = langfuse.fetch_observations(limit=1, page=2, name=name)
-    assert len(response.data) == 1
-    assert response.meta.total_items == 2
-    assert response.meta.total_pages == 2
-
-
-def test_fetch_trace_not_found():
-    langfuse = Langfuse()
-
-    # Attempt to fetch a non-existent trace
-    with pytest.raises(Exception):
-        langfuse.fetch_trace(create_uuid())
-
-
-def test_fetch_observation_not_found():
-    langfuse = Langfuse()
-
-    # Attempt to fetch a non-existent observation
-    with pytest.raises(Exception):
-        langfuse.fetch_observation(create_uuid())
-
-
-def test_fetch_traces_empty():
-    langfuse = Langfuse()
-
-    # Fetch traces with a filter that should return no results
-    response = langfuse.fetch_traces(name=create_uuid())
-
-    assert isinstance(response, FetchTracesResponse)
-    assert len(response.data) == 0
-    assert response.meta.total_items == 0
-
-
-def test_fetch_observations_empty():
-    langfuse = Langfuse()
-
-    # Fetch observations with a filter that should return no results
-    response = langfuse.fetch_observations(name=create_uuid())
-
-    assert isinstance(response, FetchObservationsResponse)
-    assert len(response.data) == 0
-    assert response.meta.total_items == 0
-
-
-def test_fetch_sessions():
-    langfuse = Langfuse()
-
-    # unique name
-    name = create_uuid()
-    session1 = create_uuid()
-    session2 = create_uuid()
-    session3 = create_uuid()
-
-    # Create multiple traces
-    langfuse.trace(name=name, session_id=session1)
-    langfuse.trace(name=name, session_id=session2)
-    langfuse.trace(name=name, session_id=session3)
-    langfuse.flush()
-
-    # Fetch traces
-    sleep(3)
-    response = langfuse.fetch_sessions()
-
-    # Assert the structure of the response, cannot check for the exact number of sessions as the table is not cleared between tests
-    assert isinstance(response, FetchSessionsResponse)
-    assert hasattr(response, "data")
-    assert hasattr(response, "meta")
-    assert isinstance(response.data, list)
-
-    # fetch only one, cannot check for the exact number of sessions as the table is not cleared between tests
-    response = langfuse.fetch_sessions(limit=1, page=2)
-    assert len(response.data) == 1
-
-
-def test_create_trace_sampling_zero():
-    langfuse = Langfuse(debug=True, sample_rate=0)
-    api_wrapper = LangfuseAPI()
-    trace_name = create_uuid()
-
-    trace = langfuse.trace(
-        name=trace_name,
-        user_id="test",
-        metadata={"key": "value"},
-        tags=["tag1", "tag2"],
-        public=True,
-    )
-
-    trace.generation(name="generation")
-    trace.score(name="score", value=0.5)
-
-    langfuse.flush()
-
-    fetched_trace = api_wrapper.get_trace(trace.id)
-    assert fetched_trace == {
-        "error": "LangfuseNotFoundError",
-        "message": f"Trace {trace.id} not found within authorized project",
-    }
-
-
-def test_mask_function():
-    def mask_func(data):
-        if isinstance(data, dict):
-            return {k: "MASKED" for k in data}
-        elif isinstance(data, str):
-            return "MASKED"
-        return data
-
-    langfuse = Langfuse(debug=True, mask=mask_func)
-    api_wrapper = LangfuseAPI()
-
-    trace = langfuse.trace(name="test_trace", input={"sensitive": "data"})
-    sleep(0.1)
-    trace.update(output={"more": "sensitive"})
-
-    gen = trace.generation(name="test_gen", input={"prompt": "secret"})
-    sleep(0.1)
-    gen.update(output="new_confidential")
-
-    span = trace.span(name="test_span", input={"data": "private"})
-    sleep(0.1)
-    span.update(output="new_classified")
-
-    langfuse.flush()
-    sleep(1)
-
-    fetched_trace = api_wrapper.get_trace(trace.id)
-    assert fetched_trace["input"] == {"sensitive": "MASKED"}
-    assert fetched_trace["output"] == {"more": "MASKED"}
-
-    fetched_gen = [
-        o for o in fetched_trace["observations"] if o["type"] == "GENERATION"
-    ][0]
-    assert fetched_gen["input"] == {"prompt": "MASKED"}
-    assert fetched_gen["output"] == "MASKED"
-
-    fetched_span = [o for o in fetched_trace["observations"] if o["type"] == "SPAN"][0]
-    assert fetched_span["input"] == {"data": "MASKED"}
-    assert fetched_span["output"] == "MASKED"
-
-    def faulty_mask_func(data):
-        raise Exception("Masking error")
-
-    langfuse = Langfuse(debug=True, mask=faulty_mask_func)
-
-    trace = langfuse.trace(name="test_trace", input={"sensitive": "data"})
-    sleep(0.1)
-    trace.update(output={"more": "sensitive"})
-    langfuse.flush()
-    sleep(1)
-
-    fetched_trace = api_wrapper.get_trace(trace.id)
-    assert fetched_trace["input"] == "<fully masked due to failed mask function>"
-    assert fetched_trace["output"] == "<fully masked due to failed mask function>"
-
-
-def test_get_project_id():
-    langfuse = Langfuse(debug=False)
-    res = langfuse._get_project_id()
-    assert res is not None
-    assert res == "7a88fb47-b4e2-43b8-a06c-a5ce950dc53a"
-
-
-def test_generate_trace_id():
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-
-    langfuse.trace(id=trace_id, name="test_trace")
-    langfuse.flush()
-
-    trace_url = langfuse.get_trace_url()
-    assert (
-        trace_url
-        == f"http://localhost:3000/project/7a88fb47-b4e2-43b8-a06c-a5ce950dc53a/traces/{trace_id}"
-    )
diff --git a/tests/test_core_sdk_unit.py b/tests/test_core_sdk_unit.py
deleted file mode 100644
index eb1702c11..000000000
--- a/tests/test_core_sdk_unit.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from unittest.mock import Mock
-from langfuse.api.client import FernLangfuse
-from langfuse.client import (
-    StatefulClient,
-    StatefulGenerationClient,
-    StatefulSpanClient,
-    StatefulTraceClient,
-)
-import pytest
-from langfuse import Langfuse
-
-
-@pytest.fixture
-def langfuse():
-    langfuse_instance = Langfuse(debug=False)
-    langfuse_instance.client = Mock()
-    langfuse_instance.task_manager = Mock()
-    langfuse_instance.log = Mock()
-
-    return langfuse_instance
-
-
-@pytest.fixture
-def stateful_client():
-    stateful_client = StatefulClient(Mock(), "test_id", Mock(), "test_trace", Mock())
-
-    return stateful_client
-
-
-@pytest.mark.parametrize(
-    "trace_method, expected_client, kwargs",
-    [
-        (Langfuse.trace, StatefulTraceClient, {}),
-        (Langfuse.generation, StatefulGenerationClient, {}),
-        (Langfuse.span, StatefulSpanClient, {}),
-        (Langfuse.score, StatefulClient, {"value": 1, "trace_id": "test_trace"}),
-    ],
-)
-def test_langfuse_returning_if_taskmanager_fails(
-    langfuse, trace_method, expected_client, kwargs
-):
-    trace_name = "test_trace"
-
-    mock_task_manager = langfuse.task_manager.add_task
-    mock_task_manager.return_value = Exception("Task manager unable to process event")
-
-    body = {
-        "name": trace_name,
-        **kwargs,
-    }
-
-    result = trace_method(langfuse, **body)
-    mock_task_manager.assert_called()
-
-    assert isinstance(result, expected_client)
-
-
-@pytest.mark.parametrize(
-    "trace_method, expected_client, kwargs",
-    [
-        (StatefulClient.generation, StatefulGenerationClient, {}),
-        (StatefulClient.span, StatefulSpanClient, {}),
-        (StatefulClient.score, StatefulClient, {"value": 1}),
-    ],
-)
-def test_stateful_client_returning_if_taskmanager_fails(
-    stateful_client, trace_method, expected_client, kwargs
-):
-    trace_name = "test_trace"
-
-    mock_task_manager = stateful_client.task_manager.add_task
-    mock_task_manager.return_value = Exception("Task manager unable to process event")
-    mock_client = stateful_client.client
-    mock_client.return_value = FernLangfuse(base_url="http://localhost:8000")
-
-    body = {
-        "name": trace_name,
-        **kwargs,
-    }
-
-    result = trace_method(stateful_client, **body)
-    mock_task_manager.assert_called()
-
-    assert isinstance(result, expected_client)
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
deleted file mode 100644
index 7ef65417b..000000000
--- a/tests/test_datasets.py
+++ /dev/null
@@ -1,562 +0,0 @@
-import json
-import os
-import time
-from concurrent.futures import ThreadPoolExecutor
-from typing import List
-
-import pytest
-from langchain import LLMChain, OpenAI, PromptTemplate
-
-from langfuse import Langfuse
-from langfuse.api.resources.commons.types.observation import Observation
-from langfuse.decorators import langfuse_context, observe
-from tests.utils import create_uuid, get_api, get_llama_index_index
-
-
-def test_create_and_get_dataset():
-    langfuse = Langfuse(debug=False)
-
-    name = "Text with spaces " + create_uuid()[:5]
-    langfuse.create_dataset(name=name)
-    dataset = langfuse.get_dataset(name)
-    assert dataset.name == name
-
-    name = create_uuid()
-    langfuse.create_dataset(
-        name=name, description="This is a test dataset", metadata={"key": "value"}
-    )
-    dataset = langfuse.get_dataset(name)
-    assert dataset.name == name
-    assert dataset.description == "This is a test dataset"
-    assert dataset.metadata == {"key": "value"}
-
-
-def test_create_dataset_item():
-    langfuse = Langfuse(debug=False)
-    name = create_uuid()
-    langfuse.create_dataset(name=name)
-
-    generation = langfuse.generation(name="test")
-    langfuse.flush()
-
-    input = {"input": "Hello World"}
-    # 2
-    langfuse.create_dataset_item(dataset_name=name, input=input)
-    # 1
-    langfuse.create_dataset_item(
-        dataset_name=name,
-        input=input,
-        expected_output="Output",
-        metadata={"key": "value"},
-        source_observation_id=generation.id,
-        source_trace_id=generation.trace_id,
-    )
-    # 0 - no data
-    langfuse.create_dataset_item(
-        dataset_name=name,
-    )
-
-    dataset = langfuse.get_dataset(name)
-
-    assert len(dataset.items) == 3
-    assert dataset.items[2].input == input
-    assert dataset.items[2].expected_output is None
-    assert dataset.items[2].dataset_name == name
-
-    assert dataset.items[1].input == input
-    assert dataset.items[1].expected_output == "Output"
-    assert dataset.items[1].metadata == {"key": "value"}
-    assert dataset.items[1].source_observation_id == generation.id
-    assert dataset.items[1].source_trace_id == generation.trace_id
-    assert dataset.items[1].dataset_name == name
-
-    assert dataset.items[0].input is None
-    assert dataset.items[0].expected_output is None
-    assert dataset.items[0].metadata is None
-    assert dataset.items[0].source_observation_id is None
-    assert dataset.items[0].source_trace_id is None
-    assert dataset.items[0].dataset_name == name
-
-
-def test_get_all_items():
-    langfuse = Langfuse(debug=False)
-    name = create_uuid()
-    langfuse.create_dataset(name=name)
-
-    input = {"input": "Hello World"}
-    for _ in range(99):
-        langfuse.create_dataset_item(dataset_name=name, input=input)
-
-    dataset = langfuse.get_dataset(name)
-    assert len(dataset.items) == 99
-
-    dataset_2 = langfuse.get_dataset(name, fetch_items_page_size=9)
-    assert len(dataset_2.items) == 99
-
-    dataset_3 = langfuse.get_dataset(name, fetch_items_page_size=2)
-    assert len(dataset_3.items) == 99
-
-
-def test_upsert_and_get_dataset_item():
-    langfuse = Langfuse(debug=False)
-    name = create_uuid()
-    langfuse.create_dataset(name=name)
-    input = {"input": "Hello World"}
-    item = langfuse.create_dataset_item(
-        dataset_name=name, input=input, expected_output=input
-    )
-
-    get_item = langfuse.get_dataset_item(item.id)
-    assert get_item.input == input
-    assert get_item.id == item.id
-    assert get_item.expected_output == input
-
-    new_input = {"input": "Hello World 2"}
-    langfuse.create_dataset_item(
-        dataset_name=name,
-        input=new_input,
-        id=item.id,
-        expected_output=new_input,
-        status="ARCHIVED",
-    )
-    get_new_item = langfuse.get_dataset_item(item.id)
-    assert get_new_item.input == new_input
-    assert get_new_item.id == item.id
-    assert get_new_item.expected_output == new_input
-    assert get_new_item.status == "ARCHIVED"
-
-
-def test_linking_observation():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name = create_uuid()
-    generation_id = create_uuid()
-    trace_id = None
-
-    for item in dataset.items:
-        generation = langfuse.generation(id=generation_id)
-        trace_id = generation.trace_id
-
-        item.link(generation, run_name)
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].observation_id == generation_id
-    assert run.dataset_run_items[0].trace_id == trace_id
-
-
-def test_linking_trace_and_run_metadata_and_description():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name = create_uuid()
-    trace_id = create_uuid()
-
-    for item in dataset.items:
-        trace = langfuse.trace(id=trace_id)
-
-        item.link(
-            trace,
-            run_name,
-            run_metadata={"key": "value"},
-            run_description="This is a test run",
-        )
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert run.metadata == {"key": "value"}
-    assert run.description == "This is a test run"
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].trace_id == trace_id
-    assert run.dataset_run_items[0].observation_id is None
-
-
-def test_get_runs():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name_1 = create_uuid()
-    trace_id_1 = create_uuid()
-
-    for item in dataset.items:
-        trace = langfuse.trace(id=trace_id_1)
-
-        item.link(
-            trace,
-            run_name_1,
-            run_metadata={"key": "value"},
-            run_description="This is a test run",
-        )
-
-    run_name_2 = create_uuid()
-    trace_id_2 = create_uuid()
-
-    for item in dataset.items:
-        trace = langfuse.trace(id=trace_id_2)
-
-        item.link(
-            trace,
-            run_name_2,
-            run_metadata={"key": "value"},
-            run_description="This is a test run",
-        )
-
-    runs = langfuse.get_dataset_runs(dataset_name)
-
-    assert len(runs.data) == 2
-    assert runs.data[0].name == run_name_2
-    assert runs.data[0].metadata == {"key": "value"}
-    assert runs.data[0].description == "This is a test run"
-    assert runs.data[1].name == run_name_1
-    assert runs.meta.total_items == 2
-    assert runs.meta.total_pages == 1
-    assert runs.meta.page == 1
-    assert runs.meta.limit == 50
-
-
-def test_linking_via_id_observation_arg_legacy():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name = create_uuid()
-    generation_id = create_uuid()
-    trace_id = None
-
-    for item in dataset.items:
-        generation = langfuse.generation(id=generation_id)
-        trace_id = generation.trace_id
-        langfuse.flush()
-        time.sleep(1)
-
-        item.link(generation_id, run_name)
-
-    langfuse.flush()
-
-    time.sleep(1)
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].observation_id == generation_id
-    assert run.dataset_run_items[0].trace_id == trace_id
-
-
-def test_linking_via_id_trace_kwarg():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name = create_uuid()
-    trace_id = create_uuid()
-
-    for item in dataset.items:
-        langfuse.trace(id=trace_id)
-        langfuse.flush()
-
-        item.link(None, run_name, trace_id=trace_id)
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].observation_id is None
-    assert run.dataset_run_items[0].trace_id == trace_id
-
-
-def test_linking_via_id_generation_kwarg():
-    langfuse = Langfuse(debug=False)
-
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == 1
-    assert dataset.items[0].input == input
-
-    run_name = create_uuid()
-    generation_id = create_uuid()
-    trace_id = None
-
-    for item in dataset.items:
-        generation = langfuse.generation(id=generation_id)
-        trace_id = generation.trace_id
-        langfuse.flush()
-
-        item.link(None, run_name, trace_id=trace_id, observation_id=generation_id)
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].observation_id == generation_id
-    assert run.dataset_run_items[0].trace_id == trace_id
-
-
-def test_langchain_dataset():
-    langfuse = Langfuse(debug=False)
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    input = json.dumps({"input": "Hello World"})
-    langfuse.create_dataset_item(dataset_name=dataset_name, input=input)
-
-    dataset = langfuse.get_dataset(dataset_name)
-
-    run_name = create_uuid()
-
-    dataset_item_id = None
-
-    for item in dataset.items:
-        handler = item.get_langchain_handler(run_name=run_name)
-        dataset_item_id = item.id
-        llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-        template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-            Title: {title}
-            Playwright: This is a synopsis for the above play:"""
-
-        prompt_template = PromptTemplate(input_variables=["title"], template=template)
-        synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-        synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].dataset_run_id == run.id
-
-    api = get_api()
-
-    trace = api.trace.get(handler.get_trace_id())
-
-    assert len(trace.observations) == 2
-
-    sorted_observations = sorted_dependencies(trace.observations)
-
-    assert sorted_observations[0].id == sorted_observations[1].parent_observation_id
-    assert sorted_observations[0].parent_observation_id is None
-
-    assert trace.name == "LLMChain"  # Overwritten by the Langchain run
-    assert trace.metadata == {
-        "dataset_item_id": dataset_item_id,
-        "run_name": run_name,
-        "dataset_id": dataset.id,
-    }
-
-    assert sorted_observations[0].name == "LLMChain"
-
-    assert sorted_observations[1].name == "OpenAI"
-    assert sorted_observations[1].type == "GENERATION"
-    assert sorted_observations[1].input is not None
-    assert sorted_observations[1].output is not None
-    assert sorted_observations[1].input != ""
-    assert sorted_observations[1].output != ""
-    assert sorted_observations[1].usage.total is not None
-    assert sorted_observations[1].usage.input is not None
-    assert sorted_observations[1].usage.output is not None
-
-
-@pytest.mark.skip(reason="flaky on V3 pipeline")
-def test_llama_index_dataset():
-    langfuse = Langfuse(debug=False)
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    langfuse.create_dataset_item(
-        dataset_name=dataset_name, input={"input": "Hello World"}
-    )
-
-    dataset = langfuse.get_dataset(dataset_name)
-
-    run_name = create_uuid()
-
-    dataset_item_id = None
-
-    for item in dataset.items:
-        with item.observe_llama_index(run_name=run_name) as handler:
-            dataset_item_id = item.id
-
-            index = get_llama_index_index(handler)
-            index.as_query_engine().query(
-                "What did the speaker achieve in the past twelve months?"
-            )
-
-    langfuse.flush()
-    handler.flush()
-
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == 1
-    assert run.dataset_run_items[0].dataset_run_id == run.id
-    time.sleep(3)
-
-    trace_id = run.dataset_run_items[0].trace_id
-    trace = get_api().trace.get(trace_id)
-
-    sorted_observations = sorted_dependencies(trace.observations)
-
-    assert sorted_observations[0].id == sorted_observations[1].parent_observation_id
-    assert sorted_observations[0].parent_observation_id is None
-
-    assert trace.name == "LlamaIndex_query"  # Overwritten by the Langchain run
-    assert trace.metadata == {
-        "dataset_item_id": dataset_item_id,
-        "run_name": run_name,
-        "dataset_id": dataset.id,
-    }
-
-
-def sorted_dependencies(
-    observations: List[Observation],
-):
-    # observations have an id and a parent_observation_id. Return a sorted list starting with the root observation where the parent_observation_id is None
-    parent_to_observation = {obs.parent_observation_id: obs for obs in observations}
-
-    # Start with the root observation (parent_observation_id is None)
-    current_observation = parent_to_observation[None]
-    dependencies = [current_observation]
-
-    while current_observation.id in parent_to_observation:
-        current_observation = parent_to_observation[current_observation.id]
-        dependencies.append(current_observation)
-
-    return dependencies
-
-
-def test_observe_dataset_run():
-    # Create dataset
-    langfuse = Langfuse(debug=True)
-    dataset_name = create_uuid()
-    langfuse.create_dataset(name=dataset_name)
-
-    items_data = []
-    num_items = 3
-
-    for i in range(num_items):
-        trace_id = create_uuid()
-        dataset_item_input = "Hello World " + str(i)
-        langfuse.create_dataset_item(
-            dataset_name=dataset_name, input=dataset_item_input
-        )
-
-        items_data.append((dataset_item_input, trace_id))
-
-    dataset = langfuse.get_dataset(dataset_name)
-    assert len(dataset.items) == num_items
-
-    run_name = create_uuid()
-
-    @observe()
-    def run_llm_app_on_dataset_item(input):
-        return input
-
-    def wrapperFunc(input):
-        return run_llm_app_on_dataset_item(input)
-
-    def execute_dataset_item(item, run_name, trace_id):
-        with item.observe(run_name=run_name, trace_id=trace_id):
-            wrapperFunc(item.input)
-
-    items = zip(dataset.items[::-1], items_data)  # Reverse order to reflect input order
-
-    with ThreadPoolExecutor() as executor:
-        for item, (_, trace_id) in items:
-            result = executor.submit(
-                execute_dataset_item,
-                item,
-                run_name=run_name,
-                trace_id=trace_id,
-            )
-
-            result.result()
-
-    langfuse_context.flush()
-
-    # Check dataset run
-    run = langfuse.get_dataset_run(dataset_name, run_name)
-
-    assert run.name == run_name
-    assert len(run.dataset_run_items) == num_items
-    assert run.dataset_run_items[0].dataset_run_id == run.id
-
-    for _, trace_id in items_data:
-        assert any(
-            item.trace_id == trace_id for item in run.dataset_run_items
-        ), f"Trace {trace_id} not found in run"
-
-    for dataset_item_input, trace_id in items_data:
-        trace = get_api().trace.get(trace_id)
-
-        assert trace.name == "run_llm_app_on_dataset_item"
-        assert len(trace.observations) == 0
-        assert trace.input["args"][0] == dataset_item_input
-        assert trace.output == dataset_item_input
-
-    # Check that the decorator context is not polluted
-    new_trace_id = create_uuid()
-    run_llm_app_on_dataset_item(
-        "non-dataset-run-afterwards", langfuse_observation_id=new_trace_id
-    )
-
-    langfuse_context.flush()
-
-    next_trace = get_api().trace.get(new_trace_id)
-    assert next_trace.name == "run_llm_app_on_dataset_item"
-    assert next_trace.input["args"][0] == "non-dataset-run-afterwards"
-    assert next_trace.output == "non-dataset-run-afterwards"
-    assert len(next_trace.observations) == 0
-    assert next_trace.id != trace_id
diff --git a/tests/test_decorators.py b/tests/test_decorators.py
deleted file mode 100644
index df6d2d4cf..000000000
--- a/tests/test_decorators.py
+++ /dev/null
@@ -1,1571 +0,0 @@
-import asyncio
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
-from contextvars import ContextVar
-from time import sleep
-from typing import Optional
-
-import pytest
-from langchain.prompts import ChatPromptTemplate
-from langchain_openai import ChatOpenAI
-
-from langfuse.decorators import langfuse_context, observe
-from langfuse.media import LangfuseMedia
-from langfuse.openai import AsyncOpenAI
-from tests.utils import create_uuid, get_api, get_llama_index_index
-
-mock_metadata = {"key": "metadata"}
-mock_deep_metadata = {"key": "mock_deep_metadata"}
-mock_session_id = "session-id-1"
-mock_args = (1, 2, 3)
-mock_kwargs = {"a": 1, "b": 2, "c": 3}
-
-
-def test_nested_observations():
-    mock_name = "test_nested_observations"
-    mock_trace_id = create_uuid()
-
-    @observe(as_type="generation", name="level_3_to_be_overwritten")
-    def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-            output="mock_output",
-        )
-        langfuse_context.update_current_observation(
-            version="version-1", name="overwritten_level_3"
-        )
-
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        langfuse_context.update_current_trace(
-            user_id="user_id",
-        )
-
-        return "level_3"
-
-    @observe(name="level_2_manually_set")
-    def level_2_function():
-        level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        level_2_function()
-
-        return "level_1"
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 2
-    )  # Top-most function is trace, so it's not an observations
-
-    assert trace_data.input == {"args": list(mock_args), "kwargs": mock_kwargs}
-    assert trace_data.output == "level_1"
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.user_id == "user_id"
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    level_3_observation = adjacencies[level_2_observation.id][0]
-
-    assert level_2_observation.name == "level_2_manually_set"
-    assert level_2_observation.metadata == mock_metadata
-
-    assert level_3_observation.name == "overwritten_level_3"
-    assert level_3_observation.metadata == mock_deep_metadata
-    assert level_3_observation.type == "GENERATION"
-    assert level_3_observation.calculated_total_cost > 0
-    assert level_3_observation.output == "mock_output"
-    assert level_3_observation.version == "version-1"
-
-
-def test_nested_observations_with_non_parentheses_decorator():
-    mock_name = "test_nested_observations"
-    mock_trace_id = create_uuid()
-
-    @observe(as_type="generation", name="level_3_to_be_overwritten")
-    def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-            output="mock_output",
-        )
-        langfuse_context.update_current_observation(
-            version="version-1", name="overwritten_level_3"
-        )
-
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        langfuse_context.update_current_trace(
-            user_id="user_id",
-        )
-
-        return "level_3"
-
-    @observe
-    def level_2_function():
-        level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe
-    def level_1_function(*args, **kwargs):
-        level_2_function()
-
-        return "level_1"
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 2
-    )  # Top-most function is trace, so it's not an observations
-
-    assert trace_data.input == {"args": list(mock_args), "kwargs": mock_kwargs}
-    assert trace_data.output == "level_1"
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.user_id == "user_id"
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    level_3_observation = adjacencies[level_2_observation.id][0]
-
-    assert level_2_observation.name == "level_2_function"
-    assert level_2_observation.metadata == mock_metadata
-
-    assert level_3_observation.name == "overwritten_level_3"
-    assert level_3_observation.metadata == mock_deep_metadata
-    assert level_3_observation.type == "GENERATION"
-    assert level_3_observation.calculated_total_cost > 0
-    assert level_3_observation.output == "mock_output"
-    assert level_3_observation.version == "version-1"
-
-
-# behavior on exceptions
-def test_exception_in_wrapped_function():
-    mock_name = "test_exception_in_wrapped_function"
-    mock_trace_id = create_uuid()
-
-    @observe(as_type="generation")
-    def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-        )
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        raise ValueError("Mock exception")
-
-    @observe()
-    def level_2_function():
-        level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        sleep(1)
-        level_2_function()
-
-        return "level_1"
-
-    # Check that the exception is raised
-    with pytest.raises(ValueError):
-        level_1_function(
-            *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-        )
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert trace_data.input == {"args": list(mock_args), "kwargs": mock_kwargs}
-    assert trace_data.output is None  # Output is None if exception is raised
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    level_3_observation = adjacencies[level_2_observation.id][0]
-
-    assert (
-        level_2_observation.metadata == {}
-    )  # Exception is raised before metadata is set
-    assert level_3_observation.metadata == mock_deep_metadata
-    assert level_3_observation.status_message == "Mock exception"
-    assert level_3_observation.level == "ERROR"
-
-
-# behavior on concurrency
-def test_concurrent_decorator_executions():
-    mock_name = "test_concurrent_decorator_executions"
-    mock_trace_id_1 = create_uuid()
-    mock_trace_id_2 = create_uuid()
-
-    @observe(as_type="generation")
-    def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(metadata=mock_deep_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-        )
-        langfuse_context.update_current_trace(session_id=mock_session_id)
-
-        return "level_3"
-
-    @observe()
-    def level_2_function():
-        level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe(name=mock_name)
-    def level_1_function(*args, **kwargs):
-        sleep(1)
-        level_2_function()
-
-        return "level_1"
-
-    with ThreadPoolExecutor(max_workers=2) as executor:
-        future1 = executor.submit(
-            level_1_function,
-            *mock_args,
-            mock_trace_id_1,
-            **mock_kwargs,
-            langfuse_observation_id=mock_trace_id_1,
-        )
-        future2 = executor.submit(
-            level_1_function,
-            *mock_args,
-            mock_trace_id_2,
-            **mock_kwargs,
-            langfuse_observation_id=mock_trace_id_2,
-        )
-
-        future1.result()
-        future2.result()
-
-    langfuse_context.flush()
-
-    for mock_id in [mock_trace_id_1, mock_trace_id_2]:
-        trace_data = get_api().trace.get(mock_id)
-        assert (
-            len(trace_data.observations) == 2
-        )  # Top-most function is trace, so it's not an observations
-
-        assert trace_data.input == {
-            "args": list(mock_args) + [mock_id],
-            "kwargs": mock_kwargs,
-        }
-        assert trace_data.output == "level_1"
-
-        # trace parameters if set anywhere in the call stack
-        assert trace_data.session_id == mock_session_id
-        assert trace_data.name == mock_name
-
-        # Check correct nesting
-        adjacencies = defaultdict(list)
-        for o in trace_data.observations:
-            adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-        assert len(adjacencies[mock_id]) == 1  # Trace has only one child
-        assert len(adjacencies) == 2  # Only trace and one observation have children
-
-        level_2_observation = adjacencies[mock_id][0]
-        level_3_observation = adjacencies[level_2_observation.id][0]
-
-        assert level_2_observation.metadata == mock_metadata
-        assert level_3_observation.metadata == mock_deep_metadata
-        assert level_3_observation.type == "GENERATION"
-        assert level_3_observation.calculated_total_cost > 0
-
-
-def test_decorators_llama_index():
-    mock_name = "test_decorators_llama_index"
-    mock_trace_id = create_uuid()
-
-    @observe()
-    def llama_index_operations(*args, **kwargs):
-        callback = langfuse_context.get_current_llama_index_handler()
-        index = get_llama_index_index(callback, force_rebuild=True)
-
-        return index.as_query_engine().query(kwargs["query"])
-
-    @observe()
-    def level_3_function(*args, **kwargs):
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(metadata=mock_deep_metadata)
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        return llama_index_operations(*args, **kwargs)
-
-    @observe()
-    def level_2_function(*args, **kwargs):
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return level_3_function(*args, **kwargs)
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        return level_2_function(*args, **kwargs)
-
-    level_1_function(
-        query="What is the authors ambition?", langfuse_observation_id=mock_trace_id
-    )
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert len(trace_data.observations) > 2
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-
-    # Check that the llama_index_operations is at the correct level
-    lvl = 1
-    curr_id = mock_trace_id
-    llama_index_root_span = None
-
-    while len(adjacencies[curr_id]) > 0:
-        o = adjacencies[curr_id][0]
-        if o.name == "llama_index_operations":
-            llama_index_root_span = o
-            break
-
-        curr_id = adjacencies[curr_id][0].id
-        lvl += 1
-
-    assert lvl == 3
-
-    assert llama_index_root_span is not None
-    assert any([o.name == "OpenAIEmbedding" for o in trace_data.observations])
-
-
-def test_decorators_langchain():
-    mock_name = "test_decorators_langchain"
-    mock_trace_id = create_uuid()
-
-    @observe()
-    def langchain_operations(*args, **kwargs):
-        handler = langfuse_context.get_current_langchain_handler()
-        prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
-        model = ChatOpenAI(temperature=0)
-
-        chain = prompt | model
-
-        return chain.invoke(
-            {"topic": kwargs["topic"]},
-            config={
-                "callbacks": [handler],
-            },
-        )
-
-    @observe()
-    def level_3_function(*args, **kwargs):
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(metadata=mock_deep_metadata)
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        return langchain_operations(*args, **kwargs)
-
-    @observe()
-    def level_2_function(*args, **kwargs):
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return level_3_function(*args, **kwargs)
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        return level_2_function(*args, **kwargs)
-
-    level_1_function(topic="socks", langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert len(trace_data.observations) > 2
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-
-    # Check that the langchain_operations is at the correct level
-    lvl = 1
-    curr_id = mock_trace_id
-    llama_index_root_span = None
-
-    while len(adjacencies[curr_id]) > 0:
-        o = adjacencies[curr_id][0]
-        if o.name == "langchain_operations":
-            llama_index_root_span = o
-            break
-
-        curr_id = adjacencies[curr_id][0].id
-        lvl += 1
-
-    assert lvl == 3
-
-    assert llama_index_root_span is not None
-    assert any([o.name == "ChatPromptTemplate" for o in trace_data.observations])
-
-
-@pytest.mark.asyncio
-async def test_asyncio_concurrency_inside_nested_span():
-    mock_name = "test_asyncio_concurrency_inside_nested_span"
-    mock_trace_id = create_uuid()
-    mock_observation_id_1 = create_uuid()
-    mock_observation_id_2 = create_uuid()
-
-    @observe(as_type="generation")
-    async def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-        )
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name
-        )
-
-        return "level_3"
-
-    @observe()
-    async def level_2_function(*args, **kwargs):
-        await level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe()
-    async def level_1_function(*args, **kwargs):
-        print("Executing level 1")
-        await asyncio.gather(
-            level_2_function(
-                *mock_args,
-                mock_observation_id_1,
-                **mock_kwargs,
-                langfuse_observation_id=mock_observation_id_1,
-            ),
-            level_2_function(
-                *mock_args,
-                mock_observation_id_2,
-                **mock_kwargs,
-                langfuse_observation_id=mock_observation_id_2,
-            ),
-        )
-
-        return "level_1"
-
-    await level_1_function(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 4
-    )  # Top-most function is trace, so it's not an observations
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.name == mock_name
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.output == "level_1"
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    # Trace has two children
-    assert len(adjacencies[mock_trace_id]) == 2
-
-    # Each async call has one child
-    for mock_id in [mock_observation_id_1, mock_observation_id_2]:
-        assert len(adjacencies[mock_id]) == 1
-
-    assert (
-        len(adjacencies) == 3
-    )  # Only trace and the two lvl-2 observation have children
-
-
-def test_get_current_ids():
-    mock_trace_id = create_uuid()
-    mock_deep_observation_id = create_uuid()
-
-    retrieved_trace_id: ContextVar[Optional[str]] = ContextVar(
-        "retrieved_trace_id", default=None
-    )
-    retrieved_observation_id: ContextVar[Optional[str]] = ContextVar(
-        "retrieved_observation_id", default=None
-    )
-
-    @observe()
-    def level_3_function(*args, **kwargs):
-        retrieved_trace_id.set(langfuse_context.get_current_trace_id())
-        retrieved_observation_id.set(langfuse_context.get_current_observation_id())
-
-        return "level_3"
-
-    @observe()
-    def level_2_function():
-        return level_3_function(langfuse_observation_id=mock_deep_observation_id)
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        level_2_function()
-
-        return "level_1"
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert retrieved_trace_id.get() == mock_trace_id
-    assert retrieved_observation_id.get() == mock_deep_observation_id
-    assert any(
-        [o.id == retrieved_observation_id.get() for o in trace_data.observations]
-    )
-
-
-def test_get_current_trace_url():
-    mock_trace_id = create_uuid()
-
-    @observe()
-    def level_3_function():
-        return langfuse_context.get_current_trace_url()
-
-    @observe()
-    def level_2_function():
-        return level_3_function()
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        return level_2_function()
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    expected_url = f"http://localhost:3000/project/7a88fb47-b4e2-43b8-a06c-a5ce950dc53a/traces/{mock_trace_id}"
-    assert result == expected_url
-
-
-def test_scoring_observations():
-    mock_name = "test_scoring_observations"
-    mock_trace_id = create_uuid()
-
-    @observe(as_type="generation")
-    def level_3_function():
-        langfuse_context.score_current_observation(
-            name="test-observation-score", value=1
-        )
-        langfuse_context.score_current_trace(
-            name="another-test-trace-score", value="my_value"
-        )
-        return "level_3"
-
-    @observe()
-    def level_2_function():
-        return level_3_function()
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        langfuse_context.score_current_observation(name="test-trace-score", value=3)
-        langfuse_context.update_current_trace(name=mock_name)
-        return level_2_function()
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_3"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 2
-    )  # Top-most function is trace, so it's not an observations
-    assert trace_data.name == mock_name
-
-    # Check for correct scoring
-    scores = trace_data.scores
-
-    assert len(scores) == 3
-
-    trace_scores = [
-        s for s in scores if s.trace_id == mock_trace_id and s.observation_id is None
-    ]
-    observation_score = [s for s in scores if s.observation_id is not None][0]
-
-    assert any(
-        [
-            score.name == "another-test-trace-score"
-            and score.string_value == "my_value"
-            and score.data_type == "CATEGORICAL"
-            for score in trace_scores
-        ]
-    )
-    assert any(
-        [
-            score.name == "test-trace-score"
-            and score.value == 3
-            and score.data_type == "NUMERIC"
-            for score in trace_scores
-        ]
-    )
-
-    assert observation_score.name == "test-observation-score"
-    assert observation_score.value == 1
-    assert observation_score.data_type == "NUMERIC"
-
-
-def test_circular_reference_handling():
-    mock_trace_id = create_uuid()
-
-    # Define a class that will contain a circular reference
-    class CircularRefObject:
-        def __init__(self):
-            self.reference: Optional[CircularRefObject] = None
-
-    @observe()
-    def function_with_circular_arg(circular_obj, *args, **kwargs):
-        # This function doesn't need to do anything with circular_obj,
-        # the test is simply to see if it can be called without error.
-        return "function response"
-
-    # Create an instance of the object and establish a circular reference
-    circular_obj = CircularRefObject()
-    circular_obj.reference = circular_obj
-
-    # Call the decorated function, passing the circularly-referenced object
-    result = function_with_circular_arg(
-        circular_obj, langfuse_observation_id=mock_trace_id
-    )
-
-    langfuse_context.flush()
-
-    # Validate that the function executed as expected
-    assert result == "function response"
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert trace_data.input["args"][0]["reference"] == "CircularRefObject"
-
-
-def test_disabled_io_capture():
-    mock_trace_id = create_uuid()
-
-    class Node:
-        def __init__(self, value: tuple):
-            self.value = value
-
-    @observe(capture_input=False, capture_output=False)
-    def nested(*args, **kwargs):
-        langfuse_context.update_current_observation(
-            input=Node(("manually set tuple", 1)), output="manually set output"
-        )
-        return "nested response"
-
-    @observe(capture_output=False)
-    def main(*args, **kwargs):
-        nested(*args, **kwargs)
-        return "function response"
-
-    result = main("Hello, World!", name="John", langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    assert result == "function response"
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert trace_data.input == {"args": ["Hello, World!"], "kwargs": {"name": "John"}}
-    assert trace_data.output is None
-
-    # Check that disabled capture_io doesn't capture manually set input/output
-    assert len(trace_data.observations) == 1
-    assert trace_data.observations[0].input["value"] == ["manually set tuple", 1]
-    assert trace_data.observations[0].output == "manually set output"
-
-
-def test_decorated_class_and_instance_methods():
-    mock_name = "test_decorated_class_and_instance_methods"
-    mock_trace_id = create_uuid()
-
-    class TestClass:
-        @classmethod
-        @observe()
-        def class_method(cls, *args, **kwargs):
-            langfuse_context.update_current_observation(name="class_method")
-            return "class_method"
-
-        @observe(as_type="generation")
-        def level_3_function(self):
-            langfuse_context.update_current_observation(metadata=mock_metadata)
-            langfuse_context.update_current_observation(
-                metadata=mock_deep_metadata,
-                usage={"input": 150, "output": 50, "total": 300},
-                model="gpt-3.5-turbo",
-                output="mock_output",
-            )
-
-            langfuse_context.update_current_trace(
-                session_id=mock_session_id, name=mock_name
-            )
-
-            return "level_3"
-
-        @observe()
-        def level_2_function(self):
-            TestClass.class_method()
-
-            self.level_3_function()
-            langfuse_context.update_current_observation(metadata=mock_metadata)
-
-            return "level_2"
-
-        @observe()
-        def level_1_function(self, *args, **kwargs):
-            self.level_2_function()
-
-            return "level_1"
-
-    result = TestClass().level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 3
-    )  # Top-most function is trace, so it's not an observations
-
-    assert trace_data.input == {"args": list(mock_args), "kwargs": mock_kwargs}
-    assert trace_data.output == "level_1"
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    class_method_observation = [
-        o for o in adjacencies[level_2_observation.id] if o.name == "class_method"
-    ][0]
-    level_3_observation = [
-        o for o in adjacencies[level_2_observation.id] if o.name != "class_method"
-    ][0]
-
-    assert class_method_observation.input == {"args": [], "kwargs": {}}
-    assert class_method_observation.output == "class_method"
-
-    assert level_2_observation.metadata == mock_metadata
-    assert level_3_observation.metadata == mock_deep_metadata
-    assert level_3_observation.type == "GENERATION"
-    assert level_3_observation.calculated_total_cost > 0
-    assert level_3_observation.output == "mock_output"
-
-
-def test_generator_as_return_value():
-    mock_trace_id = create_uuid()
-    mock_output = "Hello, World!"
-
-    def custom_transform_to_string(x):
-        return "--".join(x)
-
-    def generator_function():
-        yield "Hello"
-        yield ", "
-        yield "World!"
-
-    @observe(transform_to_string=custom_transform_to_string)
-    def nested():
-        return generator_function()
-
-    @observe()
-    def main(**kwargs):
-        gen = nested()
-
-        result = ""
-        for item in gen:
-            result += item
-
-        return result
-
-    result = main(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    assert result == mock_output
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert trace_data.output == mock_output
-
-    assert trace_data.observations[0].output == "Hello--, --World!"
-
-
-@pytest.mark.asyncio
-async def test_async_generator_as_return_value():
-    mock_trace_id = create_uuid()
-    mock_output = "Hello, async World!"
-
-    def custom_transform_to_string(x):
-        return "--".join(x)
-
-    @observe(transform_to_string=custom_transform_to_string)
-    async def async_generator_function():
-        await asyncio.sleep(0.1)  # Simulate async operation
-        yield "Hello"
-        await asyncio.sleep(0.1)
-        yield ", async "
-        await asyncio.sleep(0.1)
-        yield "World!"
-
-    @observe(transform_to_string=custom_transform_to_string)
-    async def nested_async():
-        gen = async_generator_function()
-        print(type(gen))
-
-        async for item in gen:
-            yield item
-
-    @observe()
-    async def main_async(**kwargs):
-        gen = nested_async()
-
-        result = ""
-        async for item in gen:
-            result += item
-
-        return result
-
-    result = await main_async(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    assert result == mock_output
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert trace_data.output == result
-
-    assert trace_data.observations[0].output == "Hello--, async --World!"
-    assert trace_data.observations[1].output == "Hello--, async --World!"
-
-
-@pytest.mark.asyncio
-async def test_async_nested_openai_chat_stream():
-    mock_name = "test_async_nested_openai_chat_stream"
-    mock_trace_id = create_uuid()
-    mock_tags = ["tag1", "tag2"]
-    mock_session_id = "session-id-1"
-    mock_user_id = "user-id-1"
-    mock_generation_name = "openai generation"
-
-    @observe()
-    async def level_2_function():
-        gen = await AsyncOpenAI().chat.completions.create(
-            name=mock_generation_name,
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-            session_id=mock_session_id,
-            user_id=mock_user_id,
-            tags=mock_tags,
-            stream=True,
-        )
-
-        async for c in gen:
-            print(c)
-
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_trace(name=mock_name)
-
-        return "level_2"
-
-    @observe()
-    async def level_1_function(*args, **kwargs):
-        await level_2_function()
-
-        return "level_1"
-
-    result = await level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 2
-    )  # Top-most function is trace, so it's not an observations
-
-    assert trace_data.input == {"args": list(mock_args), "kwargs": mock_kwargs}
-    assert trace_data.output == "level_1"
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    level_3_observation = adjacencies[level_2_observation.id][0]
-
-    assert level_2_observation.metadata == mock_metadata
-
-    generation = level_3_observation
-
-    assert generation.name == mock_generation_name
-    assert generation.metadata == {"someKey": "someResponse"}
-    assert generation.input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.type == "GENERATION"
-    assert "gpt-3.5-turbo" in generation.model
-    assert generation.start_time is not None
-    assert generation.end_time is not None
-    assert generation.start_time < generation.end_time
-    assert generation.model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.usage.input is not None
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    print(generation)
-    assert generation.output == 2
-
-
-def test_generation_at_highest_level():
-    mock_trace_id = create_uuid()
-    mock_result = "Hello, World!"
-
-    @observe(as_type="generation")
-    def main():
-        return mock_result
-
-    result = main(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    assert result == mock_result
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        trace_data.output is None
-    )  # output will be attributed to generation observation
-
-    # Check that the generation is wrapped inside a trace
-    assert len(trace_data.observations) == 1
-
-    generation = trace_data.observations[0]
-    assert generation.type == "GENERATION"
-    assert generation.output == result
-
-
-def test_generator_as_function_input():
-    mock_trace_id = create_uuid()
-    mock_output = "Hello, World!"
-
-    def generator_function():
-        yield "Hello"
-        yield ", "
-        yield "World!"
-
-    @observe()
-    def nested(gen):
-        result = ""
-        for item in gen:
-            result += item
-
-        return result
-
-    @observe()
-    def main(**kwargs):
-        gen = generator_function()
-
-        return nested(gen)
-
-    result = main(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    assert result == mock_output
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert trace_data.output == mock_output
-
-    assert trace_data.observations[0].input["args"][0] == "<generator>"
-    assert trace_data.observations[0].output == "Hello, World!"
-
-    observation_start_time = trace_data.observations[0].start_time
-    observation_end_time = trace_data.observations[0].end_time
-
-    assert observation_start_time is not None
-    assert observation_end_time is not None
-    assert observation_start_time <= observation_end_time
-
-
-def test_nest_list_of_generator_as_function_IO():
-    mock_trace_id = create_uuid()
-
-    def generator_function():
-        yield "Hello"
-        yield ", "
-        yield "World!"
-
-    @observe()
-    def nested(list_of_gens):
-        return list_of_gens
-
-    @observe()
-    def main(**kwargs):
-        gen = generator_function()
-
-        return nested([(gen, gen)])
-
-    main(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert [[["<generator>", "<generator>"]]] == trace_data.observations[0].input[
-        "args"
-    ]
-
-    assert all(
-        ["generator" in arg for arg in trace_data.observations[0].output[0]],
-    )
-
-    observation_start_time = trace_data.observations[0].start_time
-    observation_end_time = trace_data.observations[0].end_time
-
-    assert observation_start_time is not None
-    assert observation_end_time is not None
-    assert observation_start_time <= observation_end_time
-
-
-def test_return_dict_for_output():
-    mock_trace_id = create_uuid()
-    mock_output = {"key": "value"}
-
-    @observe()
-    def function():
-        return mock_output
-
-    result = function(langfuse_observation_id=mock_trace_id)
-    langfuse_context.flush()
-
-    assert result == mock_output
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert trace_data.output == mock_output
-
-
-def test_manual_context_copy_in_threadpoolexecutor():
-    from concurrent.futures import ThreadPoolExecutor, as_completed
-    from contextvars import copy_context
-
-    mock_trace_id = create_uuid()
-
-    @observe()
-    def execute_task(*args):
-        return args
-
-    task_args = [["a", "b"], ["c", "d"]]
-
-    @observe()
-    def execute_groups(task_args):
-        with ThreadPoolExecutor(3) as executor:
-            futures = []
-
-            for task_arg in task_args:
-                ctx = copy_context()
-
-                # Using a lambda to capture the current 'task_arg' and context 'ctx' to ensure each task uses its specific arguments and isolated context when executed.
-                task = lambda p=task_arg: ctx.run(execute_task, *p)  # noqa
-
-                futures.append(executor.submit(task))
-
-            # Ensure all futures complete
-            for future in as_completed(futures):
-                future.result()
-
-        return [f.result() for f in futures]
-
-    execute_groups(task_args, langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert len(trace_data.observations) == 2
-
-    for observation in trace_data.observations:
-        assert observation.input["args"] in [["a", "b"], ["c", "d"]]
-        assert observation.output in [["a", "b"], ["c", "d"]]
-
-        assert (
-            observation.parent_observation_id is None
-        )  # Ensure that the observations are not nested
-
-
-def test_update_trace_io():
-    mock_name = "test_update_trace_io"
-    mock_trace_id = create_uuid()
-
-    @observe(as_type="generation", name="level_3_to_be_overwritten")
-    def level_3_function():
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-        langfuse_context.update_current_observation(
-            metadata=mock_deep_metadata,
-            usage={"input": 150, "output": 50, "total": 300},
-            model="gpt-3.5-turbo",
-            output="mock_output",
-        )
-        langfuse_context.update_current_observation(
-            version="version-1", name="overwritten_level_3"
-        )
-
-        langfuse_context.update_current_trace(
-            session_id=mock_session_id, name=mock_name, input="nested_input"
-        )
-
-        langfuse_context.update_current_trace(
-            user_id="user_id",
-        )
-
-        return "level_3"
-
-    @observe(name="level_2_manually_set")
-    def level_2_function():
-        level_3_function()
-        langfuse_context.update_current_observation(metadata=mock_metadata)
-
-        return "level_2"
-
-    @observe()
-    def level_1_function(*args, **kwargs):
-        level_2_function()
-        langfuse_context.update_current_trace(output="nested_output")
-
-        return "level_1"
-
-    result = level_1_function(
-        *mock_args, **mock_kwargs, langfuse_observation_id=mock_trace_id
-    )
-    langfuse_context.flush()
-
-    assert result == "level_1"  # Wrapped function returns correctly
-
-    # ID setting for span or trace
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert (
-        len(trace_data.observations) == 2
-    )  # Top-most function is trace, so it's not an observations
-
-    assert trace_data.input == "nested_input"
-    assert trace_data.output == "nested_output"
-
-    # trace parameters if set anywhere in the call stack
-    assert trace_data.session_id == mock_session_id
-    assert trace_data.user_id == "user_id"
-    assert trace_data.name == mock_name
-
-    # Check correct nesting
-    adjacencies = defaultdict(list)
-    for o in trace_data.observations:
-        adjacencies[o.parent_observation_id or o.trace_id].append(o)
-
-    assert len(adjacencies[mock_trace_id]) == 1  # Trace has only one child
-    assert len(adjacencies) == 2  # Only trace and one observation have children
-
-    level_2_observation = adjacencies[mock_trace_id][0]
-    level_3_observation = adjacencies[level_2_observation.id][0]
-
-    assert level_2_observation.name == "level_2_manually_set"
-    assert level_2_observation.metadata == mock_metadata
-
-    assert level_3_observation.name == "overwritten_level_3"
-    assert level_3_observation.metadata == mock_deep_metadata
-    assert level_3_observation.type == "GENERATION"
-    assert level_3_observation.calculated_total_cost > 0
-    assert level_3_observation.output == "mock_output"
-    assert level_3_observation.version == "version-1"
-
-
-def test_parent_trace_id():
-    # Create a parent trace
-    parent_trace_id = create_uuid()
-    observation_id = create_uuid()
-    trace_name = "test_parent_trace_id"
-
-    langfuse = langfuse_context.client_instance
-    langfuse.trace(id=parent_trace_id, name=trace_name)
-
-    @observe()
-    def decorated_function():
-        return "decorated_function"
-
-    decorated_function(
-        langfuse_parent_trace_id=parent_trace_id, langfuse_observation_id=observation_id
-    )
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(parent_trace_id)
-
-    assert trace_data.id == parent_trace_id
-    assert trace_data.name == trace_name
-
-    assert len(trace_data.observations) == 1
-    assert trace_data.observations[0].id == observation_id
-
-
-def test_parent_observation_id():
-    parent_trace_id = create_uuid()
-    parent_span_id = create_uuid()
-    observation_id = create_uuid()
-    trace_name = "test_parent_observation_id"
-    mock_metadata = {"key": "value"}
-
-    langfuse = langfuse_context.client_instance
-    trace = langfuse.trace(id=parent_trace_id, name=trace_name)
-    trace.span(id=parent_span_id, name="parent_span")
-
-    @observe()
-    def decorated_function():
-        langfuse_context.update_current_trace(metadata=mock_metadata)
-        langfuse_context.score_current_trace(value=1, name="score_name")
-
-        return "decorated_function"
-
-    decorated_function(
-        langfuse_parent_trace_id=parent_trace_id,
-        langfuse_parent_observation_id=parent_span_id,
-        langfuse_observation_id=observation_id,
-    )
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(parent_trace_id)
-
-    assert trace_data.id == parent_trace_id
-    assert trace_data.name == trace_name
-    assert trace_data.metadata == mock_metadata
-    assert trace_data.scores[0].name == "score_name"
-    assert trace_data.scores[0].value == 1
-
-    assert len(trace_data.observations) == 2
-
-    parent_span = next(
-        (o for o in trace_data.observations if o.id == parent_span_id), None
-    )
-    assert parent_span is not None
-    assert parent_span.parent_observation_id is None
-
-    execution_span = next(
-        (o for o in trace_data.observations if o.id == observation_id), None
-    )
-    assert execution_span is not None
-    assert execution_span.parent_observation_id == parent_span_id
-
-
-def test_ignore_parent_observation_id_if_parent_trace_id_is_not_set():
-    parent_trace_id = create_uuid()
-    parent_span_id = create_uuid()
-    observation_id = create_uuid()
-    trace_name = "test_parent_observation_id"
-
-    langfuse = langfuse_context.client_instance
-    trace = langfuse.trace(id=parent_trace_id, name=trace_name)
-    trace.span(id=parent_span_id, name="parent_span")
-
-    @observe()
-    def decorated_function():
-        return "decorated_function"
-
-    decorated_function(
-        langfuse_parent_observation_id=parent_span_id,
-        langfuse_observation_id=observation_id,
-        # No parent trace id set
-    )
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(observation_id)
-
-    assert trace_data.id == observation_id
-    assert trace_data.name == "decorated_function"
-
-    assert len(trace_data.observations) == 0
-
-
-def test_top_level_generation():
-    mock_trace_id = create_uuid()
-    mock_output = "Hello, World!"
-
-    @observe(as_type="generation")
-    def main():
-        sleep(1)
-        langfuse_context.update_current_trace(name="updated_name")
-
-        return mock_output
-
-    main(langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-    assert trace_data.name == "updated_name"
-
-    assert len(trace_data.observations) == 1
-    assert trace_data.observations[0].name == "main"
-    assert trace_data.observations[0].type == "GENERATION"
-    assert trace_data.observations[0].output == mock_output
-
-
-def test_threadpool_executor():
-    mock_trace_id = create_uuid()
-    mock_parent_observation_id = create_uuid()
-
-    from concurrent.futures import ThreadPoolExecutor, as_completed
-
-    from langfuse.decorators import langfuse_context, observe
-
-    @observe()
-    def execute_task(*args):
-        return args
-
-    @observe()
-    def execute_groups(task_args):
-        trace_id = langfuse_context.get_current_trace_id()
-        observation_id = langfuse_context.get_current_observation_id()
-
-        with ThreadPoolExecutor(3) as executor:
-            futures = [
-                executor.submit(
-                    execute_task,
-                    *task_arg,
-                    langfuse_parent_trace_id=trace_id,
-                    langfuse_parent_observation_id=observation_id,
-                )
-                for task_arg in task_args
-            ]
-
-            for future in as_completed(futures):
-                future.result()
-
-        return [f.result() for f in futures]
-
-    @observe()
-    def main():
-        task_args = [["a", "b"], ["c", "d"]]
-
-        execute_groups(task_args, langfuse_observation_id=mock_parent_observation_id)
-
-    main(langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert len(trace_data.observations) == 3
-
-    parent_observation = next(
-        (o for o in trace_data.observations if o.id == mock_parent_observation_id), None
-    )
-
-    assert parent_observation is not None
-
-    child_observations = [
-        o
-        for o in trace_data.observations
-        if o.parent_observation_id == mock_parent_observation_id
-    ]
-    assert len(child_observations) == 2
-
-
-def test_media():
-    mock_trace_id = create_uuid()
-
-    with open("static/bitcoin.pdf", "rb") as pdf_file:
-        pdf_bytes = pdf_file.read()
-
-    media = LangfuseMedia(content_bytes=pdf_bytes, content_type="application/pdf")
-
-    @observe()
-    def main():
-        sleep(1)
-        langfuse_context.update_current_trace(
-            input={
-                "context": {
-                    "nested": media,
-                },
-            },
-            output={
-                "context": {
-                    "nested": media,
-                },
-            },
-            metadata={
-                "context": {
-                    "nested": media,
-                },
-            },
-        )
-
-    main(langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert (
-        "@@@langfuseMedia:type=application/pdf|id="
-        in trace_data.input["context"]["nested"]
-    )
-    assert (
-        "@@@langfuseMedia:type=application/pdf|id="
-        in trace_data.output["context"]["nested"]
-    )
-    assert (
-        "@@@langfuseMedia:type=application/pdf|id="
-        in trace_data.metadata["context"]["nested"]
-    )
-    parsed_reference_string = LangfuseMedia.parse_reference_string(
-        trace_data.metadata["context"]["nested"]
-    )
-    assert parsed_reference_string["content_type"] == "application/pdf"
-    assert parsed_reference_string["media_id"] is not None
-    assert parsed_reference_string["source"] == "bytes"
-
-
-def test_merge_metadata_and_tags():
-    mock_trace_id = create_uuid()
-
-    @observe
-    def nested():
-        langfuse_context.update_current_trace(
-            metadata={"key2": "value2"}, tags=["tag2"]
-        )
-
-    @observe
-    def main():
-        langfuse_context.update_current_trace(
-            metadata={"key1": "value1"}, tags=["tag1"]
-        )
-
-        nested()
-
-    main(langfuse_observation_id=mock_trace_id)
-
-    langfuse_context.flush()
-
-    trace_data = get_api().trace.get(mock_trace_id)
-
-    assert trace_data.metadata == {"key1": "value1", "key2": "value2"}
-
-    assert trace_data.tags == ["tag1", "tag2"]
diff --git a/tests/test_error_logging.py b/tests/test_error_logging.py
deleted file mode 100644
index 8927e1323..000000000
--- a/tests/test_error_logging.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import logging
-import pytest
-
-from langfuse.utils.error_logging import (
-    catch_and_log_errors,
-    auto_decorate_methods_with,
-)
-
-
-# Test for the catch_and_log_errors decorator applied to a standalone function
-@catch_and_log_errors
-def function_that_raises():
-    raise ValueError("This is a test error.")
-
-
-def test_catch_and_log_errors_logs_error_silently(caplog):
-    function_that_raises()
-
-    assert len(caplog.records) == 1
-    assert caplog.records[0].levelno == logging.ERROR
-    assert (
-        "An error occurred in function_that_raises: This is a test error."
-        in caplog.text
-    )
-    caplog.clear()
-
-
-# Test for the auto_decorate_methods_with decorator applied to a class
-@auto_decorate_methods_with(catch_and_log_errors, exclude=["excluded_instance_method"])
-class TestClass:
-    def instance_method(self):
-        raise ValueError("Error in instance method.")
-
-    def excluded_instance_method(self):
-        raise ValueError("Error in instance method.")
-
-    @classmethod
-    def class_method(cls):
-        raise ValueError("Error in class method.")
-
-    @staticmethod
-    def static_method():
-        raise ValueError("Error in static method.")
-
-
-def test_auto_decorate_class_methods(caplog):
-    test_obj = TestClass()
-
-    # Test the instance method
-    test_obj.instance_method()
-    assert "Error in instance method." in caplog.text
-    caplog.clear()
-
-    # Test the class method
-    TestClass.class_method()
-    assert "Error in class method." in caplog.text
-    caplog.clear()
-
-    # Test the static method
-    TestClass.static_method()
-    assert "Error in static method." in caplog.text
-    caplog.clear()
-
-    # Test the excluded instance method that should raise an error
-    with pytest.raises(ValueError, match="Error in instance method."):
-        test_obj.excluded_instance_method()
diff --git a/tests/test_error_parsing.py b/tests/test_error_parsing.py
deleted file mode 100644
index a92cc8b43..000000000
--- a/tests/test_error_parsing.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""@private"""
-
-from langfuse.request import APIErrors, APIError
-from langfuse.parse_error import (
-    generate_error_message,
-    generate_error_message_fern,
-)
-from langfuse.api.resources.commons.errors import (
-    AccessDeniedError,
-    MethodNotAllowedError,
-    NotFoundError,
-    UnauthorizedError,
-)
-from langfuse.api.core import ApiError
-from langfuse.api.resources.health.errors import ServiceUnavailableError
-
-
-def test_generate_error_message_api_error():
-    exception = APIError(message="Test API error", status="500")
-    expected_message = "API error occurred: Internal server error occurred. For help, please contact support: https://langfuse.com/support"
-    assert expected_message in generate_error_message(exception)
-
-
-def test_generate_error_message_api_errors():
-    errors = [
-        APIError(status=400, message="Bad request", details="Invalid input"),
-        APIError(status=401, message="Unauthorized", details="Invalid credentials"),
-    ]
-    exception = APIErrors(errors)
-    expected_message = (
-        "API errors occurred: "
-        "Bad request. Please check your request for any missing or incorrect parameters. Refer to our API docs: https://api.reference.langfuse.com for details.\n"
-        "Unauthorized. Please check your public/private host settings. Refer to our installation and setup guide: https://langfuse.com/docs/sdk/typescript/guide for details on SDK configuration."
-    )
-    assert expected_message in generate_error_message(exception)
-
-
-def test_generate_error_message_generic_exception():
-    exception = Exception("Generic error")
-    expected_message = "Unexpected error occurred. Please check your request and contact support: https://langfuse.com/support."
-    assert generate_error_message(exception) == expected_message
-
-
-def test_generate_error_message_access_denied_error():
-    exception = AccessDeniedError(body={})
-    expected_message = "Forbidden. Please check your access control settings. Refer to our RBAC docs: https://langfuse.com/docs/rbac for details."
-    assert generate_error_message_fern(exception) == expected_message
-
-
-def test_generate_error_message_method_not_allowed_error():
-    exception = MethodNotAllowedError(body={})
-    expected_message = "Unexpected error occurred. Please check your request and contact support: https://langfuse.com/support."
-    assert generate_error_message_fern(exception) == expected_message
-
-
-def test_generate_error_message_not_found_error():
-    exception = NotFoundError(body={})
-    expected_message = "Internal error occurred. This is an unusual occurrence and we are monitoring it closely. For help, please contact support: https://langfuse.com/support."
-    assert generate_error_message_fern(exception) == expected_message
-
-
-def test_generate_error_message_unauthorized_error():
-    exception = UnauthorizedError(body={})
-    expected_message = "Unauthorized. Please check your public/private host settings. Refer to our installation and setup guide: https://langfuse.com/docs/sdk/typescript/guide for details on SDK configuration."
-    assert generate_error_message_fern(exception) == expected_message
-
-
-def test_generate_error_message_service_unavailable_error():
-    exception = ServiceUnavailableError()
-    expected_message = "Service unavailable. This is an unusual occurrence and we are monitoring it closely. For help, please contact support: https://langfuse.com/support."
-    assert generate_error_message_fern(exception) == expected_message
-
-
-def test_generate_error_message_generic():
-    exception = ApiError(status_code=503)
-    expected_message = "Service unavailable. This is an unusual occurrence and we are monitoring it closely. For help, please contact support: https://langfuse.com/support."
-    assert generate_error_message_fern(exception) == expected_message
diff --git a/tests/test_extract_model.py b/tests/test_extract_model.py
deleted file mode 100644
index 01990da92..000000000
--- a/tests/test_extract_model.py
+++ /dev/null
@@ -1,153 +0,0 @@
-from typing import Any
-from unittest.mock import MagicMock
-
-import pytest
-from langchain.schema.messages import HumanMessage
-from langchain_anthropic import Anthropic, ChatAnthropic
-from langchain_aws import BedrockLLM, ChatBedrock
-from langchain_community.chat_models import (
-    ChatCohere,
-    ChatTongyi,
-)
-from langchain_community.chat_models.fake import FakeMessagesListChatModel
-
-# from langchain_huggingface.llms import HuggingFacePipeline
-from langchain_community.llms.textgen import TextGen
-from langchain_core.load.dump import default
-from langchain_google_vertexai import ChatVertexAI
-from langchain_groq import ChatGroq
-from langchain_mistralai.chat_models import ChatMistralAI
-from langchain_ollama import ChatOllama, OllamaLLM
-from langchain_openai import (
-    AzureChatOpenAI,
-    ChatOpenAI,
-    OpenAI,
-)
-
-from langfuse.callback import CallbackHandler
-from langfuse.extract_model import _extract_model_name
-from tests.utils import get_api
-
-
-@pytest.mark.parametrize(
-    "expected_model,model",
-    [
-        (
-            "mixtral-8x7b-32768",
-            ChatGroq(
-                temperature=0, model_name="mixtral-8x7b-32768", groq_api_key="something"
-            ),
-        ),
-        ("llama3", OllamaLLM(model="llama3")),
-        ("llama3", ChatOllama(model="llama3")),
-        (
-            None,
-            FakeMessagesListChatModel(responses=[HumanMessage("Hello, how are you?")]),
-        ),
-        (
-            "mistralai",
-            ChatMistralAI(mistral_api_key="mistral_api_key", model="mistralai"),
-        ),
-        (
-            "text-gen",
-            TextGen(model_url="some-url"),
-        ),  # local deployments, does not have a model name
-        ("claude-2", ChatAnthropic(model_name="claude-2")),
-        (
-            "claude-3-sonnet-20240229",
-            ChatAnthropic(model="claude-3-sonnet-20240229"),
-        ),
-        ("claude-2", Anthropic()),
-        ("claude-2", Anthropic()),
-        ("command", ChatCohere(model="command", cohere_api_key="command")),
-        (None, ChatTongyi(dashscope_api_key="dash")),
-        (
-            "amazon.titan-tg1-large",
-            BedrockLLM(
-                model="amazon.titan-tg1-large",
-                region="us-east-1",
-                client=MagicMock(),
-            ),
-        ),
-        (
-            "anthropic.claude-3-sonnet-20240229-v1:0",
-            ChatBedrock(
-                model_id="anthropic.claude-3-sonnet-20240229-v1:0",
-                region_name="us-east-1",
-                client=MagicMock(),
-            ),
-        ),
-        (
-            "claude-1",
-            BedrockLLM(
-                model="claude-1",
-                region="us-east-1",
-                client=MagicMock(),
-            ),
-        ),
-    ],
-)
-def test_models(expected_model: str, model: Any):
-    serialized = default(model)
-    model_name = _extract_model_name(serialized)
-    assert model_name == expected_model
-
-
-# all models here need to be tested here because we take the model from the kwargs / invocation_params or we need to make an actual call for setup
-@pytest.mark.parametrize(
-    "expected_model,model",
-    [
-        ("gpt-3.5-turbo-0125", ChatOpenAI()),
-        ("gpt-3.5-turbo-instruct", OpenAI()),
-        (
-            "gpt-3.5-turbo",
-            AzureChatOpenAI(
-                openai_api_version="2023-05-15",
-                model="gpt-3.5-turbo",
-                azure_deployment="your-deployment-name",
-                azure_endpoint="https://your-endpoint-name.azurewebsites.net",
-            ),
-        ),
-        # (
-        #     "gpt2",
-        #     HuggingFacePipeline(
-        #         model_id="gpt2",
-        #         model_kwargs={
-        #             "max_new_tokens": 512,
-        #             "top_k": 30,
-        #             "temperature": 0.1,
-        #             "repetition_penalty": 1.03,
-        #         },
-        #     ),
-        # ),
-        (
-            "qwen-72b-chat",
-            ChatTongyi(model="qwen-72b-chat", dashscope_api_key="dashscope"),
-        ),
-        (
-            "gemini",
-            ChatVertexAI(
-                model_name="gemini", credentials=MagicMock(), project="some-project"
-            ),
-        ),
-    ],
-)
-def test_entire_llm_call(expected_model, model):
-    callback = CallbackHandler()
-    try:
-        # LLM calls are failing, because of missing API keys etc.
-        # However, we are still able to extract the model names beforehand.
-        model.invoke("Hello, how are you?", config={"callbacks": [callback]})
-    except Exception as e:
-        print(e)
-        pass
-
-    callback.flush()
-    api = get_api()
-
-    trace = api.trace.get(callback.get_trace_id())
-
-    assert len(trace.observations) == 1
-
-    generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0]
-    assert generation.model == expected_model
diff --git a/tests/test_extract_model_langchain_openai.py b/tests/test_extract_model_langchain_openai.py
deleted file mode 100644
index cf9c8ba25..000000000
--- a/tests/test_extract_model_langchain_openai.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from langchain_openai import AzureChatOpenAI, ChatOpenAI, OpenAI
-import pytest
-
-from langfuse.callback import CallbackHandler
-from tests.utils import get_api
-
-
-@pytest.mark.parametrize(  # noqa: F821
-    "expected_model,model",
-    [
-        ("gpt-3.5-turbo", ChatOpenAI()),
-        ("gpt-3.5-turbo-instruct", OpenAI()),
-        (
-            "gpt-3.5-turbo",
-            AzureChatOpenAI(
-                openai_api_version="2023-05-15",
-                model="gpt-3.5-turbo",
-                azure_deployment="your-deployment-name",
-                azure_endpoint="https://your-endpoint-name.azurewebsites.net",
-            ),
-        ),
-        # # default model is now set a s azure-deployment since langchain > 0.3.0
-        # (
-        #     "gpt-3.5-turbo-instruct",
-        #     AzureOpenAI(
-        #         openai_api_version="2023-05-15",
-        #         azure_deployment="your-deployment-name",
-        #         azure_endpoint="https://your-endpoint-name.azurewebsites.net",
-        #     ),
-        # ),
-    ],
-)
-def test_entire_llm_call_using_langchain_openai(expected_model, model):
-    callback = CallbackHandler()
-    try:
-        # LLM calls are failing, because of missing API keys etc.
-        # However, we are still able to extract the model names beforehand.
-        model.invoke("Hello, how are you?", config={"callbacks": [callback]})
-    except Exception as e:
-        print(e)
-        pass
-
-    callback.flush()
-    api = get_api()
-
-    trace = api.trace.get(callback.get_trace_id())
-
-    assert len(trace.observations) == 1
-
-    generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0]
-    assert expected_model in generation.model
diff --git a/tests/test_json.py b/tests/test_json.py
deleted file mode 100644
index e9bd887d3..000000000
--- a/tests/test_json.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import builtins
-from dataclasses import dataclass
-import importlib
-import json
-from datetime import datetime, timezone, date
-from unittest.mock import patch
-import uuid
-from bson import ObjectId
-
-import pytest
-from langchain.schema.messages import HumanMessage
-from pydantic import BaseModel
-
-import langfuse
-from langfuse.api.resources.commons.types.observation_level import ObservationLevel
-from langfuse.serializer import EventSerializer
-
-
-class TestModel(BaseModel):
-    foo: str
-    bar: datetime
-
-
-def test_json_encoder():
-    """Test that the JSON encoder encodes datetimes correctly."""
-    message = HumanMessage(content="I love programming!")
-    obj = {
-        "foo": "bar",
-        "bar": datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
-        "date": date(2024, 1, 1),
-        "messages": [message],
-    }
-
-    result = json.dumps(obj, cls=EventSerializer)
-    assert (
-        '{"foo": "bar", "bar": "2021-01-01T00:00:00Z", "date": "2024-01-01", "messages": [{"content": "I love programming!", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}]}'
-        in result
-    )
-
-
-def test_json_decoder_pydantic():
-    obj = TestModel(foo="bar", bar=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc))
-    assert (
-        json.dumps(obj, cls=EventSerializer)
-        == '{"foo": "bar", "bar": "2021-01-01T00:00:00Z"}'
-    )
-
-
-@pytest.fixture
-def event_serializer():
-    return EventSerializer()
-
-
-def test_json_decoder_without_langchain_serializer():
-    with patch.dict("sys.modules", {"langchain.load.serializable": None}):
-        model = TestModel(
-            foo="John", bar=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
-        )
-        result = json.dumps(model, cls=EventSerializer)
-        assert result == '{"foo": "John", "bar": "2021-01-01T00:00:00Z"}'
-
-
-@pytest.fixture
-def hide_available_langchain(monkeypatch):
-    import_orig = builtins.__import__
-
-    def mocked_import(name, *args, **kwargs):
-        if name == "langchain" or name == "langchain.load.serializable":
-            raise ImportError()
-        return import_orig(name, *args, **kwargs)
-
-    monkeypatch.setattr(builtins, "__import__", mocked_import)
-
-
-@pytest.mark.usefixtures("hide_available_langchain")
-def test_json_decoder_without_langchain_serializer_with_langchain_message():
-    with pytest.raises(ImportError):
-        import langchain  # noqa
-
-    with pytest.raises(ImportError):
-        from langchain.load.serializable import Serializable  # noqa
-
-    importlib.reload(langfuse)
-    obj = TestModel(foo="bar", bar=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc))
-    result = json.dumps(obj, cls=EventSerializer)
-    assert result == '{"foo": "bar", "bar": "2021-01-01T00:00:00Z"}'
-
-
-@pytest.mark.usefixtures("hide_available_langchain")
-def test_json_decoder_without_langchain_serializer_with_none():
-    with pytest.raises(ImportError):
-        import langchain  # noqa
-
-    with pytest.raises(ImportError):
-        from langchain.load.serializable import Serializable  # noqa
-
-    importlib.reload(langfuse)
-    result = json.dumps(None, cls=EventSerializer)
-    default = json.dumps(None)
-    assert result == "null"
-    assert result == default
-
-
-def test_data_class():
-    @dataclass
-    class InventoryItem:
-        """Class for keeping track of an item in inventory."""
-
-        name: str
-        unit_price: float
-        quantity_on_hand: int = 0
-
-    item = InventoryItem("widget", 3.0, 10)
-
-    result = json.dumps(item, cls=EventSerializer)
-
-    assert result == '{"name": "widget", "unit_price": 3.0, "quantity_on_hand": 10}'
-
-
-def test_data_uuid():
-    test_id = uuid.uuid4()
-
-    result = json.dumps(test_id, cls=EventSerializer)
-
-    assert result == f'"{str(test_id)}"'
-
-
-def test_observation_level():
-    result = json.dumps(ObservationLevel.ERROR, cls=EventSerializer)
-
-    assert result == '"ERROR"'
-
-
-def test_mongo_cursor():
-    test_id = ObjectId("5f3e3e3e3e3e3e3e3e3e3e3e")
-
-    result = json.dumps(test_id, cls=EventSerializer)
-
-    assert isinstance(result, str)
diff --git a/tests/test_langchain.py b/tests/test_langchain.py
deleted file mode 100644
index 86e49b970..000000000
--- a/tests/test_langchain.py
+++ /dev/null
@@ -1,2320 +0,0 @@
-import os
-import random
-import string
-import time
-from time import sleep
-from typing import Any, Dict, List, Literal, Mapping, Optional
-
-import pytest
-from langchain.agents import AgentType, initialize_agent
-from langchain.chains import (
-    ConversationalRetrievalChain,
-    ConversationChain,
-    LLMChain,
-    RetrievalQA,
-    SimpleSequentialChain,
-)
-from langchain.chains.openai_functions import create_openai_fn_chain
-from langchain.chains.summarize import load_summarize_chain
-from langchain.memory import ConversationBufferMemory
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain.schema import Document, HumanMessage, SystemMessage
-from langchain.text_splitter import CharacterTextSplitter
-from langchain_anthropic import Anthropic
-from langchain_community.agent_toolkits.load_tools import load_tools
-from langchain_community.document_loaders import TextLoader
-from langchain_community.embeddings import OpenAIEmbeddings
-from langchain_community.llms.huggingface_hub import HuggingFaceHub
-from langchain_community.vectorstores import Chroma
-from langchain_core.callbacks.manager import CallbackManagerForLLMRun
-from langchain_core.language_models.llms import LLM
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables.base import RunnableLambda
-from langchain_core.tools import StructuredTool, tool
-from langchain_openai import AzureChatOpenAI, ChatOpenAI, OpenAI
-from langgraph.checkpoint.memory import MemorySaver
-from langgraph.graph import END, START, MessagesState, StateGraph
-from langgraph.prebuilt import ToolNode
-from pydantic.v1 import BaseModel, Field
-
-from langfuse.callback import CallbackHandler
-from langfuse.callback.langchain import LANGSMITH_TAG_HIDDEN
-from langfuse.client import Langfuse
-from tests.api_wrapper import LangfuseAPI
-from tests.utils import create_uuid, encode_file_to_base64, get_api
-
-
-def test_callback_init():
-    callback = CallbackHandler(release="something", session_id="session-id")
-    assert callback.trace is None
-    assert not callback.runs
-    assert callback.langfuse.release == "something"
-    assert callback.session_id == "session-id"
-    assert callback._task_manager is not None
-
-
-def test_callback_kwargs():
-    callback = CallbackHandler(
-        trace_name="trace-name",
-        release="release",
-        version="version",
-        session_id="session-id",
-        user_id="user-id",
-        metadata={"key": "value"},
-        tags=["tag1", "tag2"],
-    )
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"), max_tokens=5)
-    prompt_template = PromptTemplate(input_variables=["input"], template="""{input}""")
-    test_chain = LLMChain(llm=llm, prompt=prompt_template)
-    test_chain.run("Hi", callbacks=[callback])
-    callback.flush()
-
-    trace_id = callback.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-    assert trace.input is not None
-    assert trace.output is not None
-    assert trace.metadata == {"key": "value"}
-    assert trace.tags == ["tag1", "tag2"]
-    assert trace.release == "release"
-    assert trace.version == "version"
-    assert trace.session_id == "session-id"
-    assert trace.user_id == "user-id"
-
-
-def test_langfuse_span():
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    langfuse = Langfuse(debug=False)
-    trace = langfuse.trace(id=trace_id)
-    span = trace.span(id=span_id)
-
-    handler = span.get_langchain_handler()
-
-    assert handler.get_trace_id() == trace_id
-    assert handler.root_span.id == span_id
-    assert handler._task_manager is not None
-
-
-def test_callback_generated_from_trace_chain():
-    langfuse = Langfuse(debug=True)
-
-    trace_id = create_uuid()
-
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-
-    handler = trace.get_langchain_handler()
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is None
-    assert trace.output is None
-    assert handler.get_trace_id() == trace_id
-
-    assert len(trace.observations) == 2
-    assert trace.id == trace_id
-
-    langchain_span = list(
-        filter(
-            lambda o: o.type == "SPAN" and o.name == "LLMChain",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_span.parent_observation_id is None
-    assert langchain_span.input is not None
-    assert langchain_span.output is not None
-
-    langchain_generation_span = list(
-        filter(
-            lambda o: o.type == "GENERATION" and o.name == "OpenAI",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_generation_span.parent_observation_id == langchain_span.id
-    assert langchain_generation_span.usage_details["input"] > 0
-    assert langchain_generation_span.usage_details["output"] > 0
-    assert langchain_generation_span.usage_details["total"] > 0
-    assert langchain_generation_span.input is not None
-    assert langchain_generation_span.input != ""
-    assert langchain_generation_span.output is not None
-    assert langchain_generation_span.output != ""
-
-
-def test_callback_generated_from_trace_chat():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    handler = trace.get_langchain_handler()
-
-    chat = ChatOpenAI(temperature=0)
-
-    messages = [
-        SystemMessage(
-            content="You are a helpful assistant that translates English to French."
-        ),
-        HumanMessage(
-            content="Translate this sentence from English to French. I love programming."
-        ),
-    ]
-
-    chat(messages, callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is None
-    assert trace.output is None
-
-    assert handler.get_trace_id() == trace_id
-    assert trace.id == trace_id
-
-    assert len(trace.observations) == 1
-
-    langchain_generation_span = list(
-        filter(
-            lambda o: o.type == "GENERATION" and o.name == "ChatOpenAI",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_generation_span.parent_observation_id is None
-    assert langchain_generation_span.usage_details["input"] > 0
-    assert langchain_generation_span.usage_details["output"] > 0
-    assert langchain_generation_span.usage_details["total"] > 0
-    assert langchain_generation_span.input is not None
-    assert langchain_generation_span.input != ""
-    assert langchain_generation_span.output is not None
-    assert langchain_generation_span.output != ""
-
-
-def test_callback_generated_from_lcel_chain():
-    langfuse = Langfuse(debug=False)
-
-    run_name_override = "This is a custom Run Name"
-    handler = CallbackHandler(debug=False)
-
-    prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
-    model = ChatOpenAI(temperature=0)
-
-    chain = prompt | model
-
-    chain.invoke(
-        {"topic": "ice cream"},
-        config={
-            "callbacks": [handler],
-            "run_name": run_name_override,
-        },
-    )
-
-    langfuse.flush()
-    handler.flush()
-    trace_id = handler.get_trace_id()
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.name == run_name_override
-
-
-def test_callback_generated_from_span_chain():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    span_id = create_uuid()
-
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    span = trace.span(id=span_id, name=span_id)
-
-    handler = span.get_langchain_handler()
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is None
-    assert trace.output is None
-    assert handler.get_trace_id() == trace_id
-
-    assert len(trace.observations) == 3
-    assert trace.id == trace_id
-
-    user_span = list(
-        filter(
-            lambda o: o.id == span_id,
-            trace.observations,
-        )
-    )[0]
-
-    assert user_span.input is None
-    assert user_span.output is None
-
-    assert user_span.input is None
-    assert user_span.output is None
-
-    langchain_span = list(
-        filter(
-            lambda o: o.type == "SPAN" and o.name == "LLMChain",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_span.parent_observation_id == user_span.id
-
-    langchain_generation_span = list(
-        filter(
-            lambda o: o.type == "GENERATION" and o.name == "OpenAI",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_generation_span.parent_observation_id == langchain_span.id
-    assert langchain_generation_span.usage_details["input"] > 0
-    assert langchain_generation_span.usage_details["output"] > 0
-    assert langchain_generation_span.usage_details["total"] > 0
-    assert langchain_generation_span.input is not None
-    assert langchain_generation_span.input != ""
-    assert langchain_generation_span.output is not None
-    assert langchain_generation_span.output != ""
-
-
-def test_callback_generated_from_span_chat():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    span_id = create_uuid()
-
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    span = trace.span(id=span_id, name=span_id)
-
-    handler = span.get_langchain_handler()
-
-    chat = ChatOpenAI(temperature=0)
-
-    messages = [
-        SystemMessage(
-            content="You are a helpful assistant that translates English to French."
-        ),
-        HumanMessage(
-            content="Translate this sentence from English to French. I love programming."
-        ),
-    ]
-
-    chat(messages, callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is None
-    assert trace.output is None
-
-    assert handler.get_trace_id() == trace_id
-    assert trace.id == trace_id
-
-    assert len(trace.observations) == 2
-
-    user_span = list(
-        filter(
-            lambda o: o.id == span_id,
-            trace.observations,
-        )
-    )[0]
-
-    assert user_span.input is None
-    assert user_span.output is None
-
-    langchain_generation_span = list(
-        filter(
-            lambda o: o.type == "GENERATION" and o.name == "ChatOpenAI",
-            trace.observations,
-        )
-    )[0]
-
-    assert langchain_generation_span.parent_observation_id == user_span.id
-    assert langchain_generation_span.usage_details["input"] > 0
-    assert langchain_generation_span.usage_details["output"] > 0
-    assert langchain_generation_span.usage_details["total"] > 0
-    assert langchain_generation_span.input is not None
-    assert langchain_generation_span.input != ""
-    assert langchain_generation_span.output is not None
-    assert langchain_generation_span.output != ""
-
-
-@pytest.mark.skip(reason="missing api key")
-def test_callback_generated_from_trace_azure_chat():
-    api_wrapper = LangfuseAPI()
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-
-    handler = trace.getNewHandler()
-
-    llm = AzureChatOpenAI(
-        openai_api_base="AZURE_OPENAI_ENDPOINT",
-        openai_api_version="2023-05-15",
-        deployment_name="gpt-4",
-        openai_api_key="AZURE_OPENAI_API_KEY",
-        openai_api_type="azure",
-        model_version="0613",
-        temperature=0,
-    )
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert handler.get_trace_id() == trace_id
-    assert len(trace["observations"]) == 2
-    assert trace["id"] == trace_id
-
-
-@pytest.mark.skip(reason="missing api key")
-def test_mistral():
-    from langchain_core.messages import HumanMessage
-    from langchain_mistralai.chat_models import ChatMistralAI
-
-    callback = CallbackHandler(debug=False)
-
-    chat = ChatMistralAI(model="mistral-small", callbacks=[callback])
-    messages = [HumanMessage(content="say a brief hello")]
-    chat.invoke(messages)
-
-    callback.flush()
-
-    trace_id = callback.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.id == trace_id
-    assert len(trace.observations) == 2
-
-    generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0]
-    assert generation.model == "mistral-small"
-
-
-@pytest.mark.skip(reason="missing api key")
-def test_vertx():
-    from langchain.llms import VertexAI
-
-    callback = CallbackHandler(debug=False)
-
-    llm = VertexAI(callbacks=[callback])
-    llm.predict("say a brief hello", callbacks=[callback])
-
-    callback.flush()
-
-    trace_id = callback.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.id == trace_id
-    assert len(trace.observations) == 2
-
-    generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0]
-    assert generation.model == "text-bison"
-
-
-@pytest.mark.skip(reason="rate limits")
-def test_callback_generated_from_trace_anthropic():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-
-    handler = trace.getNewHandler()
-
-    llm = Anthropic(
-        model="claude-instant-1.2",
-    )
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert handler.get_trace_id() == trace_id
-    assert len(trace.observations) == 2
-    assert trace.id == trace_id
-    for observation in trace.observations:
-        if observation.type == "GENERATION":
-            assert observation.usage_details["input"] > 0
-            assert observation.usage_details["output"] > 0
-            assert observation.usage_details["total"] > 0
-            assert observation.output is not None
-            assert observation.output != ""
-            assert isinstance(observation.input, str) is True
-            assert isinstance(observation.output, str) is True
-            assert observation.input != ""
-            assert observation.model == "claude-instant-1.2"
-
-
-def test_basic_chat_openai():
-    callback = CallbackHandler(debug=False)
-
-    chat = ChatOpenAI(temperature=0)
-
-    messages = [
-        SystemMessage(
-            content="You are a helpful assistant that translates English to French."
-        ),
-        HumanMessage(
-            content="Translate this sentence from English to French. I love programming."
-        ),
-    ]
-
-    chat(messages, callbacks=[callback])
-    callback.flush()
-
-    trace_id = callback.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.id == trace_id
-    assert len(trace.observations) == 1
-
-    assert trace.output == trace.observations[0].output
-    assert trace.input == trace.observations[0].input
-
-    assert trace.observations[0].input == [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant that translates English to French.",
-        },
-        {
-            "role": "user",
-            "content": "Translate this sentence from English to French. I love programming.",
-        },
-    ]
-    assert trace.observations[0].output["role"] == "assistant"
-
-
-def test_basic_chat_openai_based_on_trace():
-    from langchain.schema import HumanMessage, SystemMessage
-
-    trace_id = create_uuid()
-
-    langfuse = Langfuse(debug=False)
-    trace = langfuse.trace(id=trace_id)
-
-    callback = trace.get_langchain_handler()
-
-    chat = ChatOpenAI(temperature=0)
-
-    messages = [
-        SystemMessage(
-            content="You are a helpful assistant that translates English to French."
-        ),
-        HumanMessage(
-            content="Translate this sentence from English to French. I love programming."
-        ),
-    ]
-
-    chat(messages, callbacks=[callback])
-    callback.flush()
-
-    trace_id = callback.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.id == trace_id
-    assert len(trace.observations) == 1
-
-
-def test_callback_from_trace_with_trace_update():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-
-    handler = trace.get_langchain_handler(update_parent=True)
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is not None
-    assert trace.output is not None
-
-    assert len(trace.observations) == 2
-    assert handler.get_trace_id() == trace_id
-    assert trace.id == trace_id
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-def test_callback_from_span_with_span_update():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-    span = trace.span(id=span_id)
-
-    handler = span.get_langchain_handler(update_parent=True)
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.input is None
-    assert trace.output is None
-    assert trace.metadata == {}
-
-    assert len(trace.observations) == 3
-    assert handler.get_trace_id() == trace_id
-    assert trace.id == trace_id
-    assert handler.root_span.id == span_id
-
-    root_span_observation = [o for o in trace.observations if o.id == span_id][0]
-    assert root_span_observation.input is not None
-    assert root_span_observation.output is not None
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-def test_callback_from_trace_simple_chain():
-    langfuse = Langfuse(debug=False)
-
-    trace_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-
-    handler = trace.getNewHandler()
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-    assert trace.input is None
-    assert trace.output is None
-
-    assert len(trace.observations) == 2
-    assert handler.get_trace_id() == trace_id
-    assert trace.id == trace_id
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-def test_next_span_id_from_trace_simple_chain():
-    api_wrapper = LangfuseAPI()
-    langfuse = Langfuse()
-
-    trace_id = create_uuid()
-    trace = langfuse.trace(id=trace_id)
-
-    handler = trace.getNewHandler()
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    synopsis_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    next_span_id = create_uuid()
-    handler.setNextSpan(next_span_id)
-
-    synopsis_chain.run("Comedy at sunset on the beach", callbacks=[handler])
-
-    langfuse.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["observations"]) == 4
-    assert handler.get_trace_id() == trace_id
-    assert trace["id"] == trace_id
-
-    assert any(
-        observation["id"] == next_span_id for observation in trace["observations"]
-    )
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-def test_callback_sequential_chain():
-    handler = CallbackHandler(debug=False)
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    template = """You are a play critic from the New York Times.
-    Given the synopsis of play, it is your job to write a review for that play.
-
-        Play Synopsis:
-        {synopsis}
-        Review from a New York Times play critic of the above play:"""
-    prompt_template = PromptTemplate(input_variables=["synopsis"], template=template)
-    review_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    overall_chain = SimpleSequentialChain(
-        chains=[synopsis_chain, review_chain],
-    )
-    overall_chain.run("Tragedy at sunset on the beach", callbacks=[handler])
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert len(trace.observations) == 5
-    assert trace.id == trace_id
-
-    for observation in trace.observations:
-        if observation.type == "GENERATION":
-            assert observation.usage_details["input"] > 0
-            assert observation.usage_details["output"] > 0
-            assert observation.usage_details["total"] > 0
-            assert observation.input is not None
-            assert observation.input != ""
-            assert observation.output is not None
-            assert observation.output != ""
-
-
-def test_stuffed_chain():
-    with open("./static/state_of_the_union_short.txt", encoding="utf-8") as f:
-        api_wrapper = LangfuseAPI()
-        handler = CallbackHandler(debug=False)
-
-        text = f.read()
-        docs = [Document(page_content=text)]
-        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
-
-        template = """
-        Compose a concise and a brief summary of the following text:
-        TEXT: `{text}`
-        """
-
-        prompt = PromptTemplate(input_variables=["text"], template=template)
-
-        chain = load_summarize_chain(
-            llm, chain_type="stuff", prompt=prompt, verbose=False
-        )
-
-        chain.run(docs, callbacks=[handler])
-
-        handler.flush()
-
-        trace_id = handler.get_trace_id()
-
-        trace = api_wrapper.get_trace(trace_id)
-
-        assert len(trace["observations"]) == 3
-        for observation in trace["observations"]:
-            if observation["type"] == "GENERATION":
-                assert observation["promptTokens"] > 0
-                assert observation["completionTokens"] > 0
-                assert observation["totalTokens"] > 0
-                assert observation["input"] is not None
-                assert observation["input"] != ""
-                assert observation["output"] is not None
-                assert observation["output"] != ""
-
-
-def test_callback_retriever():
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    loader = TextLoader("./static/state_of_the_union.txt", encoding="utf8")
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-
-    embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    docsearch = Chroma.from_documents(texts, embeddings)
-
-    query = "What did the president say about Ketanji Brown Jackson"
-
-    chain = RetrievalQA.from_chain_type(
-        llm,
-        retriever=docsearch.as_retriever(),
-    )
-
-    chain.run(query, callbacks=[handler])
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["observations"]) == 5
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-def test_callback_retriever_with_sources():
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    loader = TextLoader("./static/state_of_the_union.txt", encoding="utf8")
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-
-    embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    docsearch = Chroma.from_documents(texts, embeddings)
-
-    query = "What did the president say about Ketanji Brown Jackson"
-
-    chain = RetrievalQA.from_chain_type(
-        llm, retriever=docsearch.as_retriever(), return_source_documents=True
-    )
-
-    chain(query, callbacks=[handler])
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["observations"]) == 5
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-def test_callback_retriever_conversational_with_memory():
-    handler = CallbackHandler(debug=False)
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    conversation = ConversationChain(
-        llm=llm, verbose=True, memory=ConversationBufferMemory(), callbacks=[handler]
-    )
-    conversation.predict(input="Hi there!", callbacks=[handler])
-    handler.flush()
-
-    trace = get_api().trace.get(handler.get_trace_id())
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) == 1
-
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.input != ""
-        assert generation.output != ""
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-def test_callback_retriever_conversational():
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    loader = TextLoader("./static/state_of_the_union.txt", encoding="utf8")
-
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-
-    embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    docsearch = Chroma.from_documents(texts, embeddings)
-
-    query = "What did the president say about Ketanji Brown Jackson"
-
-    chain = ConversationalRetrievalChain.from_llm(
-        ChatOpenAI(
-            openai_api_key=os.environ.get("OPENAI_API_KEY"),
-            temperature=0.5,
-            model="gpt-3.5-turbo-16k",
-        ),
-        docsearch.as_retriever(search_kwargs={"k": 6}),
-        return_source_documents=True,
-    )
-
-    chain({"question": query, "chat_history": []}, callbacks=[handler])
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["observations"]) == 5
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-def test_callback_simple_openai():
-    handler = CallbackHandler()
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-
-    text = "What would be a good company name for a company that makes colorful socks?"
-
-    llm.predict(text, callbacks=[handler])
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-
-    assert len(trace.observations) == 1
-
-    for observation in trace.observations:
-        if observation.type == "GENERATION":
-            print(observation.usage_details)
-            assert observation.usage_details["input"] > 0
-            assert observation.usage_details["output"] > 0
-            assert observation.usage_details["total"] > 0
-            assert observation.input is not None
-            assert observation.input != ""
-            assert observation.output is not None
-            assert observation.output != ""
-
-
-def test_callback_multiple_invocations_on_different_traces():
-    handler = CallbackHandler(debug=False)
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-
-    text = "What would be a good company name for a company that makes colorful socks?"
-
-    llm.predict(text, callbacks=[handler])
-
-    trace_id_one = handler.get_trace_id()
-
-    llm.predict(text, callbacks=[handler])
-
-    trace_id_two = handler.get_trace_id()
-
-    handler.flush()
-
-    assert trace_id_one != trace_id_two
-
-    trace_one = get_api().trace.get(trace_id_one)
-    trace_two = get_api().trace.get(trace_id_two)
-
-    for test_data in [
-        {"trace": trace_one, "expected_trace_id": trace_id_one},
-        {"trace": trace_two, "expected_trace_id": trace_id_two},
-    ]:
-        assert len(test_data["trace"].observations) == 1
-        assert test_data["trace"].id == test_data["expected_trace_id"]
-        for observation in test_data["trace"].observations:
-            if observation.type == "GENERATION":
-                assert observation.usage_details["input"] > 0
-                assert observation.usage_details["output"] > 0
-                assert observation.usage_details["total"] > 0
-                assert observation.input is not None
-                assert observation.input != ""
-                assert observation.output is not None
-                assert observation.output != ""
-
-
-@pytest.mark.skip(reason="inference cost")
-def test_callback_simple_openai_streaming():
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"), streaming=False)
-
-    text = "What would be a good company name for a company that makes laptops?"
-
-    llm.predict(text, callbacks=[handler])
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    generation = trace["observations"][1]
-
-    assert generation["promptTokens"] is not None
-    assert generation["completionTokens"] is not None
-    assert generation["totalTokens"] is not None
-
-    assert len(trace["observations"]) == 2
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-@pytest.mark.skip(reason="no serpapi setup in CI")
-def test_tools():
-    handler = CallbackHandler(debug=False)
-
-    llm = ChatOpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-
-    tools = load_tools(["serpapi", "llm-math"], llm=llm)
-
-    agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
-
-    agent.run(
-        "Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?",
-        callbacks=[handler],
-    )
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = get_api().trace.get(trace_id)
-    assert trace.id == trace_id
-    assert len(trace.observations) > 2
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.input != ""
-        assert generation.output != ""
-        assert generation.total_tokens is not None
-        assert generation.prompt_tokens is not None
-        assert generation.completion_tokens is not None
-
-
-@pytest.mark.skip(reason="inference cost")
-def test_callback_huggingface_hub():
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    def initialize_huggingface_llm(prompt: PromptTemplate) -> LLMChain:
-        repo_id = "google/flan-t5-small"
-        # Experiment with the max_length parameter and temperature
-        llm = HuggingFaceHub(
-            repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 500}
-        )
-        return LLMChain(prompt=prompt, llm=llm)
-
-    hugging_chain = initialize_huggingface_llm(
-        prompt=PromptTemplate(
-            input_variables=["title"],
-            template="""
-You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-Title: {title}
-        """,
-        )
-    )
-
-    hugging_chain.run(title="Mission to Mars", callbacks=[handler])
-
-    handler.langfuse.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    assert len(trace["observations"]) == 2
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-
-
-def test_callback_openai_functions_python():
-    handler = CallbackHandler(debug=False)
-    assert handler.langfuse.base_url == "http://localhost:3000"
-
-    llm = ChatOpenAI(model="gpt-4", temperature=0)
-    prompt = ChatPromptTemplate.from_messages(
-        [
-            (
-                "system",
-                "You are a world class algorithm for extracting information in structured formats.",
-            ),
-            (
-                "human",
-                "Use the given format to extract information from the following input: {input}",
-            ),
-            ("human", "Tip: Make sure to answer in the correct format"),
-        ]
-    )
-
-    class OptionalFavFood(BaseModel):
-        """Either a food or null."""
-
-        food: Optional[str] = Field(
-            None,
-            description="Either the name of a food or null. Should be null if the food isn't known.",
-        )
-
-    def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:
-        """Record some basic identifying information about a person.
-
-        Args:
-            name: The person's name.
-            age: The person's age in years.
-            fav_food: An OptionalFavFood object that either contains the person's favorite food or a null value.
-            Food should be null if it's not known.
-        """
-        return (
-            f"Recording person {name} of age {age} with favorite food {fav_food.food}!"
-        )
-
-    def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:
-        """Record some basic identifying information about a dog.
-
-        Args:
-            name: The dog's name.
-            color: The dog's color.
-            fav_food: An OptionalFavFood object that either contains the dog's favorite food or a null value.
-            Food should be null if it's not known.
-        """
-        return f"Recording dog {name} of color {color} with favorite food {fav_food}!"
-
-    chain = create_openai_fn_chain(
-        [record_person, record_dog], llm, prompt, callbacks=[handler]
-    )
-    chain.run(
-        "I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?",
-        callbacks=[handler],
-    )
-
-    handler.langfuse.flush()
-
-    trace = get_api().trace.get(handler.get_trace_id())
-
-    assert len(trace.observations) == 2
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.input == [
-            {
-                "role": "system",
-                "content": "You are a world class algorithm for extracting information in structured formats.",
-            },
-            {
-                "role": "user",
-                "content": "Use the given format to extract information from the following input: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?",
-            },
-            {
-                "role": "user",
-                "content": "Tip: Make sure to answer in the correct format",
-            },
-        ]
-        assert generation.output == {
-            "role": "assistant",
-            "content": "",
-            "additional_kwargs": {
-                "function_call": {
-                    "arguments": '{\n  "name": "Henry",\n  "color": "brown",\n  "fav_food": {\n    "food": null\n  }\n}',
-                    "name": "record_dog",
-                },
-                "refusal": None,
-            },
-        }
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-def test_agent_executor_chain():
-    from langchain.agents import AgentExecutor, create_react_agent
-    from langchain.tools import tool
-
-    prompt = PromptTemplate.from_template("""
-    Answer the following questions as best you can. You have access to the following tools:
-
-    {tools}
-
-    Use the following format:
-
-    Question: the input question you must answer
-    Thought: you should always think about what to do
-    Action: the action to take, should be one of [{tool_names}]
-    Action Input: the input to the action
-    Observation: the result of the action
-    ... (this Thought/Action/Action Input/Observation can repeat N times)
-    Thought: I now know the final answer
-    Final Answer: the final answer to the original input question
-
-    Begin!
-
-    Question: {input}
-    Thought:{agent_scratchpad}
-    """)
-
-    callback = CallbackHandler(debug=True)
-    llm = OpenAI(temperature=0)
-
-    @tool
-    def get_word_length(word: str) -> int:
-        """Returns the length of a word."""
-        return len(word)
-
-    tools = [get_word_length]
-    agent = create_react_agent(llm, tools, prompt)
-    agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)
-
-    agent_executor.invoke(
-        {"input": "what is the length of the word LangFuse?"},
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-
-    trace = get_api().trace.get(callback.get_trace_id())
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-
-    for generation in generations:
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.input != ""
-        assert generation.output != ""
-        assert generation.usage_details["total"] is not None
-        assert generation.usage_details["input"] is not None
-        assert generation.usage_details["output"] is not None
-
-
-# def test_create_extraction_chain():
-#     import os
-#     from uuid import uuid4
-
-#     from langchain.chains import create_extraction_chain
-#     from langchain.chat_models import ChatOpenAI
-#     from langchain.document_loaders import TextLoader
-#     from langchain.embeddings.openai import OpenAIEmbeddings
-#     from langchain.text_splitter import CharacterTextSplitter
-#     from langchain.vectorstores import Chroma
-
-#     from langfuse.client import Langfuse
-
-#     def create_uuid():
-#         return str(uuid4())
-
-#     langfuse = Langfuse(debug=False, host="http://localhost:3000")
-
-#     trace_id = create_uuid()
-
-#     trace = langfuse.trace(id=trace_id)
-#     handler = trace.getNewHandler()
-
-#     loader = TextLoader("./static/state_of_the_union.txt", encoding="utf8")
-
-#     documents = loader.load()
-#     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-#     texts = text_splitter.split_documents(documents)
-
-#     embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-#     vector_search = Chroma.from_documents(texts, embeddings)
-
-#     main_character = vector_search.similarity_search(
-#         "Who is the main character and what is the summary of the text?"
-#     )
-
-#     llm = ChatOpenAI(
-#         openai_api_key=os.getenv("OPENAI_API_KEY"),
-#         temperature=0,
-#         streaming=False,
-#         model="gpt-3.5-turbo-16k-0613",
-#     )
-
-#     schema = {
-#         "properties": {
-#             "Main character": {"type": "string"},
-#             "Summary": {"type": "string"},
-#         },
-#         "required": [
-#             "Main character",
-#             "Cummary",
-#         ],
-#     }
-#     chain = create_extraction_chain(schema, llm)
-
-#     chain.run(main_character, callbacks=[handler])
-
-#     handler.flush()
-
-#
-
-#     trace = get_api().trace.get(handler.get_trace_id())
-
-#     generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-#     assert len(generations) > 0
-
-#     for generation in generations:
-#         assert generation.input is not None
-#         assert generation.output is not None
-#         assert generation.input != ""
-#         assert generation.output != ""
-#         assert generation.usage_details["total"] is not None
-#         assert generation.usage_details["input"] is not None
-#         assert generation.usage_details["output"] is not None
-
-
-@pytest.mark.skip(reason="inference cost")
-def test_aws_bedrock_chain():
-    import os
-
-    import boto3
-    from langchain.llms.bedrock import Bedrock
-
-    api_wrapper = LangfuseAPI()
-    handler = CallbackHandler(debug=False)
-
-    bedrock_client = boto3.client(
-        "bedrock-runtime",
-        region_name="us-east-1",
-        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
-        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
-        aws_session_token=os.environ.get("AWS_SESSION_TOKEN"),
-    )
-
-    llm = Bedrock(
-        model_id="anthropic.claude-instant-v1",
-        client=bedrock_client,
-        model_kwargs={
-            "max_tokens_to_sample": 1000,
-            "temperature": 0.0,
-        },
-    )
-
-    text = "What would be a good company name for a company that makes colorful socks?"
-
-    llm.predict(text, callbacks=[handler])
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    trace = api_wrapper.get_trace(trace_id)
-
-    generation = trace["observations"][1]
-
-    assert generation["promptTokens"] is not None
-    assert generation["completionTokens"] is not None
-    assert generation["totalTokens"] is not None
-
-    assert len(trace["observations"]) == 2
-    for observation in trace["observations"]:
-        if observation["type"] == "GENERATION":
-            assert observation["promptTokens"] > 0
-            assert observation["completionTokens"] > 0
-            assert observation["totalTokens"] > 0
-            assert observation["input"] is not None
-            assert observation["input"] != ""
-            assert observation["output"] is not None
-            assert observation["output"] != ""
-            assert observation["name"] == "Bedrock"
-            assert observation["model"] == "claude"
-
-
-def test_unimplemented_model():
-    callback = CallbackHandler(debug=False)
-
-    class CustomLLM(LLM):
-        n: int
-
-        @property
-        def _llm_type(self) -> str:
-            return "custom"
-
-        def _call(
-            self,
-            prompt: str,
-            stop: Optional[List[str]] = None,
-            run_manager: Optional[CallbackManagerForLLMRun] = None,
-            **kwargs: Any,
-        ) -> str:
-            if stop is not None:
-                raise ValueError("stop kwargs are not permitted.")
-            return "This is a great text, which i can take characters from "[: self.n]
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        """Get the identifying parameters."""
-        return {"n": self.n}
-
-    custom_llm = CustomLLM(n=10)
-
-    llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
-        Title: {title}
-        Playwright: This is a synopsis for the above play:"""
-
-    prompt_template = PromptTemplate(input_variables=["title"], template=template)
-    synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
-
-    template = """You are a play critic from the New York Times.
-    Given the synopsis of play, it is your job to write a review for that play.
-
-        Play Synopsis:
-        {synopsis}
-        Review from a New York Times play critic of the above play:"""
-    prompt_template = PromptTemplate(input_variables=["synopsis"], template=template)
-    custom_llm_chain = LLMChain(llm=custom_llm, prompt=prompt_template)
-
-    sequential_chain = SimpleSequentialChain(chains=[custom_llm_chain, synopsis_chain])
-    sequential_chain.run("This is a foobar thing", callbacks=[callback])
-
-    callback.flush()
-
-    trace = get_api().trace.get(callback.get_trace_id())
-
-    assert len(trace.observations) == 5
-
-    custom_generation = list(
-        filter(
-            lambda x: x.type == "GENERATION" and x.name == "CustomLLM",
-            trace.observations,
-        )
-    )[0]
-
-    assert custom_generation.output == "This is a"
-    assert custom_generation.model is None
-
-
-def test_names_on_spans_lcel():
-    from langchain_core.output_parsers import StrOutputParser
-    from langchain_core.runnables import RunnablePassthrough
-    from langchain_openai import OpenAIEmbeddings
-
-    callback = CallbackHandler(debug=False)
-    model = ChatOpenAI(temperature=0)
-
-    template = """Answer the question based only on the following context:
-    {context}
-
-    Question: {question}
-    """
-    prompt = ChatPromptTemplate.from_template(template)
-
-    loader = TextLoader("./static/state_of_the_union.txt", encoding="utf8")
-
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-
-    embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
-    docsearch = Chroma.from_documents(texts, embeddings)
-
-    retriever = docsearch.as_retriever()
-
-    retrieval_chain = (
-        {
-            "context": retriever.with_config(run_name="Docs"),
-            "question": RunnablePassthrough(),
-        }
-        | prompt
-        | model.with_config(run_name="my_llm")
-        | StrOutputParser()
-    )
-
-    retrieval_chain.invoke(
-        "What did the president say about Ketanji Brown Jackson?",
-        config={
-            "callbacks": [callback],
-        },
-    )
-
-    callback.flush()
-
-    trace = get_api().trace.get(callback.get_trace_id())
-
-    assert len(trace.observations) == 7
-
-    assert (
-        len(
-            list(
-                filter(
-                    lambda x: x.type == "GENERATION" and x.name == "my_llm",
-                    trace.observations,
-                )
-            )
-        )
-        == 1
-    )
-
-    assert (
-        len(
-            list(
-                filter(
-                    lambda x: x.type == "SPAN" and x.name == "Docs",
-                    trace.observations,
-                )
-            )
-        )
-        == 1
-    )
-
-
-def test_openai_instruct_usage():
-    from langchain_core.output_parsers.string import StrOutputParser
-    from langchain_core.runnables import Runnable
-    from langchain_openai import OpenAI
-
-    lf_handler = CallbackHandler(debug=True)
-
-    runnable_chain: Runnable = (
-        PromptTemplate.from_template(
-            """Answer the question based only on the following context:
-
-            Question: {question}
-
-            Answer in the following language: {language}
-            """
-        )
-        | OpenAI(
-            model="gpt-3.5-turbo-instruct",
-            temperature=0,
-            callbacks=[lf_handler],
-            max_retries=3,
-            timeout=30,
-        )
-        | StrOutputParser()
-    )
-    input_list = [
-        {"question": "where did harrison work", "language": "english"},
-        {"question": "how is your day", "language": "english"},
-    ]
-    runnable_chain.batch(input_list)
-
-    lf_handler.flush()
-
-    observations = get_api().trace.get(lf_handler.get_trace_id()).observations
-
-    assert len(observations) == 2
-
-    for observation in observations:
-        assert observation.type == "GENERATION"
-        assert observation.output is not None
-        assert observation.output != ""
-        assert observation.input is not None
-        assert observation.input != ""
-        assert observation.usage is not None
-        assert observation.usage_details["input"] is not None
-        assert observation.usage_details["output"] is not None
-        assert observation.usage_details["total"] is not None
-
-
-def test_get_langchain_prompt_with_jinja2():
-    langfuse = Langfuse()
-
-    prompt = 'this is a {{ template }} template that should remain unchanged: {{ handle_text(payload["Name"], "Name is") }}'
-    langfuse.create_prompt(
-        name="test_jinja2",
-        prompt=prompt,
-        labels=["production"],
-    )
-
-    langfuse_prompt = langfuse.get_prompt(
-        "test_jinja2", fetch_timeout_seconds=1, max_retries=3
-    )
-
-    assert (
-        langfuse_prompt.get_langchain_prompt()
-        == 'this is a {template} template that should remain unchanged: {{ handle_text(payload["Name"], "Name is") }}'
-    )
-
-
-def test_get_langchain_prompt():
-    langfuse = Langfuse()
-
-    test_prompts = ["This is a {{test}}", "This is a {{test}}. And this is a {{test2}}"]
-
-    for i, test_prompt in enumerate(test_prompts):
-        langfuse.create_prompt(
-            name=f"test_{i}",
-            prompt=test_prompt,
-            config={
-                "model": "gpt-3.5-turbo-1106",
-                "temperature": 0,
-            },
-            labels=["production"],
-        )
-
-        langfuse_prompt = langfuse.get_prompt(f"test_{i}")
-
-        langchain_prompt = ChatPromptTemplate.from_template(
-            langfuse_prompt.get_langchain_prompt()
-        )
-
-        if i == 0:
-            assert langchain_prompt.format(test="test") == "Human: This is a test"
-        else:
-            assert (
-                langchain_prompt.format(test="test", test2="test2")
-                == "Human: This is a test. And this is a test2"
-            )
-
-
-def test_get_langchain_chat_prompt():
-    langfuse = Langfuse()
-
-    test_prompts = [
-        [{"role": "system", "content": "This is a {{test}} with a {{test}}"}],
-        [
-            {"role": "system", "content": "This is a {{test}}."},
-            {"role": "user", "content": "And this is a {{test2}}"},
-        ],
-    ]
-
-    for i, test_prompt in enumerate(test_prompts):
-        langfuse.create_prompt(
-            name=f"test_chat_{i}",
-            prompt=test_prompt,
-            type="chat",
-            config={
-                "model": "gpt-3.5-turbo-1106",
-                "temperature": 0,
-            },
-            labels=["production"],
-        )
-
-        langfuse_prompt = langfuse.get_prompt(f"test_chat_{i}", type="chat")
-        langchain_prompt = ChatPromptTemplate.from_messages(
-            langfuse_prompt.get_langchain_prompt()
-        )
-
-        if i == 0:
-            assert (
-                langchain_prompt.format(test="test")
-                == "System: This is a test with a test"
-            )
-        else:
-            assert (
-                langchain_prompt.format(test="test", test2="test2")
-                == "System: This is a test.\nHuman: And this is a test2"
-            )
-
-
-def test_disabled_langfuse():
-    run_name_override = "This is a custom Run Name"
-    handler = CallbackHandler(enabled=False, debug=False)
-
-    prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
-    model = ChatOpenAI(temperature=0)
-
-    chain = prompt | model
-
-    chain.invoke(
-        {"topic": "ice cream"},
-        config={
-            "callbacks": [handler],
-            "run_name": run_name_override,
-        },
-    )
-
-    assert handler.langfuse.task_manager._ingestion_queue.empty()
-
-    handler.flush()
-
-    trace_id = handler.get_trace_id()
-
-    with pytest.raises(Exception):
-        get_api().trace.get(trace_id)
-
-
-def test_link_langfuse_prompts_invoke():
-    langfuse = Langfuse()
-    trace_name = "test_link_langfuse_prompts_invoke"
-    session_id = "session_" + create_uuid()[:8]
-    user_id = "user_" + create_uuid()[:8]
-
-    # Create prompts
-    joke_prompt_name = "joke_prompt_" + create_uuid()[:8]
-    joke_prompt_string = "Tell me a joke involving the animal {{animal}}"
-
-    explain_prompt_name = "explain_prompt_" + create_uuid()[:8]
-    explain_prompt_string = "Explain the joke to me like I'm a 5 year old {{joke}}"
-
-    langfuse.create_prompt(
-        name=joke_prompt_name,
-        prompt=joke_prompt_string,
-        labels=["production"],
-    )
-
-    langfuse.create_prompt(
-        name=explain_prompt_name,
-        prompt=explain_prompt_string,
-        labels=["production"],
-    )
-
-    # Get prompts
-    langfuse_joke_prompt = langfuse.get_prompt(joke_prompt_name)
-    langfuse_explain_prompt = langfuse.get_prompt(explain_prompt_name)
-
-    langchain_joke_prompt = PromptTemplate.from_template(
-        langfuse_joke_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_joke_prompt},
-    )
-
-    langchain_explain_prompt = PromptTemplate.from_template(
-        langfuse_explain_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_explain_prompt},
-    )
-
-    # Create chain
-    parser = StrOutputParser()
-    model = OpenAI()
-    chain = (
-        {"joke": langchain_joke_prompt | model | parser}
-        | langchain_explain_prompt
-        | model
-        | parser
-    )
-
-    # Run chain
-    langfuse_handler = CallbackHandler(debug=True)
-
-    output = chain.invoke(
-        {"animal": "dog"},
-        config={
-            "callbacks": [langfuse_handler],
-            "run_name": trace_name,
-            "tags": ["langchain-tag"],
-            "metadata": {
-                "langfuse_session_id": session_id,
-                "langfuse_user_id": user_id,
-            },
-        },
-    )
-
-    langfuse_handler.flush()
-    sleep(2)
-
-    trace = get_api().trace.get(langfuse_handler.get_trace_id())
-
-    assert trace.tags == ["langchain-tag"]
-    assert trace.session_id == session_id
-    assert trace.user_id == user_id
-
-    observations = trace.observations
-
-    generations = sorted(
-        list(filter(lambda x: x.type == "GENERATION", observations)),
-        key=lambda x: x.start_time,
-    )
-
-    assert len(generations) == 2
-    assert generations[0].input == "Tell me a joke involving the animal dog"
-    assert "Explain the joke to me like I'm a 5 year old" in generations[1].input
-
-    assert generations[0].prompt_name == joke_prompt_name
-    assert generations[1].prompt_name == explain_prompt_name
-
-    assert generations[0].prompt_version == langfuse_joke_prompt.version
-    assert generations[1].prompt_version == langfuse_explain_prompt.version
-
-    assert generations[1].output == (output.strip() if output else None)
-
-
-def test_link_langfuse_prompts_stream():
-    langfuse = Langfuse(debug=True)
-    trace_name = "test_link_langfuse_prompts_stream"
-    session_id = "session_" + create_uuid()[:8]
-    user_id = "user_" + create_uuid()[:8]
-
-    # Create prompts
-    joke_prompt_name = "joke_prompt_" + create_uuid()[:8]
-    joke_prompt_string = "Tell me a joke involving the animal {{animal}}"
-
-    explain_prompt_name = "explain_prompt_" + create_uuid()[:8]
-    explain_prompt_string = "Explain the joke to me like I'm a 5 year old {{joke}}"
-
-    langfuse.create_prompt(
-        name=joke_prompt_name,
-        prompt=joke_prompt_string,
-        labels=["production"],
-    )
-
-    langfuse.create_prompt(
-        name=explain_prompt_name,
-        prompt=explain_prompt_string,
-        labels=["production"],
-    )
-
-    # Get prompts
-    langfuse_joke_prompt = langfuse.get_prompt(joke_prompt_name)
-    langfuse_explain_prompt = langfuse.get_prompt(explain_prompt_name)
-
-    langchain_joke_prompt = PromptTemplate.from_template(
-        langfuse_joke_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_joke_prompt},
-    )
-
-    langchain_explain_prompt = PromptTemplate.from_template(
-        langfuse_explain_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_explain_prompt},
-    )
-
-    # Create chain
-    parser = StrOutputParser()
-    model = OpenAI()
-    chain = (
-        {"joke": langchain_joke_prompt | model | parser}
-        | langchain_explain_prompt
-        | model
-        | parser
-    )
-
-    # Run chain
-    langfuse_handler = CallbackHandler()
-
-    stream = chain.stream(
-        {"animal": "dog"},
-        config={
-            "callbacks": [langfuse_handler],
-            "run_name": trace_name,
-            "tags": ["langchain-tag"],
-            "metadata": {
-                "langfuse_session_id": session_id,
-                "langfuse_user_id": user_id,
-            },
-        },
-    )
-
-    output = ""
-    for chunk in stream:
-        output += chunk
-
-    langfuse_handler.flush()
-    sleep(2)
-
-    trace = get_api().trace.get(langfuse_handler.get_trace_id())
-
-    assert trace.tags == ["langchain-tag"]
-    assert trace.session_id == session_id
-    assert trace.user_id == user_id
-
-    observations = trace.observations
-
-    generations = sorted(
-        list(filter(lambda x: x.type == "GENERATION", observations)),
-        key=lambda x: x.start_time,
-    )
-
-    assert len(generations) == 2
-    assert generations[0].input == "Tell me a joke involving the animal dog"
-    assert "Explain the joke to me like I'm a 5 year old" in generations[1].input
-
-    assert generations[0].prompt_name == joke_prompt_name
-    assert generations[1].prompt_name == explain_prompt_name
-
-    assert generations[0].prompt_version == langfuse_joke_prompt.version
-    assert generations[1].prompt_version == langfuse_explain_prompt.version
-
-    assert generations[0].time_to_first_token is not None
-    assert generations[1].time_to_first_token is not None
-
-    assert generations[1].output == (output.strip() if output else None)
-
-
-def test_link_langfuse_prompts_batch():
-    langfuse = Langfuse()
-    trace_name = "test_link_langfuse_prompts_batch_" + create_uuid()[:8]
-
-    # Create prompts
-    joke_prompt_name = "joke_prompt_" + create_uuid()[:8]
-    joke_prompt_string = "Tell me a joke involving the animal {{animal}}"
-
-    explain_prompt_name = "explain_prompt_" + create_uuid()[:8]
-    explain_prompt_string = "Explain the joke to me like I'm a 5 year old {{joke}}"
-
-    langfuse.create_prompt(
-        name=joke_prompt_name,
-        prompt=joke_prompt_string,
-        labels=["production"],
-    )
-
-    langfuse.create_prompt(
-        name=explain_prompt_name,
-        prompt=explain_prompt_string,
-        labels=["production"],
-    )
-
-    # Get prompts
-    langfuse_joke_prompt = langfuse.get_prompt(joke_prompt_name)
-    langfuse_explain_prompt = langfuse.get_prompt(explain_prompt_name)
-
-    langchain_joke_prompt = PromptTemplate.from_template(
-        langfuse_joke_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_joke_prompt},
-    )
-
-    langchain_explain_prompt = PromptTemplate.from_template(
-        langfuse_explain_prompt.get_langchain_prompt(),
-        metadata={"langfuse_prompt": langfuse_explain_prompt},
-    )
-
-    # Create chain
-    parser = StrOutputParser()
-    model = OpenAI()
-    chain = (
-        {"joke": langchain_joke_prompt | model | parser}
-        | langchain_explain_prompt
-        | model
-        | parser
-    )
-
-    # Run chain
-    langfuse_handler = CallbackHandler(debug=True)
-
-    chain.batch(
-        [{"animal": "dog"}, {"animal": "cat"}, {"animal": "elephant"}],
-        config={
-            "callbacks": [langfuse_handler],
-            "run_name": trace_name,
-            "tags": ["langchain-tag"],
-        },
-    )
-
-    langfuse_handler.flush()
-
-    traces = get_api().trace.list(name=trace_name).data
-
-    assert len(traces) == 3
-
-    for trace in traces:
-        trace = get_api().trace.get(trace.id)
-
-        assert trace.tags == ["langchain-tag"]
-
-        observations = trace.observations
-
-        generations = sorted(
-            list(filter(lambda x: x.type == "GENERATION", observations)),
-            key=lambda x: x.start_time,
-        )
-
-        assert len(generations) == 2
-
-        assert generations[0].prompt_name == joke_prompt_name
-        assert generations[1].prompt_name == explain_prompt_name
-
-        assert generations[0].prompt_version == langfuse_joke_prompt.version
-        assert generations[1].prompt_version == langfuse_explain_prompt.version
-
-
-def test_get_langchain_text_prompt_with_precompiled_prompt():
-    langfuse = Langfuse()
-
-    prompt_name = "test_precompiled_langchain_prompt"
-    test_prompt = (
-        "This is a {{pre_compiled_var}}. This is a langchain {{langchain_var}}"
-    )
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt=test_prompt,
-        labels=["production"],
-    )
-
-    langfuse_prompt = langfuse.get_prompt(prompt_name)
-    langchain_prompt = PromptTemplate.from_template(
-        langfuse_prompt.get_langchain_prompt(pre_compiled_var="dog")
-    )
-
-    assert (
-        langchain_prompt.format(langchain_var="chain")
-        == "This is a dog. This is a langchain chain"
-    )
-
-
-def test_get_langchain_chat_prompt_with_precompiled_prompt():
-    langfuse = Langfuse()
-
-    prompt_name = "test_precompiled_langchain_chat_prompt"
-    test_prompt = [
-        {"role": "system", "content": "This is a {{pre_compiled_var}}."},
-        {"role": "user", "content": "This is a langchain {{langchain_var}}."},
-    ]
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt=test_prompt,
-        type="chat",
-        labels=["production"],
-    )
-
-    langfuse_prompt = langfuse.get_prompt(prompt_name, type="chat")
-    langchain_prompt = ChatPromptTemplate.from_messages(
-        langfuse_prompt.get_langchain_prompt(pre_compiled_var="dog")
-    )
-
-    system_message, user_message = langchain_prompt.format_messages(
-        langchain_var="chain"
-    )
-
-    assert system_message.content == "This is a dog."
-    assert user_message.content == "This is a langchain chain."
-
-
-def test_callback_openai_functions_with_tools():
-    handler = CallbackHandler()
-
-    llm = ChatOpenAI(model="gpt-4", temperature=0, callbacks=[handler])
-
-    class StandardizedAddress(BaseModel):
-        street: str = Field(description="The street name and number")
-        city: str = Field(description="The city name")
-        state: str = Field(description="The state or province")
-        zip_code: str = Field(description="The postal code")
-
-    class GetWeather(BaseModel):
-        city: str = Field(description="The city name")
-        state: str = Field(description="The state or province")
-        zip_code: str = Field(description="The postal code")
-
-    address_tool = StructuredTool.from_function(
-        func=lambda **kwargs: StandardizedAddress(**kwargs),
-        name="standardize_address",
-        description="Standardize the given address",
-        args_schema=StandardizedAddress,
-    )
-
-    weather_tool = StructuredTool.from_function(
-        func=lambda **kwargs: GetWeather(**kwargs),
-        name="get_weather",
-        description="Get the weather for the given city",
-        args_schema=GetWeather,
-    )
-
-    messages = [
-        {
-            "role": "user",
-            "content": "Please standardize this address: 123 Main St, Springfield, IL 62701",
-        }
-    ]
-
-    llm.bind_tools([address_tool, weather_tool]).invoke(messages)
-
-    handler.flush()
-
-    trace = get_api().trace.get(handler.get_trace_id())
-
-    generations = list(filter(lambda x: x.type == "GENERATION", trace.observations))
-    assert len(generations) > 0
-
-    for generation in generations:
-        assert generation.input is not None
-        tool_messages = [msg for msg in generation.input if msg["role"] == "tool"]
-        assert len(tool_messages) == 2
-        assert any(
-            "standardize_address" == msg["content"]["function"]["name"]
-            for msg in tool_messages
-        )
-        assert any(
-            "get_weather" == msg["content"]["function"]["name"] for msg in tool_messages
-        )
-
-        assert generation.output is not None
-
-
-def test_langfuse_overhead():
-    def _generate_random_dict(n: int, key_length: int = 8) -> Dict[str, Any]:
-        result = {}
-        value_generators = [
-            lambda: "".join(
-                random.choices(string.ascii_letters, k=random.randint(3, 15))
-            ),
-            lambda: random.randint(0, 1000),
-            lambda: round(random.uniform(0, 100), 2),
-            lambda: [random.randint(0, 100) for _ in range(random.randint(1, 5))],
-            lambda: random.choice([True, False]),
-        ]
-        while len(result) < n:
-            key = "".join(
-                random.choices(string.ascii_letters + string.digits, k=key_length)
-            )
-            if key in result:
-                continue
-            value = random.choice(value_generators)()
-            result[key] = value
-        return result
-
-    # Test performance overhead of langfuse tracing
-    inputs = _generate_random_dict(10000, 20000)
-    test_chain = RunnableLambda(lambda x: None)
-
-    start = time.monotonic()
-    test_chain.invoke(inputs)
-    duration_without_langfuse = (time.monotonic() - start) * 1000
-
-    start = time.monotonic()
-    handler = CallbackHandler()
-    test_chain.invoke(inputs, config={"callbacks": [handler]})
-    duration_with_langfuse = (time.monotonic() - start) * 1000
-
-    overhead = duration_with_langfuse - duration_without_langfuse
-    print(f"Langfuse overhead: {overhead}ms")
-
-    assert (
-        overhead < 100
-    ), f"Langfuse tracing overhead of {overhead}ms exceeds threshold"
-
-    handler.flush()
-
-    duration_full = (time.monotonic() - start) * 1000
-    print(f"Full execution took {duration_full}ms")
-
-    assert duration_full > 1000, "Full execution should take longer than 1 second"
-
-
-def test_multimodal():
-    handler = CallbackHandler()
-    model = ChatOpenAI(model="gpt-4o-mini")
-
-    image_data = encode_file_to_base64("static/puton.jpg")
-
-    message = HumanMessage(
-        content=[
-            {"type": "text", "text": "What's in this image?"},
-            {
-                "type": "image_url",
-                "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
-            },
-        ],
-    )
-
-    response = model.invoke([message], config={"callbacks": [handler]})
-
-    print(response.content)
-
-    handler.flush()
-
-    trace = get_api().trace.get(handler.get_trace_id())
-
-    assert len(trace.observations) == 1
-    assert trace.observations[0].type == "GENERATION"
-
-    print(trace.observations[0].input)
-
-    assert (
-        "@@@langfuseMedia:type=image/jpeg|id="
-        in trace.observations[0].input[0]["content"][1]["image_url"]["url"]
-    )
-
-
-def test_langgraph():
-    # Define the tools for the agent to use
-    @tool
-    def search(query: str):
-        """Call to surf the web."""
-        # This is a placeholder, but don't tell the LLM that...
-        if "sf" in query.lower() or "san francisco" in query.lower():
-            return "It's 60 degrees and foggy."
-        return "It's 90 degrees and sunny."
-
-    tools = [search]
-    tool_node = ToolNode(tools)
-    model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
-
-    # Define the function that determines whether to continue or not
-    def should_continue(state: MessagesState) -> Literal["tools", END]:
-        messages = state["messages"]
-        last_message = messages[-1]
-        # If the LLM makes a tool call, then we route to the "tools" node
-        if last_message.tool_calls:
-            return "tools"
-        # Otherwise, we stop (reply to the user)
-        return END
-
-    # Define the function that calls the model
-    def call_model(state: MessagesState):
-        messages = state["messages"]
-        response = model.invoke(messages)
-        # We return a list, because this will get added to the existing list
-        return {"messages": [response]}
-
-    # Define a new graph
-    workflow = StateGraph(MessagesState)
-
-    # Define the two nodes we will cycle between
-    workflow.add_node("agent", call_model)
-    workflow.add_node("tools", tool_node)
-
-    # Set the entrypoint as `agent`
-    # This means that this node is the first one called
-    workflow.add_edge(START, "agent")
-
-    # We now add a conditional edge
-    workflow.add_conditional_edges(
-        # First, we define the start node. We use `agent`.
-        # This means these are the edges taken after the `agent` node is called.
-        "agent",
-        # Next, we pass in the function that will determine which node is called next.
-        should_continue,
-    )
-
-    # We now add a normal edge from `tools` to `agent`.
-    # This means that after `tools` is called, `agent` node is called next.
-    workflow.add_edge("tools", "agent")
-
-    # Initialize memory to persist state between graph runs
-    checkpointer = MemorySaver()
-
-    # Finally, we compile it!
-    # This compiles it into a LangChain Runnable,
-    # meaning you can use it as you would any other runnable.
-    # Note that we're (optionally) passing the memory when compiling the graph
-    app = workflow.compile(checkpointer=checkpointer)
-
-    handler = CallbackHandler()
-
-    # Use the Runnable
-    final_state = app.invoke(
-        {"messages": [HumanMessage(content="what is the weather in sf")]},
-        config={"configurable": {"thread_id": 42}, "callbacks": [handler]},
-    )
-    print(final_state["messages"][-1].content)
-    handler.flush()
-
-    trace = get_api().trace.get(handler.get_trace_id())
-
-    hidden_count = 0
-
-    for observation in trace.observations:
-        if LANGSMITH_TAG_HIDDEN in observation.metadata.get("tags", []):
-            hidden_count += 1
-            assert observation.level == "DEBUG"
-
-        else:
-            assert observation.level == "DEFAULT"
-
-    assert hidden_count > 0
diff --git a/tests/test_langchain_integration.py b/tests/test_langchain_integration.py
deleted file mode 100644
index f3d7b6980..000000000
--- a/tests/test_langchain_integration.py
+++ /dev/null
@@ -1,822 +0,0 @@
-from langchain_openai import ChatOpenAI, OpenAI
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain.schema import StrOutputParser
-import pytest
-import types
-from langfuse.callback import CallbackHandler
-from tests.utils import get_api
-from .utils import create_uuid
-
-
-# to avoid the instanciation of langfuse in side langfuse.openai.
-def _is_streaming_response(response):
-    return isinstance(response, types.GeneratorType) or isinstance(
-        response, types.AsyncGeneratorType
-    )
-
-
-# Streaming in chat models
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-def test_stream_chat_models(model_name):
-    name = f"test_stream_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(
-        streaming=True, max_completion_tokens=300, tags=tags, model=model_name
-    )
-    callback = CallbackHandler(trace_name=name)
-    res = model.stream(
-        [{"role": "user", "content": "return the exact phrase - This is a test!"}],
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    for chunk in res:
-        response_str.append(chunk.content)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_completion_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output["content"] is not None
-    assert generation.output["role"] is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Streaming in completions models
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-def test_stream_completions_models(model_name):
-    name = f"test_stream_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(streaming=True, max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    res = model.stream(
-        "return the exact phrase - This is a test!",
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    for chunk in res:
-        response_str.append(chunk)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Invoke in chat models
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-def test_invoke_chat_models(model_name):
-    name = f"test_invoke_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    _ = model.invoke(
-        [{"role": "user", "content": "return the exact phrase - This is a test!"}],
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_completion_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output["content"] is not None
-    assert generation.output["role"] is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Invoke in completions models
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-def test_invoke_in_completions_models(model_name):
-    name = f"test_invoke_in_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    test_phrase = "This is a test!"
-    _ = model.invoke(
-        f"return the exact phrase - {test_phrase}",
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert test_phrase in generation.output
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-def test_batch_in_completions_models(model_name):
-    name = f"test_batch_in_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    input1 = "Who is the first president of America ?"
-    input2 = "Who is the first president of Ireland ?"
-    _ = model.batch(
-        [input1, input2],
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-def test_batch_in_chat_models(model_name):
-    name = f"test_batch_in_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    input1 = "Who is the first president of America ?"
-    input2 = "Who is the first president of Ireland ?"
-    _ = model.batch(
-        [input1, input2],
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    for generation in generationList:
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_completion_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert generation.metadata["tags"] == tags
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-# Async stream in chat models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-async def test_astream_chat_models(model_name):
-    name = f"test_astream_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(
-        streaming=True, max_completion_tokens=300, tags=tags, model=model_name
-    )
-    callback = CallbackHandler(trace_name=name)
-    res = model.astream(
-        [{"role": "user", "content": "Who was the first American president "}],
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    async for chunk in res:
-        response_str.append(chunk.content)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 1
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_completion_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output["content"] is not None
-    assert generation.output["role"] is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Async stream in completions model
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-async def test_astream_completions_models(model_name):
-    name = f"test_astream_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(streaming=True, max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    test_phrase = "This is a test!"
-    res = model.astream(
-        f"return the exact phrase - {test_phrase}",
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    async for chunk in res:
-        response_str.append(chunk)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 1
-    assert test_phrase in "".join(response_str)
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert test_phrase in generation.output
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Async invoke in chat models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-async def test_ainvoke_chat_models(model_name):
-    name = f"test_ainvoke_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    test_phrase = "This is a test!"
-    _ = await model.ainvoke(
-        [{"role": "user", "content": f"return the exact phrase - {test_phrase} "}],
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_completion_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output["content"] is not None
-    assert generation.output["role"] is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-async def test_ainvoke_in_completions_models(model_name):
-    name = f"test_ainvoke_in_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    test_phrase = "This is a test!"
-    _ = await model.ainvoke(
-        f"return the exact phrase - {test_phrase}",
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert len(trace.observations) == 1
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert generation.metadata["tags"] == tags
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert test_phrase in generation.output
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Chains
-
-
-# Sync batch in chains and chat models
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-def test_chains_batch_in_chat_models(model_name):
-    name = f"test_chains_batch_in_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-
-    prompt = ChatPromptTemplate.from_template("tell me a joke about {foo} in 300 words")
-    inputs = [{"foo": "bears"}, {"foo": "cats"}]
-    chain = prompt | model | StrOutputParser()
-    _ = chain.batch(
-        inputs,
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    assert len(trace.observations) == 4
-    for generation in generationList:
-        assert trace.name == name
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_completion_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert all(x in generation.metadata["tags"] for x in tags)
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-def test_chains_batch_in_completions_models(model_name):
-    name = f"test_chains_batch_in_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-
-    prompt = ChatPromptTemplate.from_template("tell me a joke about {foo} in 300 words")
-    inputs = [{"foo": "bears"}, {"foo": "cats"}]
-    chain = prompt | model | StrOutputParser()
-    _ = chain.batch(
-        inputs,
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    assert len(trace.observations) == 4
-    for generation in generationList:
-        assert trace.name == name
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert all(x in generation.metadata["tags"] for x in tags)
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-# Async batch call with chains and chat models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-async def test_chains_abatch_in_chat_models(model_name):
-    name = f"test_chains_abatch_in_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-
-    prompt = ChatPromptTemplate.from_template("tell me a joke about {foo} in 300 words")
-    inputs = [{"foo": "bears"}, {"foo": "cats"}]
-    chain = prompt | model | StrOutputParser()
-    _ = await chain.abatch(
-        inputs,
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    assert len(trace.observations) == 4
-    for generation in generationList:
-        assert trace.name == name
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_completion_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert all(x in generation.metadata["tags"] for x in tags)
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-# Async batch call with chains and completions models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-async def test_chains_abatch_in_completions_models(model_name):
-    name = f"test_chains_abatch_in_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-
-    prompt = ChatPromptTemplate.from_template("tell me a joke about {foo} in 300 words")
-    inputs = [{"foo": "bears"}, {"foo": "cats"}]
-    chain = prompt | model | StrOutputParser()
-    _ = await chain.abatch(inputs, config={"callbacks": [callback]})
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-    assert len(trace.observations) == 4
-    for generation in generationList:
-        assert trace.name == name
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert all(x in generation.metadata["tags"] for x in tags)
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-# Async invoke in chains and chat models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo"])
-async def test_chains_ainvoke_chat_models(model_name):
-    name = f"test_chains_ainvoke_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(max_completion_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    prompt1 = ChatPromptTemplate.from_template(
-        """You are a skilled writer tasked with crafting an engaging introduction for a blog post on the following topic:
-        Topic: {topic}
-        Introduction: This is an engaging introduction for the blog post on the topic above:"""
-    )
-    chain = prompt1 | model | StrOutputParser()
-    res = await chain.ainvoke(
-        {"topic": "The Impact of Climate Change"},
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    assert len(trace.observations) == 4
-    assert trace.name == name
-    assert trace.input == {"topic": "The Impact of Climate Change"}
-    assert trace.output == res
-    for generation in generationList:
-        assert model_name in generation.model
-        assert generation.input is not None
-        assert generation.output is not None
-        assert generation.model_parameters.get("max_completion_tokens") is not None
-        assert generation.model_parameters.get("temperature") is not None
-        assert all(x in generation.metadata["tags"] for x in tags)
-        assert generation.usage.output is not None
-        assert generation.usage.total is not None
-        assert generation.output["content"] is not None
-        assert generation.output["role"] is not None
-        assert generation.input_price is not None
-        assert generation.output_price is not None
-        assert generation.calculated_input_cost is not None
-        assert generation.calculated_output_cost is not None
-        assert generation.calculated_total_cost is not None
-        assert generation.latency is not None
-
-
-# Async invoke in chains and completions models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-async def test_chains_ainvoke_completions_models(model_name):
-    name = f"test_chains_ainvoke_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    prompt1 = PromptTemplate.from_template(
-        """You are a skilled writer tasked with crafting an engaging introduction for a blog post on the following topic:
-        Topic: {topic}
-        Introduction: This is an engaging introduction for the blog post on the topic above:"""
-    )
-    chain = prompt1 | model | StrOutputParser()
-    res = await chain.ainvoke(
-        {"topic": "The Impact of Climate Change"},
-        config={"callbacks": [callback]},
-    )
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-    assert trace.input == {"topic": "The Impact of Climate Change"}
-    assert trace.output == res
-    assert len(trace.observations) == 4
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert all(x in generation.metadata["tags"] for x in tags)
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Async streaming in chat models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo", "gpt-4"])
-async def test_chains_astream_chat_models(model_name):
-    name = f"test_chains_astream_chat_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = ChatOpenAI(
-        streaming=True, max_completion_tokens=300, tags=tags, model=model_name
-    )
-    callback = CallbackHandler(trace_name=name)
-    prompt1 = PromptTemplate.from_template(
-        """You are a skilled writer tasked with crafting an engaging introduction for a blog post on the following topic:
-        Topic: {topic}
-        Introduction: This is an engaging introduction for the blog post on the topic above:"""
-    )
-    chain = prompt1 | model | StrOutputParser()
-    res = chain.astream(
-        {"topic": "The Impact of Climate Change"},
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    async for chunk in res:
-        response_str.append(chunk)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert trace.input == {"topic": "The Impact of Climate Change"}
-    assert trace.output == "".join(response_str)
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 4
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_completion_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert all(x in generation.metadata["tags"] for x in tags)
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.output["content"] is not None
-    assert generation.output["role"] is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
-
-
-# Async Streaming in completions models
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", ["gpt-3.5-turbo-instruct"])
-async def test_chains_astream_completions_models(model_name):
-    name = f"test_chains_astream_completions_models-{create_uuid()}"
-    tags = ["Hello", "world"]
-    model = OpenAI(streaming=True, max_tokens=300, tags=tags, model=model_name)
-    callback = CallbackHandler(trace_name=name)
-    prompt1 = PromptTemplate.from_template(
-        """You are a skilled writer tasked with crafting an engaging introduction for a blog post on the following topic:
-        Topic: {topic}
-        Introduction: This is an engaging introduction for the blog post on the topic above:"""
-    )
-    chain = prompt1 | model | StrOutputParser()
-    res = chain.astream(
-        {"topic": "The Impact of Climate Change"},
-        config={"callbacks": [callback]},
-    )
-    response_str = []
-    assert _is_streaming_response(res)
-    async for chunk in res:
-        response_str.append(chunk)
-
-    callback.flush()
-    assert callback.runs == {}
-    api = get_api()
-    trace = api.trace.get(callback.get_trace_id())
-    generationList = list(filter(lambda o: o.type == "GENERATION", trace.observations))
-    assert len(generationList) != 0
-
-    generation = generationList[0]
-
-    assert trace.input == {"topic": "The Impact of Climate Change"}
-    assert trace.output == "".join(response_str)
-    assert len(response_str) > 1  # To check there are more than one chunk.
-    assert len(trace.observations) == 4
-    assert trace.name == name
-    assert model_name in generation.model
-    assert generation.input is not None
-    assert generation.output is not None
-    assert generation.model_parameters.get("max_tokens") is not None
-    assert generation.model_parameters.get("temperature") is not None
-    assert all(x in generation.metadata["tags"] for x in tags)
-    assert generation.usage.output is not None
-    assert generation.usage.total is not None
-    assert generation.input_price is not None
-    assert generation.output_price is not None
-    assert generation.calculated_input_cost is not None
-    assert generation.calculated_output_cost is not None
-    assert generation.calculated_total_cost is not None
-    assert generation.latency is not None
diff --git a/tests/test_llama_index.py b/tests/test_llama_index.py
deleted file mode 100644
index f3ccadc37..000000000
--- a/tests/test_llama_index.py
+++ /dev/null
@@ -1,544 +0,0 @@
-import pytest
-from llama_index.core import PromptTemplate, Settings
-from llama_index.core.callbacks import CallbackManager
-from llama_index.core.query_pipeline import QueryPipeline
-from llama_index.llms.anthropic import Anthropic
-from llama_index.llms.openai import OpenAI
-
-from langfuse.client import Langfuse
-from langfuse.llama_index import LlamaIndexCallbackHandler
-from tests.utils import create_uuid, get_api, get_llama_index_index
-
-
-def validate_embedding_generation(generation):
-    return all(
-        [
-            generation.name == "OpenAIEmbedding",
-            generation.usage.input == 0,
-            generation.usage.output == 0,
-            generation.usage.total > 0,  # For embeddings, only total tokens are logged
-            bool(generation.input),
-            bool(generation.output),
-        ]
-    )
-
-
-def validate_llm_generation(generation, model_name="openai_llm"):
-    return all(
-        [
-            generation.name == model_name,
-            generation.usage.input > 0,
-            # generation.usage.output > 0, todo: enable when streaming output tokens are working
-            generation.usage.total > 0,
-            bool(generation.input),
-            bool(generation.output),
-        ]
-    )
-
-
-def test_callback_init():
-    callback = LlamaIndexCallbackHandler(
-        release="release",
-        version="version",
-        session_id="session-id",
-        user_id="user-id",
-        metadata={"key": "value"},
-        tags=["tag1", "tag2"],
-    )
-
-    assert callback.trace is None
-
-    assert callback.langfuse.release == "release"
-    assert callback.session_id == "session-id"
-    assert callback.user_id == "user-id"
-    assert callback.metadata == {"key": "value"}
-    assert callback.tags == ["tag1", "tag2"]
-    assert callback.version == "version"
-    assert callback._task_manager is not None
-
-
-def test_constructor_kwargs():
-    callback = LlamaIndexCallbackHandler(
-        release="release",
-        version="version",
-        session_id="session-id",
-        user_id="user-id",
-        metadata={"key": "value"},
-        tags=["tag1", "tag2"],
-    )
-    get_llama_index_index(callback, force_rebuild=True)
-    assert callback.trace is not None
-
-    trace_id = callback.trace.id
-    assert trace_id is not None
-
-    callback.flush()
-    trace_data = get_api().trace.get(trace_id)
-    assert trace_data is not None
-
-    assert trace_data.release == "release"
-    assert trace_data.version == "version"
-    assert trace_data.session_id == "session-id"
-    assert trace_data.user_id == "user-id"
-    assert trace_data.metadata == {"key": "value"}
-    assert trace_data.tags == ["tag1", "tag2"]
-
-
-def test_callback_from_index_construction():
-    callback = LlamaIndexCallbackHandler()
-    get_llama_index_index(callback, force_rebuild=True)
-
-    assert callback.trace is not None
-
-    trace_id = callback.trace.id
-    assert trace_id is not None
-
-    callback.flush()
-    trace_data = get_api().trace.get(trace_id)
-    assert trace_data is not None
-
-    observations = trace_data.observations
-
-    assert any(o.name == "OpenAIEmbedding" for o in observations)
-
-    # Test embedding generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert len(generations) == 1  # Only one generation event for all embedded chunks
-
-    generation = generations[0]
-    assert validate_embedding_generation(generation)
-
-
-def test_callback_from_query_engine():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_callback_from_chat_engine():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-    index.as_chat_engine().chat(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    embedding_generations = [g for g in generations if g.name == "OpenAIEmbedding"]
-    llm_generations = [g for g in generations if g.name == "openai_llm"]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-    assert all([validate_llm_generation(g) for g in llm_generations])
-
-
-def test_callback_from_query_engine_stream():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-    stream_response = index.as_query_engine(streaming=True).query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    for token in stream_response.response_gen:
-        print(token, end="")
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    embedding_generations = [g for g in generations if g.name == "OpenAIEmbedding"]
-    llm_generations = [g for g in generations if g.name == "openai_llm"]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-
-
-def test_callback_from_chat_stream():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-    stream_response = index.as_chat_engine().stream_chat(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    for token in stream_response.response_gen:
-        print(token, end="")
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    embedding_generations = [g for g in generations if g.name == "OpenAIEmbedding"]
-    llm_generations = [g for g in generations if g.name == "openai_llm"]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-    assert all([validate_llm_generation(g) for g in llm_generations])
-
-
-def test_callback_from_query_pipeline():
-    callback = LlamaIndexCallbackHandler()
-    Settings.callback_manager = CallbackManager([callback])
-
-    prompt_str = "Please generate related movies to {movie_name}"
-    prompt_tmpl = PromptTemplate(prompt_str)
-    models = [
-        ("openai_llm", OpenAI(model="gpt-3.5-turbo")),
-        ("Anthropic_LLM", Anthropic()),
-    ]
-
-    for model_name, llm in models:
-        pipeline = QueryPipeline(
-            chain=[prompt_tmpl, llm],
-            verbose=True,
-            callback_manager=Settings.callback_manager,
-        )
-        pipeline.run(movie_name="The Matrix")
-
-        callback.flush()
-        trace_data = get_api().trace.get(callback.trace.id)
-        observations = trace_data.observations
-        llm_generations = list(
-            filter(
-                lambda o: o.type == "GENERATION" and o.name == model_name,
-                observations,
-            )
-        )
-
-        assert len(llm_generations) == 1
-        assert validate_llm_generation(llm_generations[0], model_name=model_name)
-
-
-def test_callback_with_root_trace():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-    root_trace = langfuse.trace(id=trace_id, name=trace_id)
-
-    callback.set_root(root_trace)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    assert callback.get_trace_id() == trace_id
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-    assert trace_data is not None
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-    # Test that further observations are also appended to the root trace
-    index.as_query_engine().query("How did the speaker achieve those goals?")
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert len(generations) == 4  # Two more generations are appended
-
-    second_embedding_generation, second_llm_generation = generations[-2:]
-    assert validate_embedding_generation(second_embedding_generation)
-    assert validate_llm_generation(second_llm_generation)
-
-    # Reset the root trace
-    callback.set_root(None)
-
-    index.as_query_engine().query("How did the speaker achieve those goals?")
-    new_trace_id = callback.get_trace_id()
-    assert callback.get_trace_id() != trace_id
-
-    callback.flush()
-
-    trace_data = get_api().trace.get(new_trace_id)
-    assert trace_data is not None
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_callback_with_root_trace_and_trace_update():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-    root_trace = langfuse.trace(id=trace_id, name=trace_id)
-
-    callback.set_root(root_trace, update_root=True)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    assert callback.get_trace_id() == trace_id
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-    assert trace_data is not None
-    assert "LlamaIndex" in trace_data.name
-    assert trace_data.input is not None
-    assert trace_data.output is not None
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_callback_with_root_span():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    span = trace.span(id=span_id, name=span_id)
-
-    callback.set_root(span)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    assert callback.get_trace_id() == trace_id
-    callback.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    assert trace_data is not None
-    assert any([o.id == span_id for o in trace_data.observations])
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-    # Test that more observations are also appended to the root span
-    index.as_query_engine().query("How did the speaker achieve those goals?")
-
-    callback.flush()
-    trace_data = get_api().trace.get(trace_id)
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert len(generations) == 4  # Two more generations are appended
-
-    second_embedding_generation, second_llm_generation = generations[-2:]
-    assert validate_embedding_generation(second_embedding_generation)
-    assert validate_llm_generation(second_llm_generation)
-
-    # Reset the root span
-    callback.set_root(None)
-    index.as_query_engine().query("How did the speaker achieve those goals?")
-
-    new_trace_id = callback.get_trace_id()
-    assert new_trace_id != trace_id
-    callback.flush()
-
-    trace_data = get_api().trace.get(new_trace_id)
-
-    assert trace_data is not None
-    assert not any([o.id == span_id for o in trace_data.observations])
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_callback_with_root_span_and_root_update():
-    callback = LlamaIndexCallbackHandler()
-    index = get_llama_index_index(callback)
-
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    span = trace.span(id=span_id, name=span_id)
-
-    callback.set_root(span, update_root=True)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    assert callback.get_trace_id() == trace_id
-    callback.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    assert trace_data is not None
-
-    root_span_data = [o for o in trace_data.observations if o.id == span_id][0]
-    assert root_span_data is not None
-    assert "LlamaIndex" in root_span_data.name
-    assert root_span_data.input is not None
-    assert root_span_data.output is not None
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_callback_with_custom_trace_metadata():
-    initial_name = "initial-name"
-    initial_user_id = "initial-user-id"
-    initial_session_id = "initial-session-id"
-    initial_tags = ["initial_value1", "initial_value2"]
-
-    callback = LlamaIndexCallbackHandler(
-        trace_name=initial_name,
-        user_id=initial_user_id,
-        session_id=initial_session_id,
-        tags=initial_tags,
-    )
-
-    index = get_llama_index_index(callback)
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    assert trace_data.name == initial_name
-    assert trace_data.user_id == initial_user_id
-    assert trace_data.session_id == initial_session_id
-    assert trace_data.tags == initial_tags
-
-    # Update trace metadata on existing handler
-    updated_name = "updated-name"
-    updated_user_id = "updated-user-id"
-    updated_session_id = "updated-session-id"
-    updated_tags = ["updated_value1", "updated_value2"]
-
-    callback.set_trace_params(
-        name=updated_name,
-        user_id=updated_user_id,
-        session_id=updated_session_id,
-        tags=updated_tags,
-    )
-
-    index.as_query_engine().query(
-        "What did the speaker achieve in the past twelve months?"
-    )
-    callback.flush()
-    trace_data = get_api().trace.get(callback.trace.id)
-
-    assert trace_data.name == updated_name
-    assert trace_data.user_id == updated_user_id
-    assert trace_data.session_id == updated_session_id
-    assert trace_data.tags == updated_tags
-
-
-def test_disabled_langfuse():
-    callback = LlamaIndexCallbackHandler(enabled=False)
-    get_llama_index_index(callback, force_rebuild=True)
-
-    assert callback.trace is not None
-
-    trace_id = callback.trace.id
-    assert trace_id is not None
-
-    assert callback.langfuse.task_manager._ingestion_queue.empty()
-
-    callback.flush()
-
-    with pytest.raises(Exception):
-        get_api().trace.get(trace_id)
diff --git a/tests/test_llama_index_instrumentation.py b/tests/test_llama_index_instrumentation.py
deleted file mode 100644
index 1b179024c..000000000
--- a/tests/test_llama_index_instrumentation.py
+++ /dev/null
@@ -1,349 +0,0 @@
-from typing import Optional
-from langfuse.client import Langfuse
-from langfuse.llama_index import LlamaIndexInstrumentor
-from llama_index.llms import openai, anthropic
-from llama_index.core.prompts import PromptTemplate
-from llama_index.core.query_pipeline import QueryPipeline
-
-from tests.utils import get_api, get_llama_index_index, create_uuid
-
-
-def is_embedding_generation_name(name: Optional[str]) -> bool:
-    return name is not None and any(
-        embedding_class in name
-        for embedding_class in ("OpenAIEmbedding.", "BaseEmbedding")
-    )
-
-
-def is_llm_generation_name(name: Optional[str], model_name: str = "OpenAI") -> bool:
-    return name is not None and f"{model_name}." in name
-
-
-def validate_embedding_generation(generation):
-    return all(
-        [
-            is_embedding_generation_name(generation.name),
-            # generation.usage.input == 0,
-            # generation.usage.output == 0,
-            # generation.usage.total > 0,  # For embeddings, only total tokens are logged
-            bool(generation.input),
-            bool(generation.output),
-        ]
-    )
-
-
-def validate_llm_generation(generation, model_name="OpenAI"):
-    return all(
-        [
-            is_llm_generation_name(generation.name, model_name),
-            generation.usage.input > 0,
-            # generation.usage.output > 0, # streamed generations currently broken with no output
-            generation.usage.total > 0,
-            bool(generation.input),
-            # bool(generation.output), # streamed generations currently broken with no output
-        ]
-    )
-
-
-def test_instrumentor_from_index_construction():
-    trace_id = create_uuid()
-    instrumentor = LlamaIndexInstrumentor()
-    instrumentor.start()
-
-    with instrumentor.observe(trace_id=trace_id):
-        get_llama_index_index(None, force_rebuild=True)
-
-    instrumentor.flush()
-
-    trace_data = get_api().trace.get(trace_id)
-    assert trace_data is not None
-
-    observations = trace_data.observations
-    assert any(
-        is_embedding_generation_name(o.name) for o in observations if o.name is not None
-    )
-
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert len(generations) == 1  # Only one generation event for all embedded chunks
-
-    generation = generations[0]
-    assert validate_embedding_generation(generation)
-
-
-def test_instrumentor_from_query_engine():
-    trace_id = create_uuid()
-    instrumentor = LlamaIndexInstrumentor()
-    instrumentor.start()
-
-    with instrumentor.observe(
-        trace_id=trace_id,
-        user_id="test_user_id",
-        session_id="test_session_id",
-        version="test_version",
-        release="test_release",
-        metadata={"test_metadata": "test_metadata"},
-        tags=["test_tag"],
-        public=True,
-    ):
-        index = get_llama_index_index(None, force_rebuild=True)
-        index.as_query_engine().query(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-    instrumentor.flush()
-
-    trace_data = get_api().trace.get(trace_id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 3
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generations = [
-        g for g in generations if is_embedding_generation_name(g.name)
-    ]
-    llm_generations = [g for g in generations if is_llm_generation_name(g.name)]
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-    assert all([validate_llm_generation(g) for g in llm_generations])
-
-
-def test_instrumentor_from_chat_engine():
-    trace_id = create_uuid()
-    instrumentor = LlamaIndexInstrumentor()
-    instrumentor.start()
-
-    with instrumentor.observe(trace_id=trace_id):
-        index = get_llama_index_index(None)
-        index.as_chat_engine().chat(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-
-    embedding_generations = [
-        g for g in generations if is_embedding_generation_name(g.name)
-    ]
-    llm_generations = [g for g in generations if is_llm_generation_name(g.name)]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-    assert all([validate_llm_generation(g) for g in llm_generations])
-
-
-def test_instrumentor_from_query_engine_stream():
-    trace_id = create_uuid()
-
-    instrumentor = LlamaIndexInstrumentor()
-    instrumentor.start()
-
-    with instrumentor.observe(trace_id=trace_id):
-        index = get_llama_index_index(None)
-        stream_response = index.as_query_engine(streaming=True).query(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-        for token in stream_response.response_gen:
-            print(token, end="")
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    embedding_generations = [
-        g for g in generations if is_embedding_generation_name(g.name)
-    ]
-    llm_generations = [g for g in generations if is_llm_generation_name(g.name)]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-
-
-def test_instrumentor_from_chat_stream():
-    trace_id = create_uuid()
-    instrumentor = LlamaIndexInstrumentor()
-
-    with instrumentor.observe(trace_id=trace_id):
-        index = get_llama_index_index(None)
-        stream_response = index.as_chat_engine().stream_chat(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-        for token in stream_response.response_gen:
-            print(token, end="")
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    embedding_generations = [
-        g for g in generations if is_embedding_generation_name(g.name)
-    ]
-    llm_generations = [g for g in generations if is_llm_generation_name(g.name)]
-
-    assert len(embedding_generations) == 1
-    assert len(llm_generations) > 0
-
-    assert all([validate_embedding_generation(g) for g in embedding_generations])
-    assert all([validate_llm_generation(g) for g in llm_generations])
-
-
-def test_instrumentor_from_query_pipeline():
-    instrumentor = LlamaIndexInstrumentor()
-
-    # index = get_llama_index_index(None)
-
-    prompt_str = "Please generate related movies to {movie_name}"
-    prompt_tmpl = PromptTemplate(prompt_str)
-    models = [
-        ("OpenAI", openai.OpenAI(model="gpt-3.5-turbo")),
-        ("Anthropic", anthropic.Anthropic()),
-    ]
-
-    for model_name, llm in models:
-        trace_id = create_uuid()
-        pipeline = QueryPipeline(
-            chain=[prompt_tmpl, llm],
-            verbose=True,
-        )
-
-        with instrumentor.observe(trace_id=trace_id):
-            pipeline.run(movie_name="The Matrix")
-
-        instrumentor.flush()
-
-        trace_data = get_api().trace.get(trace_id)
-        observations = trace_data.observations
-        llm_generations = [
-            o
-            for o in observations
-            if is_llm_generation_name(o.name, model_name) and o.type == "GENERATION"
-        ]
-
-        assert len(llm_generations) == 1
-        assert validate_llm_generation(llm_generations[0], model_name=model_name)
-
-
-def test_instrumentor_with_root_trace():
-    instrumentor = LlamaIndexInstrumentor()
-
-    index = get_llama_index_index(None)
-
-    langfuse = Langfuse()
-
-    trace_id = create_uuid()
-    langfuse.trace(id=trace_id, name=trace_id)
-
-    with instrumentor.observe(trace_id=trace_id):
-        index.as_query_engine().query(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    assert trace_data is not None
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_instrumentor_with_root_span():
-    instrumentor = LlamaIndexInstrumentor()
-    index = get_llama_index_index(None)
-
-    langfuse = Langfuse(debug=False)
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    trace = langfuse.trace(id=trace_id, name=trace_id)
-    trace.span(id=span_id, name=span_id)
-
-    with instrumentor.observe(trace_id=trace_id, parent_observation_id=span_id):
-        index.as_query_engine().query(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace_id)
-
-    assert trace_data is not None
-    assert any([o.id == span_id for o in trace_data.observations])
-
-    # Test LLM generation
-    generations = sorted(
-        [o for o in trace_data.observations if o.type == "GENERATION"],
-        key=lambda o: o.start_time,
-    )
-    assert (
-        len(generations) == 2
-    )  # One generation event for embedding call of query, one for LLM call
-
-    embedding_generation, llm_generation = generations
-    assert validate_embedding_generation(embedding_generation)
-    assert validate_llm_generation(llm_generation)
-
-
-def test_instrumentor_with_custom_trace_metadata():
-    initial_name = "initial-name"
-    initial_user_id = "initial-user-id"
-    initial_session_id = "initial-session-id"
-    initial_tags = ["initial_value1", "initial_value2"]
-
-    instrumentor = LlamaIndexInstrumentor()
-
-    trace = Langfuse().trace(
-        name=initial_name,
-        user_id=initial_user_id,
-        session_id=initial_session_id,
-        tags=initial_tags,
-    )
-
-    with instrumentor.observe(trace_id=trace.id, update_parent=False):
-        index = get_llama_index_index(None)
-        index.as_query_engine().query(
-            "What did the speaker achieve in the past twelve months?"
-        )
-
-    instrumentor.flush()
-    trace_data = get_api().trace.get(trace.id)
-
-    assert trace_data.name == initial_name
-    assert trace_data.user_id == initial_user_id
-    assert trace_data.session_id == initial_session_id
-    assert trace_data.tags == initial_tags
diff --git a/tests/test_logger.py b/tests/test_logger.py
deleted file mode 100644
index 0c5d78b24..000000000
--- a/tests/test_logger.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os
-
-from langfuse import Langfuse
-from langfuse.callback import CallbackHandler
-
-"""
-Level	Numeric value
-logging.DEBUG	10
-logging.INFO	20
-logging.WARNING	30
-logging.ERROR	40
-"""
-
-
-def test_via_env():
-    os.environ["LANGFUSE_DEBUG"] = "True"
-
-    langfuse = Langfuse()
-
-    assert langfuse.log.level == 10
-
-    os.environ.pop("LANGFUSE_DEBUG")
-
-
-def test_via_env_callback():
-    os.environ["LANGFUSE_DEBUG"] = "True"
-
-    callback = CallbackHandler()
-
-    assert callback.log.level == 10
-    assert callback.langfuse.log.level == 10
-    os.environ.pop("LANGFUSE_DEBUG")
-
-
-def test_debug_langfuse():
-    langfuse = Langfuse(debug=True)
-    assert langfuse.log.level == 10
-
-
-def test_default_langfuse():
-    langfuse = Langfuse()
-    assert langfuse.log.level == 30
-
-
-def test_default_langfuse_callback():
-    callback = CallbackHandler()
-    assert callback.log.level == 30
-    assert callback.log.level == 30
-    assert callback.langfuse.log.level == 30
-
-
-def test_debug_langfuse_callback():
-    callback = CallbackHandler(debug=True)
-    assert callback.log.level == 10
-    assert callback.log.level == 10
-    assert callback.langfuse.log.level == 10
-
-
-def test_default_langfuse_trace_callback():
-    langfuse = Langfuse()
-    trace = langfuse.trace(name="test")
-    callback = trace.getNewHandler()
-
-    assert callback.log.level == 30
-    assert callback.log.level == 30
-    assert callback.trace.log.level == 30
-
-
-def test_debug_langfuse_trace_callback():
-    langfuse = Langfuse(debug=True)
-    trace = langfuse.trace(name="test")
-    callback = trace.getNewHandler()
-
-    assert callback.log.level == 10
-    assert callback.log.level == 10
-    assert callback.trace.log.level == 10
diff --git a/tests/test_media.py b/tests/test_media.py
deleted file mode 100644
index 82211a37e..000000000
--- a/tests/test_media.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import base64
-import re
-from uuid import uuid4
-
-import pytest
-
-from langfuse.client import Langfuse
-from langfuse.media import LangfuseMedia
-from tests.utils import get_api
-
-# Test data
-SAMPLE_JPEG_BYTES = b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00"
-SAMPLE_BASE64_DATA_URI = (
-    "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/4QBARXhpZgAA"
-)
-
-
-def test_init_with_base64_data_uri():
-    media = LangfuseMedia(base64_data_uri=SAMPLE_BASE64_DATA_URI)
-    assert media._source == "base64_data_uri"
-    assert media._content_type == "image/jpeg"
-    assert media._content_bytes is not None
-
-
-def test_init_with_content_bytes():
-    media = LangfuseMedia(content_bytes=SAMPLE_JPEG_BYTES, content_type="image/jpeg")
-    assert media._source == "bytes"
-    assert media._content_type == "image/jpeg"
-    assert media._content_bytes == SAMPLE_JPEG_BYTES
-
-
-def test_init_with_invalid_input():
-    # LangfuseMedia logs error but doesn't raise ValueError when initialized without required params
-    media = LangfuseMedia()
-    assert media._source is None
-    assert media._content_type is None
-    assert media._content_bytes is None
-
-    media = LangfuseMedia(content_bytes=SAMPLE_JPEG_BYTES)  # Missing content_type
-    assert media._source is None
-    assert media._content_type is None
-    assert media._content_bytes is None
-
-    media = LangfuseMedia(content_type="image/jpeg")  # Missing content_bytes
-    assert media._source is None
-    assert media._content_type is None
-    assert media._content_bytes is None
-
-
-def test_content_length():
-    media = LangfuseMedia(content_bytes=SAMPLE_JPEG_BYTES, content_type="image/jpeg")
-    assert media._content_length == len(SAMPLE_JPEG_BYTES)
-
-
-def test_content_sha256_hash():
-    media = LangfuseMedia(content_bytes=SAMPLE_JPEG_BYTES, content_type="image/jpeg")
-    assert media._content_sha256_hash is not None
-    # Hash should be base64 encoded
-    assert base64.b64decode(media._content_sha256_hash)
-
-
-def test_reference_string():
-    media = LangfuseMedia(content_bytes=SAMPLE_JPEG_BYTES, content_type="image/jpeg")
-    # Reference string should be None initially as media_id is not set
-    assert media._reference_string is None
-
-    # Set media_id
-    media._media_id = "test-id"
-    reference = media._reference_string
-    assert reference is not None
-    assert "test-id" in reference
-    assert "image/jpeg" in reference
-    assert "bytes" in reference
-
-
-def test_parse_reference_string():
-    valid_ref = "@@@langfuseMedia:type=image/jpeg|id=test-id|source=base64_data_uri@@@"
-    result = LangfuseMedia.parse_reference_string(valid_ref)
-
-    assert result["media_id"] == "test-id"
-    assert result["content_type"] == "image/jpeg"
-    assert result["source"] == "base64_data_uri"
-
-
-def test_parse_invalid_reference_string():
-    with pytest.raises(ValueError):
-        LangfuseMedia.parse_reference_string("")
-
-    with pytest.raises(ValueError):
-        LangfuseMedia.parse_reference_string("invalid")
-
-    with pytest.raises(ValueError):
-        LangfuseMedia.parse_reference_string(
-            "@@@langfuseMedia:type=image/jpeg@@@"
-        )  # Missing fields
-
-
-def test_file_handling():
-    file_path = "static/puton.jpg"
-
-    media = LangfuseMedia(file_path=file_path, content_type="image/jpeg")
-    assert media._source == "file"
-    assert media._content_bytes is not None
-    assert media._content_type == "image/jpeg"
-
-
-def test_nonexistent_file():
-    media = LangfuseMedia(file_path="nonexistent.jpg")
-
-    assert media._source is None
-    assert media._content_bytes is None
-    assert media._content_type is None
-
-
-def test_replace_media_reference_string_in_object():
-    # Create test audio file
-    audio_file = "static/joke_prompt.wav"
-    with open(audio_file, "rb") as f:
-        mock_audio_bytes = f.read()
-
-    # Create Langfuse client and trace with media
-    langfuse = Langfuse()
-
-    mock_trace_name = f"test-trace-with-audio-{uuid4()}"
-    base64_audio = base64.b64encode(mock_audio_bytes).decode()
-
-    trace = langfuse.trace(
-        name=mock_trace_name,
-        metadata={
-            "context": {
-                "nested": LangfuseMedia(
-                    base64_data_uri=f"data:audio/wav;base64,{base64_audio}"
-                )
-            }
-        },
-    )
-
-    langfuse.flush()
-
-    # Verify media reference string format
-    fetched_trace = get_api().trace.get(trace.id)
-    media_ref = fetched_trace.metadata["context"]["nested"]
-    assert re.match(
-        r"^@@@langfuseMedia:type=audio/wav\|id=.+\|source=base64_data_uri@@@$",
-        media_ref,
-    )
-
-    # Resolve media references back to base64
-    resolved_trace = langfuse.resolve_media_references(
-        obj=fetched_trace, resolve_with="base64_data_uri"
-    )
-
-    # Verify resolved base64 matches original
-    expected_base64 = f"data:audio/wav;base64,{base64_audio}"
-    assert resolved_trace["metadata"]["context"]["nested"] == expected_base64
-
-    # Create second trace reusing the media reference
-    trace2 = langfuse.trace(
-        name=f"2-{mock_trace_name}",
-        metadata={
-            "context": {"nested": resolved_trace["metadata"]["context"]["nested"]}
-        },
-    )
-
-    langfuse.flush()
-
-    # Verify second trace has same media reference
-    fetched_trace2 = get_api().trace.get(trace2.id)
-    assert (
-        fetched_trace2.metadata["context"]["nested"]
-        == fetched_trace.metadata["context"]["nested"]
-    )
diff --git a/tests/test_openai.py b/tests/test_openai.py
deleted file mode 100644
index 31176bfae..000000000
--- a/tests/test_openai.py
+++ /dev/null
@@ -1,1614 +0,0 @@
-import os
-
-import pytest
-from openai import APIConnectionError
-from openai.types.chat.chat_completion_message import ChatCompletionMessage
-from pydantic import BaseModel
-
-from langfuse.client import Langfuse
-from langfuse.openai import (
-    AsyncAzureOpenAI,
-    AsyncOpenAI,
-    AzureOpenAI,
-    _is_openai_v1,
-    openai,
-)
-from tests.utils import create_uuid, encode_file_to_base64, get_api
-
-chat_func = (
-    openai.chat.completions.create if _is_openai_v1() else openai.ChatCompletion.create
-)
-completion_func = (
-    openai.completions.create if _is_openai_v1() else openai.Completion.create
-)
-expected_err = openai.APIError if _is_openai_v1() else openai.error.AuthenticationError
-expected_err_msg = (
-    "Connection error." if _is_openai_v1() else "You didn't provide an API key."
-)
-
-
-def test_auth_check():
-    auth_check = openai.langfuse_auth_check()
-
-    assert auth_check is True
-
-
-def test_openai_chat_completion():
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[
-            ChatCompletionMessage(
-                role="assistant", content="You are an expert mathematician"
-            ),
-            {"role": "user", "content": "1 + 1 = "},
-        ],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert len(completion.choices) != 0
-    assert generation.data[0].input == [
-        {
-            "content": "You are an expert mathematician",
-            "audio": None,
-            "function_call": None,
-            "refusal": None,
-            "role": "assistant",
-            "tool_calls": None,
-        },
-        {"content": "1 + 1 = ", "role": "user"},
-    ]
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-3.5-turbo-0125" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "2" in generation.data[0].output["content"]
-    assert generation.data[0].output["role"] == "assistant"
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == [
-        {
-            "content": "You are an expert mathematician",
-            "audio": None,
-            "function_call": None,
-            "refusal": None,
-            "role": "assistant",
-            "tool_calls": None,
-        },
-        {"role": "user", "content": "1 + 1 = "},
-    ]
-    assert trace.output["content"] == completion.choices[0].message.content
-    assert trace.output["role"] == completion.choices[0].message.role
-
-
-def test_openai_chat_completion_stream():
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        stream=True,
-    )
-
-    assert iter(completion)
-
-    chat_content = ""
-    for i in completion:
-        print("\n", i)
-        chat_content += (i.choices[0].delta.content or "") if i.choices else ""
-
-    assert len(chat_content) > 0
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-3.5-turbo-0125" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].output == 2
-    assert generation.data[0].completion_start_time is not None
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == [{"role": "user", "content": "1 + 1 = "}]
-    assert str(trace.output) == chat_content
-
-
-def test_openai_chat_completion_stream_with_next_iteration():
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        stream=True,
-    )
-
-    assert iter(completion)
-
-    chat_content = ""
-
-    while True:
-        try:
-            c = next(completion)
-            chat_content += (c.choices[0].delta.content or "") if c.choices else ""
-
-        except StopIteration:
-            break
-
-    assert len(chat_content) > 0
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].output == 2
-    assert generation.data[0].completion_start_time is not None
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == [{"role": "user", "content": "1 + 1 = "}]
-    assert str(trace.output) == chat_content
-
-
-def test_openai_chat_completion_stream_fail():
-    generation_name = create_uuid()
-    openai.api_key = ""
-
-    with pytest.raises(expected_err, match=expected_err_msg):
-        chat_func(
-            name=generation_name,
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-            stream=True,
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].level == "ERROR"
-    assert expected_err_msg in generation.data[0].status_message
-    assert generation.data[0].output is None
-
-    openai.api_key = os.environ["OPENAI_API_KEY"]
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == [{"role": "user", "content": "1 + 1 = "}]
-    assert trace.output is None
-
-
-def test_openai_chat_completion_with_trace():
-    generation_name = create_uuid()
-    trace_id = create_uuid()
-    langfuse = Langfuse()
-
-    langfuse.trace(id=trace_id)
-
-    chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        trace_id=trace_id,
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].trace_id == trace_id
-
-
-def test_openai_chat_completion_with_langfuse_prompt():
-    generation_name = create_uuid()
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-    langfuse.create_prompt(name=prompt_name, prompt="test prompt", is_active=True)
-
-    prompt_client = langfuse.get_prompt(name=prompt_name)
-
-    chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Make me laugh"}],
-        langfuse_prompt=prompt_client,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert isinstance(generation.data[0].prompt_id, str)
-
-
-def test_openai_chat_completion_with_parent_observation_id():
-    generation_name = create_uuid()
-    trace_id = create_uuid()
-    span_id = create_uuid()
-    langfuse = Langfuse()
-
-    trace = langfuse.trace(id=trace_id)
-    trace.span(id=span_id)
-
-    chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        trace_id=trace_id,
-        parent_observation_id=span_id,
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].trace_id == trace_id
-    assert generation.data[0].parent_observation_id == span_id
-
-
-def test_openai_chat_completion_fail():
-    generation_name = create_uuid()
-
-    openai.api_key = ""
-
-    with pytest.raises(expected_err, match=expected_err_msg):
-        chat_func(
-            name=generation_name,
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo"
-    assert generation.data[0].level == "ERROR"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert expected_err_msg in generation.data[0].status_message
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].output is None
-
-    openai.api_key = os.environ["OPENAI_API_KEY"]
-
-
-def test_openai_chat_completion_with_additional_params():
-    user_id = create_uuid()
-    session_id = create_uuid()
-    tags = ["tag1", "tag2"]
-    trace_id = create_uuid()
-    completion = chat_func(
-        name="user-creation",
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        user_id=user_id,
-        trace_id=trace_id,
-        session_id=session_id,
-        tags=tags,
-    )
-
-    openai.flush_langfuse()
-
-    assert len(completion.choices) != 0
-    trace = get_api().trace.get(trace_id)
-
-    assert trace.user_id == user_id
-    assert trace.session_id == session_id
-    assert trace.tags == tags
-
-
-def test_openai_chat_completion_without_extra_param():
-    completion = chat_func(
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    assert len(completion.choices) != 0
-
-
-def test_openai_chat_completion_two_calls():
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    generation_name_2 = create_uuid()
-
-    completion_2 = chat_func(
-        name=generation_name_2,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "2 + 2 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert len(completion.choices) != 0
-
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-
-    generation_2 = get_api().observations.get_many(
-        name=generation_name_2, type="GENERATION"
-    )
-
-    assert len(generation_2.data) != 0
-    assert generation_2.data[0].name == generation_name_2
-    assert len(completion_2.choices) != 0
-
-    assert generation_2.data[0].input == [{"content": "2 + 2 = ", "role": "user"}]
-
-
-def test_openai_chat_completion_with_seed():
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-4o-mini",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        seed=123,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-        "seed": 123,
-    }
-    assert len(completion.choices) != 0
-
-
-def test_openai_completion():
-    generation_name = create_uuid()
-    completion = completion_func(
-        name=generation_name,
-        model="gpt-3.5-turbo-instruct",
-        prompt="1 + 1 = ",
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert len(completion.choices) != 0
-    assert completion.choices[0].text == generation.data[0].output
-    assert generation.data[0].input == "1 + 1 = "
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-3.5-turbo-instruct" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].output == "2\n\n1 + 2 = 3\n\n2 + 3 = "
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == "1 + 1 = "
-    assert trace.output == completion.choices[0].text
-
-
-def test_openai_completion_stream():
-    generation_name = create_uuid()
-    completion = completion_func(
-        name=generation_name,
-        model="gpt-3.5-turbo-instruct",
-        prompt="1 + 1 = ",
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        stream=True,
-    )
-
-    assert iter(completion)
-    content = ""
-    for i in completion:
-        content += (i.choices[0].text or "") if i.choices else ""
-
-    openai.flush_langfuse()
-
-    assert len(content) > 0
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-
-    assert generation.data[0].input == "1 + 1 = "
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-3.5-turbo-instruct" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].output == "2\n\n1 + 2 = 3\n\n2 + 3 = "
-    assert generation.data[0].completion_start_time is not None
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.input == "1 + 1 = "
-    assert trace.output == content
-
-
-def test_openai_completion_fail():
-    generation_name = create_uuid()
-
-    openai.api_key = ""
-
-    with pytest.raises(expected_err, match=expected_err_msg):
-        completion_func(
-            name=generation_name,
-            model="gpt-3.5-turbo-instruct",
-            prompt="1 + 1 = ",
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert generation.data[0].input == "1 + 1 = "
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-instruct"
-    assert generation.data[0].level == "ERROR"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert expected_err_msg in generation.data[0].status_message
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].output is None
-
-    openai.api_key = os.environ["OPENAI_API_KEY"]
-
-
-def test_openai_completion_stream_fail():
-    generation_name = create_uuid()
-    openai.api_key = ""
-
-    with pytest.raises(expected_err, match=expected_err_msg):
-        completion_func(
-            name=generation_name,
-            model="gpt-3.5-turbo",
-            prompt="1 + 1 = ",
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-            stream=True,
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-
-    assert generation.data[0].input == "1 + 1 = "
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].level == "ERROR"
-    assert expected_err_msg in generation.data[0].status_message
-    assert generation.data[0].output is None
-
-    openai.api_key = os.environ["OPENAI_API_KEY"]
-
-
-def test_openai_completion_with_languse_prompt():
-    generation_name = create_uuid()
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name, prompt="test prompt", is_active=True
-    )
-    completion_func(
-        name=generation_name,
-        model="gpt-3.5-turbo-instruct",
-        prompt="1 + 1 = ",
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        langfuse_prompt=prompt_client,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert isinstance(generation.data[0].prompt_id, str)
-
-
-def test_fails_wrong_name():
-    with pytest.raises(TypeError, match="name must be a string"):
-        completion_func(
-            name={"key": "generation_name"},
-            model="gpt-3.5-turbo-instruct",
-            prompt="1 + 1 = ",
-            temperature=0,
-        )
-
-
-def test_fails_wrong_metadata():
-    with pytest.raises(TypeError, match="metadata must be a dictionary"):
-        completion_func(
-            metadata="metadata",
-            model="gpt-3.5-turbo-instruct",
-            prompt="1 + 1 = ",
-            temperature=0,
-        )
-
-
-def test_fails_wrong_trace_id():
-    with pytest.raises(TypeError, match="trace_id must be a string"):
-        completion_func(
-            trace_id={"trace_id": "metadata"},
-            model="gpt-3.5-turbo-instruct",
-            prompt="1 + 1 = ",
-            temperature=0,
-        )
-
-
-@pytest.mark.asyncio
-async def test_async_chat():
-    client = AsyncOpenAI()
-    generation_name = create_uuid()
-
-    completion = await client.chat.completions.create(
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        model="gpt-3.5-turbo",
-        name=generation_name,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert len(completion.choices) != 0
-
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 1,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "2" in generation.data[0].output["content"]
-    assert generation.data[0].output["role"] == "assistant"
-
-
-@pytest.mark.asyncio
-async def test_async_chat_stream():
-    client = AsyncOpenAI()
-
-    generation_name = create_uuid()
-
-    completion = await client.chat.completions.create(
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        model="gpt-3.5-turbo",
-        name=generation_name,
-        stream=True,
-    )
-
-    async for c in completion:
-        print(c)
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 1,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "2" in str(generation.data[0].output)
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-
-@pytest.mark.asyncio
-async def test_async_chat_stream_with_anext():
-    client = AsyncOpenAI()
-
-    generation_name = create_uuid()
-
-    completion = await client.chat.completions.create(
-        messages=[{"role": "user", "content": "Give me a one-liner joke"}],
-        model="gpt-3.5-turbo",
-        name=generation_name,
-        stream=True,
-    )
-
-    result = ""
-
-    while True:
-        try:
-            c = await completion.__anext__()
-
-            result += (c.choices[0].delta.content or "") if c.choices else ""
-
-        except StopAsyncIteration:
-            break
-
-    openai.flush_langfuse()
-
-    print(result)
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].input == [
-        {"content": "Give me a one-liner joke", "role": "user"}
-    ]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 1,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-
-def test_openai_function_call():
-    from typing import List
-
-    from pydantic import BaseModel
-
-    generation_name = create_uuid()
-
-    class StepByStepAIResponse(BaseModel):
-        title: str
-        steps: List[str]
-
-    import json
-
-    response = openai.chat.completions.create(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Explain how to assemble a PC"}],
-        functions=[
-            {
-                "name": "get_answer_for_user_query",
-                "description": "Get user answer in series of steps",
-                "parameters": StepByStepAIResponse.schema(),
-            }
-        ],
-        function_call={"name": "get_answer_for_user_query"},
-    )
-
-    output = json.loads(response.choices[0].message.function_call.arguments)
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].output is not None
-    assert "function_call" in generation.data[0].output
-
-    assert output["title"] is not None
-
-
-def test_openai_function_call_streamed():
-    from typing import List
-
-    from pydantic import BaseModel
-
-    generation_name = create_uuid()
-
-    class StepByStepAIResponse(BaseModel):
-        title: str
-        steps: List[str]
-
-    response = openai.chat.completions.create(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Explain how to assemble a PC"}],
-        functions=[
-            {
-                "name": "get_answer_for_user_query",
-                "description": "Get user answer in series of steps",
-                "parameters": StepByStepAIResponse.schema(),
-            }
-        ],
-        function_call={"name": "get_answer_for_user_query"},
-        stream=True,
-    )
-
-    # Consume the stream
-    for _ in response:
-        pass
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].output is not None
-    assert "function_call" in generation.data[0].output
-
-
-def test_openai_tool_call():
-    generation_name = create_uuid()
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
-                        },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                    },
-                    "required": ["location"],
-                },
-            },
-        }
-    ]
-    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
-    openai.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=messages,
-        tools=tools,
-        tool_choice="auto",
-        name=generation_name,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert (
-        generation.data[0].output["tool_calls"][0]["function"]["name"]
-        == "get_current_weather"
-    )
-    assert (
-        generation.data[0].output["tool_calls"][0]["function"]["arguments"] is not None
-    )
-    assert generation.data[0].input["tools"] == tools
-    assert generation.data[0].input["messages"] == messages
-
-
-def test_openai_tool_call_streamed():
-    generation_name = create_uuid()
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
-                        },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                    },
-                    "required": ["location"],
-                },
-            },
-        }
-    ]
-    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
-    response = openai.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=messages,
-        tools=tools,
-        tool_choice="required",
-        name=generation_name,
-        stream=True,
-    )
-
-    # Consume the stream
-    for _ in response:
-        pass
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-
-    assert (
-        generation.data[0].output["tool_calls"][0]["function"]["name"]
-        == "get_current_weather"
-    )
-    assert (
-        generation.data[0].output["tool_calls"][0]["function"]["arguments"] is not None
-    )
-    assert generation.data[0].input["tools"] == tools
-    assert generation.data[0].input["messages"] == messages
-
-
-def test_azure():
-    generation_name = create_uuid()
-    azure = AzureOpenAI(
-        api_key="missing",
-        api_version="2020-07-01-preview",
-        base_url="https://api.labs.azure.com",
-    )
-
-    with pytest.raises(APIConnectionError):
-        azure.chat.completions.create(
-            name=generation_name,
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].level == "ERROR"
-
-
-@pytest.mark.asyncio
-async def test_async_azure():
-    generation_name = create_uuid()
-    azure = AsyncAzureOpenAI(
-        api_key="missing",
-        api_version="2020-07-01-preview",
-        base_url="https://api.labs.azure.com",
-    )
-
-    with pytest.raises(APIConnectionError):
-        await azure.chat.completions.create(
-            name=generation_name,
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-        )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].level == "ERROR"
-
-
-def test_openai_with_existing_trace_id():
-    langfuse = Langfuse()
-    trace = langfuse.trace(
-        name="docs-retrieval",
-        user_id="user__935d7d1d-8625-4ef4-8651-544613e7bd22",
-        metadata={
-            "email": "user@langfuse.com",
-        },
-        tags=["production"],
-        output="This is a standard output",
-        input="My custom input",
-    )
-
-    langfuse.flush()
-
-    generation_name = create_uuid()
-    completion = chat_func(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-        trace_id=trace.id,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {"someKey": "someResponse"}
-    assert len(completion.choices) != 0
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 0,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "2" in generation.data[0].output["content"]
-    assert generation.data[0].output["role"] == "assistant"
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.output == "This is a standard output"
-    assert trace.input == "My custom input"
-
-
-def test_disabled_langfuse():
-    # Reimport to reset the state
-    from langfuse.openai import openai
-    from langfuse.utils.langfuse_singleton import LangfuseSingleton
-
-    LangfuseSingleton().reset()
-
-    openai.langfuse_enabled = False
-
-    generation_name = create_uuid()
-    openai.chat.completions.create(
-        name=generation_name,
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generations = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generations.data) == 0
-
-    # Reimport to reset the state
-    LangfuseSingleton().reset()
-    openai.langfuse_enabled = True
-
-    import importlib
-
-    from langfuse.openai import openai
-
-    importlib.reload(openai)
-
-
-def test_langchain_integration():
-    from langchain_openai import ChatOpenAI
-
-    chat = ChatOpenAI(model="gpt-4o")
-
-    result = ""
-
-    for chunk in chat.stream("Hello, how are you?"):
-        result += chunk.content
-
-    print(result)
-    assert result != ""
-
-
-def test_structured_output_response_format_kwarg():
-    generation_name = (
-        "test_structured_output_response_format_kwarg" + create_uuid()[0:10]
-    )
-
-    json_schema = {
-        "name": "math_response",
-        "strict": True,
-        "schema": {
-            "type": "object",
-            "properties": {
-                "steps": {
-                    "type": "array",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "explanation": {"type": "string"},
-                            "output": {"type": "string"},
-                        },
-                        "required": ["explanation", "output"],
-                        "additionalProperties": False,
-                    },
-                },
-                "final_answer": {"type": "string"},
-            },
-            "required": ["steps", "final_answer"],
-            "additionalProperties": False,
-        },
-    }
-
-    openai.chat.completions.create(
-        name=generation_name,
-        model="gpt-4o-2024-08-06",
-        messages=[
-            {"role": "system", "content": "You are a helpful math tutor."},
-            {"role": "user", "content": "solve 8x + 31 = 2"},
-        ],
-        response_format={
-            "type": "json_schema",
-            "json_schema": json_schema,
-        },
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].metadata == {
-        "someKey": "someResponse",
-        "response_format": {"type": "json_schema", "json_schema": json_schema},
-    }
-
-    assert generation.data[0].input == [
-        {"role": "system", "content": "You are a helpful math tutor."},
-        {"content": "solve 8x + 31 = 2", "role": "user"},
-    ]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-4o-2024-08-06"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 1,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert generation.data[0].output["role"] == "assistant"
-
-    trace = get_api().trace.get(generation.data[0].trace_id)
-    assert trace.output is not None
-    assert trace.input is not None
-
-
-def test_structured_output_beta_completions_parse():
-    from typing import List
-
-    from packaging.version import Version
-
-    class CalendarEvent(BaseModel):
-        name: str
-        date: str
-        participants: List[str]
-
-    generation_name = create_uuid()
-
-    params = {
-        "model": "gpt-4o-2024-08-06",
-        "messages": [
-            {"role": "system", "content": "Extract the event information."},
-            {
-                "role": "user",
-                "content": "Alice and Bob are going to a science fair on Friday.",
-            },
-        ],
-        "response_format": CalendarEvent,
-        "name": generation_name,
-    }
-
-    # The beta API is only wrapped for this version range. prior to that, implicitly another wrapped method was called
-    if Version(openai.__version__) < Version("1.50.0"):
-        params.pop("name")
-
-    openai.beta.chat.completions.parse(**params)
-
-    openai.flush_langfuse()
-
-    if Version(openai.__version__) >= Version("1.50.0"):
-        # Check the trace and observation properties
-        generation = get_api().observations.get_many(
-            name=generation_name, type="GENERATION"
-        )
-
-        assert len(generation.data) == 1
-        assert generation.data[0].name == generation_name
-        assert generation.data[0].type == "GENERATION"
-        assert generation.data[0].model == "gpt-4o-2024-08-06"
-        assert generation.data[0].start_time is not None
-        assert generation.data[0].end_time is not None
-        assert generation.data[0].start_time < generation.data[0].end_time
-
-        # Check input and output
-        assert len(generation.data[0].input) == 2
-        assert generation.data[0].input[0]["role"] == "system"
-        assert generation.data[0].input[1]["role"] == "user"
-        assert isinstance(generation.data[0].output, dict)
-        assert "name" in generation.data[0].output["content"]
-        assert "date" in generation.data[0].output["content"]
-        assert "participants" in generation.data[0].output["content"]
-
-        # Check usage
-        assert generation.data[0].usage.input is not None
-        assert generation.data[0].usage.output is not None
-        assert generation.data[0].usage.total is not None
-
-        # Check trace
-        trace = get_api().trace.get(generation.data[0].trace_id)
-
-        assert trace.input is not None
-        assert trace.output is not None
-
-
-@pytest.mark.asyncio
-async def test_close_async_stream():
-    client = AsyncOpenAI()
-    generation_name = create_uuid()
-
-    stream = await client.chat.completions.create(
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        model="gpt-3.5-turbo",
-        name=generation_name,
-        stream=True,
-    )
-
-    async for token in stream:
-        print(token)
-
-    await stream.close()
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].input == [{"content": "1 + 1 = ", "role": "user"}]
-    assert generation.data[0].type == "GENERATION"
-    assert generation.data[0].model == "gpt-3.5-turbo-0125"
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].model_parameters == {
-        "temperature": 1,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "max_tokens": "inf",
-        "presence_penalty": 0,
-    }
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "2" in str(generation.data[0].output)
-
-    # Completion start time for time-to-first-token
-    assert generation.data[0].completion_start_time is not None
-    assert generation.data[0].completion_start_time >= generation.data[0].start_time
-    assert generation.data[0].completion_start_time <= generation.data[0].end_time
-
-
-def test_base_64_image_input():
-    client = openai.OpenAI()
-    generation_name = "test_base_64_image_input" + create_uuid()[:8]
-
-    content_path = "static/puton.jpg"
-    content_type = "image/jpeg"
-
-    base64_image = encode_file_to_base64(content_path)
-
-    client.chat.completions.create(
-        name=generation_name,
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "What’s in this image?"},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:{content_type};base64,{base64_image}"
-                        },
-                    },
-                ],
-            }
-        ],
-        max_tokens=300,
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert generation.data[0].input[0]["content"][0]["text"] == "What’s in this image?"
-    assert (
-        f"@@@langfuseMedia:type={content_type}|id="
-        in generation.data[0].input[0]["content"][1]["image_url"]["url"]
-    )
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-4o-mini" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    assert "dog" in generation.data[0].output["content"]
-
-
-def test_audio_input_and_output():
-    client = openai.OpenAI()
-    openai.langfuse_debug = True
-    generation_name = "test_audio_input_and_output" + create_uuid()[:8]
-
-    content_path = "static/joke_prompt.wav"
-    base64_string = encode_file_to_base64(content_path)
-
-    client.chat.completions.create(
-        name=generation_name,
-        model="gpt-4o-audio-preview",
-        modalities=["text", "audio"],
-        audio={"voice": "alloy", "format": "wav"},
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Do what this recording says."},
-                    {
-                        "type": "input_audio",
-                        "input_audio": {"data": base64_string, "format": "wav"},
-                    },
-                ],
-            },
-        ],
-    )
-
-    openai.flush_langfuse()
-
-    generation = get_api().observations.get_many(
-        name=generation_name, type="GENERATION"
-    )
-
-    assert len(generation.data) != 0
-    assert generation.data[0].name == generation_name
-    assert (
-        generation.data[0].input[0]["content"][0]["text"]
-        == "Do what this recording says."
-    )
-    assert (
-        "@@@langfuseMedia:type=audio/wav|id="
-        in generation.data[0].input[0]["content"][1]["input_audio"]["data"]
-    )
-    assert generation.data[0].type == "GENERATION"
-    assert "gpt-4o-audio-preview" in generation.data[0].model
-    assert generation.data[0].start_time is not None
-    assert generation.data[0].end_time is not None
-    assert generation.data[0].start_time < generation.data[0].end_time
-    assert generation.data[0].usage.input is not None
-    assert generation.data[0].usage.output is not None
-    assert generation.data[0].usage.total is not None
-    print(generation.data[0].output)
-    assert (
-        "@@@langfuseMedia:type=audio/wav|id="
-        in generation.data[0].output["audio"]["data"]
-    )
diff --git a/tests/test_prompt.py b/tests/test_prompt.py
deleted file mode 100644
index 8c6660f57..000000000
--- a/tests/test_prompt.py
+++ /dev/null
@@ -1,1098 +0,0 @@
-from time import sleep
-from unittest.mock import Mock, patch
-
-import openai
-import pytest
-
-from langfuse.api.resources.prompts import Prompt_Chat, Prompt_Text
-from langfuse.client import Langfuse
-from langfuse.model import ChatPromptClient, TextPromptClient
-from langfuse.prompt_cache import DEFAULT_PROMPT_CACHE_TTL_SECONDS, PromptCacheItem
-from tests.utils import create_uuid, get_api
-
-
-def test_create_prompt():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name,
-        prompt="test prompt",
-        labels=["production"],
-        commit_message="initial commit",
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name)
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.config == second_prompt_client.config
-    assert prompt_client.commit_message == second_prompt_client.commit_message
-    assert prompt_client.config == {}
-
-
-def test_create_prompt_with_is_active():
-    # Backward compatibility test for is_active
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name, prompt="test prompt", is_active=True
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name)
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.config == second_prompt_client.config
-    assert prompt_client.labels == ["production", "latest"]
-    assert prompt_client.config == {}
-
-
-def test_create_prompt_with_special_chars_in_name():
-    langfuse = Langfuse()
-    prompt_name = create_uuid() + "special chars !@#$%^&*() +"
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name,
-        prompt="test prompt",
-        labels=["production"],
-        tags=["test"],
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name)
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.tags == second_prompt_client.tags
-    assert prompt_client.config == second_prompt_client.config
-    assert prompt_client.config == {}
-
-
-def test_create_chat_prompt():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name,
-        prompt=[
-            {"role": "system", "content": "test prompt 1 with {{animal}}"},
-            {"role": "user", "content": "test prompt 2 with {{occupation}}"},
-        ],
-        labels=["production"],
-        tags=["test"],
-        type="chat",
-        commit_message="initial commit",
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name, type="chat")
-
-    # Create a test generation
-    completion = openai.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=prompt_client.compile(animal="dog", occupation="doctor"),
-    )
-
-    assert len(completion.choices) > 0
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.config == second_prompt_client.config
-    assert prompt_client.labels == ["production", "latest"]
-    assert prompt_client.tags == second_prompt_client.tags
-    assert prompt_client.commit_message == second_prompt_client.commit_message
-    assert prompt_client.config == {}
-
-
-def test_compiling_chat_prompt():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name,
-        prompt=[
-            {
-                "role": "system",
-                "content": "test prompt 1 with {{state}} {{target}} {{state}}",
-            },
-            {"role": "user", "content": "test prompt 2 with {{state}}"},
-        ],
-        labels=["production"],
-        type="chat",
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name, type="chat")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    assert second_prompt_client.compile(target="world", state="great") == [
-        {"role": "system", "content": "test prompt 1 with great world great"},
-        {"role": "user", "content": "test prompt 2 with great"},
-    ]
-
-
-def test_compiling_prompt():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test",
-        prompt='Hello, {{target}}! I hope you are {{state}}. {{undefined_variable}}. And here is some JSON that should not be compiled: {{ "key": "value" }} \
-            Here is a custom var for users using str.format instead of the mustache-style double curly braces: {custom_var}',
-        is_active=True,
-    )
-
-    second_prompt_client = langfuse.get_prompt("test")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    compiled = second_prompt_client.compile(target="world", state="great")
-
-    assert (
-        compiled
-        == 'Hello, world! I hope you are great. {{undefined_variable}}. And here is some JSON that should not be compiled: {{ "key": "value" }} \
-            Here is a custom var for users using str.format instead of the mustache-style double curly braces: {custom_var}'
-    )
-
-
-def test_compiling_prompt_without_character_escaping():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test",
-        prompt="Hello, {{ some_json }}",
-        is_active=True,
-    )
-
-    second_prompt_client = langfuse.get_prompt("test")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    some_json = '{"key": "value"}'
-    compiled = second_prompt_client.compile(some_json=some_json)
-
-    assert compiled == 'Hello, {"key": "value"}'
-
-
-def test_compiling_prompt_with_content_as_variable_name():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test",
-        prompt="Hello, {{ content }}!",
-        is_active=True,
-    )
-
-    second_prompt_client = langfuse.get_prompt("test")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    compiled = second_prompt_client.compile(content="Jane")
-
-    assert compiled == "Hello, Jane!"
-
-
-def test_create_prompt_with_null_config():
-    langfuse = Langfuse(debug=False)
-
-    langfuse.create_prompt(
-        name="test_null_config",
-        prompt="Hello, world! I hope you are great",
-        is_active=True,
-        config=None,
-    )
-
-    prompt = langfuse.get_prompt("test_null_config")
-
-    assert prompt.config == {}
-
-
-def test_create_prompt_with_tags():
-    langfuse = Langfuse(debug=False)
-    prompt_name = create_uuid()
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=["tag1", "tag2"],
-    )
-
-    prompt = langfuse.get_prompt(prompt_name, version=1)
-
-    assert prompt.tags == ["tag1", "tag2"]
-
-
-def test_create_prompt_with_empty_tags():
-    langfuse = Langfuse(debug=False)
-    prompt_name = create_uuid()
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=[],
-    )
-
-    prompt = langfuse.get_prompt(prompt_name, version=1)
-
-    assert prompt.tags == []
-
-
-def test_create_prompt_with_previous_tags():
-    langfuse = Langfuse(debug=False)
-    prompt_name = create_uuid()
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-    )
-
-    prompt = langfuse.get_prompt(prompt_name, version=1)
-
-    assert prompt.tags == []
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=["tag1", "tag2"],
-    )
-
-    prompt_v2 = langfuse.get_prompt(prompt_name, version=2)
-
-    assert prompt_v2.tags == ["tag1", "tag2"]
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-    )
-
-    prompt_v3 = langfuse.get_prompt(prompt_name, version=3)
-
-    assert prompt_v3.tags == ["tag1", "tag2"]
-
-
-def test_remove_prompt_tags():
-    langfuse = Langfuse(debug=False)
-    prompt_name = create_uuid()
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=["tag1", "tag2"],
-    )
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=[],
-    )
-
-    prompt_v1 = langfuse.get_prompt(prompt_name, version=1)
-    prompt_v2 = langfuse.get_prompt(prompt_name, version=2)
-
-    assert prompt_v1.tags == []
-    assert prompt_v2.tags == []
-
-
-def test_update_prompt_tags():
-    langfuse = Langfuse(debug=False)
-    prompt_name = create_uuid()
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=["tag1", "tag2"],
-    )
-
-    prompt_v1 = langfuse.get_prompt(prompt_name, version=1)
-
-    assert prompt_v1.tags == ["tag1", "tag2"]
-
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="Hello, world! I hope you are great",
-        tags=["tag3", "tag4"],
-    )
-
-    prompt_v2 = langfuse.get_prompt(prompt_name, version=2)
-
-    assert prompt_v2.tags == ["tag3", "tag4"]
-
-
-def test_get_prompt_by_version_or_label():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-
-    for i in range(3):
-        langfuse.create_prompt(
-            name=prompt_name,
-            prompt="test prompt " + str(i + 1),
-            labels=["production"] if i == 1 else [],
-        )
-
-    default_prompt_client = langfuse.get_prompt(prompt_name)
-    assert default_prompt_client.version == 2
-    assert default_prompt_client.prompt == "test prompt 2"
-    assert default_prompt_client.labels == ["production"]
-
-    first_prompt_client = langfuse.get_prompt(prompt_name, 1)
-    assert first_prompt_client.version == 1
-    assert first_prompt_client.prompt == "test prompt 1"
-    assert first_prompt_client.labels == []
-
-    second_prompt_client = langfuse.get_prompt(prompt_name, version=2)
-    assert second_prompt_client.version == 2
-    assert second_prompt_client.prompt == "test prompt 2"
-    assert second_prompt_client.labels == ["production"]
-
-    third_prompt_client = langfuse.get_prompt(prompt_name, label="latest")
-    assert third_prompt_client.version == 3
-    assert third_prompt_client.prompt == "test prompt 3"
-    assert third_prompt_client.labels == ["latest"]
-
-
-def test_prompt_end_to_end():
-    langfuse = Langfuse(debug=False)
-
-    langfuse.create_prompt(
-        name="test",
-        prompt="Hello, {{target}}! I hope you are {{state}}.",
-        is_active=True,
-        config={"temperature": 0.5},
-    )
-
-    prompt = langfuse.get_prompt("test")
-
-    prompt_str = prompt.compile(target="world", state="great")
-    assert prompt_str == "Hello, world! I hope you are great."
-    assert prompt.config == {"temperature": 0.5}
-
-    generation = langfuse.generation(input=prompt_str, prompt=prompt)
-
-    # to check that these do not error
-    generation.update(prompt=prompt)
-    generation.end(prompt=prompt)
-
-    langfuse.flush()
-
-    api = get_api()
-
-    trace_id = langfuse.get_trace_id()
-
-    trace = api.trace.get(trace_id)
-
-    assert len(trace.observations) == 1
-
-    generation = trace.observations[0]
-    assert generation.prompt_id is not None
-
-    observation = api.observations.get(generation.id)
-
-    assert observation.prompt_id is not None
-
-
-@pytest.fixture
-def langfuse():
-    langfuse_instance = Langfuse()
-    langfuse_instance.client = Mock()
-    langfuse_instance.log = Mock()
-
-    return langfuse_instance
-
-
-# Fetching a new prompt when nothing in cache
-def test_get_fresh_prompt(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        type="text",
-        labels=[],
-        config={},
-        tags=[],
-    )
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result = langfuse.get_prompt(prompt_name, fallback="fallback")
-    mock_server_call.assert_called_once_with(
-        prompt_name,
-        version=None,
-        label=None,
-        request_options=None,
-    )
-
-    assert result == TextPromptClient(prompt)
-
-
-# Should throw an error if prompt name is unspecified
-def test_throw_if_name_unspecified(langfuse):
-    prompt_name = ""
-
-    with pytest.raises(ValueError) as exc_info:
-        langfuse.get_prompt(prompt_name)
-
-    assert "Prompt name cannot be empty" in str(exc_info.value)
-
-
-# Should throw an error if nothing in cache and fetch fails
-def test_throw_when_failing_fetch_and_no_cache(langfuse):
-    prompt_name = "test"
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.side_effect = Exception("Prompt not found")
-
-    with pytest.raises(Exception) as exc_info:
-        langfuse.get_prompt(prompt_name)
-
-    assert "Prompt not found" in str(exc_info.value)
-
-
-def test_using_custom_prompt_timeouts(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        type="text",
-        labels=[],
-        config={},
-        tags=[],
-    )
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result = langfuse.get_prompt(
-        prompt_name, fallback="fallback", fetch_timeout_seconds=1000
-    )
-    mock_server_call.assert_called_once_with(
-        prompt_name,
-        version=None,
-        label=None,
-        request_options={"timeout_in_seconds": 1000},
-    )
-
-    assert result == TextPromptClient(prompt)
-
-
-# Should throw an error if cache_ttl_seconds is passed as positional rather than keyword argument
-def test_throw_if_cache_ttl_seconds_positional_argument(langfuse):
-    prompt_name = "test"
-    version = 1
-    ttl_seconds = 20
-
-    with pytest.raises(TypeError) as exc_info:
-        langfuse.get_prompt(prompt_name, version, ttl_seconds)
-
-    assert "positional arguments" in str(exc_info.value)
-
-
-# Should return cached prompt if not expired
-def test_get_valid_cached_prompt(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        type="text",
-        labels=[],
-        config={},
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name, fallback="fallback")
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    result_call_2 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_2 == prompt_client
-
-
-# Should return cached chat prompt if not expired when fetching by label
-def test_get_valid_cached_chat_prompt_by_label(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Chat(
-        name=prompt_name,
-        version=1,
-        prompt=[{"role": "system", "content": "Make me laugh"}],
-        labels=["test"],
-        type="chat",
-        config={},
-        tags=[],
-    )
-    prompt_client = ChatPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name, label="test")
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    result_call_2 = langfuse.get_prompt(prompt_name, label="test")
-    assert mock_server_call.call_count == 1
-    assert result_call_2 == prompt_client
-
-
-# Should return cached chat prompt if not expired when fetching by version
-def test_get_valid_cached_chat_prompt_by_version(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Chat(
-        name=prompt_name,
-        version=1,
-        prompt=[{"role": "system", "content": "Make me laugh"}],
-        labels=["test"],
-        type="chat",
-        config={},
-        tags=[],
-    )
-    prompt_client = ChatPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name, version=1)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    result_call_2 = langfuse.get_prompt(prompt_name, version=1)
-    assert mock_server_call.call_count == 1
-    assert result_call_2 == prompt_client
-
-
-# Should return cached chat prompt if fetching the default prompt or the 'production' labeled one
-def test_get_valid_cached_production_chat_prompt(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Chat(
-        name=prompt_name,
-        version=1,
-        prompt=[{"role": "system", "content": "Make me laugh"}],
-        labels=["test"],
-        type="chat",
-        config={},
-        tags=[],
-    )
-    prompt_client = ChatPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    result_call_2 = langfuse.get_prompt(prompt_name, label="production")
-    assert mock_server_call.call_count == 1
-    assert result_call_2 == prompt_client
-
-
-# Should return cached chat prompt if not expired
-def test_get_valid_cached_chat_prompt(langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Chat(
-        name=prompt_name,
-        version=1,
-        prompt=[{"role": "system", "content": "Make me laugh"}],
-        labels=[],
-        type="chat",
-        config={},
-        tags=[],
-    )
-    prompt_client = ChatPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    result_call_2 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_2 == prompt_client
-
-
-# Should refetch and return new prompt if cached one is expired according to custom TTL
-@patch.object(PromptCacheItem, "get_epoch_seconds")
-def test_get_fresh_prompt_when_expired_cache_custom_ttl(mock_time, langfuse: Langfuse):
-    mock_time.return_value = 0
-    ttl_seconds = 20
-
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        config={"temperature": 0.9},
-        labels=[],
-        type="text",
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name, cache_ttl_seconds=ttl_seconds)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    # Set time to just BEFORE cache expiry
-    mock_time.return_value = ttl_seconds - 1
-
-    result_call_2 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1  # No new call
-    assert result_call_2 == prompt_client
-
-    # Set time to just AFTER cache expiry
-    mock_time.return_value = ttl_seconds + 1
-
-    result_call_3 = langfuse.get_prompt(prompt_name)
-
-    while True:
-        if langfuse.prompt_cache._task_manager.active_tasks() == 0:
-            break
-        sleep(0.1)
-
-    assert mock_server_call.call_count == 2  # New call
-    assert result_call_3 == prompt_client
-
-
-# Should disable caching when cache_ttl_seconds is set to 0
-@patch.object(PromptCacheItem, "get_epoch_seconds")
-def test_disable_caching_when_ttl_zero(mock_time, langfuse: Langfuse):
-    mock_time.return_value = 0
-    prompt_name = "test"
-
-    # Initial prompt
-    prompt1 = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-
-    # Updated prompts
-    prompt2 = Prompt_Text(
-        name=prompt_name,
-        version=2,
-        prompt="Tell me a joke",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    prompt3 = Prompt_Text(
-        name=prompt_name,
-        version=3,
-        prompt="Share a funny story",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.side_effect = [prompt1, prompt2, prompt3]
-
-    # First call
-    result1 = langfuse.get_prompt(prompt_name, cache_ttl_seconds=0)
-    assert mock_server_call.call_count == 1
-    assert result1 == TextPromptClient(prompt1)
-
-    # Second call
-    result2 = langfuse.get_prompt(prompt_name, cache_ttl_seconds=0)
-    assert mock_server_call.call_count == 2
-    assert result2 == TextPromptClient(prompt2)
-
-    # Third call
-    result3 = langfuse.get_prompt(prompt_name, cache_ttl_seconds=0)
-    assert mock_server_call.call_count == 3
-    assert result3 == TextPromptClient(prompt3)
-
-    # Verify that all results are different
-    assert result1 != result2 != result3
-
-
-# Should return stale prompt immediately if cached one is expired according to default TTL and add to refresh promise map
-@patch.object(PromptCacheItem, "get_epoch_seconds")
-def test_get_stale_prompt_when_expired_cache_default_ttl(mock_time, langfuse: Langfuse):
-    import logging
-
-    logging.basicConfig(level=logging.DEBUG)
-    mock_time.return_value = 0
-
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    # Update the version of the returned mocked prompt
-    updated_prompt = Prompt_Text(
-        name=prompt_name,
-        version=2,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    mock_server_call.return_value = updated_prompt
-
-    # Set time to just AFTER cache expiry
-    mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS + 1
-
-    stale_result = langfuse.get_prompt(prompt_name)
-    assert stale_result == prompt_client
-
-    # Ensure that only one refresh is triggered despite multiple calls
-    # Cannot check for value as the prompt might have already been updated
-    langfuse.get_prompt(prompt_name)
-    langfuse.get_prompt(prompt_name)
-    langfuse.get_prompt(prompt_name)
-    langfuse.get_prompt(prompt_name)
-
-    while True:
-        if langfuse.prompt_cache._task_manager.active_tasks() == 0:
-            break
-        sleep(0.1)
-
-    assert mock_server_call.call_count == 2  # Only one new call to server
-
-    # Check that the prompt has been updated after refresh
-    updated_result = langfuse.get_prompt(prompt_name)
-    assert updated_result.version == 2
-    assert updated_result == TextPromptClient(updated_prompt)
-
-
-# Should refetch and return new prompt if cached one is expired according to default TTL
-@patch.object(PromptCacheItem, "get_epoch_seconds")
-def test_get_fresh_prompt_when_expired_cache_default_ttl(mock_time, langfuse: Langfuse):
-    mock_time.return_value = 0
-
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    # Set time to just BEFORE cache expiry
-    mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS - 1
-
-    result_call_2 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1  # No new call
-    assert result_call_2 == prompt_client
-
-    # Set time to just AFTER cache expiry
-    mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS + 1
-
-    result_call_3 = langfuse.get_prompt(prompt_name)
-    while True:
-        if langfuse.prompt_cache._task_manager.active_tasks() == 0:
-            break
-        sleep(0.1)
-
-    assert mock_server_call.call_count == 2  # New call
-    assert result_call_3 == prompt_client
-
-
-# Should return expired prompt if refetch fails
-@patch.object(PromptCacheItem, "get_epoch_seconds")
-def test_get_expired_prompt_when_failing_fetch(mock_time, langfuse: Langfuse):
-    mock_time.return_value = 0
-
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    # Set time to just AFTER cache expiry
-    mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS + 1
-
-    mock_server_call.side_effect = Exception("Server error")
-
-    result_call_2 = langfuse.get_prompt(prompt_name, max_retries=1)
-    while True:
-        if langfuse.prompt_cache._task_manager.active_tasks() == 0:
-            break
-        sleep(0.1)
-
-    assert mock_server_call.call_count == 2
-    assert result_call_2 == prompt_client
-
-
-# Should fetch new prompt if version changes
-def test_get_fresh_prompt_when_version_changes(langfuse: Langfuse):
-    prompt_name = "test"
-    prompt = Prompt_Text(
-        name=prompt_name,
-        version=1,
-        prompt="Make me laugh",
-        labels=[],
-        type="text",
-        config={},
-        tags=[],
-    )
-    prompt_client = TextPromptClient(prompt)
-
-    mock_server_call = langfuse.client.prompts.get
-    mock_server_call.return_value = prompt
-
-    result_call_1 = langfuse.get_prompt(prompt_name, version=1)
-    assert mock_server_call.call_count == 1
-    assert result_call_1 == prompt_client
-
-    version_changed_prompt = Prompt_Text(
-        name=prompt_name,
-        version=2,
-        labels=[],
-        prompt="Make me laugh",
-        type="text",
-        config={},
-        tags=[],
-    )
-    version_changed_prompt_client = TextPromptClient(version_changed_prompt)
-    mock_server_call.return_value = version_changed_prompt
-
-    result_call_2 = langfuse.get_prompt(prompt_name, version=2)
-    assert mock_server_call.call_count == 2
-    assert result_call_2 == version_changed_prompt_client
-
-
-def test_do_not_return_fallback_if_fetch_success():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-    prompt_client = langfuse.create_prompt(
-        name=prompt_name,
-        prompt="test prompt",
-        labels=["production"],
-    )
-
-    second_prompt_client = langfuse.get_prompt(prompt_name, fallback="fallback")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.config == second_prompt_client.config
-    assert prompt_client.config == {}
-
-
-def test_fallback_text_prompt():
-    langfuse = Langfuse()
-
-    fallback_text_prompt = "this is a fallback text prompt with {{variable}}"
-
-    # Should throw an error if prompt not found and no fallback provided
-    with pytest.raises(Exception):
-        langfuse.get_prompt("nonexistent_prompt")
-
-    prompt = langfuse.get_prompt("nonexistent_prompt", fallback=fallback_text_prompt)
-
-    assert prompt.prompt == fallback_text_prompt
-    assert (
-        prompt.compile(variable="value") == "this is a fallback text prompt with value"
-    )
-
-
-def test_fallback_chat_prompt():
-    langfuse = Langfuse()
-    fallback_chat_prompt = [
-        {"role": "system", "content": "fallback system"},
-        {"role": "user", "content": "fallback user name {{name}}"},
-    ]
-
-    # Should throw an error if prompt not found and no fallback provided
-    with pytest.raises(Exception):
-        langfuse.get_prompt("nonexistent_chat_prompt", type="chat")
-
-    prompt = langfuse.get_prompt(
-        "nonexistent_chat_prompt", type="chat", fallback=fallback_chat_prompt
-    )
-
-    assert prompt.prompt == fallback_chat_prompt
-    assert prompt.compile(name="Jane") == [
-        {"role": "system", "content": "fallback system"},
-        {"role": "user", "content": "fallback user name Jane"},
-    ]
-
-
-def test_do_not_link_observation_if_fallback():
-    langfuse = Langfuse()
-    trace_id = create_uuid()
-
-    fallback_text_prompt = "this is a fallback text prompt with {{variable}}"
-
-    # Should throw an error if prompt not found and no fallback provided
-    with pytest.raises(Exception):
-        langfuse.get_prompt("nonexistent_prompt")
-
-    prompt = langfuse.get_prompt("nonexistent_prompt", fallback=fallback_text_prompt)
-
-    langfuse.trace(id=trace_id).generation(prompt=prompt, input="this is a test input")
-    langfuse.flush()
-
-    api = get_api()
-    trace = api.trace.get(trace_id)
-
-    assert len(trace.observations) == 1
-    assert trace.observations[0].prompt_id is None
-
-
-def test_variable_names_on_content_with_variable_names():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test_variable_names_1",
-        prompt="test prompt with var names {{ var1 }} {{ var2 }}",
-        is_active=True,
-        type="text",
-    )
-
-    second_prompt_client = langfuse.get_prompt("test_variable_names_1")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    var_names = second_prompt_client.variables
-
-    assert var_names == ["var1", "var2"]
-
-
-def test_variable_names_on_content_with_no_variable_names():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test_variable_names_2",
-        prompt="test prompt with no var names",
-        is_active=True,
-        type="text",
-    )
-
-    second_prompt_client = langfuse.get_prompt("test_variable_names_2")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    var_names = second_prompt_client.variables
-
-    assert var_names == []
-
-
-def test_variable_names_on_content_with_variable_names_chat_messages():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test_variable_names_3",
-        prompt=[
-            {
-                "role": "system",
-                "content": "test prompt with template vars {{ var1 }} {{ var2 }}",
-            },
-            {"role": "user", "content": "test prompt 2 with template vars {{ var3 }}"},
-        ],
-        is_active=True,
-        type="chat",
-    )
-
-    second_prompt_client = langfuse.get_prompt("test_variable_names_3")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    var_names = second_prompt_client.variables
-
-    assert var_names == ["var1", "var2", "var3"]
-
-
-def test_variable_names_on_content_with_no_variable_names_chat_messages():
-    langfuse = Langfuse()
-
-    prompt_client = langfuse.create_prompt(
-        name="test_variable_names_4",
-        prompt=[
-            {"role": "system", "content": "test prompt with no template vars"},
-            {"role": "user", "content": "test prompt 2 with no template vars"},
-        ],
-        is_active=True,
-        type="chat",
-    )
-
-    second_prompt_client = langfuse.get_prompt("test_variable_names_4")
-
-    assert prompt_client.name == second_prompt_client.name
-    assert prompt_client.version == second_prompt_client.version
-    assert prompt_client.prompt == second_prompt_client.prompt
-    assert prompt_client.labels == ["production", "latest"]
-
-    var_names = second_prompt_client.variables
-
-    assert var_names == []
diff --git a/tests/test_prompt_atexit.py b/tests/test_prompt_atexit.py
deleted file mode 100644
index 87ba396e9..000000000
--- a/tests/test_prompt_atexit.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import pytest
-import subprocess
-
-
-@pytest.mark.timeout(10)
-def test_prompts_atexit():
-    python_code = """
-import time
-import logging
-from langfuse.prompt_cache import PromptCache  # assuming task_manager is the module name
-
-logging.basicConfig(
-    level=logging.DEBUG,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        logging.StreamHandler()
-    ]
-)
-
-print("Adding prompt cache", PromptCache)
-prompt_cache = PromptCache(max_prompt_refresh_workers=10)
-
-# example task that takes 2 seconds but we will force it to exit earlier
-def wait_2_sec():
-    time.sleep(2)
-
-# 8 times
-for i in range(8):
-    prompt_cache.add_refresh_prompt_task(f"key_wait_2_sec_i_{i}", lambda: wait_2_sec())
-"""
-
-    process = subprocess.Popen(
-        ["python", "-c", python_code], stderr=subprocess.PIPE, text=True
-    )
-
-    logs = ""
-
-    try:
-        for line in process.stderr:
-            logs += line.strip()
-            print(line.strip())
-    except subprocess.TimeoutExpired:
-        pytest.fail("The process took too long to execute")
-    process.communicate()
-
-    returncode = process.returncode
-    if returncode != 0:
-        pytest.fail("Process returned with error code")
-
-    print(process.stderr)
-
-    shutdown_count = logs.count("Shutdown of prompt refresh task manager completed.")
-    assert (
-        shutdown_count == 1
-    ), f"Expected 1 shutdown messages, but found {shutdown_count}"
-
-
-@pytest.mark.timeout(10)
-def test_prompts_atexit_async():
-    python_code = """
-import time
-import asyncio
-import logging
-from langfuse.prompt_cache import PromptCache  # assuming task_manager is the module name
-
-logging.basicConfig(
-    level=logging.DEBUG,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        logging.StreamHandler()
-    ]
-)
-
-async def main():
-    print("Adding prompt cache", PromptCache)
-    prompt_cache = PromptCache(max_prompt_refresh_workers=10)
-
-    # example task that takes 2 seconds but we will force it to exit earlier
-    def wait_2_sec():
-        time.sleep(2)
-
-    async def add_new_prompt_refresh(i: int):
-        prompt_cache.add_refresh_prompt_task(f"key_wait_2_sec_i_{i}", lambda: wait_2_sec())
-    
-    # 8 times
-    tasks = [add_new_prompt_refresh(i) for i in range(8)]
-    await asyncio.gather(*tasks)
-
-async def run_multiple_mains():
-    main_tasks = [main() for _ in range(3)]
-    await asyncio.gather(*main_tasks)
-
-if __name__ == "__main__":
-    asyncio.run(run_multiple_mains())
-"""
-
-    process = subprocess.Popen(
-        ["python", "-c", python_code], stderr=subprocess.PIPE, text=True
-    )
-
-    logs = ""
-
-    try:
-        for line in process.stderr:
-            logs += line.strip()
-            print(line.strip())
-    except subprocess.TimeoutExpired:
-        pytest.fail("The process took too long to execute")
-    process.communicate()
-
-    returncode = process.returncode
-    if returncode != 0:
-        pytest.fail("Process returned with error code")
-
-    print(process.stderr)
-
-    shutdown_count = logs.count("Shutdown of prompt refresh task manager completed.")
-    assert (
-        shutdown_count == 3
-    ), f"Expected 3 shutdown messages, but found {shutdown_count}"
diff --git a/tests/test_prompt_compilation.py b/tests/test_prompt_compilation.py
deleted file mode 100644
index 856025717..000000000
--- a/tests/test_prompt_compilation.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import pytest
-
-from langfuse.model import TemplateParser
-
-
-def test_basic_replacement():
-    template = "Hello, {{ name }}!"
-    expected = "Hello, John!"
-
-    assert TemplateParser.compile_template(template, {"name": "John"}) == expected
-
-
-def test_multiple_replacements():
-    template = "{{greeting}}, {{name}}! Your balance is {{balance}}."
-    expected = "Hello, John! Your balance is $100."
-
-    assert (
-        TemplateParser.compile_template(
-            template, {"greeting": "Hello", "name": "John", "balance": "$100"}
-        )
-        == expected
-    )
-
-
-def test_no_replacements():
-    template = "This is a test."
-    expected = "This is a test."
-
-    assert TemplateParser.compile_template(template) == expected
-
-
-def test_content_as_variable_name():
-    template = "This is a {{content}}."
-    expected = "This is a dog."
-
-    assert TemplateParser.compile_template(template, {"content": "dog"}) == expected
-
-
-def test_unmatched_opening_tag():
-    template = "Hello, {{name! Your balance is $100."
-    expected = "Hello, {{name! Your balance is $100."
-
-    assert TemplateParser.compile_template(template, {"name": "John"}) == expected
-
-
-def test_unmatched_closing_tag():
-    template = "Hello, {{name}}! Your balance is $100}}"
-    expected = "Hello, John! Your balance is $100}}"
-
-    assert TemplateParser.compile_template(template, {"name": "John"}) == expected
-
-
-def test_missing_variable():
-    template = "Hello, {{name}}!"
-    expected = "Hello, {{name}}!"
-
-    assert TemplateParser.compile_template(template) == expected
-
-
-def test_none_variable():
-    template = "Hello, {{name}}!"
-    expected = "Hello, !"
-
-    assert TemplateParser.compile_template(template, {"name": None}) == expected
-
-
-def test_strip_whitespace():
-    template = "Hello, {{    name }}!"
-    expected = "Hello, John!"
-
-    assert TemplateParser.compile_template(template, {"name": "John"}) == expected
-
-
-def test_special_characters():
-    template = "Symbols: {{symbol}}."
-    expected = "Symbols: @$%^&*."
-
-    assert TemplateParser.compile_template(template, {"symbol": "@$%^&*"}) == expected
-
-
-def test_multiple_templates_one_var():
-    template = "{{a}} + {{a}} = {{b}}"
-    expected = "1 + 1 = 2"
-
-    assert TemplateParser.compile_template(template, {"a": 1, "b": 2}) == expected
-
-
-def test_unused_variable():
-    template = "{{a}} + {{a}}"
-    expected = "1 + 1"
-
-    assert TemplateParser.compile_template(template, {"a": 1, "b": 2}) == expected
-
-
-def test_single_curly_braces():
-    template = "{{a}} + {a} = {{b}"
-    expected = "1 + {a} = {{b}"
-
-    assert TemplateParser.compile_template(template, {"a": 1, "b": 2}) == expected
-
-
-def test_complex_json():
-    template = """{{a}} + {{
-    "key1": "val1",
-    "key2": "val2",
-    }}"""
-    expected = """1 + {{
-    "key1": "val1",
-    "key2": "val2",
-    }}"""
-
-    assert TemplateParser.compile_template(template, {"a": 1, "b": 2}) == expected
-
-
-def test_replacement_with_empty_string():
-    template = "Hello, {{name}}!"
-    expected = "Hello, !"
-
-    assert TemplateParser.compile_template(template, {"name": ""}) == expected
-
-
-def test_variable_case_sensitivity():
-    template = "{{Name}} != {{name}}"
-    expected = "John != john"
-
-    assert (
-        TemplateParser.compile_template(template, {"Name": "John", "name": "john"})
-        == expected
-    )
-
-
-def test_start_with_closing_braces():
-    template = "}}"
-    expected = "}}"
-
-    assert TemplateParser.compile_template(template, {"name": "john"}) == expected
-
-
-def test_unescaped_JSON_variable_value():
-    template = "{{some_json}}"
-    some_json = """
-{
-  "user": {
-    "id": 12345,
-    "name": "John Doe",
-    "email": "john.doe@example.com",
-    "isActive": true,
-    "accountCreated": "2024-01-15T08:00:00Z",
-    "roles": [
-      "user",
-      "admin"
-    ],
-    "preferences": {
-      "language": "en",
-      "notifications": {
-        "email": true,
-        "sms": false
-      }
-    },
-    "address": {
-      "street": "123 Elm Street",
-      "city": "Anytown",
-      "state": "Anystate",
-      "zipCode": "12345",
-      "country": "USA"
-    }
-  }
-}"""
-
-    compiled = TemplateParser.compile_template(template, {"some_json": some_json})
-    assert compiled == some_json
-
-
-@pytest.mark.parametrize(
-    "template,data,expected",
-    [
-        ("{{a}} + {{b}} = {{result}}", {"a": 1, "b": 2, "result": 3}, "1 + 2 = 3"),
-        ("{{x}}, {{y}}", {"x": "X", "y": "Y"}, "X, Y"),
-        ("No variables", {}, "No variables"),
-    ],
-)
-def test_various_templates(template, data, expected):
-    assert TemplateParser.compile_template(template, data) == expected
diff --git a/tests/test_sampler.py b/tests/test_sampler.py
deleted file mode 100644
index eb67f1e36..000000000
--- a/tests/test_sampler.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import unittest
-from langfuse.Sampler import Sampler
-
-
-class TestSampler(unittest.TestCase):
-    def setUp(self):
-        self.sampler = Sampler(sample_rate=0.5)
-
-    def test_sample_event_trace_create(self):
-        event = {"type": "trace-create", "body": {"id": "trace_123"}}
-        result = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        event = {
-            "type": "trace-create",
-            "body": {"id": "trace_123", "something": "else"},
-        }
-        result_two = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        assert result == result_two
-
-    def test_multiple_events_of_different_types(self):
-        event = {"type": "trace-create", "body": {"id": "trace_123"}}
-
-        result = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        event = {"type": "generation-create", "body": {"trace_id": "trace_123"}}
-        result_two = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        event = event = {"type": "score-create", "body": {"trace_id": "trace_123"}}
-        result_three = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        event = {"type": "generation-update", "body": {"traceId": "trace_123"}}
-        result_four = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-        assert result == result_two == result_three == result_four
-
-    def test_sample_event_trace_id(self):
-        event = {"type": "some-other-type", "body": {"trace_id": "trace_456"}}
-        result = self.sampler.sample_event(event)
-        self.assertIsInstance(result, bool)
-
-    def test_sample_event_unexpected_properties(self):
-        event = {"type": "some-type", "body": {}}
-        result = self.sampler.sample_event(event)
-        self.assertTrue(result)
-
-    def test_deterministic_sample(self):
-        trace_id = "trace_789"
-        result = self.sampler.deterministic_sample(trace_id, 0.5)
-        self.assertIsInstance(result, bool)
-
-    def test_deterministic_sample_high_rate(self):
-        trace_id = "trace_789"
-        result = self.sampler.deterministic_sample(trace_id, 1.0)
-        self.assertTrue(result)
-
-    def test_deterministic_sample_low_rate(self):
-        trace_id = "trace_789"
-        result = self.sampler.deterministic_sample(trace_id, 0.0)
-        self.assertFalse(result)
-
-    def test_deterministic_sample_50_percent_rate(self):
-        trace_ids = [f"trace_{i}" for i in range(1000)]
-        sampled_count = sum(
-            self.sampler.deterministic_sample(trace_id, 0.5) for trace_id in trace_ids
-        )
-        print(sampled_count)
-        self.assertTrue(
-            450 <= sampled_count <= 550,
-            f"Sampled count {sampled_count} is not within the expected range",
-        )
-
-    def test_deterministic_sample_10_percent_rate(self):
-        trace_ids = [f"trace_{i}" for i in range(1000)]
-        sampled_count = sum(
-            self.sampler.deterministic_sample(trace_id, 0.1) for trace_id in trace_ids
-        )
-        print(sampled_count)
-        self.assertTrue(
-            90 <= sampled_count <= 110,
-            f"Sampled count {sampled_count} is not within the expected range",
-        )
diff --git a/tests/test_sdk_setup.py b/tests/test_sdk_setup.py
deleted file mode 100644
index cca83929e..000000000
--- a/tests/test_sdk_setup.py
+++ /dev/null
@@ -1,516 +0,0 @@
-import importlib
-import logging
-import os
-
-import httpx
-import pytest
-from pytest_httpserver import HTTPServer
-from werkzeug import Response
-
-import langfuse
-from langfuse.api.resources.commons.errors.unauthorized_error import UnauthorizedError
-from langfuse.callback import CallbackHandler
-from langfuse.client import Langfuse
-from langfuse.openai import _is_openai_v1, auth_check, openai
-from langfuse.utils.langfuse_singleton import LangfuseSingleton
-from tests.test_task_manager import get_host
-
-chat_func = (
-    openai.chat.completions.create if _is_openai_v1() else openai.ChatCompletion.create
-)
-
-
-def test_langfuse_release():
-    # Backup environment variables to restore them later
-    backup_environ = os.environ.copy()
-
-    # Clearing the environment variables
-    os.environ.clear()
-
-    # These key are required
-    client = Langfuse(public_key="test", secret_key="test")
-    assert client.release is None
-
-    # If neither the LANGFUSE_RELEASE env var nor the release parameter is given,
-    # it should fall back to get_common_release_envs
-    os.environ["CIRCLE_SHA1"] = "mock-sha1"
-    client = Langfuse(public_key="test", secret_key="test")
-    assert client.release == "mock-sha1"
-
-    # If LANGFUSE_RELEASE env var is set, it should take precedence
-    os.environ["LANGFUSE_RELEASE"] = "mock-langfuse-release"
-    client = Langfuse(public_key="test", secret_key="test")
-    assert client.release == "mock-langfuse-release"
-
-    # If the release parameter is given during initialization, it should take the highest precedence
-    client = Langfuse(public_key="test", secret_key="test", release="parameter-release")
-    assert client.release == "parameter-release"
-
-    # Restoring the environment variables
-    os.environ.update(backup_environ)
-
-
-# langfuse sdk
-def test_setup_without_any_keys(caplog):
-    public_key, secret_key, host = (
-        os.environ["LANGFUSE_PUBLIC_KEY"],
-        os.environ["LANGFUSE_SECRET_KEY"],
-        os.environ["LANGFUSE_HOST"],
-    )
-    os.environ.pop("LANGFUSE_PUBLIC_KEY")
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-    os.environ.pop("LANGFUSE_HOST")
-
-    with caplog.at_level(logging.WARNING):
-        Langfuse()
-
-    assert "Langfuse client is disabled" in caplog.text
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ["LANGFUSE_HOST"] = host
-
-
-def test_setup_without_pk(caplog):
-    public_key = os.environ["LANGFUSE_PUBLIC_KEY"]
-    os.environ.pop("LANGFUSE_PUBLIC_KEY")
-    with caplog.at_level(logging.WARNING):
-        Langfuse()
-
-    assert "Langfuse client is disabled" in caplog.text
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-
-
-def test_setup_without_sk(caplog):
-    secret_key = os.environ["LANGFUSE_SECRET_KEY"]
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-    with caplog.at_level(logging.WARNING):
-        Langfuse()
-
-    assert "Langfuse client is disabled" in caplog.text
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-
-
-def test_init_precedence_pk():
-    langfuse = Langfuse(public_key="test_LANGFUSE_PUBLIC_KEY")
-    assert (
-        langfuse.client._client_wrapper._x_langfuse_public_key
-        == "test_LANGFUSE_PUBLIC_KEY"
-    )
-    assert langfuse.client._client_wrapper._username == "test_LANGFUSE_PUBLIC_KEY"
-
-
-def test_init_precedence_sk():
-    langfuse = Langfuse(secret_key="test_LANGFUSE_SECRET_KEY")
-    assert langfuse.client._client_wrapper._password == "test_LANGFUSE_SECRET_KEY"
-
-
-def test_init_precedence_env():
-    langfuse = Langfuse(host="http://localhost:8000/")
-    assert langfuse.client._client_wrapper._base_url == "http://localhost:8000/"
-
-
-def test_sdk_default_host():
-    _, _, host = get_env_variables()
-    os.environ.pop("LANGFUSE_HOST")
-
-    langfuse = Langfuse()
-    assert langfuse.base_url == "https://cloud.langfuse.com"
-    os.environ["LANGFUSE_HOST"] = host
-
-
-def test_sdk_default():
-    public_key, secret_key, host = get_env_variables()
-
-    langfuse = Langfuse()
-
-    assert langfuse.client._client_wrapper._username == public_key
-    assert langfuse.client._client_wrapper._password == secret_key
-    assert langfuse.client._client_wrapper._base_url == host
-    assert langfuse.task_manager._threads == 1
-    assert langfuse.task_manager._flush_at == 15
-    assert langfuse.task_manager._flush_interval == 0.5
-    assert langfuse.task_manager._max_retries == 3
-    assert langfuse.task_manager._client._timeout == 20
-
-
-def test_sdk_custom_configs():
-    public_key, secret_key, host = get_env_variables()
-
-    langfuse = Langfuse(
-        threads=3,
-        flush_at=3,
-        flush_interval=3,
-        max_retries=3,
-        timeout=3,
-    )
-
-    assert langfuse.client._client_wrapper._username == public_key
-    assert langfuse.client._client_wrapper._password == secret_key
-    assert langfuse.client._client_wrapper._base_url == host
-    assert langfuse.task_manager._threads == 3
-    assert langfuse.task_manager._flush_at == 3
-    assert langfuse.task_manager._flush_interval == 3
-    assert langfuse.task_manager._max_retries == 3
-    assert langfuse.task_manager._client._timeout == 3
-
-
-def test_sdk_custom_xhttp_client():
-    public_key, secret_key, host = get_env_variables()
-
-    client = httpx.Client(timeout=9999)
-
-    langfuse = Langfuse(httpx_client=client)
-
-    langfuse.auth_check()
-
-    assert langfuse.client._client_wrapper._username == public_key
-    assert langfuse.client._client_wrapper._password == secret_key
-    assert langfuse.client._client_wrapper._base_url == host
-    assert langfuse.task_manager._client._session._timeout.as_dict() == {
-        "connect": 9999,
-        "pool": 9999,
-        "read": 9999,
-        "write": 9999,
-    }
-    assert (
-        langfuse.client._client_wrapper.httpx_client.httpx_client._timeout.as_dict()
-        == {
-            "connect": 9999,
-            "pool": 9999,
-            "read": 9999,
-            "write": 9999,
-        }
-    )
-
-
-# callback
-def test_callback_setup_without_keys(caplog):
-    public_key, secret_key, host = get_env_variables()
-    os.environ.pop("LANGFUSE_PUBLIC_KEY")
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-    os.environ.pop("LANGFUSE_HOST")
-
-    with caplog.at_level(logging.WARNING):
-        CallbackHandler()
-
-    assert "Langfuse client is disabled" in caplog.text
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ["LANGFUSE_HOST"] = host
-
-
-def test_callback_default_host():
-    _, _, host = get_env_variables()
-    os.environ.pop("LANGFUSE_HOST")
-
-    handler = CallbackHandler(debug=False)
-    assert (
-        handler.langfuse.client._client_wrapper._base_url
-        == "https://cloud.langfuse.com"
-    )
-    os.environ["LANGFUSE_HOST"] = host
-
-
-def test_callback_sampling():
-    os.environ["LANGFUSE_SAMPLE_RATE"] = "0.2"
-
-    handler = CallbackHandler()
-    assert handler.langfuse.task_manager._sample_rate == 0.2
-
-    os.environ.pop("LANGFUSE_SAMPLE_RATE")
-
-
-def test_callback_setup():
-    public_key, secret_key, host = get_env_variables()
-
-    callback_handler = CallbackHandler()
-
-    assert callback_handler.langfuse.client._client_wrapper._username == public_key
-    assert callback_handler.langfuse.client._client_wrapper._base_url == host
-    assert callback_handler.langfuse.client._client_wrapper._password == secret_key
-
-
-def test_callback_setup_without_pk(caplog):
-    public_key = os.environ["LANGFUSE_PUBLIC_KEY"]
-    os.environ.pop("LANGFUSE_PUBLIC_KEY")
-
-    with caplog.at_level(logging.WARNING):
-        CallbackHandler()
-
-    assert "Langfuse client is disabled" in caplog.text
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-
-
-def test_callback_setup_without_sk(caplog):
-    secret_key = os.environ["LANGFUSE_SECRET_KEY"]
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-
-    with caplog.at_level(logging.WARNING):
-        CallbackHandler()
-
-    assert "Langfuse client is disabled" in caplog.text
-
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-
-
-def test_callback_init_precedence_pk():
-    handler = CallbackHandler(public_key="test_LANGFUSE_PUBLIC_KEY")
-    assert (
-        handler.langfuse.client._client_wrapper._x_langfuse_public_key
-        == "test_LANGFUSE_PUBLIC_KEY"
-    )
-    assert (
-        handler.langfuse.client._client_wrapper._username == "test_LANGFUSE_PUBLIC_KEY"
-    )
-
-
-def test_callback_init_precedence_sk():
-    handler = CallbackHandler(secret_key="test_LANGFUSE_SECRET_KEY")
-    assert (
-        handler.langfuse.client._client_wrapper._password == "test_LANGFUSE_SECRET_KEY"
-    )
-
-
-def test_callback_init_precedence_host():
-    handler = CallbackHandler(host="http://localhost:8000/")
-    assert handler.langfuse.client._client_wrapper._base_url == "http://localhost:8000/"
-
-
-def test_callback_init_workers():
-    handler = CallbackHandler()
-    assert handler.langfuse.task_manager._threads == 1
-
-
-def test_callback_init_workers_5():
-    handler = CallbackHandler(threads=5)
-    assert handler.langfuse.task_manager._threads == 5
-
-
-def test_client_init_workers():
-    langfuse = Langfuse()
-    assert langfuse.task_manager._threads == 1
-
-
-def test_openai_default():
-    from langfuse.openai import modifier, openai
-
-    importlib.reload(langfuse)
-    importlib.reload(langfuse.openai)
-
-    chat_func = (
-        openai.chat.completions.create
-        if _is_openai_v1()
-        else openai.ChatCompletion.create
-    )
-
-    public_key, secret_key, host = (
-        os.environ["LANGFUSE_PUBLIC_KEY"],
-        os.environ["LANGFUSE_SECRET_KEY"],
-        os.environ["LANGFUSE_HOST"],
-    )
-
-    chat_func(
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-
-    openai.flush_langfuse()
-    assert modifier._langfuse.client._client_wrapper._username == public_key
-    assert modifier._langfuse.client._client_wrapper._password == secret_key
-    assert modifier._langfuse.client._client_wrapper._base_url == host
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ["LANGFUSE_HOST"] = host
-
-
-def test_openai_configs():
-    from langfuse.openai import modifier, openai
-
-    importlib.reload(langfuse)
-    importlib.reload(langfuse.openai)
-
-    chat_func = (
-        openai.chat.completions.create
-        if _is_openai_v1()
-        else openai.ChatCompletion.create
-    )
-
-    openai.base_url = "http://localhost:8000/"
-
-    public_key, secret_key, host = (
-        os.environ["LANGFUSE_PUBLIC_KEY"],
-        os.environ["LANGFUSE_SECRET_KEY"],
-        os.environ["LANGFUSE_HOST"],
-    )
-
-    with pytest.raises(openai.APIConnectionError):
-        chat_func(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "1 + 1 = "}],
-            temperature=0,
-            metadata={"someKey": "someResponse"},
-        )
-
-    openai.flush_langfuse()
-    assert modifier._langfuse.client._client_wrapper._username == public_key
-    assert modifier._langfuse.client._client_wrapper._password == secret_key
-    assert modifier._langfuse.client._client_wrapper._base_url == host
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ["LANGFUSE_HOST"] = host
-    openai.base_url = None
-
-
-def test_openai_auth_check():
-    assert auth_check() is True
-
-
-def test_openai_auth_check_failing_key():
-    LangfuseSingleton().reset()
-
-    secret_key = os.environ["LANGFUSE_SECRET_KEY"]
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-
-    importlib.reload(langfuse)
-    importlib.reload(langfuse.openai)
-
-    from langfuse.openai import openai
-
-    openai.langfuse_secret_key = "test"
-
-    with pytest.raises(UnauthorizedError):
-        auth_check()
-
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-
-
-def test_openai_configured(httpserver: HTTPServer):
-    LangfuseSingleton().reset()
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_response(Response(status=200))
-    host = get_host(httpserver.url_for("/api/public/ingestion"))
-
-    importlib.reload(langfuse)
-    importlib.reload(langfuse.openai)
-    from langfuse.openai import modifier, openai
-
-    chat_func = (
-        openai.chat.completions.create
-        if _is_openai_v1()
-        else openai.ChatCompletion.create
-    )
-
-    public_key, secret_key, original_host = (
-        os.environ["LANGFUSE_PUBLIC_KEY"],
-        os.environ["LANGFUSE_SECRET_KEY"],
-        os.environ["LANGFUSE_HOST"],
-    )
-
-    os.environ.pop("LANGFUSE_PUBLIC_KEY")
-    os.environ.pop("LANGFUSE_SECRET_KEY")
-    os.environ.pop("LANGFUSE_HOST")
-
-    openai.langfuse_public_key = "pk-lf-asdfghjkl"
-    openai.langfuse_secret_key = "sk-lf-asdfghjkl"
-    openai.langfuse_host = host
-    openai.langfuse_sample_rate = 0.2
-
-    chat_func(
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "1 + 1 = "}],
-        temperature=0,
-        metadata={"someKey": "someResponse"},
-    )
-    openai.flush_langfuse()
-
-    assert modifier._langfuse.client._client_wrapper._username == "pk-lf-asdfghjkl"
-    assert modifier._langfuse.client._client_wrapper._password == "sk-lf-asdfghjkl"
-    assert modifier._langfuse.client._client_wrapper._base_url == host
-    assert modifier._langfuse.task_manager._client._base_url == host
-    assert modifier._langfuse.task_manager._sample_rate == 0.2
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ["LANGFUSE_HOST"] = original_host
-
-
-def test_client_init_workers_5():
-    langfuse = Langfuse(threads=5)
-    langfuse.flush()
-
-    assert langfuse.task_manager._threads == 5
-
-
-def get_env_variables():
-    return (
-        os.environ["LANGFUSE_PUBLIC_KEY"],
-        os.environ["LANGFUSE_SECRET_KEY"],
-        os.environ["LANGFUSE_HOST"],
-    )
-
-
-def test_auth_check():
-    langfuse = Langfuse(debug=False)
-
-    assert langfuse.auth_check() is True
-
-    langfuse.flush()
-
-
-def test_wrong_key_auth_check():
-    langfuse = Langfuse(debug=False, secret_key="test")
-
-    with pytest.raises(UnauthorizedError):
-        langfuse.auth_check()
-
-    langfuse.flush()
-
-
-def test_auth_check_callback():
-    langfuse = CallbackHandler(debug=False)
-
-    assert langfuse.auth_check() is True
-    langfuse.flush()
-
-
-def test_auth_check_callback_stateful():
-    langfuse = Langfuse(debug=False)
-    trace = langfuse.trace(name="name")
-    handler = trace.get_langchain_handler()
-
-    assert handler.auth_check() is True
-    handler.flush()
-
-
-def test_wrong_key_auth_check_callback():
-    langfuse = CallbackHandler(debug=False, secret_key="test")
-
-    with pytest.raises(UnauthorizedError):
-        langfuse.auth_check()
-    langfuse.flush()
-
-
-def test_wrong_url_auth_check():
-    langfuse = Langfuse(debug=False, host="http://localhost:4000/")
-
-    with pytest.raises(httpx.ConnectError):
-        langfuse.auth_check()
-
-    langfuse.flush()
-
-
-def test_wrong_url_auth_check_callback():
-    langfuse = CallbackHandler(debug=False, host="http://localhost:4000/")
-
-    with pytest.raises(httpx.ConnectError):
-        langfuse.auth_check()
-    langfuse.flush()
diff --git a/tests/test_serializer.py b/tests/test_serializer.py
deleted file mode 100644
index e01561530..000000000
--- a/tests/test_serializer.py
+++ /dev/null
@@ -1,191 +0,0 @@
-from datetime import datetime, date, timezone
-from uuid import UUID
-from enum import Enum
-from dataclasses import dataclass
-from pathlib import Path
-from pydantic import BaseModel
-import json
-import pytest
-import threading
-import langfuse.serializer
-from langfuse.serializer import (
-    EventSerializer,
-)
-
-
-class TestEnum(Enum):
-    A = 1
-    B = 2
-
-
-@dataclass
-class TestDataclass:
-    field: str
-
-
-class TestBaseModel(BaseModel):
-    field: str
-
-
-def test_datetime():
-    dt = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
-    serializer = EventSerializer()
-
-    assert serializer.encode(dt) == '"2023-01-01T12:00:00Z"'
-
-
-def test_date():
-    d = date(2023, 1, 1)
-    serializer = EventSerializer()
-    assert serializer.encode(d) == '"2023-01-01"'
-
-
-def test_enum():
-    serializer = EventSerializer()
-    assert serializer.encode(TestEnum.A) == "1"
-
-
-def test_uuid():
-    uuid = UUID("123e4567-e89b-12d3-a456-426614174000")
-    serializer = EventSerializer()
-    assert serializer.encode(uuid) == '"123e4567-e89b-12d3-a456-426614174000"'
-
-
-def test_bytes():
-    b = b"hello"
-    serializer = EventSerializer()
-    assert serializer.encode(b) == '"hello"'
-
-
-def test_dataclass():
-    dc = TestDataclass(field="test")
-    serializer = EventSerializer()
-    assert json.loads(serializer.encode(dc)) == {"field": "test"}
-
-
-def test_pydantic_model():
-    model = TestBaseModel(field="test")
-    serializer = EventSerializer()
-    assert json.loads(serializer.encode(model)) == {"field": "test"}
-
-
-def test_path():
-    path = Path("/tmp/test.txt")
-    serializer = EventSerializer()
-    assert serializer.encode(path) == '"/tmp/test.txt"'
-
-
-def test_tuple_set_frozenset():
-    data = (1, 2, 3)
-    serializer = EventSerializer()
-    assert serializer.encode(data) == "[1, 2, 3]"
-
-    data = {1, 2, 3}
-    assert serializer.encode(data) == "[1, 2, 3]"
-
-    data = frozenset([1, 2, 3])
-    assert json.loads(serializer.encode(data)) == [1, 2, 3]
-
-
-def test_dict():
-    data = {"a": 1, "b": "two"}
-    serializer = EventSerializer()
-
-    assert json.loads(serializer.encode(data)) == data
-
-
-def test_list():
-    data = [1, "two", 3.0]
-    serializer = EventSerializer()
-
-    assert json.loads(serializer.encode(data)) == data
-
-
-def test_nested_structures():
-    data = {"list": [1, 2, 3], "dict": {"a": 1, "b": 2}, "tuple": (4, 5, 6)}
-    serializer = EventSerializer()
-
-    assert json.loads(serializer.encode(data)) == {
-        "list": [1, 2, 3],
-        "dict": {"a": 1, "b": 2},
-        "tuple": [4, 5, 6],
-    }
-
-
-def test_custom_object():
-    class CustomObject:
-        def __init__(self):
-            self.field = "value"
-
-    obj = CustomObject()
-    serializer = EventSerializer()
-
-    assert json.loads(serializer.encode(obj)) == {"field": "value"}
-
-
-def test_circular_reference():
-    class Node:
-        def __init__(self):
-            self.next = None
-
-    node1 = Node()
-    node2 = Node()
-    node1.next = node2
-    node2.next = node1
-
-    serializer = EventSerializer()
-    result = json.loads(serializer.encode(node1))
-
-    assert result == {"next": {"next": "Node"}}
-
-
-def test_not_serializable():
-    class NotSerializable:
-        def __init__(self):
-            self.lock = threading.Lock()
-
-        def __repr__(self):
-            raise Exception("Cannot represent")
-
-    obj = NotSerializable()
-    serializer = EventSerializer()
-
-    assert serializer.encode(obj) == '{"lock": "<lock>"}'
-
-
-def test_exception():
-    ex = ValueError("Test exception")
-    serializer = EventSerializer()
-    assert serializer.encode(ex) == '"ValueError: Test exception"'
-
-
-def test_none():
-    serializer = EventSerializer()
-    assert serializer.encode(None) == "null"
-
-
-def test_none_without_langchain(monkeypatch: pytest.MonkeyPatch):
-    monkeypatch.setattr(langfuse.serializer, "Serializable", type(None), raising=True)
-    serializer = EventSerializer()
-    assert serializer.encode(None) == "null"
-
-
-def test_slots():
-    class SlotClass:
-        __slots__ = ["field"]
-
-        def __init__(self):
-            self.field = "value"
-
-    obj = SlotClass()
-    serializer = EventSerializer()
-    assert json.loads(serializer.encode(obj)) == {"field": "value"}
-
-
-def test_numpy_float32():
-    import numpy as np
-
-    data = np.float32(1.0)
-    serializer = EventSerializer()
-
-    assert serializer.encode(data) == "1.0"
diff --git a/tests/test_singleton.py b/tests/test_singleton.py
deleted file mode 100644
index c54c86f79..000000000
--- a/tests/test_singleton.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import threading
-from unittest.mock import patch
-
-import pytest
-
-from langfuse.utils.langfuse_singleton import LangfuseSingleton
-
-
-@pytest.fixture(autouse=True)
-def reset_singleton():
-    LangfuseSingleton._instance = None
-    LangfuseSingleton._langfuse = None
-    yield
-    LangfuseSingleton._instance = None
-    LangfuseSingleton._langfuse = None
-
-
-def test_singleton_instance():
-    """Test that the LangfuseSingleton class truly implements singleton behavior."""
-    instance1 = LangfuseSingleton()
-    instance2 = LangfuseSingleton()
-
-    assert instance1 is instance2
-
-
-def test_singleton_thread_safety():
-    """Test the thread safety of the LangfuseSingleton class."""
-
-    def get_instance(results):
-        instance = LangfuseSingleton()
-        results.append(instance)
-
-    results = []
-    threads = [
-        threading.Thread(target=get_instance, args=(results,)) for _ in range(10)
-    ]
-
-    for thread in threads:
-        thread.start()
-    for thread in threads:
-        thread.join()
-
-    for instance in results:
-        assert instance is results[0]
-
-
-@patch("langfuse.utils.langfuse_singleton.Langfuse")
-def test_langfuse_initialization(mock_langfuse):
-    instance = LangfuseSingleton()
-    created = instance.get(public_key="key123", secret_key="secret", debug=True)
-    mock_langfuse.assert_called_once_with(
-        public_key="key123",
-        secret_key="secret",
-        debug=True,
-    )
-
-    assert created is mock_langfuse.return_value
-
-
-@patch("langfuse.utils.langfuse_singleton.Langfuse")
-def test_reset_functionality(mock_langfuse):
-    """Test the reset functionality of the LangfuseSingleton."""
-    instance = LangfuseSingleton()
-    instance.get(public_key="key123")
-    instance.reset()
-
-    assert instance._langfuse is None
-
-    mock_langfuse.return_value.shutdown.assert_called_once()
diff --git a/tests/test_task_manager.py b/tests/test_task_manager.py
deleted file mode 100644
index 373493670..000000000
--- a/tests/test_task_manager.py
+++ /dev/null
@@ -1,639 +0,0 @@
-import logging
-import subprocess
-import threading
-from urllib.parse import urlparse, urlunparse
-
-import httpx
-import pytest
-from pytest_httpserver import HTTPServer
-from werkzeug.wrappers import Request, Response
-
-from langfuse._task_manager.task_manager import TaskManager
-from langfuse.request import LangfuseClient
-
-logging.basicConfig()
-log = logging.getLogger("langfuse")
-log.setLevel(logging.DEBUG)
-
-
-def setup_server(httpserver, expected_body: dict):
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST", json=expected_body
-    ).respond_with_data("success")
-
-
-def setup_langfuse_client(server: str):
-    return LangfuseClient(
-        "public_key", "secret_key", server, "1.0.0", 15, httpx.Client()
-    )
-
-
-def get_host(url):
-    parsed_url = urlparse(url)
-    new_url = urlunparse((parsed_url.scheme, parsed_url.netloc, "", "", "", ""))
-    return new_url
-
-
-@pytest.mark.timeout(10)
-def test_multiple_tasks_without_predecessor(httpserver: HTTPServer):
-    failed = False
-
-    def handler(request: Request):
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=10,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    tm.add_task({"foo": "bar"})
-    tm.add_task({"foo": "bar"})
-    tm.add_task({"foo": "bar"})
-
-    tm.flush()
-    assert not failed
-
-
-@pytest.mark.timeout(10)
-def test_disabled_task_manager(httpserver: HTTPServer):
-    request_fired = False
-
-    def handler(request: Request):
-        nonlocal request_fired
-        request_fired = True
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=10,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-        enabled=False,
-    )
-
-    tm.add_task({"foo": "bar"})
-    tm.add_task({"foo": "bar"})
-    tm.add_task({"foo": "bar"})
-
-    assert tm._ingestion_queue.empty()
-
-    tm.flush()
-    assert not request_fired
-
-
-@pytest.mark.timeout(10)
-def test_task_manager_fail(httpserver: HTTPServer):
-    count = 0
-
-    def handler(request: Request):
-        nonlocal count
-        count = count + 1
-        return Response(status=500)
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=10,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    tm.add_task({"type": "bar", "body": {"trace_id": "trace_123"}})
-    tm.flush()
-
-    assert count == 3
-
-
-@pytest.mark.timeout(20)
-def test_consumer_restart(httpserver: HTTPServer):
-    failed = False
-
-    def handler(request: Request):
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=10,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    tm.add_task({"foo": "bar"})
-    tm.flush()
-
-    tm.add_task({"foo": "bar"})
-    tm.flush()
-    assert not failed
-
-
-@pytest.mark.timeout(10)
-def test_concurrent_task_additions(httpserver: HTTPServer):
-    counter = 0
-
-    def handler(request: Request):
-        nonlocal counter
-        counter = counter + 1
-        return Response(status=200)
-
-    def add_task_concurrently(tm, func):
-        tm.add_task(func)
-
-    httpserver.expect_request(
-        "/api/public/ingestion", method="POST"
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=1,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-    threads = [
-        threading.Thread(
-            target=add_task_concurrently,
-            args=(tm, {"type": "bar", "body": {"trace_id": "trace_123"}}),
-        )
-        for i in range(10)
-    ]
-    for t in threads:
-        t.start()
-    for t in threads:
-        t.join()
-
-    tm.shutdown()
-
-    assert counter == 10
-
-
-@pytest.mark.timeout(10)
-def test_atexit():
-    python_code = """
-import time
-import logging
-from langfuse._task_manager.task_manager import TaskManager
-from langfuse.request import LangfuseClient
-import httpx
-
-langfuse_client = LangfuseClient("public_key", "secret_key", "http://localhost:3000", "1.0.0", 15, httpx.Client())
-
-logging.basicConfig(
-    level=logging.DEBUG,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        logging.StreamHandler()
-    ]
-)
-print("Adding task manager", TaskManager)
-manager = TaskManager(client=langfuse_client, api_client=None, public_key='pk', flush_at=10, flush_interval=0.1, max_retries=3, threads=1, max_task_queue_size=10_000, sdk_name="test-sdk", sdk_version="1.0.0", sdk_integration="default")
-
-"""
-
-    process = subprocess.Popen(
-        ["python", "-c", python_code], stderr=subprocess.PIPE, text=True
-    )
-
-    logs = ""
-
-    try:
-        for line in process.stderr:
-            logs += line.strip()
-            print(line.strip())
-    except subprocess.TimeoutExpired:
-        pytest.fail("The process took too long to execute")
-    process.communicate()
-
-    returncode = process.returncode
-    if returncode != 0:
-        pytest.fail("Process returned with error code")
-
-    print(process.stderr)
-
-    assert "MediaUploadConsumer thread 0 joined" in logs
-    assert "IngestionConsumer thread 0 joined" in logs
-
-
-def test_flush(httpserver: HTTPServer):
-    # set up the consumer with more requests than a single batch will allow
-
-    failed = False
-
-    def handler(request: Request):
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion",
-        method="POST",
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=1,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    for _ in range(100):
-        tm.add_task({"foo": "bar"})
-    # We can't reliably assert that the queue is non-empty here; that's
-    # a race condition. We do our best to load it up though.
-    tm.flush()
-    # Make sure that the client queue is empty after flushing
-    assert tm._ingestion_queue.empty()
-    assert not failed
-
-
-def test_shutdown(httpserver: HTTPServer):
-    # set up the consumer with more requests than a single batch will allow
-
-    failed = False
-
-    def handler(request: Request):
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion",
-        method="POST",
-    ).respond_with_handler(handler)
-
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=1,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=5,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    for _ in range(100):
-        tm.add_task({"foo": "bar"})
-
-    tm.shutdown()
-    # we expect two things after shutdown:
-    # 1. client queue is empty
-    # 2. consumer thread has stopped
-    assert tm._ingestion_queue.empty()
-
-    assert len(tm._ingestion_consumers) == 5
-    for c in tm._ingestion_consumers:
-        assert not c.is_alive()
-    assert tm._ingestion_queue.empty()
-    assert not failed
-
-
-def test_large_events_dropped_if_random(httpserver: HTTPServer):
-    failed = False
-
-    def handler(request: Request):
-        try:
-            if request.json["batch"][0]["foo"] == "bar":
-                return Response(status=200)
-            return Response(status=500)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion",
-        method="POST",
-    ).respond_with_handler(handler)
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=1,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    tm.add_task({"foo": "bar"})
-    # create task with extremely long string for bar
-    long_string = "a" * 100_000  # 100,000 characters of 'a'
-    tm.add_task({"foo": long_string})
-
-    # We can't reliably assert that the queue is non-empty here; that's
-    # a race condition. We do our best to load it up though.
-    tm.flush()
-    # Make sure that the client queue is empty after flushing
-    assert tm._ingestion_queue.empty()
-    assert not failed
-
-
-def test_large_events_i_o_dropped(httpserver: HTTPServer):
-    failed = False
-    count = 0
-
-    def handler(request: Request):
-        try:
-            nonlocal count
-            count += 1
-            log.info(f"count {count}")
-            return Response(status=200)
-        except Exception as e:
-            print(e)
-            logging.error(e)
-            nonlocal failed
-            failed = True
-
-    httpserver.expect_request(
-        "/api/public/ingestion",
-        method="POST",
-    ).respond_with_handler(handler)
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=1,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=10_000,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    tm.add_task({"type": "bar", "body": {"trace_id": "trace_123"}})
-    # create task with extremely long string for bar
-    long_string = "a" * 1_000_000
-    tm.add_task(
-        {
-            "body": {"input": long_string, "trace_id": "trace_123"},
-            "type": "bar",
-        }
-    )
-
-    # We can't reliably assert that the queue is non-empty here; that's
-    # a race condition. We do our best to load it up though.
-    tm.flush()
-    # Make sure that the client queue is empty after flushing
-    assert tm._ingestion_queue.empty()
-    assert not failed
-    assert count == 2
-
-
-def test_truncate_item_in_place(httpserver):
-    langfuse_client = setup_langfuse_client(
-        get_host(httpserver.url_for("/api/public/ingestion"))
-    )
-
-    tm = TaskManager(
-        client=langfuse_client,
-        api_client=None,
-        public_key="pk",
-        flush_at=10,
-        flush_interval=0.1,
-        max_retries=3,
-        threads=1,
-        max_task_queue_size=100,
-        sdk_name="test-sdk",
-        sdk_version="1.0.0",
-        sdk_integration="default",
-    )
-
-    consumer = tm._ingestion_consumers[0]
-
-    # Item size within limit
-    MAX_MSG_SIZE = 100
-
-    small_item = {"body": {"input": "small"}}
-    assert (
-        consumer._truncate_item_in_place(event=small_item, max_size=MAX_MSG_SIZE)
-        <= MAX_MSG_SIZE
-    )
-    assert small_item["body"]["input"] == "small"  # unchanged
-
-    # Item size exceeding limit
-    large_item = {"body": {"input": "a" * (MAX_MSG_SIZE + 10)}}
-    truncated_size = consumer._truncate_item_in_place(
-        event=large_item, max_size=MAX_MSG_SIZE
-    )
-
-    assert truncated_size <= MAX_MSG_SIZE
-    assert large_item["body"]["input"] is None  # truncated
-
-    # Logs message if item is truncated
-    large_item = {"body": {"input": "a" * (MAX_MSG_SIZE + 10)}}
-    truncated_size = consumer._truncate_item_in_place(
-        event=large_item, max_size=MAX_MSG_SIZE, log_message="truncated"
-    )
-
-    assert truncated_size <= MAX_MSG_SIZE
-    assert large_item["body"]["input"] == "truncated"  # truncated
-
-    # Multiple fields
-    full_item = {
-        "body": {
-            "input": "a" * 300,
-            "output": "b" * 300,
-            "metadata": "c" * 300,
-        }
-    }
-    truncated_size = consumer._truncate_item_in_place(
-        event=full_item, max_size=MAX_MSG_SIZE
-    )
-
-    assert truncated_size <= MAX_MSG_SIZE
-    assert any(
-        full_item["body"][field] is None for field in ["input", "output", "metadata"]
-    )  # all truncated
-
-    # Field sizes
-    input_largest = {
-        "body": {
-            "input": "a" * 500,
-            "output": "b" * 10,
-            "metadata": "c" * 10,
-        }
-    }
-    consumer._truncate_item_in_place(event=input_largest, max_size=MAX_MSG_SIZE)
-    assert input_largest["body"]["input"] is None
-    assert input_largest["body"]["output"] is not None
-    assert input_largest["body"]["metadata"] is not None
-
-    # Truncation order
-    mixed_size = {
-        "body": {
-            "input": "a" * 20,
-            "output": "b" * 200,
-            "metadata": "c" * 20,
-        }
-    }
-    consumer._truncate_item_in_place(event=mixed_size, max_size=MAX_MSG_SIZE)
-    assert mixed_size["body"]["input"] is not None
-    assert mixed_size["body"]["output"] is None
-    assert mixed_size["body"]["metadata"] is not None
-
-    # Multiple field drops
-    very_large = {
-        "body": {
-            "input": "a" * 100,
-            "output": "b" * 120,
-            "metadata": "c" * 50,
-        }
-    }
-    consumer._truncate_item_in_place(event=very_large, max_size=MAX_MSG_SIZE)
-    assert very_large["body"]["input"] is None
-    assert very_large["body"]["output"] is None
-    assert very_large["body"]["metadata"] is not None
-
-    # Return value
-    assert isinstance(
-        consumer._truncate_item_in_place(event=small_item, max_size=MAX_MSG_SIZE), int
-    )
-
-    # JSON serialization
-    complex_item = {
-        "body": {
-            "input": {"nested": ["complex", {"structure": "a" * (MAX_MSG_SIZE + 1)}]}
-        }
-    }
-    assert (
-        consumer._truncate_item_in_place(event=complex_item, max_size=MAX_MSG_SIZE)
-        <= MAX_MSG_SIZE
-    )
-    assert complex_item["body"]["input"] is None
diff --git a/tests/test_updating_prompt.py b/tests/test_updating_prompt.py
deleted file mode 100644
index addcd4528..000000000
--- a/tests/test_updating_prompt.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from langfuse.client import Langfuse
-from tests.utils import create_uuid
-
-
-def test_update_prompt():
-    langfuse = Langfuse()
-    prompt_name = create_uuid()
-
-    # Create initial prompt
-    langfuse.create_prompt(
-        name=prompt_name,
-        prompt="test prompt",
-        labels=["production"],
-    )
-
-    # Update prompt labels
-    updated_prompt = langfuse.update_prompt(
-        name=prompt_name,
-        version=1,
-        new_labels=["john", "doe"],
-    )
-
-    # Fetch prompt after update (should be invalidated)
-    fetched_prompt = langfuse.get_prompt(prompt_name)
-
-    # Verify the fetched prompt matches the updated values
-    assert fetched_prompt.name == prompt_name
-    assert fetched_prompt.version == 1
-    print(f"Fetched prompt labels: {fetched_prompt.labels}")
-    print(f"Updated prompt labels: {updated_prompt.labels}")
-
-    # production was set by the first call, latest is managed and set by Langfuse
-    expected_labels = sorted(["latest", "doe", "production", "john"])
-    assert sorted(fetched_prompt.labels) == expected_labels
-    assert sorted(updated_prompt.labels) == expected_labels
diff --git a/tests/utils.py b/tests/utils.py
deleted file mode 100644
index 6b6849a6a..000000000
--- a/tests/utils.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import base64
-import os
-import typing
-from time import sleep
-from uuid import uuid4
-
-try:
-    import pydantic.v1 as pydantic  # type: ignore
-except ImportError:
-    import pydantic  # type: ignore
-
-from llama_index.core import (
-    Settings,
-    SimpleDirectoryReader,
-    StorageContext,
-    VectorStoreIndex,
-    load_index_from_storage,
-)
-from llama_index.core.callbacks import CallbackManager
-
-from langfuse.api.client import FernLangfuse
-
-
-def create_uuid():
-    return str(uuid4())
-
-
-def get_api():
-    sleep(2)
-
-    return FernLangfuse(
-        username=os.environ.get("LANGFUSE_PUBLIC_KEY"),
-        password=os.environ.get("LANGFUSE_SECRET_KEY"),
-        base_url=os.environ.get("LANGFUSE_HOST"),
-    )
-
-
-class LlmUsageWithCost(pydantic.BaseModel):
-    prompt_tokens: typing.Optional[int] = pydantic.Field(
-        alias="promptTokens", default=None
-    )
-    completion_tokens: typing.Optional[int] = pydantic.Field(
-        alias="completionTokens", default=None
-    )
-    total_tokens: typing.Optional[int] = pydantic.Field(
-        alias="totalTokens", default=None
-    )
-    input_cost: typing.Optional[float] = pydantic.Field(alias="inputCost", default=None)
-    output_cost: typing.Optional[float] = pydantic.Field(
-        alias="outputCost", default=None
-    )
-    total_cost: typing.Optional[float] = pydantic.Field(alias="totalCost", default=None)
-
-
-class CompletionUsage(pydantic.BaseModel):
-    completion_tokens: int
-    """Number of tokens in the generated completion."""
-
-    prompt_tokens: int
-    """Number of tokens in the prompt."""
-
-    total_tokens: int
-    """Total number of tokens used in the request (prompt + completion)."""
-
-
-class LlmUsage(pydantic.BaseModel):
-    prompt_tokens: typing.Optional[int] = pydantic.Field(
-        alias="promptTokens", default=None
-    )
-    completion_tokens: typing.Optional[int] = pydantic.Field(
-        alias="completionTokens", default=None
-    )
-    total_tokens: typing.Optional[int] = pydantic.Field(
-        alias="totalTokens", default=None
-    )
-
-    def json(self, **kwargs: typing.Any) -> str:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().json(**kwargs_with_defaults)
-
-    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().dict(**kwargs_with_defaults)
-
-
-def get_llama_index_index(callback, force_rebuild: bool = False):
-    if callback:
-        Settings.callback_manager = CallbackManager([callback])
-    PERSIST_DIR = "tests/mocks/llama-index-storage"
-
-    if not os.path.exists(PERSIST_DIR) or force_rebuild:
-        print("Building RAG index...")
-        documents = SimpleDirectoryReader(
-            "static", ["static/state_of_the_union_short.txt"]
-        ).load_data()
-        index = VectorStoreIndex.from_documents(documents)
-        index.storage_context.persist(persist_dir=PERSIST_DIR)
-    else:
-        print("Using pre-built index from storage...")
-        storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-        index = load_index_from_storage(storage_context)
-
-    return index
-
-
-def encode_file_to_base64(image_path) -> str:
-    with open(image_path, "rb") as file:
-        return base64.b64encode(file.read()).decode("utf-8")