reworkd · himankpathak · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/sdk/harambe/contrib/soup/impl.py b/sdk/harambe/contrib/soup/impl.py
@@ -5,8 +5,12 @@
 # noinspection PyProtectedMember
 from curl_cffi.requests import AsyncSession, HeaderTypes
 from harambe.contrib.soup.tracing import Tracer
-from harambe.contrib.types import AbstractElementHandle, AbstractPage, Selectable
-from harambe.contrib.types import ResponseWithStatus
+from harambe.contrib.types import (
+    AbstractElementHandle,
+    AbstractPage,
+    ResponseWithStatus,
+    Selectable,
+)
 
 
 class SoupElementHandle(AbstractElementHandle, Selectable["SoupElementHandle"]):
@@ -139,3 +143,9 @@ def __init__(self, selector: str, page: SoupPage) -> None:
 
     async def all(self) -> list[SoupElementHandle]:
         return await self._page.query_selector_all(self._selector)
+
+    async def text_content(self) -> str | None:
+        if el := await self._page.query_selector(self._selector):
+            return await el.text_content()
+
+        return None
diff --git a/sdk/harambe/core.py b/sdk/harambe/core.py
@@ -1,4 +1,5 @@
 import asyncio
+import base64
 import inspect
 import tempfile
 import uuid
@@ -22,10 +23,8 @@
 from harambe.contrib.soup.impl import SoupPage
 from harambe.contrib.types import AbstractPage
 from harambe.cookie_utils import fix_cookie
-from harambe.handlers import (
-    ResourceRequestHandler,
-    ResourceType,
-)
+from harambe.handlers import ResourceRequestHandler, ResourceType
+from harambe.llm import LLM_AGENTS, LLMManager
 from harambe.observer import (
     DownloadMeta,
     HTMLMetadata,
@@ -52,6 +51,7 @@
 from harambe_core.errors import GotoError
 from harambe_core.normalize_url import normalize_url
 from harambe_core.parser.expression import ExpressionEvaluator
+from harambe_core.types import SchemaFieldType
 from playwright.async_api import (
     ElementHandle,
     Page,
@@ -608,5 +608,81 @@ async def wrapper(sdk: "SDK", url: URL, context: Context) -> None:
 
         return decorator
 
+    @staticmethod
+    async def llm(
+        to_evaluate: Optional[ElementHandle | str] = None,
+        is_image_url: bool = False,
+        prompt: str = "You are a superintelligent artificial intelligence designed for helping software developers. Help evaluate the given text.",
+        data_type: SchemaFieldType = "string",
+        include_screenshot: bool = False,
+        agent: Optional[LLM_AGENTS] = None,
+        model: Optional[str] = None,
+        return_object_format: Optional[object] = {},
+    ) -> str:
+        """
+        Use a LLM agent to evaluate any prompt for a string or ElementHandle or image URL.
+
+        Parameters:
+            to_evaluate (Optional[ElementHandle | str]): The ElementHandle or string or image URL to evaluate.
+            is_image_url (bool): Whether the to_evaluate is an image or not.
+            prompt (str): The prompt to use for the evaluation.
+            data_type (SchemaFieldType): The type of data to return.
+            include_screenshot (bool): Whether to include the screenshot of the element in the response (Playwright only)
+            agent (Optional[LLM_AGENTS]): The LLM agent to use.
+            model (Optional[str]): The model to use.
+            return_object_format (Optional[object]): The format to return the data in.
+        """
+
+        agent = LLMManager(agent=agent, model=model)
+
+        stringify = to_evaluate
+        if not is_image_url:
+            if isinstance(to_evaluate, str):
+                stringify = to_evaluate.strip()
+
+            elif hasattr(to_evaluate, "text_content") and callable(
+                to_evaluate.text_content
+            ):
+                stringify = await to_evaluate.text_content()
+                stringify = stringify.strip()
+
+        # Add a check to return None if no information is found
+        prompt += '. Just return the requested data without any additional text. If no information is found, return "NONE"'
+
+        # Return the response in the requested format
+        if len(return_object_format):
+            prompt += f". Return response in the following format: {{{str(return_object_format)}}}"
+
+        prompts = [
+            {"type": "text", "content": prompt},
+            {"type": "image" if is_image_url else "text", "content": stringify},
+        ]
+
+        # Add the screenshot to prompt
+        if include_screenshot and not is_image_url:
+            screenshot = await to_evaluate.screenshot()
+            screenshot_b64 = base64.b64encode(screenshot).decode()
+
+            prompts.append(
+                {
+                    "type": "image",
+                    "content": f"data:image/jpeg;base64,{screenshot_b64}",
+                },
+            )
+
+        response = agent.query(prompts)
+
+        # Check if LLM failed in the response
+        if response == "NONE":
+            return None
+
+        if not len(return_object_format):
+            # Validate the response against data_type
+            schema = {"data": {"type": data_type}}
+            validator = SchemaParser(schema)
+            validator.validate({"data": response}, base_url="https://example.com")
+
+        return response
+
 
 PAGE_PDF_FILENAME = "reworkd_page_pdf.pdf"
diff --git a/sdk/harambe/llm.py b/sdk/harambe/llm.py
@@ -0,0 +1,49 @@
+from typing import Dict, List, Literal, Optional
+
+from openai import OpenAI
+
+from harambe.settings import get_settings
+
+LLM_AGENTS = Literal["openai"]
+
+
+class LLMManager:
+    def __init__(self, agent: Optional[LLM_AGENTS] = None, model: Optional[str] = None):
+        self.agent_name = "openai" if agent is None else agent
+        self.model = "gpt-4o-mini" if model is None else model
+
+        if self.agent_name == "openai":
+            self.agent = OpenAI(api_key=get_settings().openai_api_key)
+
+    def query(self, prompts: List[Dict[str, str]]) -> str:
+        """
+        Query the LLM agent with the given prompts.
+        Parameters:
+            prompts: List[{ type, content}]
+            type: one of ["text", "image"]
+            content: "string"
+
+        Returns:
+            response: The response from the LLM agent.
+        """
+
+        if self.agent_name == "openai":
+            content = []
+            for prompt in prompts:
+                if prompt["type"] == "text":
+                    content.append({"type": "text", "text": prompt["content"]})
+
+                if prompt["type"] == "image":
+                    content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"{prompt['content']}"},
+                        }
+                    )
+
+            response = self.agent.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": content}],
+            )
+
+            return response.choices[0].message.content
diff --git a/sdk/harambe/settings.py b/sdk/harambe/settings.py
@@ -0,0 +1,14 @@
+from functools import lru_cache
+
+# from pydantic import SecretStr
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_file=(".env"))
+    openai_api_key: str
+
+
+@lru_cache()
+def get_settings():
+    return Settings()
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
@@ -22,6 +22,8 @@ dependencies = [
     "curl-cffi==0.7.3",
     "ua-generator==1.0.5",
     "python-slugify>=8.0.4",
+    "pydantic-settings>=2.6.1",
+    "openai>=1.56.2",
 ]
 
 [tool.uv]
@@ -32,6 +34,7 @@ dev-dependencies = [
     "pytest==7.4.4",
     "pytest-cov==4.1.0",
     "pytest-asyncio==0.21.2",
+    "rich>=13.9.4",
 ]
 
 [tool.uv.sources]

diff --git a/sdk/test/test_e2e.py b/sdk/test/test_e2e.py
@@ -4,13 +4,12 @@
 import pytest
 from aiohttp import web
 from bs4 import BeautifulSoup
+from harambe import SDK
+from harambe.contrib import playwright_harness, soup_harness
 from harambe.observer import InMemoryObserver
 from harambe.types import BrowserType
 from harambe_core.errors import GotoError
 
-from harambe import SDK
-from harambe.contrib import playwright_harness, soup_harness
-
 
 @pytest.fixture(scope="module")
 def mock_html_folder():
@@ -588,3 +587,77 @@ async def scrape(sdk: SDK, current_url, context) -> None:
         )
 
     assert len(observer.data) == 0
+
+
+@pytest.mark.parametrize("harness", [playwright_harness, soup_harness])
+async def test_core_llm_method(server, observer, harness):
+    url = f"{server}/solicitation"
+
+    async def scrape(sdk: SDK, url, context) -> None:
+        page = sdk.page
+        # body = await page.query_selector("body")
+        body = page.locator("body")
+
+        date_prepared = await sdk.llm(
+            to_evaluate=body,
+            prompt="Find the date prepared in the general information",
+            data_type="datetime",
+        )
+        solicitation_id = await sdk.llm(
+            to_evaluate=body,
+            prompt="Find the solicitation id in the general information",
+            data_type="int",
+        )
+
+        large_text = """
+        <p> Here is a big lorem ipsum element Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc imperdiet,
+        libero ac vestibulum tristique, massa orci viverra augue, eu congue elit urna a justo. Suspendisse ac nisi
+        dolor. Sed turpis ante, tincidunt in nibh quis, pellentesque euismod dolor. Sed at mauris maximus, tempus
+        dolor eu, tristique sem. Fusce in dolor egestas, The product launch date was 12/04/2024 vulputate lacus eget, pharetra felis. Morbi ac lorem at
+        lorem aliquam blandit. Quisque eget vulputate felis. Suspendisse bibendum mauris vel ex dignissim tincidunt.
+        Integer porttitor libero ligula, ut convallis lorem rhoncus et. Ut ac nisl a mauris malesuada aliquet.
+        In vitae pharetra tellus. Suspendisse vel varius tellus. Ut pellentesque sem at gravida volutpat. </p>
+        """
+        product_launch_date = await sdk.llm(
+            to_evaluate=large_text,
+            prompt="Find the product launch date on page",
+            data_type="datetime",
+        )
+
+        large_text_none = """
+        <p> Here is a big lorem ipsum element Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc imperdiet,
+        libero ac vestibulum tristique, massa orci viverra augue, eu congue elit urna a justo. Suspendisse ac nisi
+        dolor. Sed turpis ante, tincidunt in nibh quis, pellentesque euismod dolor. Sed at mauris maximus, tempus
+        dolor eu, tristique sem. Fusce in dolor egestas, The product launch date was vulputate lacus eget, pharetra felis. Morbi ac lorem at
+        lorem aliquam blandit. Quisque eget vulputate felis. Suspendisse bibendum mauris vel ex dignissim tincidunt.
+        Integer porttitor libero ligula, ut convallis lorem rhoncus et. Ut ac nisl a mauris malesuada aliquet.
+        In vitae pharetra tellus. Suspendisse vel varius tellus. Ut pellentesque sem at gravida volutpat. </p>
+        """
+        product_launch_date_none = await sdk.llm(
+            to_evaluate=large_text_none,
+            prompt="Find the product launch date on page",
+            data_type="datetime",
+        )
+
+        await sdk.save_data(
+            {
+                "date_prepared": date_prepared,
+                "solicitation_id": solicitation_id,
+                "product_launch_date": product_launch_date,
+                "product_launch_date_none": product_launch_date_none,
+            }
+        )
+
+    await SDK.run(
+        scrape,
+        url,
+        schema={},
+        harness=harness,
+        context={"status": "Open"},
+        observer=observer,
+    )
+    assert len(observer.data) == 1
+    assert observer.data[0]["date_prepared"] == "10/31/24"
+    assert observer.data[0]["solicitation_id"] == "6100062375"
+    assert observer.data[0]["product_launch_date"] == "12/04/2024"
+    assert observer.data[0]["product_launch_date_none"] is None