From 606d2faf57112ddecd8bc9ff5e63e0dd51c88b7e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 7 Feb 2025 14:45:38 -0500 Subject: [PATCH 001/195] logic for create user manual --- .env.example | 3 +- app/configurations/config.py | 2 + app/configurations/pdf_manual_config.py | 17 +++++ app/controllers/handle_controller.py | 10 +++ .../s3_upload/requests/s3_upload_request.py | 7 ++ .../s3_upload/responses/s3_upload_response.py | 5 ++ app/externals/s3_upload/s3_upload_client.py | 20 ++++++ app/pdf/helpers.py | 20 ++++++ app/pdf/pdf_generator.py | 64 +++++++++++++++++++ app/pdf/pdf_manual_generator.py | 30 +++++++++ app/providers/deepseek_provider.py | 2 +- app/providers/openai_provider.py | 2 +- app/requests/generate_pdf_request.py | 7 ++ app/services/message_service.py | 56 ++++++++++++++++ app/services/message_service_interface.py | 3 + requirements.txt | 3 +- 16 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 app/configurations/pdf_manual_config.py create mode 100644 app/externals/s3_upload/requests/s3_upload_request.py create mode 100644 app/externals/s3_upload/responses/s3_upload_response.py create mode 100644 app/externals/s3_upload/s3_upload_client.py create mode 100644 app/pdf/helpers.py create mode 100644 app/pdf/pdf_generator.py create mode 100644 app/pdf/pdf_manual_generator.py create mode 100644 app/requests/generate_pdf_request.py diff --git a/.env.example b/.env.example index 3bf38a6..383ddaa 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,7 @@ HOST_AGENT_CONFIG=https://services.fluxi.com OPENAI_API_KEY=dasdasdasdasdas ANTHROPIC_API_KEY=dasdasdasdas HOST_DEEP_SEEK=https://55.188.190.83:11434/v1 +RAPIDAPI_HOST=https://aliexpress-datahub.p.rapidapi.com AGENT_RECOMMEND_PRODUCTS_ID=recommend_agent RAPIDAPI_KEY=dsadasdasdasda -RAPIDAPI_HOST=https://aliexpress-datahub.p.rapidapi.com \ No newline at end of file +S3_UPLOAD_API=http://lambdahost \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 5a09e87..970c0ff 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -13,3 +13,5 @@ RAPIDAPI_KEY = os.getenv('RAPIDAPI_KEY') RAPIDAPI_HOST = os.getenv('RAPIDAPI_HOST') + +S3_UPLOAD_API = os.getenv('S3_UPLOAD_API') diff --git a/app/configurations/pdf_manual_config.py b/app/configurations/pdf_manual_config.py new file mode 100644 index 0000000..baedcb0 --- /dev/null +++ b/app/configurations/pdf_manual_config.py @@ -0,0 +1,17 @@ +PDF_MANUAL_SECTIONS = { + "introduction": "Introduction", + "main_features": "Main Features", + "usage_instructions": "Usage Instructions", + "troubleshooting": "Troubleshooting", + "faq": "FAQ", + "conclusion": "Conclusion" +} + +PDF_MANUAL_SECTION_ORDER = [ + "introduction", + "main_features", + "usage_instructions", + "troubleshooting", + "faq", + "conclusion" +] \ No newline at end of file diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index c819b7b..0875a15 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,3 +1,4 @@ +from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest from fastapi import APIRouter, Depends, Request @@ -28,6 +29,15 @@ async def recommend_products( return response +@router.post("/generate-pdf") +async def generate_pdf( + request: GeneratePdfRequest, + message_service: MessageServiceInterface = Depends() +): + response = await message_service.generate_pdf(request) + return response + + @router.get("/health") async def health_check(): return {"status": "OK"} diff --git a/app/externals/s3_upload/requests/s3_upload_request.py b/app/externals/s3_upload/requests/s3_upload_request.py new file mode 100644 index 0000000..1df1f47 --- /dev/null +++ b/app/externals/s3_upload/requests/s3_upload_request.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class S3UploadRequest(BaseModel): + file: str + folder: str + filename: str diff --git a/app/externals/s3_upload/responses/s3_upload_response.py b/app/externals/s3_upload/responses/s3_upload_response.py new file mode 100644 index 0000000..0d2ba00 --- /dev/null +++ b/app/externals/s3_upload/responses/s3_upload_response.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class S3UploadResponse(BaseModel): + s3_url: str diff --git a/app/externals/s3_upload/s3_upload_client.py b/app/externals/s3_upload/s3_upload_client.py new file mode 100644 index 0000000..15dbfe6 --- /dev/null +++ b/app/externals/s3_upload/s3_upload_client.py @@ -0,0 +1,20 @@ +import httpx +from app.configurations.config import S3_UPLOAD_API +from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest +from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse + + +async def upload_file(request: S3UploadRequest) -> S3UploadResponse: + headers = { + 'Content-Type': 'application/json', + } + + async with httpx.AsyncClient() as client: + response = await client.post( + S3_UPLOAD_API, + headers=headers, + json=request.dict() + ) + response.raise_for_status() + + return S3UploadResponse(**response.json()) diff --git a/app/pdf/helpers.py b/app/pdf/helpers.py new file mode 100644 index 0000000..eeffe3c --- /dev/null +++ b/app/pdf/helpers.py @@ -0,0 +1,20 @@ +def clean_text(text): + text = text.replace("\u2019", "'") + text = text.replace("\u2018", "'") + text = text.replace("\u201C", '"') + text = text.replace("\u201D", '"') + text = text.replace("\u2014", "-") + text = text.replace("\u2013", "-") + text = text.replace("\u2026", "...") + return text + + +def clean_json(text): + text = text.strip() + if text.startswith("```json"): + text = text[len("```json"):].strip() + elif text.startswith("```"): + text = text[len("```"):].strip() + if text.endswith("```"): + text = text[:-len("```")].strip() + return text diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py new file mode 100644 index 0000000..39ae17c --- /dev/null +++ b/app/pdf/pdf_generator.py @@ -0,0 +1,64 @@ +from fpdf import FPDF + + +class PDFGenerator(FPDF): + def __init__(self, product_name): + super().__init__() + self.product_name = product_name + + def header(self): + self.set_font("Helvetica", "B", 18) + self.set_text_color(0, 51, 102) # Azul oscuro + self.cell(0, 10, f"User Manual for {self.product_name}", ln=True, align="C") + self.ln(5) + self.set_line_width(0.5) + self.set_draw_color(0, 51, 102) + self.line(10, 25, self.w - 10, 25) + self.ln(10) + + def footer(self): + self.set_y(-20) + self.set_font("Helvetica", "I", 10) + self.set_text_color(128, 128, 128) + self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C") + + def add_cover_page(self, title, subtitle=""): + self.add_page() + self.set_font("Helvetica", "B", 24) + self.set_text_color(0, 51, 102) + self.ln(40) # Espacio superior para la portada + self.cell(0, 20, title, ln=True, align="C") + if subtitle: + self.ln(10) + self.set_font("Helvetica", "", 16) + self.cell(0, 10, subtitle, ln=True, align="C") + self.ln(20) + self.add_page() + + def add_section(self, title, content): + if self.get_y() > self.h * 0.6: + self.add_page() + + self.set_font("Helvetica", "B", 14) + self.set_text_color(255, 255, 255) + self.set_fill_color(0, 102, 204) # Azul + self.cell(0, 12, title, ln=True, fill=True, align="C", border=1) + self.ln(6) + + # Contenido de la sección + self.set_text_color(0, 0, 0) + self.set_font("Helvetica", "", 12) + + if isinstance(content, list): + formatted_text = "\n".join(str(item) for item in content) + else: + formatted_text = content.replace("\\n", "\n") + + self.multi_cell(0, 8, formatted_text) + + self.ln(8) + self.set_draw_color(200, 200, 200) # Línea gris claro + self.set_line_width(0.3) + current_y = self.get_y() + self.line(10, current_y, self.w - 10, current_y) + self.ln(10) diff --git a/app/pdf/pdf_manual_generator.py b/app/pdf/pdf_manual_generator.py new file mode 100644 index 0000000..2f2e9bb --- /dev/null +++ b/app/pdf/pdf_manual_generator.py @@ -0,0 +1,30 @@ +import base64 +import os +from app.pdf.pdf_generator import PDFGenerator +from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS, PDF_MANUAL_SECTION_ORDER + + +class PDFManualGenerator: + def __init__(self, product_name: str): + self.product_name = product_name + self.pdf = PDFGenerator(product_name) + + async def create_manual(self, data: dict, file_name: str) -> str: + self.pdf.add_cover_page( + f"User Manual for {self.product_name}", + "Everything You Need to Know to Get Started" + ) + self.pdf.set_auto_page_break(auto=True, margin=20) + + for key in PDF_MANUAL_SECTION_ORDER: + self.pdf.add_section(PDF_MANUAL_SECTIONS[key], data.get(key, "")) + + self.pdf.output(file_name) + + with open(file_name, "rb") as f: + pdf_bytes = f.read() + + base64_str = base64.b64encode(pdf_bytes).decode("utf-8") + os.remove(file_name) + + return base64_str diff --git a/app/providers/deepseek_provider.py b/app/providers/deepseek_provider.py index 8d100f2..1637586 100644 --- a/app/providers/deepseek_provider.py +++ b/app/providers/deepseek_provider.py @@ -14,5 +14,5 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) return Ollama( model=model, base_url=DEEP_SEEK_HOST - ##**model_kwargs + **model_kwargs ) diff --git a/app/providers/openai_provider.py b/app/providers/openai_provider.py index 1e2e2f5..d2e6ed4 100644 --- a/app/providers/openai_provider.py +++ b/app/providers/openai_provider.py @@ -12,5 +12,5 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) return ChatOpenAI( model=model, - ##**model_kwargs + **model_kwargs ) diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py new file mode 100644 index 0000000..a41620d --- /dev/null +++ b/app/requests/generate_pdf_request.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class GeneratePdfRequest(BaseModel): + product_name: str + product_description: str + owner_id: str \ No newline at end of file diff --git a/app/services/message_service.py b/app/services/message_service.py index 432da36..39bf430 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -1,8 +1,14 @@ import json +import uuid +import asyncio from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID from app.externals.agent_config.agent_config_client import get_agent from app.externals.aliexpress.requests.aliexpress_search_request import AliexpressSearchRequest +from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest +from app.externals.s3_upload.s3_upload_client import upload_file +from app.pdf.helpers import clean_text, clean_json +from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.message_request import MessageRequest from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest from app.requests.recommend_product_request import RecommendProductRequest @@ -11,6 +17,8 @@ from app.managers.conversation_manager_interface import ConversationManagerInterface from fastapi import Depends from app.externals.aliexpress.aliexpress_client import search_products +from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS +from app.pdf.pdf_manual_generator import PDFManualGenerator class MessageService(MessageServiceInterface): @@ -43,3 +51,51 @@ async def recommend_products(self, request: RecommendProductRequest): aliexpress_data = await search_products(AliexpressSearchRequest(q=json_data['recommended_product'])) return RecommendProductResponse(ai_response=json_data, products=aliexpress_data.get_products()) + + async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: + tasks = [ + self.handle_message(MessageRequest( + agent_id=item['agent'], + conversation_id="", + query=item['query'] + )) for item in agent_queries + ] + + try: + responses = await asyncio.gather(*tasks, return_exceptions=True) + + combined_data = {} + for response in responses: + if isinstance(response, Exception): + continue + data_clean = clean_text(clean_json(response['text'])) + data = json.loads(data_clean) + combined_data.update(data) + + if not combined_data: + raise ValueError("No se pudo obtener respuesta válida de ningún agente") + + return combined_data + + except Exception as e: + raise ValueError(f"Error procesando respuestas de agentes: {str(e)}") + + async def generate_pdf(self, request: GeneratePdfRequest): + base_query = f"Product Name: {request.product_name} Description: {request.product_description}" + + agent_queries = [ + {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} + for section, _ in PDF_MANUAL_SECTIONS.items() + ] + + combined_data = await self.process_multiple_agents(agent_queries) + + unique_id = uuid.uuid4().hex[:8] + file_name = f"{request.product_name.replace(' ', '_').lower()}_{unique_id}" + + pdf_generator = PDFManualGenerator(request.product_name) + pdf = await pdf_generator.create_manual(combined_data, f"{file_name}.pdf") + + return await upload_file( + S3UploadRequest(file=pdf, folder=f"{request.owner_id}/pdfs", + filename=file_name)) diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index 4423d33..cec22d8 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -11,4 +11,7 @@ async def handle_message(self, request: MessageRequest): @abstractmethod async def recommend_products(self, request: RecommendProductRequest): + pass + + async def generate_pdf(self, request): pass \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f3c1f62..1e84f4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ openai langgraph>=0.0.10 langchain-core>=0.1.17 langchain-anthropic -langchain-ollama \ No newline at end of file +langchain-ollama +fpdf \ No newline at end of file From c69629a7b1d1c958abb03b5a8190cc2d80c87358 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 10 Feb 2025 11:04:03 -0500 Subject: [PATCH 002/195] eliminate file change logic for not create file unnecessary --- app/pdf/pdf_manual_generator.py | 11 ++++------- app/services/message_service.py | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/app/pdf/pdf_manual_generator.py b/app/pdf/pdf_manual_generator.py index 2f2e9bb..b834283 100644 --- a/app/pdf/pdf_manual_generator.py +++ b/app/pdf/pdf_manual_generator.py @@ -9,7 +9,7 @@ def __init__(self, product_name: str): self.product_name = product_name self.pdf = PDFGenerator(product_name) - async def create_manual(self, data: dict, file_name: str) -> str: + async def create_manual(self, data: dict) -> str: self.pdf.add_cover_page( f"User Manual for {self.product_name}", "Everything You Need to Know to Get Started" @@ -19,12 +19,9 @@ async def create_manual(self, data: dict, file_name: str) -> str: for key in PDF_MANUAL_SECTION_ORDER: self.pdf.add_section(PDF_MANUAL_SECTIONS[key], data.get(key, "")) - self.pdf.output(file_name) - - with open(file_name, "rb") as f: - pdf_bytes = f.read() + pdf_str = self.pdf.output(dest="S") + pdf_bytes = pdf_str.encode("latin1") base64_str = base64.b64encode(pdf_bytes).decode("utf-8") - os.remove(file_name) - return base64_str + return base64_str \ No newline at end of file diff --git a/app/services/message_service.py b/app/services/message_service.py index 39bf430..020f3ee 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -94,7 +94,7 @@ async def generate_pdf(self, request: GeneratePdfRequest): file_name = f"{request.product_name.replace(' ', '_').lower()}_{unique_id}" pdf_generator = PDFManualGenerator(request.product_name) - pdf = await pdf_generator.create_manual(combined_data, f"{file_name}.pdf") + pdf = await pdf_generator.create_manual(combined_data) return await upload_file( S3UploadRequest(file=pdf, folder=f"{request.owner_id}/pdfs", From ed4fc546b6cfe2b5ec1d0f8870c6c718e4ef192e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 11 Feb 2025 21:44:53 -0500 Subject: [PATCH 003/195] add amazon integration. --- app/externals/amazon/amazon_client.py | 34 ++++++++++++++++ .../amazon/requests/amazon_search_request.py | 8 ++++ .../responses/amazon_search_response.py | 40 +++++++++++++++++++ app/services/message_service.py | 8 ++-- 4 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 app/externals/amazon/amazon_client.py create mode 100644 app/externals/amazon/requests/amazon_search_request.py create mode 100644 app/externals/amazon/responses/amazon_search_response.py diff --git a/app/externals/amazon/amazon_client.py b/app/externals/amazon/amazon_client.py new file mode 100644 index 0000000..5e2a08e --- /dev/null +++ b/app/externals/amazon/amazon_client.py @@ -0,0 +1,34 @@ +import httpx +from app.configurations.config import RAPIDAPI_KEY +from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest +from app.externals.amazon.responses.amazon_search_response import AmazonSearchResponse + + +async def search_products(request: AmazonSearchRequest) -> AmazonSearchResponse: + headers = { + 'x-rapidapi-host': 'real-time-amazon-data.p.rapidapi.com', + 'x-rapidapi-key': RAPIDAPI_KEY + } + + params = { + 'query': request.query, + 'page': '1', + 'country': 'US', + 'sort_by': 'RELEVANCE', + 'product_condition': 'ALL', + 'is_prime': 'false', + 'deals_and_discounts': 'NONE' + } + + async with httpx.AsyncClient() as client: + response = await client.get( + 'https://real-time-amazon-data.p.rapidapi.com/search', + headers=headers, + params=params + ) + + if response.status_code != 200: + raise Exception(f"Error en la llamada a Amazon API: {response.status_code}") + + raw_response = response.json() + return AmazonSearchResponse(raw_response) diff --git a/app/externals/amazon/requests/amazon_search_request.py b/app/externals/amazon/requests/amazon_search_request.py new file mode 100644 index 0000000..6a44190 --- /dev/null +++ b/app/externals/amazon/requests/amazon_search_request.py @@ -0,0 +1,8 @@ + + +class AmazonSearchRequest: + def __init__( + self, + query: str, + ): + self.query = query diff --git a/app/externals/amazon/responses/amazon_search_response.py b/app/externals/amazon/responses/amazon_search_response.py new file mode 100644 index 0000000..b7a09bb --- /dev/null +++ b/app/externals/amazon/responses/amazon_search_response.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass +from typing import List, Optional + + +@dataclass +class AmazonProduct: + asin: str + title: str + price: float + image_url: str + product_url: str + + +class AmazonSearchResponse: + def __init__(self, raw_response: dict): + self.raw_response = raw_response + + def get_products(self) -> List[dict]: + products = [] + + for item in self.raw_response.get('data', {}).get('products', []): + product = { + "source": "amazon", + "external_id": item.get('asin', ''), + "name": item.get('product_title', ''), + "url_website": item.get('product_url', ''), + "url_image": item.get('product_photo', ''), + "price": self._format_price(item.get('product_price')) + } + products.append(product) + + return products + + def _format_price(self, price) -> Optional[float]: + if not price: + return None + try: + return float(str(price).replace('$', '').replace(',', '')) + except (ValueError, TypeError): + return None diff --git a/app/services/message_service.py b/app/services/message_service.py index 020f3ee..3f837e7 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -4,7 +4,6 @@ from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID from app.externals.agent_config.agent_config_client import get_agent -from app.externals.aliexpress.requests.aliexpress_search_request import AliexpressSearchRequest from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.s3_upload_client import upload_file from app.pdf.helpers import clean_text, clean_json @@ -16,9 +15,10 @@ from app.services.message_service_interface import MessageServiceInterface from app.managers.conversation_manager_interface import ConversationManagerInterface from fastapi import Depends -from app.externals.aliexpress.aliexpress_client import search_products from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS from app.pdf.pdf_manual_generator import PDFManualGenerator +from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest +from app.externals.amazon.amazon_client import search_products class MessageService(MessageServiceInterface): @@ -48,9 +48,9 @@ async def recommend_products(self, request: RecommendProductRequest): )) json_data = json.loads(data['text']) - aliexpress_data = await search_products(AliexpressSearchRequest(q=json_data['recommended_product'])) + amazon_data = await search_products(AmazonSearchRequest(query=json_data['recommended_product'])) - return RecommendProductResponse(ai_response=json_data, products=aliexpress_data.get_products()) + return RecommendProductResponse(ai_response=json_data, products=amazon_data.get_products()) async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: tasks = [ From 749206b0aabd0f5f99ac1eae0a8fd37835759c04 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 13 Feb 2025 01:45:25 -0500 Subject: [PATCH 004/195] add logic product similar --- .env.example | 3 ++- app/configurations/config.py | 2 +- app/requests/recommend_product_request.py | 3 ++- app/services/message_service.py | 6 ++++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 383ddaa..ea2d709 100644 --- a/.env.example +++ b/.env.example @@ -4,5 +4,6 @@ ANTHROPIC_API_KEY=dasdasdasdas HOST_DEEP_SEEK=https://55.188.190.83:11434/v1 RAPIDAPI_HOST=https://aliexpress-datahub.p.rapidapi.com AGENT_RECOMMEND_PRODUCTS_ID=recommend_agent +AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID=recommend_agent_similar RAPIDAPI_KEY=dsadasdasdasda -S3_UPLOAD_API=http://lambdahost \ No newline at end of file +S3_UPLOAD_API=http://lambdahost diff --git a/app/configurations/config.py b/app/configurations/config.py index 970c0ff..7a29fc8 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -9,7 +9,7 @@ DEEP_SEEK_HOST = os.getenv('HOST_DEEP_SEEK') AGENT_RECOMMEND_PRODUCTS_ID = os.getenv('AGENT_RECOMMEND_PRODUCTS_ID') - +AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID = os.getenv('AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID') RAPIDAPI_KEY = os.getenv('RAPIDAPI_KEY') RAPIDAPI_HOST = os.getenv('RAPIDAPI_HOST') diff --git a/app/requests/recommend_product_request.py b/app/requests/recommend_product_request.py index 5b3ec3d..9712013 100644 --- a/app/requests/recommend_product_request.py +++ b/app/requests/recommend_product_request.py @@ -4,4 +4,5 @@ class RecommendProductRequest(BaseModel): product_name: str - product_description: str \ No newline at end of file + product_description: str + similar: Optional[bool] = False \ No newline at end of file diff --git a/app/services/message_service.py b/app/services/message_service.py index 3f837e7..acab5ec 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -2,7 +2,7 @@ import uuid import asyncio -from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID +from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID from app.externals.agent_config.agent_config_client import get_agent from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.s3_upload_client import upload_file @@ -41,8 +41,10 @@ async def handle_message(self, request: MessageRequest): ) async def recommend_products(self, request: RecommendProductRequest): + agent_id = AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID if request.similar else AGENT_RECOMMEND_PRODUCTS_ID + data = await self.handle_message(MessageRequest( - agent_id=AGENT_RECOMMEND_PRODUCTS_ID, + agent_id=agent_id, conversation_id="", query=f"Product Name: {request.product_name} Description: {request.product_description}", )) From e7a10c44d65752c363838cbbee9510fef58a142f Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Feb 2025 00:48:01 -0500 Subject: [PATCH 005/195] add logic for files in agents, add logic for generate images add middleware security for endpoints (only new endpoint), add logic for accept files in agents, add logic for generate variations from images. --- .env.example | 6 ++ app/configurations/config.py | 9 +++ app/controllers/handle_controller.py | 16 +++- app/managers/conversation_manager.py | 3 +- app/middlewares/auth_middleware.py | 80 ++++++++++++++++++++ app/processors/agent_processor.py | 4 +- app/processors/conversation_processor.py | 6 +- app/processors/simple_processor.py | 22 ++++-- app/requests/message_request.py | 1 + app/requests/variation_image_request.py | 12 +++ app/services/image_service.py | 93 ++++++++++++++++++++++++ app/services/image_service_interface.py | 9 +++ app/services/message_service.py | 2 +- main.py | 5 +- 14 files changed, 252 insertions(+), 16 deletions(-) create mode 100644 app/middlewares/auth_middleware.py create mode 100644 app/requests/variation_image_request.py create mode 100644 app/services/image_service.py create mode 100644 app/services/image_service_interface.py diff --git a/.env.example b/.env.example index ea2d709..0738eb3 100644 --- a/.env.example +++ b/.env.example @@ -7,3 +7,9 @@ AGENT_RECOMMEND_PRODUCTS_ID=recommend_agent AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID=recommend_agent_similar RAPIDAPI_KEY=dsadasdasdasda S3_UPLOAD_API=http://lambdahost + +API_KEY=tu_clave_api_secreta_aqui +AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info + +STABILITY_API_KEY=your-stability-api-key-here +STABILITY_API_URL=https://api.stability.ai/v2beta/stable-image/control/style diff --git a/app/configurations/config.py b/app/configurations/config.py index 7a29fc8..34e569f 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -15,3 +15,12 @@ RAPIDAPI_HOST = os.getenv('RAPIDAPI_HOST') S3_UPLOAD_API = os.getenv('S3_UPLOAD_API') + +AGENT_IMAGE_VARIATIONS = "agent_image_variations" + +AUTH_SERVICE_URL: str = os.getenv('AUTH_SERVICE_URL') + +STABILITY_API_KEY: str = os.getenv('STABILITY_API_KEY') +STABILITY_API_URL: str = os.getenv('STABILITY_API_URL') + +API_KEY: str = os.getenv('API_KEY') diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 0875a15..6a83511 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,9 +1,11 @@ from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest from fastapi import APIRouter, Depends, Request - from app.requests.message_request import MessageRequest +from app.requests.variation_image_request import VariationImageRequest +from app.services.image_service_interface import ImageServiceInterface from app.services.message_service_interface import MessageServiceInterface +from app.middlewares.auth_middleware import require_auth, require_api_key router = APIRouter( prefix="/api/ms/conversational-engine", @@ -38,6 +40,18 @@ async def generate_pdf( return response +@router.post("/generate-variation-images") +@require_auth +async def generate_variation_images( + request: Request, + variation_request: VariationImageRequest, + service: ImageServiceInterface = Depends() +): + user_info = request.state.user_info + response = await service.generate_variation_images(variation_request, user_info.get("data", {}).get("_id")) + return response + + @router.get("/health") async def health_check(): return {"status": "OK"} diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index b7e4ab8..5c2fb4e 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -15,7 +15,6 @@ def get_conversation_history(self, conversation_id: str) -> List[str]: async def process_conversation(self, request: MessageRequest, agent_config: AgentConfigResponse) -> dict[str, Any]: ai_provider = AIProviderFactory.get_provider(agent_config.provider_ai) - llm = ai_provider.get_llm( model=agent_config.model_ai, temperature=agent_config.preferences.temperature, @@ -32,4 +31,4 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen else SimpleProcessor(llm, agent_config.prompt, history) ) - return await processor.process(request.query) + return await processor.process(request.query, request.files) diff --git a/app/middlewares/auth_middleware.py b/app/middlewares/auth_middleware.py new file mode 100644 index 0000000..a7bfc7b --- /dev/null +++ b/app/middlewares/auth_middleware.py @@ -0,0 +1,80 @@ +from functools import wraps +from fastapi import HTTPException, Header, Request +from typing import Optional +import httpx + +from app.configurations.config import AUTH_SERVICE_URL, API_KEY + + +async def verify_api_key(api_key: Optional[str]) -> bool: + if not api_key: + raise HTTPException( + status_code=401, + detail="API Key not provided" + ) + + if api_key != API_KEY: + raise HTTPException( + status_code=401, + detail="Invalid API Key" + ) + + return True + + +def require_api_key(func): + @wraps(func) + async def wrapper(request: Request, *args, **kwargs): + if request is None: + raise HTTPException( + status_code=500, + detail="Request not found" + ) + await verify_api_key(request.headers.get("x-api-key")) + return await func(request, *args, **kwargs) + + return wrapper + + +async def verify_user_token(authorization: Optional[str]) -> dict: + if not authorization: + raise HTTPException( + status_code=401, + detail="Authorization token not provided" + ) + + try: + async with httpx.AsyncClient() as client: + response = await client.get( + AUTH_SERVICE_URL, + headers={"Authorization": authorization}, + timeout=3.0 + ) + + if response.status_code != 200: + raise HTTPException( + status_code=401, + detail="Invalid token" + ) + + return response.json() + except httpx.RequestError: + raise HTTPException( + status_code=500, + detail="Error verifying token" + ) + + +def require_auth(func): + @wraps(func) + async def wrapper(request: Request, *args, **kwargs): + if request is None: + raise HTTPException( + status_code=500, + detail="Request not found" + ) + user_info = await verify_user_token(request.headers.get("authorization")) + request.state.user_info = user_info + return await func(request ,*args, **kwargs) + + return wrapper diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 217f065..6debfd5 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, List +from typing import Dict, Any, List, Optional from langchain.agents import AgentExecutor, create_tool_calling_agent from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from app.processors.conversation_processor import ConversationProcessor @@ -11,7 +11,7 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str], tools: super().__init__(llm, context, history) self.tools = tools - async def process(self, query: str) -> Dict[str, Any]: + async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: prompt_template = ChatPromptTemplate.from_messages([ ("system", "{context}"), MessagesPlaceholder(variable_name="chat_history"), diff --git a/app/processors/conversation_processor.py b/app/processors/conversation_processor.py index 1ad75eb..2cce354 100644 --- a/app/processors/conversation_processor.py +++ b/app/processors/conversation_processor.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, List +from typing import Dict, Any, List, Optional from langchain_core.language_models import BaseChatModel @@ -8,5 +8,5 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str]): self.context = context self.history = history - async def process(self, query: str) -> Dict[str, Any]: - raise NotImplementedError \ No newline at end of file + async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: + raise NotImplementedError diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 6e39123..48e27b0 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -1,16 +1,26 @@ -from typing import Dict, Any +from typing import Dict, Any, Optional, List, Union from langchain.chains import LLMChain from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from app.processors.conversation_processor import ConversationProcessor class SimpleProcessor(ConversationProcessor): - async def process(self, query: str) -> Dict[str, Any]: - prompt = ChatPromptTemplate.from_messages([ + async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: + messages = [ ("system", "{context}"), - MessagesPlaceholder(variable_name="chat_history"), - ("human", "{input}") - ]) + MessagesPlaceholder(variable_name="chat_history") + ] + + if files: + for file in files: + if file.get('type') == 'image': + messages.append(("system", f"{file['content']}")) + else: + messages.append(("system", f"\n{file['path']}\n```{file['content']}```\n")) + + messages.append(("human", "{input}")) + + prompt = ChatPromptTemplate.from_messages(messages) chain = LLMChain( llm=self.llm, diff --git a/app/requests/message_request.py b/app/requests/message_request.py index 370d434..0cefc15 100644 --- a/app/requests/message_request.py +++ b/app/requests/message_request.py @@ -8,3 +8,4 @@ class MessageRequest(BaseModel): conversation_id: str metadata_filter: Optional[dict] = None parameter_prompt: Optional[dict] = None + files: Optional[List[Dict[str, str]]] = None diff --git a/app/requests/variation_image_request.py b/app/requests/variation_image_request.py new file mode 100644 index 0000000..33d6376 --- /dev/null +++ b/app/requests/variation_image_request.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel, Field, validator + + +class VariationImageRequest(BaseModel): + file: str + num_variations: int = Field(default=3, ge=1, le=10) # mínimo 1, máximo 10 variaciones + + @validator('num_variations') + def validate_variations(cls, v): + if v > 10: + raise ValueError("El número máximo de variaciones permitidas es 10") + return v diff --git a/app/services/image_service.py b/app/services/image_service.py new file mode 100644 index 0000000..4365ad9 --- /dev/null +++ b/app/services/image_service.py @@ -0,0 +1,93 @@ +from app.configurations.config import ( + AGENT_IMAGE_VARIATIONS, + STABILITY_API_KEY, + STABILITY_API_URL +) +from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse +from app.requests.message_request import MessageRequest +from app.requests.variation_image_request import VariationImageRequest +from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest +from app.services.image_service_interface import ImageServiceInterface +from app.services.message_service_interface import MessageServiceInterface +from app.externals.s3_upload.s3_upload_client import upload_file +from fastapi import Depends +import asyncio +import aiohttp +import base64 +import uuid +from dotenv import load_dotenv + +load_dotenv() + + +class ImageService(ImageServiceInterface): + def __init__(self, message_service: MessageServiceInterface = Depends()): + self.message_service = message_service + self.stability_api_key = STABILITY_API_KEY + self.stability_api_url = STABILITY_API_URL + + async def _upload_to_s3(self, image_base64: str, index: int, owner_id: str) -> S3UploadResponse: + unique_id = uuid.uuid4().hex[:8] + file_name = f"variation_{index}_{unique_id}" + + return await upload_file( + S3UploadRequest( + file=image_base64, + folder=f"{owner_id}/products/variations", + filename=file_name + ) + ) + + async def _generate_single_variation(self, image_base64: str, prompt: str, negative_prompt: str, index: int, + owner_id: str) -> str: + image_bytes = base64.b64decode(image_base64) + form_data = aiohttp.FormData() + form_data.add_field('image', + image_bytes, + filename='image.jpg', + content_type='image/jpeg') + form_data.add_field('prompt', prompt) + form_data.add_field('negative_prompt', negative_prompt) + form_data.add_field('fidelity', '1.0') + form_data.add_field('control_strength', '1.0') + form_data.add_field('output_format', 'webp') + + async with aiohttp.ClientSession() as session: + async with session.post( + self.stability_api_url, + headers={ + "Authorization": f"Bearer {self.stability_api_key}", + "accept": "image/*" + }, + data=form_data + ) as response: + if response.status == 200: + content = await response.read() + content_base64 = base64.b64encode(content).decode('utf-8') + response = await self._upload_to_s3(content_base64, index, owner_id) + return response.s3_url + else: + raise Exception(f"Error {response.status}: {await response.text()}") + + async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): + message_request = MessageRequest( + query="Attached is the product image.", + agent_id=AGENT_IMAGE_VARIATIONS, + conversation_id="", + files=[{ + "type": "image", + "path": "without.png", + "content": request.file + }] + ) + + response = await self.message_service.handle_message(message_request) + prompt = response["text"] + negative_prompt = "text, letters, brand logos, brand names, symbols" + tasks = [ + self._generate_single_variation(request.file, prompt, negative_prompt, i, owner_id) + for i in range(request.num_variations) + ] + generated_urls = await asyncio.gather(*tasks) + + return {"urls": generated_urls} diff --git a/app/services/image_service_interface.py b/app/services/image_service_interface.py new file mode 100644 index 0000000..d2dfdff --- /dev/null +++ b/app/services/image_service_interface.py @@ -0,0 +1,9 @@ +from abc import abstractmethod, ABC + +from app.requests.variation_image_request import VariationImageRequest + + +class ImageServiceInterface(ABC): + @abstractmethod + async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): + pass diff --git a/app/services/message_service.py b/app/services/message_service.py index acab5ec..6df4b7e 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -42,7 +42,7 @@ async def handle_message(self, request: MessageRequest): async def recommend_products(self, request: RecommendProductRequest): agent_id = AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID if request.similar else AGENT_RECOMMEND_PRODUCTS_ID - + data = await self.handle_message(MessageRequest( agent_id=agent_id, conversation_id="", diff --git a/main.py b/main.py index 4ad61d6..c63e90d 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,8 @@ from app.controllers.handle_controller import router from app.managers.conversation_manager import ConversationManager from app.managers.conversation_manager_interface import ConversationManagerInterface +from app.services.image_service import ImageService +from app.services.image_service_interface import ImageServiceInterface from app.services.message_service import MessageService from app.services.message_service_interface import MessageServiceInterface @@ -14,8 +16,9 @@ app.include_router(router) app.dependency_overrides[MessageServiceInterface] = MessageService app.dependency_overrides[ConversationManagerInterface] = ConversationManager +app.dependency_overrides[ImageServiceInterface] = ImageService if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) From 5c02762d4cbf5b7c95ddf30190433300a4292273 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Feb 2025 12:43:31 -0500 Subject: [PATCH 006/195] fix for use url use url s3 upload. --- app/processors/simple_processor.py | 4 ++-- app/services/image_service.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 48e27b0..22faee9 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -14,9 +14,9 @@ async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None if files: for file in files: if file.get('type') == 'image': - messages.append(("system", f"{file['content']}")) + messages.append(("system", f"{file['url']}")) else: - messages.append(("system", f"\n{file['path']}\n```{file['content']}```\n")) + messages.append(("system", f"")) messages.append(("human", "{input}")) diff --git a/app/services/image_service.py b/app/services/image_service.py index 4365ad9..85eb607 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -70,13 +70,15 @@ async def _generate_single_variation(self, image_base64: str, prompt: str, negat raise Exception(f"Error {response.status}: {await response.text()}") async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): + original_image_response = await self._upload_to_s3(request.file, 0, owner_id) + message_request = MessageRequest( query="Attached is the product image.", agent_id=AGENT_IMAGE_VARIATIONS, conversation_id="", files=[{ "type": "image", - "path": "without.png", + "url": original_image_response.s3_url, "content": request.file }] ) @@ -90,4 +92,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ ] generated_urls = await asyncio.gather(*tasks) - return {"urls": generated_urls} + # Agregamos la URL de la imagen original al principio de la lista + all_urls = [original_image_response.s3_url] + generated_urls + + return {"urls": all_urls} From d1f50bf8cafc703f7f8d3dc199458c89baae9437 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Feb 2025 16:30:26 -0500 Subject: [PATCH 007/195] add response add parameters to response. --- app/responses/generate_image_response.py | 9 +++++++++ app/services/image_service.py | 9 ++++----- 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 app/responses/generate_image_response.py diff --git a/app/responses/generate_image_response.py b/app/responses/generate_image_response.py new file mode 100644 index 0000000..d82b603 --- /dev/null +++ b/app/responses/generate_image_response.py @@ -0,0 +1,9 @@ +from typing import List + +from pydantic import BaseModel + + +class GenerateImageResponse(BaseModel): + original_url: str + generated_urls: List[str] + generated_prompt: str diff --git a/app/services/image_service.py b/app/services/image_service.py index 85eb607..ce3f64d 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -7,6 +7,7 @@ from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest +from app.responses.generate_image_response import GenerateImageResponse from app.services.image_service_interface import ImageServiceInterface from app.services.message_service_interface import MessageServiceInterface from app.externals.s3_upload.s3_upload_client import upload_file @@ -71,7 +72,7 @@ async def _generate_single_variation(self, image_base64: str, prompt: str, negat async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): original_image_response = await self._upload_to_s3(request.file, 0, owner_id) - + message_request = MessageRequest( query="Attached is the product image.", agent_id=AGENT_IMAGE_VARIATIONS, @@ -92,7 +93,5 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ ] generated_urls = await asyncio.gather(*tasks) - # Agregamos la URL de la imagen original al principio de la lista - all_urls = [original_image_response.s3_url] + generated_urls - - return {"urls": all_urls} + return GenerateImageResponse(generated_urls=generated_urls, original_url=original_image_response.s3_url, + generated_prompt=prompt) From 1bbfdc9370fa7e7ec713ae65a1e7e50526c1cd0c Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Feb 2025 16:48:12 -0500 Subject: [PATCH 008/195] modify structure folders --- app/services/image_service.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index ce3f64d..2f51f24 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -27,20 +27,21 @@ def __init__(self, message_service: MessageServiceInterface = Depends()): self.stability_api_key = STABILITY_API_KEY self.stability_api_url = STABILITY_API_URL - async def _upload_to_s3(self, image_base64: str, index: int, owner_id: str) -> S3UploadResponse: + async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, + prefix_name: str) -> S3UploadResponse: unique_id = uuid.uuid4().hex[:8] - file_name = f"variation_{index}_{unique_id}" + file_name = f"{prefix_name}_{unique_id}" return await upload_file( S3UploadRequest( file=image_base64, - folder=f"{owner_id}/products/variations", + folder=f"{owner_id}/products/variations/{folder_id}", filename=file_name ) ) - async def _generate_single_variation(self, image_base64: str, prompt: str, negative_prompt: str, index: int, - owner_id: str) -> str: + async def _generate_single_variation(self, image_base64: str, prompt: str, negative_prompt: str, owner_id: str, + folder_id: str) -> str: image_bytes = base64.b64decode(image_base64) form_data = aiohttp.FormData() form_data.add_field('image', @@ -65,13 +66,14 @@ async def _generate_single_variation(self, image_base64: str, prompt: str, negat if response.status == 200: content = await response.read() content_base64 = base64.b64encode(content).decode('utf-8') - response = await self._upload_to_s3(content_base64, index, owner_id) + response = await self._upload_to_s3(content_base64, owner_id, folder_id, "variation") return response.s3_url else: raise Exception(f"Error {response.status}: {await response.text()}") async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): - original_image_response = await self._upload_to_s3(request.file, 0, owner_id) + folder_id = uuid.uuid4().hex[:8] + original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") message_request = MessageRequest( query="Attached is the product image.", @@ -88,7 +90,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ prompt = response["text"] negative_prompt = "text, letters, brand logos, brand names, symbols" tasks = [ - self._generate_single_variation(request.file, prompt, negative_prompt, i, owner_id) + self._generate_single_variation(request.file, prompt, negative_prompt, owner_id, folder_id) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) From 5e6f46b127604cd70f34042124fbea44247e83ec Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Feb 2025 23:24:30 -0500 Subject: [PATCH 009/195] add logic for images support antrhopic and deepseek --- app/managers/conversation_manager.py | 2 +- app/processors/agent_processor.py | 3 ++- app/processors/conversation_processor.py | 2 +- app/processors/simple_processor.py | 28 ++++++++++++++++-------- app/providers/ai_provider_interface.py | 4 ++++ app/providers/anthropic_provider.py | 9 +++++--- app/providers/deepseek_provider.py | 5 ++++- app/providers/openai_provider.py | 3 +++ 8 files changed, 40 insertions(+), 16 deletions(-) diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index 5c2fb4e..8e554a9 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -31,4 +31,4 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen else SimpleProcessor(llm, agent_config.prompt, history) ) - return await processor.process(request.query, request.files) + return await processor.process(request.query, request.files, ai_provider.supports_interleaved_files()) diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 6debfd5..7c0588c 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -11,7 +11,8 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str], tools: super().__init__(llm, context, history) self.tools = tools - async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: + async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: prompt_template = ChatPromptTemplate.from_messages([ ("system", "{context}"), MessagesPlaceholder(variable_name="chat_history"), diff --git a/app/processors/conversation_processor.py b/app/processors/conversation_processor.py index 2cce354..aadc8dd 100644 --- a/app/processors/conversation_processor.py +++ b/app/processors/conversation_processor.py @@ -8,5 +8,5 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str]): self.context = context self.history = history - async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: + async def process(self, query: str, files: Optional[List[Dict[str, str]]], supports_interleaved_files: bool) -> Dict[str, Any]: raise NotImplementedError diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 22faee9..38301e9 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -5,21 +5,31 @@ class SimpleProcessor(ConversationProcessor): - async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]: - messages = [ - ("system", "{context}"), - MessagesPlaceholder(variable_name="chat_history") - ] + async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None, supports_interleaved_files: bool = False) -> Dict[str, Any]: + messages = [] + system_message = self.context or "" - if files: + if files and not supports_interleaved_files: + file_references = [] + for file in files: + if file.get('type') == 'image': + file_references.append(f"{file['url']}") + else: + file_references.append(f"") + + system_message += "\n\n" + "\n".join(file_references) + + messages.append(("system", system_message)) + messages.append(MessagesPlaceholder(variable_name="chat_history")) + + if files and supports_interleaved_files: for file in files: if file.get('type') == 'image': messages.append(("system", f"{file['url']}")) else: messages.append(("system", f"")) - messages.append(("human", "{input}")) - + messages.append(("human", query)) prompt = ChatPromptTemplate.from_messages(messages) chain = LLMChain( @@ -32,4 +42,4 @@ async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None "context": self.context or "", "chat_history": self.history, "input": query - }) \ No newline at end of file + }) diff --git a/app/providers/ai_provider_interface.py b/app/providers/ai_provider_interface.py index b3028f8..f61d99c 100644 --- a/app/providers/ai_provider_interface.py +++ b/app/providers/ai_provider_interface.py @@ -15,3 +15,7 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) Retorna el modelo de lenguaje configurado """ pass + + @abstractmethod + def supports_interleaved_files(self) -> bool: + pass diff --git a/app/providers/anthropic_provider.py b/app/providers/anthropic_provider.py index a3a11d2..95df258 100644 --- a/app/providers/anthropic_provider.py +++ b/app/providers/anthropic_provider.py @@ -6,7 +6,10 @@ class AnthropicProvider(AIProviderInterface): def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) -> ChatAnthropic: return ChatAnthropic( model=model, - #temperature=temperature, - #max_tokens=max_tokens, - #top_p=top_p + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p ) + + def supports_interleaved_files(self) -> bool: + return False \ No newline at end of file diff --git a/app/providers/deepseek_provider.py b/app/providers/deepseek_provider.py index 1637586..19cde42 100644 --- a/app/providers/deepseek_provider.py +++ b/app/providers/deepseek_provider.py @@ -14,5 +14,8 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) return Ollama( model=model, base_url=DEEP_SEEK_HOST - **model_kwargs + ** model_kwargs ) + + def supports_interleaved_files(self) -> bool: + return False diff --git a/app/providers/openai_provider.py b/app/providers/openai_provider.py index d2e6ed4..7dd23a4 100644 --- a/app/providers/openai_provider.py +++ b/app/providers/openai_provider.py @@ -14,3 +14,6 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) model=model, **model_kwargs ) + + def supports_interleaved_files(self) -> bool: + return True From 3eb1698b8d85997ac8a0570d4a3b9b8e31dbbe6f Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 21 Feb 2025 00:39:36 -0500 Subject: [PATCH 010/195] add logic for vision analysis. --- .env.example | 2 + app/configurations/config.py | 1 + .../google_vision/google_vision_client.py | 57 +++++++++++++++++++ .../responses/vision_analysis_response.py | 18 ++++++ app/responses/generate_image_response.py | 3 + app/services/image_service.py | 11 ++-- 6 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 app/externals/google_vision/google_vision_client.py create mode 100644 app/externals/google_vision/responses/vision_analysis_response.py diff --git a/.env.example b/.env.example index 0738eb3..8ff7563 100644 --- a/.env.example +++ b/.env.example @@ -13,3 +13,5 @@ AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info STABILITY_API_KEY=your-stability-api-key-here STABILITY_API_URL=https://api.stability.ai/v2beta/stable-image/control/style + +GOOGLE_VISION_API_KEY=dsadadasda \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 34e569f..15845d2 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -22,5 +22,6 @@ STABILITY_API_KEY: str = os.getenv('STABILITY_API_KEY') STABILITY_API_URL: str = os.getenv('STABILITY_API_URL') +GOOGLE_VISION_API_KEY: str = os.getenv('GOOGLE_VISION_API_KEY') API_KEY: str = os.getenv('API_KEY') diff --git a/app/externals/google_vision/google_vision_client.py b/app/externals/google_vision/google_vision_client.py new file mode 100644 index 0000000..21112d3 --- /dev/null +++ b/app/externals/google_vision/google_vision_client.py @@ -0,0 +1,57 @@ +import aiohttp +from app.configurations.config import GOOGLE_VISION_API_KEY +from app.externals.google_vision.responses.vision_analysis_response import VisionAnalysisResponse + + +async def analyze_image(image_base64: str) -> VisionAnalysisResponse: + vision_api_url = f"https://vision.googleapis.com/v1/images:annotate?key={GOOGLE_VISION_API_KEY}" + + payload = { + "requests": [{ + "image": { + "content": image_base64 + }, + "features": [ + { + "type": "LABEL_DETECTION", + "maxResults": 3 + }, + { + "type": "LOGO_DETECTION", + "maxResults": 1 + } + ] + }] + } + + async with aiohttp.ClientSession() as session: + async with session.post( + vision_api_url, + json=payload, + headers={"Content-Type": "application/json"} + ) as response: + if response.status != 200: + raise Exception(f"Error en Google Vision API: {await response.text()}") + + data = await response.json() + + logo_description = "" + if data["responses"][0].get("logoAnnotations"): + logo = data["responses"][0]["logoAnnotations"][0] + if logo.get("score", 0) > 0.65: + logo_description = logo["description"] + + labels = [] + if data["responses"][0].get("labelAnnotations"): + labels = [ + label["description"] + for label in data["responses"][0]["labelAnnotations"] + if label.get("score", 0) > 0.65 + ] + + label_description = ", ".join(labels) + + return VisionAnalysisResponse( + logo_description=logo_description, + label_description=label_description + ) diff --git a/app/externals/google_vision/responses/vision_analysis_response.py b/app/externals/google_vision/responses/vision_analysis_response.py new file mode 100644 index 0000000..96dc5f4 --- /dev/null +++ b/app/externals/google_vision/responses/vision_analysis_response.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + + +@dataclass +class VisionAnalysisResponse: + logo_description: str + label_description: str + + def get_analysis_text(self) -> str: + analysis_parts = [] + + if self.logo_description: + analysis_parts.append(f"Detected logos: {self.logo_description}") + + if self.label_description: + analysis_parts.append(f"Detected category: {self.label_description}") + + return ". ".join(analysis_parts) + ("." if analysis_parts else "") diff --git a/app/responses/generate_image_response.py b/app/responses/generate_image_response.py index d82b603..27a94af 100644 --- a/app/responses/generate_image_response.py +++ b/app/responses/generate_image_response.py @@ -2,8 +2,11 @@ from pydantic import BaseModel +from app.externals.google_vision.responses.vision_analysis_response import VisionAnalysisResponse + class GenerateImageResponse(BaseModel): original_url: str generated_urls: List[str] generated_prompt: str + vision_analysis: VisionAnalysisResponse diff --git a/app/services/image_service.py b/app/services/image_service.py index 2f51f24..89f5262 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -1,7 +1,8 @@ from app.configurations.config import ( AGENT_IMAGE_VARIATIONS, STABILITY_API_KEY, - STABILITY_API_URL + STABILITY_API_URL, + GOOGLE_VISION_API_KEY ) from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse from app.requests.message_request import MessageRequest @@ -17,6 +18,7 @@ import base64 import uuid from dotenv import load_dotenv +from app.externals.google_vision.google_vision_client import analyze_image load_dotenv() @@ -74,9 +76,10 @@ async def _generate_single_variation(self, image_base64: str, prompt: str, negat async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): folder_id = uuid.uuid4().hex[:8] original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") - + vision_analysis = await analyze_image(request.file) + message_request = MessageRequest( - query="Attached is the product image.", + query=f"Attached is the product image. {vision_analysis.get_analysis_text()}", agent_id=AGENT_IMAGE_VARIATIONS, conversation_id="", files=[{ @@ -96,4 +99,4 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ generated_urls = await asyncio.gather(*tasks) return GenerateImageResponse(generated_urls=generated_urls, original_url=original_image_response.s3_url, - generated_prompt=prompt) + generated_prompt=prompt, vision_analysis=vision_analysis) From ab9983fd6fb5441db30bc526ebf223e2dc799177 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 21 Feb 2025 12:37:39 -0500 Subject: [PATCH 011/195] filter price --- .../responses/amazon_search_response.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/app/externals/amazon/responses/amazon_search_response.py b/app/externals/amazon/responses/amazon_search_response.py index b7a09bb..f976b9c 100644 --- a/app/externals/amazon/responses/amazon_search_response.py +++ b/app/externals/amazon/responses/amazon_search_response.py @@ -19,15 +19,17 @@ def get_products(self) -> List[dict]: products = [] for item in self.raw_response.get('data', {}).get('products', []): - product = { - "source": "amazon", - "external_id": item.get('asin', ''), - "name": item.get('product_title', ''), - "url_website": item.get('product_url', ''), - "url_image": item.get('product_photo', ''), - "price": self._format_price(item.get('product_price')) - } - products.append(product) + price = self._format_price(item.get('product_price')) + if price is not None: + product = { + "source": "amazon", + "external_id": item.get('asin', ''), + "name": item.get('product_title', ''), + "url_website": item.get('product_url', ''), + "url_image": item.get('product_photo', ''), + "price": price + } + products.append(product) return products From adaefe425f10bdf92d5cfeeceb820b061b2c2783 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 21 Feb 2025 12:53:23 -0500 Subject: [PATCH 012/195] fix > price 0 --- app/externals/amazon/responses/amazon_search_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/externals/amazon/responses/amazon_search_response.py b/app/externals/amazon/responses/amazon_search_response.py index f976b9c..36509ff 100644 --- a/app/externals/amazon/responses/amazon_search_response.py +++ b/app/externals/amazon/responses/amazon_search_response.py @@ -20,7 +20,7 @@ def get_products(self) -> List[dict]: for item in self.raw_response.get('data', {}).get('products', []): price = self._format_price(item.get('product_price')) - if price is not None: + if price is not None and price > 0: product = { "source": "amazon", "external_id": item.get('asin', ''), From bf145be0cadca903a445cc1a50bf5b2b23cc0cab Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 22 Feb 2025 01:26:10 -0500 Subject: [PATCH 013/195] change logic for replicate. --- .env.example | 7 +-- app/configurations/config.py | 3 +- app/externals/replicate/replicate_client.py | 57 +++++++++++++++++++++ app/services/image_service.py | 53 +++++-------------- 4 files changed, 74 insertions(+), 46 deletions(-) create mode 100644 app/externals/replicate/replicate_client.py diff --git a/.env.example b/.env.example index 8ff7563..c7d59c6 100644 --- a/.env.example +++ b/.env.example @@ -10,8 +10,5 @@ S3_UPLOAD_API=http://lambdahost API_KEY=tu_clave_api_secreta_aqui AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info - -STABILITY_API_KEY=your-stability-api-key-here -STABILITY_API_URL=https://api.stability.ai/v2beta/stable-image/control/style - -GOOGLE_VISION_API_KEY=dsadadasda \ No newline at end of file +GOOGLE_VISION_API_KEY=dsadadasda +REPLICATE_API_KEY=dsadadasda \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 15845d2..c3e1d57 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -20,8 +20,7 @@ AUTH_SERVICE_URL: str = os.getenv('AUTH_SERVICE_URL') -STABILITY_API_KEY: str = os.getenv('STABILITY_API_KEY') -STABILITY_API_URL: str = os.getenv('STABILITY_API_URL') GOOGLE_VISION_API_KEY: str = os.getenv('GOOGLE_VISION_API_KEY') +REPLICATE_API_KEY: str = os.getenv('REPLICATE_API_KEY') API_KEY: str = os.getenv('API_KEY') diff --git a/app/externals/replicate/replicate_client.py b/app/externals/replicate/replicate_client.py new file mode 100644 index 0000000..c12bbda --- /dev/null +++ b/app/externals/replicate/replicate_client.py @@ -0,0 +1,57 @@ +import aiohttp +import asyncio +from app.configurations.config import REPLICATE_API_KEY + + +async def generate_image_variation( + image_url: str, + prompt: str, + aspect_ratio: str = "1:1", + output_format: str = "webp", + output_quality: int = 80, + prompt_upsampling: bool = False, + safety_tolerance: int = 2 +) -> bytes: + payload = { + "input": { + "aspect_ratio": aspect_ratio, + "image_prompt": image_url, + "output_format": output_format, + "output_quality": output_quality, + "prompt": prompt, + "prompt_upsampling": prompt_upsampling, + "safety_tolerance": safety_tolerance + } + } + + async with aiohttp.ClientSession() as session: + async with session.post( + "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions", + headers={ + "Authorization": f"Bearer {REPLICATE_API_KEY}", + "Content-Type": "application/json" + }, + json=payload + ) as response: + if response.status == 200 or response.status == 201: + prediction_data = await response.json() + + while True: + async with session.get( + prediction_data["urls"]["get"], + headers={"Authorization": f"Bearer {REPLICATE_API_KEY}"} + ) as status_response: + status_data = await status_response.json() + if status_data["status"] == "succeeded": + image_url = status_data["output"] + async with session.get(image_url) as img_response: + if img_response.status == 200: + return await img_response.read() + else: + raise Exception(f"Error downloading image: {img_response.status}") + elif status_data["status"] == "failed": + raise Exception("Image Generation Failed") + + await asyncio.sleep(1) + else: + raise Exception(f"Error {response.status}: {await response.text()}") diff --git a/app/services/image_service.py b/app/services/image_service.py index 89f5262..9359d7c 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -1,8 +1,5 @@ from app.configurations.config import ( AGENT_IMAGE_VARIATIONS, - STABILITY_API_KEY, - STABILITY_API_URL, - GOOGLE_VISION_API_KEY ) from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse from app.requests.message_request import MessageRequest @@ -14,11 +11,11 @@ from app.externals.s3_upload.s3_upload_client import upload_file from fastapi import Depends import asyncio -import aiohttp import base64 import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image +from app.externals.replicate.replicate_client import generate_image_variation load_dotenv() @@ -26,8 +23,6 @@ class ImageService(ImageServiceInterface): def __init__(self, message_service: MessageServiceInterface = Depends()): self.message_service = message_service - self.stability_api_key = STABILITY_API_KEY - self.stability_api_url = STABILITY_API_URL async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, prefix_name: str) -> S3UploadResponse: @@ -42,42 +37,23 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) ) - async def _generate_single_variation(self, image_base64: str, prompt: str, negative_prompt: str, owner_id: str, + async def _generate_single_variation(self, url_image: str, prompt: str, owner_id: str, folder_id: str) -> str: - image_bytes = base64.b64decode(image_base64) - form_data = aiohttp.FormData() - form_data.add_field('image', - image_bytes, - filename='image.jpg', - content_type='image/jpeg') - form_data.add_field('prompt', prompt) - form_data.add_field('negative_prompt', negative_prompt) - form_data.add_field('fidelity', '1.0') - form_data.add_field('control_strength', '1.0') - form_data.add_field('output_format', 'webp') - - async with aiohttp.ClientSession() as session: - async with session.post( - self.stability_api_url, - headers={ - "Authorization": f"Bearer {self.stability_api_key}", - "accept": "image/*" - }, - data=form_data - ) as response: - if response.status == 200: - content = await response.read() - content_base64 = base64.b64encode(content).decode('utf-8') - response = await self._upload_to_s3(content_base64, owner_id, folder_id, "variation") - return response.s3_url - else: - raise Exception(f"Error {response.status}: {await response.text()}") + image_content = await generate_image_variation(image_url=url_image, prompt=prompt) + content_base64 = base64.b64encode(image_content).decode('utf-8') + final_upload = await self._upload_to_s3( + content_base64, + owner_id, + folder_id, + "variation" + ) + return final_upload.s3_url async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): folder_id = uuid.uuid4().hex[:8] original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") vision_analysis = await analyze_image(request.file) - + message_request = MessageRequest( query=f"Attached is the product image. {vision_analysis.get_analysis_text()}", agent_id=AGENT_IMAGE_VARIATIONS, @@ -90,10 +66,9 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ ) response = await self.message_service.handle_message(message_request) - prompt = response["text"] - negative_prompt = "text, letters, brand logos, brand names, symbols" + prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ - self._generate_single_variation(request.file, prompt, negative_prompt, owner_id, folder_id) + self._generate_single_variation(original_image_response.s3_url, prompt, owner_id, folder_id) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) From 211518bf22f174c1e4e8133ab3f54c426d91a0b4 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sun, 2 Mar 2025 23:15:15 -0500 Subject: [PATCH 014/195] add message in result --- app/processors/agent_processor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 7c0588c..a65028c 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -42,6 +42,10 @@ async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None "input": query, "agent_scratchpad": "" }) + + if "message" not in result and "output" in result: + result["message"] = result["output"] + return result except Exception as e: print(f"Error durante la ejecución del agente: {str(e)}") From bf00f6c2724993543f4f77c73dcd336164cd26e4 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sun, 2 Mar 2025 23:15:51 -0500 Subject: [PATCH 015/195] change by text. --- app/processors/agent_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index a65028c..6843369 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -43,8 +43,8 @@ async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None "agent_scratchpad": "" }) - if "message" not in result and "output" in result: - result["message"] = result["output"] + if "text" not in result and "output" in result: + result["text"] = result["output"] return result except Exception as e: From 5bf34bcc19cbb809e5387368b2ef813aea156113 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 4 Mar 2025 03:02:06 -0500 Subject: [PATCH 016/195] add logic scrapper amazon, aliexpress, cj and ai --- .env.example | 3 +- app/configurations/config.py | 2 + app/controllers/handle_controller.py | 13 + app/externals/aliexpress/aliexpress_client.py | 25 ++ app/externals/amazon/amazon_client.py | 25 ++ app/externals/scraperapi/__init__.py | 1 + app/externals/scraperapi/scraperapi_client.py | 30 ++ app/factories/scraping_factory.py | 27 ++ app/requests/product_scraping_request.py | 5 + app/scrapers/aliexpress_scraper.py | 267 ++++++++++++++++++ app/scrapers/amazon_scraper.py | 165 +++++++++++ app/scrapers/cj_scraper.py | 37 +++ app/scrapers/helper_price.py | 16 ++ app/scrapers/ia_scraper.py | 46 +++ app/scrapers/scraper_interface.py | 8 + app/services/product_scraping_service.py | 18 ++ .../product_scraping_service_interface.py | 8 + main.py | 3 + requirements.txt | 4 +- 19 files changed, 701 insertions(+), 2 deletions(-) create mode 100644 app/externals/scraperapi/__init__.py create mode 100644 app/externals/scraperapi/scraperapi_client.py create mode 100644 app/factories/scraping_factory.py create mode 100644 app/requests/product_scraping_request.py create mode 100644 app/scrapers/aliexpress_scraper.py create mode 100644 app/scrapers/amazon_scraper.py create mode 100644 app/scrapers/cj_scraper.py create mode 100644 app/scrapers/helper_price.py create mode 100644 app/scrapers/ia_scraper.py create mode 100644 app/scrapers/scraper_interface.py create mode 100644 app/services/product_scraping_service.py create mode 100644 app/services/product_scraping_service_interface.py diff --git a/.env.example b/.env.example index c7d59c6..6f1c18f 100644 --- a/.env.example +++ b/.env.example @@ -11,4 +11,5 @@ S3_UPLOAD_API=http://lambdahost API_KEY=tu_clave_api_secreta_aqui AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info GOOGLE_VISION_API_KEY=dsadadasda -REPLICATE_API_KEY=dsadadasda \ No newline at end of file +REPLICATE_API_KEY=dsadadasda +SCRAPERAPI_KEY=dsadsadsadasdsadas \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index c3e1d57..93aa946 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -17,10 +17,12 @@ S3_UPLOAD_API = os.getenv('S3_UPLOAD_API') AGENT_IMAGE_VARIATIONS = "agent_image_variations" +SCRAPER_AGENT = "scraper_agent" AUTH_SERVICE_URL: str = os.getenv('AUTH_SERVICE_URL') GOOGLE_VISION_API_KEY: str = os.getenv('GOOGLE_VISION_API_KEY') REPLICATE_API_KEY: str = os.getenv('REPLICATE_API_KEY') +SCRAPERAPI_KEY: str = os.getenv('SCRAPERAPI_KEY') API_KEY: str = os.getenv('API_KEY') diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 6a83511..7616546 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -3,8 +3,10 @@ from fastapi import APIRouter, Depends, Request from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest +from app.requests.product_scraping_request import ProductScrapingRequest from app.services.image_service_interface import ImageServiceInterface from app.services.message_service_interface import MessageServiceInterface +from app.services.product_scraping_service_interface import ProductScrapingServiceInterface from app.middlewares.auth_middleware import require_auth, require_api_key router = APIRouter( @@ -52,6 +54,17 @@ async def generate_variation_images( return response +@router.post("/scrape-product") +@require_auth +async def scrape_product( + request: Request, + scraping_request: ProductScrapingRequest, + service: ProductScrapingServiceInterface = Depends() +): + response = await service.scrape_product(scraping_request) + return response + + @router.get("/health") async def health_check(): return {"status": "OK"} diff --git a/app/externals/aliexpress/aliexpress_client.py b/app/externals/aliexpress/aliexpress_client.py index eaeb8d4..19e5aad 100644 --- a/app/externals/aliexpress/aliexpress_client.py +++ b/app/externals/aliexpress/aliexpress_client.py @@ -28,3 +28,28 @@ async def search_products(data: AliexpressSearchRequest) -> AliexpressSearchResp response.raise_for_status() return AliexpressSearchResponse(**response.json()) + + +async def get_item_detail(item_id: str): + endpoint = '/item_detail_7' + url = f"{RAPIDAPI_HOST}{endpoint}" + + headers = { + 'Content-Type': 'application/json', + 'x-rapidapi-host': 'aliexpress-datahub.p.rapidapi.com', + 'x-rapidapi-key': RAPIDAPI_KEY + } + + params = { + 'itemId': item_id + } + + async with httpx.AsyncClient() as client: + response = await client.get( + url, + params=params, + headers=headers + ) + response.raise_for_status() + + return response.json() diff --git a/app/externals/amazon/amazon_client.py b/app/externals/amazon/amazon_client.py index 5e2a08e..b44d76b 100644 --- a/app/externals/amazon/amazon_client.py +++ b/app/externals/amazon/amazon_client.py @@ -2,6 +2,7 @@ from app.configurations.config import RAPIDAPI_KEY from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest from app.externals.amazon.responses.amazon_search_response import AmazonSearchResponse +from typing import Dict, Any async def search_products(request: AmazonSearchRequest) -> AmazonSearchResponse: @@ -32,3 +33,27 @@ async def search_products(request: AmazonSearchRequest) -> AmazonSearchResponse: raw_response = response.json() return AmazonSearchResponse(raw_response) + + +async def get_product_details(asin: str, country: str = "US") -> Dict[str, Any]: + headers = { + 'x-rapidapi-host': 'real-time-amazon-data.p.rapidapi.com', + 'x-rapidapi-key': RAPIDAPI_KEY + } + + params = { + 'asin': asin, + 'country': country + } + + async with httpx.AsyncClient() as client: + response = await client.get( + 'https://real-time-amazon-data.p.rapidapi.com/product-details', + headers=headers, + params=params + ) + + if response.status_code != 200: + raise Exception(f"Error with call Amazon RapidApi: {response.status_code}") + + return response.json() diff --git a/app/externals/scraperapi/__init__.py b/app/externals/scraperapi/__init__.py new file mode 100644 index 0000000..fdb4450 --- /dev/null +++ b/app/externals/scraperapi/__init__.py @@ -0,0 +1 @@ +# Inicialización del paquete scraperapi \ No newline at end of file diff --git a/app/externals/scraperapi/scraperapi_client.py b/app/externals/scraperapi/scraperapi_client.py new file mode 100644 index 0000000..6fa40e5 --- /dev/null +++ b/app/externals/scraperapi/scraperapi_client.py @@ -0,0 +1,30 @@ +import aiohttp +from typing import Dict, Any + +from fastapi import HTTPException + +from app.configurations.config import SCRAPERAPI_KEY + + +class ScraperAPIClient: + def __init__(self): + self.api_key = SCRAPERAPI_KEY + self.base_url = "http://api.scraperapi.com" + + async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: + default_params = { + "api_key": self.api_key, + "url": url, + "render": "true" + } + + if params: + default_params.update(params) + + async with aiohttp.ClientSession() as session: + async with session.get(self.base_url, params=default_params) as response: + if response.status != 200: + error_text = await response.text() + raise HTTPException(status_code=400, detail=error_text) + + return await response.text() diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py new file mode 100644 index 0000000..e770dd8 --- /dev/null +++ b/app/factories/scraping_factory.py @@ -0,0 +1,27 @@ +from urllib.parse import urlparse + +from fastapi import Depends + +from app.scrapers.scraper_interface import ScraperInterface +from app.scrapers.amazon_scraper import AmazonScraper +from app.scrapers.aliexpress_scraper import AliexpressScraper +from app.scrapers.cj_scraper import CJScraper +from app.scrapers.ia_scraper import IAScraper +from app.services.message_service_interface import MessageServiceInterface + + +class ScrapingFactory: + def __init__(self, message_service: MessageServiceInterface = Depends()): + self.message_service = message_service + + def get_scraper(self, url: str) -> ScraperInterface: + domain = urlparse(url).netloc.lower() + + if "amazon" in domain: + return AmazonScraper() + elif "aliexpress" in domain: + return AliexpressScraper() + elif "cjdropshipping" in domain: + return CJScraper() + else: + return IAScraper(message_service=self.message_service) diff --git a/app/requests/product_scraping_request.py b/app/requests/product_scraping_request.py new file mode 100644 index 0000000..4587410 --- /dev/null +++ b/app/requests/product_scraping_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, HttpUrl + + +class ProductScrapingRequest(BaseModel): + product_url: HttpUrl diff --git a/app/scrapers/aliexpress_scraper.py b/app/scrapers/aliexpress_scraper.py new file mode 100644 index 0000000..31d7ce0 --- /dev/null +++ b/app/scrapers/aliexpress_scraper.py @@ -0,0 +1,267 @@ +from app.scrapers.scraper_interface import ScraperInterface +from typing import Dict, Any, List, Optional, Tuple +from app.externals.aliexpress.aliexpress_client import get_item_detail +import re +from fastapi import HTTPException +from decimal import Decimal + + +class AliexpressScraper(ScraperInterface): + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + item_id = self._extract_item_id(url) + product_details = await get_item_detail(item_id) + + try: + item_data = self._get_item_data(product_details) + + result = { + "name": self._get_name(item_data), + "description": self._get_description(item_data), + "external_sell_price": self._get_price(item_data), + "images": self._get_images(item_data) + } + + variants = self._extract_variants(item_data) + if variants: + result["variants"] = variants + + response = { + "provider_id": "aliexpress", + "external_id": item_id, + **result + } + + return {"data": response} + + except Exception as e: + raise HTTPException( + status_code=400, + detail=f"Error procesando datos del producto: {str(e)}" + ) + + def _extract_item_id(self, url: str) -> str: + pattern = r'item/(\d+)\.html' + match = re.search(pattern, url) + if match: + return match.group(1) + + pattern = r'itemId=(\d+)' + match = re.search(pattern, url) + if match: + return match.group(1) + + raise HTTPException(status_code=400, detail=f"No se pudo extraer el ID del producto de la URL: {url}") + + def _get_item_data(self, response: Dict[str, Any]) -> Dict[str, Any]: + result = response.get("result", {}) + item_data = result.get("item", {}) + if not item_data: + raise ValueError("No se encontraron datos del producto en la respuesta") + return item_data + + def _get_name(self, item_data: Dict[str, Any]) -> str: + return item_data.get("title", "") + + def _get_description(self, item_data: Dict[str, Any]) -> str: + description = "" + description_data = item_data.get("description", {}) + if description_data: + # Intentamos extraer el texto de la descripción HTML + html_content = description_data.get("html", "") + if html_content: + # Simplificación básica - podría mejorarse con una biblioteca HTML + description = re.sub(r'<[^>]+>', ' ', html_content) + description = re.sub(r'\s+', ' ', description).strip() + + # Si no hay descripción, intentamos usar las propiedades + if not description and "properties" in item_data: + properties = item_data.get("properties", {}).get("list", []) + if properties: + description = "\n".join([f"{prop.get('name')}: {prop.get('value')}" for prop in properties]) + + return description + + def _get_price(self, item_data: Dict[str, Any]) -> Optional[Decimal]: + sku_data = item_data.get("sku", {}) + if not sku_data: + return None + + # Intentar obtener el precio de promoción primero + def_data = sku_data.get("def", {}) + if def_data: + promotion_price = def_data.get("promotionPrice") + if promotion_price: + return self._parse_price(promotion_price) + + price = def_data.get("price") + if price: + # Si el precio es un rango (ej: "3.55 - 3.87"), tomamos el valor más bajo + if isinstance(price, str) and " - " in price: + price = price.split(" - ")[0] + return self._parse_price(price) + + # Si no hay precio en def, intentamos con la primera variante + base_variants = sku_data.get("base", []) + if base_variants and len(base_variants) > 0: + first_variant = base_variants[0] + promotion_price = first_variant.get("promotionPrice") + if promotion_price: + return self._parse_price(promotion_price) + + price = first_variant.get("price") + if price: + return self._parse_price(price) + + return None + + def _parse_price(self, price_str: Any) -> Optional[Decimal]: + if isinstance(price_str, (int, float)): + return Decimal(str(price_str)) + + if isinstance(price_str, str): + clean_price = price_str.replace("$", "").replace(",", "").strip() + try: + return Decimal(clean_price) + except: + pass + + return None + + def _get_images(self, item_data: Dict[str, Any]) -> List[str]: + images = [] + + # Obtener imágenes principales + main_images = item_data.get("images", []) + if main_images: + # Asegurarse de que las URLs sean absolutas + images = [self._ensure_absolute_url(img) for img in main_images] + + # Si no hay imágenes principales, intentar con imágenes de descripción + if not images and "description" in item_data: + desc_images = item_data.get("description", {}).get("images", []) + if desc_images: + images = [self._ensure_absolute_url(img) for img in desc_images] + + return images + + def _ensure_absolute_url(self, url: str) -> str: + """Asegura que la URL sea absoluta agregando el protocolo si es necesario.""" + if url.startswith("//"): + return f"https:{url}" + return url + + def _extract_variants(self, item_data: Dict[str, Any]) -> List[Dict[str, Any]]: + variants = [] + sku_data = item_data.get("sku", {}) + + if not sku_data or "base" not in sku_data or "props" not in sku_data: + return [] + + base_variants = sku_data.get("base", []) + props = sku_data.get("props", []) + product_title = item_data.get("title", "") + + # Crear mapeo de propiedades + prop_map = self._create_property_map(props) + + # Procesar cada variante + for variant in base_variants: + sku_id = variant.get("skuId") + sku_attr = variant.get("skuAttr", "") + + # Extraer atributos y imágenes de la variante + attributes, variant_images = self._process_variant_attributes(sku_attr, prop_map) + + # Si no hay imágenes específicas de la variante, usar las imágenes principales + if not variant_images: + main_images = self._get_images(item_data) + if main_images: + variant_images = [main_images[0]] + + # Crear clave de variante + variant_key = "-".join([attr["value"] for attr in attributes]) + + variant_info = { + "provider_id": "aliexpress", + "external_id": sku_id, + "name": product_title, + "images": variant_images, + "variant_key": variant_key, + "attributes": attributes + } + + variants.append(variant_info) + + return variants + + def _create_property_map(self, props: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]: + """Crea un mapa de propiedades para facilitar la búsqueda de atributos de variantes.""" + prop_map = {} + for prop in props: + prop_id = prop.get("pid") + prop_name = prop.get("name") + values = {} + for val in prop.get("values", []): + values[val.get("vid")] = { + "name": val.get("name"), + "image": val.get("image", "") + } + prop_map[prop_id] = { + "name": prop_name, + "values": values + } + return prop_map + + def _process_variant_attributes(self, sku_attr: str, prop_map: Dict[int, Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[str]]: + """Procesa los atributos de una variante y extrae imágenes relacionadas.""" + attributes = [] + variant_images = [] + + # Atributos a ignorar + ignored_attributes = ["Ships From", "ship from"] + + if not sku_attr: + return attributes, variant_images + + # Parsear skuAttr (formato: "pid:vid;pid:vid") + attr_parts = sku_attr.split(";") + for part in attr_parts: + if ":" not in part: + continue + + pid_vid = part.split(":") + if len(pid_vid) != 2: + continue + + try: + pid = int(pid_vid[0]) + vid_raw = pid_vid[1] + + # Extraer el vid (puede tener formato "vid#name") + vid = vid_raw + if "#" in vid_raw: + vid = vid_raw.split("#")[0] + + try: + vid = int(vid) + except: + pass + + if pid in prop_map and vid in prop_map[pid]["values"]: + prop_info = prop_map[pid] + value_info = prop_info["values"][vid] + + # Ignorar atributos de envío + if prop_info["name"] not in ignored_attributes: + attributes.append({ + "category_name": prop_info["name"], + "value": value_info["name"] + }) + + # Agregar imagen de la variante si existe + if value_info["image"]: + variant_images.append(self._ensure_absolute_url(value_info["image"])) + except: + continue + + return attributes, variant_images diff --git a/app/scrapers/amazon_scraper.py b/app/scrapers/amazon_scraper.py new file mode 100644 index 0000000..40f32d2 --- /dev/null +++ b/app/scrapers/amazon_scraper.py @@ -0,0 +1,165 @@ +from fastapi import HTTPException + +from app.scrapers.helper_price import parse_price +from app.scrapers.scraper_interface import ScraperInterface +from typing import Dict, Any, List, Optional +import re +from app.externals.amazon.amazon_client import get_product_details +from decimal import Decimal + + +class AmazonScraper(ScraperInterface): + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + asin = self._extract_asin(url) + + try: + data = await get_product_details(asin) + product_data = self._get_product_data(data) + + result = { + "name": self._get_name(product_data), + "description": self._get_description(product_data), + "external_sell_price": self._get_price(product_data), + "images": self._get_images(product_data) + } + + variants = self._extract_variants(product_data) + if variants: + result["variants"] = variants + + response = { + "provider_id": "amazon", + "external_id": asin, + **result + } + + return {"data": response} + + except Exception as e: + raise HTTPException( + status_code=400, + detail=f"Error processing product data: {str(e)}" + ) + + def _get_product_data(self, response: Dict[str, Any]) -> Dict[str, Any]: + product_data = response.get("data", {}) + if not product_data: + raise ValueError("No product data found in response") + return product_data + + def _get_name(self, product_data: Dict[str, Any]) -> str: + return product_data.get("product_title", product_data.get("title", "")) + + def _get_description(self, product_data: Dict[str, Any]) -> str: + description = product_data.get("product_description", "") + + if not description: + about_product = product_data.get("about_product", []) + if about_product: + description = "\n".join(about_product) + + return description + + def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: + price_str = product_data.get("product_price", "") + if not price_str: + price_info = product_data.get("pricing", {}) + price_str = price_info.get("current_price", "") + + if not price_str: + return None + + return parse_price(price_str) + + def _get_images(self, product_data: Dict[str, Any]) -> List[str]: + images = [] + + product_photos = product_data.get("product_photos", []) + if product_photos: + return product_photos + + main_image = product_data.get("product_photo", product_data.get("main_image", "")) + if main_image: + images.append(main_image) + + additional_images = product_data.get("images", []) + if additional_images: + images.extend(additional_images) + + return images + + def _extract_asin(self, url: str) -> str: + patterns = [ + r'/dp/([A-Z0-9]{10})', + r'/gp/product/([A-Z0-9]{10})', + r'/ASIN/([A-Z0-9]{10})', + r'asin=([A-Z0-9]{10})', + r'asin%3D([A-Z0-9]{10})' + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + raise HTTPException( + status_code=400, + detail="Product not found - Invalid Amazon URL" + ) + + def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any]]: + dimensions = product_data.get("product_variations_dimensions", []) + variations = product_data.get("product_variations", {}) + all_variations = product_data.get("all_product_variations", {}) + + if not dimensions or not variations or not all_variations: + return [] + + variants = [] + product_title = product_data.get("product_title", "") + + for asin, variant_data in all_variations.items(): + variant_attributes = self._get_variant_attributes(dimensions, variant_data) + variant_key = "-".join([attr["value"] for attr in variant_attributes]) + + variant_info = { + "provider_id": "amazon", + "external_id": asin, + "name": product_title, + "images": self._get_variant_images(dimensions, variations, variant_data, product_data), + "variant_key": variant_key, + "attributes": variant_attributes + } + + variants.append(variant_info) + + return variants + + def _get_variant_attributes(self, dimensions: List[str], variant_data: Dict[str, str]) -> List[Dict[str, str]]: + attributes = [] + + for dim in dimensions: + if dim in variant_data: + attributes.append({ + "category_name": dim.capitalize(), + "value": variant_data[dim] + }) + + return attributes + + def _get_variant_images(self, dimensions: List[str], variations: Dict[str, List], + variant_data: Dict[str, str], product_data: Dict[str, Any]) -> List[str]: + images = [] + for dim in dimensions: + if dim in variations and dim in variant_data: + for var in variations[dim]: + if var.get("value") == variant_data.get(dim) and "photo" in var: + images.append(var["photo"]) + break + + if not images: + main_image = product_data.get("product_photo") + if main_image: + images.append(main_image) + + return images diff --git a/app/scrapers/cj_scraper.py b/app/scrapers/cj_scraper.py new file mode 100644 index 0000000..a328530 --- /dev/null +++ b/app/scrapers/cj_scraper.py @@ -0,0 +1,37 @@ +import httpx +from app.scrapers.scraper_interface import ScraperInterface +from fastapi import HTTPException + + +class CJScraper(ScraperInterface): + def __init__(self): + self.webhook_url = "https://n8n.fluxi.co/webhook/cj-search" + + async def scrape(self, url: str, domain: str = None) -> dict: + payload = { + "url_cj": url + } + + headers = { + "Content-Type": "application/json" + } + + try: + async with httpx.AsyncClient(timeout=20.0) as client: + response = await client.post( + self.webhook_url, + headers=headers, + json=payload + ) + + if response.status_code == 200: + return response.json() + else: + error_message = f"Failed to get data from CJ Dropshipping: {response.status_code}" + raise HTTPException(status_code=response.status_code, detail=error_message) + + except HTTPException as he: + raise he + except Exception as e: + error_message = f"Request error to CJ Dropshipping: {str(e)}" + raise HTTPException(status_code=500, detail=error_message) diff --git a/app/scrapers/helper_price.py b/app/scrapers/helper_price.py new file mode 100644 index 0000000..455cc7e --- /dev/null +++ b/app/scrapers/helper_price.py @@ -0,0 +1,16 @@ +from decimal import Decimal +from typing import Optional, Any + + +def parse_price(price_str: Any) -> Optional[Decimal]: + if isinstance(price_str, (int, float)): + return Decimal(str(price_str)) + + if isinstance(price_str, str): + clean_price = price_str.replace("$", "").replace(",", "").strip() + try: + return Decimal(clean_price) + except: + pass + + return None diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py new file mode 100644 index 0000000..d69c2c9 --- /dev/null +++ b/app/scrapers/ia_scraper.py @@ -0,0 +1,46 @@ +from app.configurations.config import SCRAPER_AGENT +from app.pdf.helpers import clean_text, clean_json +from app.requests.message_request import MessageRequest +from app.scrapers.helper_price import parse_price +from app.scrapers.scraper_interface import ScraperInterface +from typing import Dict, Any +from app.externals.scraperapi.scraperapi_client import ScraperAPIClient +from bs4 import BeautifulSoup +from app.services.message_service_interface import MessageServiceInterface +import json + + +class IAScraper(ScraperInterface): + def __init__(self, message_service: MessageServiceInterface): + self.message_service = message_service + + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + client = ScraperAPIClient() + html_content = await client.get_html(url) + soup = BeautifulSoup(html_content, 'html.parser') + for script in soup(["script", "style"]): + script.extract() + simplified_html = str(soup) + + message_request = MessageRequest( + query=f"provider_id={domain} . Product content: {simplified_html} ", + agent_id=SCRAPER_AGENT, + conversation_id="", + ) + + result = await self.message_service.handle_message(message_request) + data_clean = clean_text(clean_json(result['text'])) + data = json.loads(data_clean) + data['data']['external_sell_price'] = parse_price(data['data']['external_sell_price']) + + if 'variants' in data['data']: + filtered_variants = [] + for variant in data['data']['variants']: + if not (variant.get('name') == 'unknown' and + variant.get('variant_key') == 'unknown' and + len(variant.get('images', [])) == 0): + filtered_variants.append(variant) + + data['data']['variants'] = filtered_variants + + return data \ No newline at end of file diff --git a/app/scrapers/scraper_interface.py b/app/scrapers/scraper_interface.py new file mode 100644 index 0000000..44904b1 --- /dev/null +++ b/app/scrapers/scraper_interface.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any + + +class ScraperInterface(ABC): + @abstractmethod + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + pass diff --git a/app/services/product_scraping_service.py b/app/services/product_scraping_service.py new file mode 100644 index 0000000..a9540e9 --- /dev/null +++ b/app/services/product_scraping_service.py @@ -0,0 +1,18 @@ +from fastapi import Depends + +from app.requests.product_scraping_request import ProductScrapingRequest +from app.services.product_scraping_service_interface import ProductScrapingServiceInterface +from app.factories.scraping_factory import ScrapingFactory +from urllib.parse import urlparse + + +class ProductScrapingService(ProductScrapingServiceInterface): + def __init__(self, scraping_factory: ScrapingFactory = Depends()): + self.scraping_factory = scraping_factory + + async def scrape_product(self, request: ProductScrapingRequest): + url = str(request.product_url) + domain = urlparse(url).netloc.lower() + + scraper = self.scraping_factory.get_scraper(url) + return await scraper.scrape(url, domain) diff --git a/app/services/product_scraping_service_interface.py b/app/services/product_scraping_service_interface.py new file mode 100644 index 0000000..e9015ba --- /dev/null +++ b/app/services/product_scraping_service_interface.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from app.requests.product_scraping_request import ProductScrapingRequest + + +class ProductScrapingServiceInterface(ABC): + @abstractmethod + async def scrape_product(self, request: ProductScrapingRequest): + pass diff --git a/main.py b/main.py index c63e90d..f170616 100644 --- a/main.py +++ b/main.py @@ -7,6 +7,8 @@ from app.services.image_service_interface import ImageServiceInterface from app.services.message_service import MessageService from app.services.message_service_interface import MessageServiceInterface +from app.services.product_scraping_service import ProductScrapingService +from app.services.product_scraping_service_interface import ProductScrapingServiceInterface app = FastAPI( title="Conversational Agent API", @@ -17,6 +19,7 @@ app.dependency_overrides[MessageServiceInterface] = MessageService app.dependency_overrides[ConversationManagerInterface] = ConversationManager app.dependency_overrides[ImageServiceInterface] = ImageService +app.dependency_overrides[ProductScrapingServiceInterface] = ProductScrapingService if __name__ == "__main__": import uvicorn diff --git a/requirements.txt b/requirements.txt index 1e84f4a..8a228f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,6 @@ langgraph>=0.0.10 langchain-core>=0.1.17 langchain-anthropic langchain-ollama -fpdf \ No newline at end of file +fpdf +beautifulsoup4 +lxml \ No newline at end of file From bb3db81394ba58c4b21fa660c145d5be5c2e0ef6 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 4 Mar 2025 03:17:14 -0500 Subject: [PATCH 017/195] fix variants --- app/scrapers/ia_scraper.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index d69c2c9..bfcb8e9 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -34,13 +34,9 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: data['data']['external_sell_price'] = parse_price(data['data']['external_sell_price']) if 'variants' in data['data']: - filtered_variants = [] - for variant in data['data']['variants']: - if not (variant.get('name') == 'unknown' and - variant.get('variant_key') == 'unknown' and - len(variant.get('images', [])) == 0): - filtered_variants.append(variant) - - data['data']['variants'] = filtered_variants + data['data']['variants'] = [ + variant for variant in data['data']['variants'] + if variant.get('variant_key') != 'unknown' + ] return data \ No newline at end of file From d3c4ba9dd9ea4a5c8fba1b439dc6e810619b9fc4 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 5 Mar 2025 00:19:52 -0500 Subject: [PATCH 018/195] new logic scraper. --- .env.example | 3 +- app/configurations/config.py | 1 + app/externals/scraperapi/scraperapi_client.py | 21 +++++++++++- app/helpers/escape_helper.py | 32 +++++++++++++++++++ app/scrapers/ia_scraper.py | 22 +++++++------ 5 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 app/helpers/escape_helper.py diff --git a/.env.example b/.env.example index 6f1c18f..74605bf 100644 --- a/.env.example +++ b/.env.example @@ -12,4 +12,5 @@ API_KEY=tu_clave_api_secreta_aqui AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info GOOGLE_VISION_API_KEY=dsadadasda REPLICATE_API_KEY=dsadadasda -SCRAPERAPI_KEY=dsadsadsadasdsadas \ No newline at end of file +SCRAPERAPI_KEY=dsadsadsadasdsadas +URL_SCRAPER_LAMBDA=https://localhost:8000/ \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 93aa946..a321696 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -24,5 +24,6 @@ GOOGLE_VISION_API_KEY: str = os.getenv('GOOGLE_VISION_API_KEY') REPLICATE_API_KEY: str = os.getenv('REPLICATE_API_KEY') SCRAPERAPI_KEY: str = os.getenv('SCRAPERAPI_KEY') +URL_SCRAPER_LAMBDA: str = os.getenv('URL_SCRAPER_LAMBDA') API_KEY: str = os.getenv('API_KEY') diff --git a/app/externals/scraperapi/scraperapi_client.py b/app/externals/scraperapi/scraperapi_client.py index 6fa40e5..325bc08 100644 --- a/app/externals/scraperapi/scraperapi_client.py +++ b/app/externals/scraperapi/scraperapi_client.py @@ -3,13 +3,14 @@ from fastapi import HTTPException -from app.configurations.config import SCRAPERAPI_KEY +from app.configurations.config import SCRAPERAPI_KEY, URL_SCRAPER_LAMBDA class ScraperAPIClient: def __init__(self): self.api_key = SCRAPERAPI_KEY self.base_url = "http://api.scraperapi.com" + self.lambda_url = URL_SCRAPER_LAMBDA async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: default_params = { @@ -28,3 +29,21 @@ async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: raise HTTPException(status_code=400, detail=error_text) return await response.text() + + async def get_html_lambda(self, url: str) -> str: + payload = { + "url": url + } + + async with aiohttp.ClientSession() as session: + async with session.post( + self.lambda_url, + headers={"Content-Type": "application/json"}, + json=payload + ) as response: + if response.status != 200: + error_text = await response.text() + raise HTTPException(status_code=400, detail=f"Error lambda API scraper: {error_text}") + + response_data = await response.json() + return response_data.get("content", "") diff --git a/app/helpers/escape_helper.py b/app/helpers/escape_helper.py new file mode 100644 index 0000000..40b04ad --- /dev/null +++ b/app/helpers/escape_helper.py @@ -0,0 +1,32 @@ +import re +from bs4 import BeautifulSoup + + +def clean_placeholders(text: str, allowed_keys: list = None) -> str: + if allowed_keys is None: + allowed_keys = [] + + def replace_placeholder(match): + key = match.group(1).strip('"\' ') # Remueve comillas internas + return match.group(0) if key in allowed_keys else "" + + pattern = re.compile(r"\{\s*[\"']?([^\"'\{\}]+)[\"']?\s*\}") + return pattern.sub(replace_placeholder, text) + + +def clean_html_deeply(html_content): + soup = BeautifulSoup(html_content, 'html.parser') + + for tag in soup(['script', 'style', 'noscript', 'svg', 'link', 'meta', 'head']): + tag.decompose() + + for tag in soup.find_all(True): + if tag.name == 'img': + tag.attrs = {key: tag.attrs[key] for key in ['src', 'alt'] if key in tag.attrs} + else: + tag.attrs = {} + + simplified_html = str(soup) + simplified_html_clean = re.sub(r'\s+', ' ', simplified_html).strip() + + return simplified_html_clean diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index bfcb8e9..6a63ae2 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -1,11 +1,11 @@ from app.configurations.config import SCRAPER_AGENT +from app.helpers.escape_helper import clean_html_deeply from app.pdf.helpers import clean_text, clean_json from app.requests.message_request import MessageRequest from app.scrapers.helper_price import parse_price from app.scrapers.scraper_interface import ScraperInterface from typing import Dict, Any from app.externals.scraperapi.scraperapi_client import ScraperAPIClient -from bs4 import BeautifulSoup from app.services.message_service_interface import MessageServiceInterface import json @@ -16,14 +16,11 @@ def __init__(self, message_service: MessageServiceInterface): async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: client = ScraperAPIClient() - html_content = await client.get_html(url) - soup = BeautifulSoup(html_content, 'html.parser') - for script in soup(["script", "style"]): - script.extract() - simplified_html = str(soup) + html_content = await client.get_html_lambda(url) + simplified_html_clean = clean_html_deeply(html_content) message_request = MessageRequest( - query=f"provider_id={domain} . Product content: {simplified_html} ", + query=f"provider_id={domain} . product_url={url} Product content: {simplified_html_clean} ", agent_id=SCRAPER_AGENT, conversation_id="", ) @@ -32,11 +29,16 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: data_clean = clean_text(clean_json(result['text'])) data = json.loads(data_clean) data['data']['external_sell_price'] = parse_price(data['data']['external_sell_price']) - + images = data['data'].get('images', []) + cleaned_images = [ + f"https:{img}" if img.startswith("//") else img for img in images + ] + data['data']['images'] = cleaned_images + if 'variants' in data['data']: data['data']['variants'] = [ - variant for variant in data['data']['variants'] + variant for variant in data['data']['variants'] if variant.get('variant_key') != 'unknown' ] - return data \ No newline at end of file + return data From 91c1584b9bcd2c1ba2b324e4c0b4ea7169e626f6 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 7 Mar 2025 17:12:28 -0500 Subject: [PATCH 019/195] modify aliexpress factory --- app/factories/scraping_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py index e770dd8..9741a27 100644 --- a/app/factories/scraping_factory.py +++ b/app/factories/scraping_factory.py @@ -20,7 +20,7 @@ def get_scraper(self, url: str) -> ScraperInterface: if "amazon" in domain: return AmazonScraper() elif "aliexpress" in domain: - return AliexpressScraper() + return IAScraper(message_service=self.message_service) elif "cjdropshipping" in domain: return CJScraper() else: From 9aeeec549ac16268309e8ed7a428d316fd727cee Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 10 Mar 2025 13:39:20 -0500 Subject: [PATCH 020/195] change endpoint aliexpress --- app/externals/aliexpress/aliexpress_client.py | 2 +- app/factories/scraping_factory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/externals/aliexpress/aliexpress_client.py b/app/externals/aliexpress/aliexpress_client.py index 19e5aad..8a758ce 100644 --- a/app/externals/aliexpress/aliexpress_client.py +++ b/app/externals/aliexpress/aliexpress_client.py @@ -31,7 +31,7 @@ async def search_products(data: AliexpressSearchRequest) -> AliexpressSearchResp async def get_item_detail(item_id: str): - endpoint = '/item_detail_7' + endpoint = '/item_detail_6' url = f"{RAPIDAPI_HOST}{endpoint}" headers = { diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py index 9741a27..e770dd8 100644 --- a/app/factories/scraping_factory.py +++ b/app/factories/scraping_factory.py @@ -20,7 +20,7 @@ def get_scraper(self, url: str) -> ScraperInterface: if "amazon" in domain: return AmazonScraper() elif "aliexpress" in domain: - return IAScraper(message_service=self.message_service) + return AliexpressScraper() elif "cjdropshipping" in domain: return CJScraper() else: From cacdede9f255a3a81b2466e06a4db12dfa686e8e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 11 Mar 2025 18:20:16 -0500 Subject: [PATCH 021/195] logic copies --- app/configurations/copies_config.py | 8 ++++++++ app/configurations/pdf_manual_config.py | 2 +- app/controllers/handle_controller.py | 10 ++++++++++ app/requests/copy_request.py | 5 +++++ app/services/message_service.py | 12 ++++++++++++ app/services/message_service_interface.py | 5 +++++ 6 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 app/configurations/copies_config.py create mode 100644 app/requests/copy_request.py diff --git a/app/configurations/copies_config.py b/app/configurations/copies_config.py new file mode 100644 index 0000000..7c94b76 --- /dev/null +++ b/app/configurations/copies_config.py @@ -0,0 +1,8 @@ +AGENT_COPIES = [ + "agent_prompt_copies_use_cases_v1", + "agent_prompt_copies_pain_points_v1", + "agent_prompt_copies_benefits_v1", + "agent_prompt_copies_features_v1", + "agent_prompt_copies_testimonials_v1", + "agent_prompt_copies_faqs_v1" +] \ No newline at end of file diff --git a/app/configurations/pdf_manual_config.py b/app/configurations/pdf_manual_config.py index baedcb0..9580698 100644 --- a/app/configurations/pdf_manual_config.py +++ b/app/configurations/pdf_manual_config.py @@ -14,4 +14,4 @@ "troubleshooting", "faq", "conclusion" -] \ No newline at end of file +] diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 7616546..66aa530 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,3 +1,4 @@ +from app.requests.copy_request import CopyRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest from fastapi import APIRouter, Depends, Request @@ -54,6 +55,15 @@ async def generate_variation_images( return response +@router.post("/generate-copies") +async def generate_copies( + copy_request: CopyRequest, + message_service: MessageServiceInterface = Depends() +): + response = await message_service.generate_copies(copy_request) + return response + + @router.post("/scrape-product") @require_auth async def scrape_product( diff --git a/app/requests/copy_request.py b/app/requests/copy_request.py new file mode 100644 index 0000000..fa0e0b5 --- /dev/null +++ b/app/requests/copy_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, Field, validator + + +class CopyRequest(BaseModel): + prompt: str diff --git a/app/services/message_service.py b/app/services/message_service.py index 6df4b7e..864b841 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -3,10 +3,12 @@ import asyncio from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID +from app.configurations.copies_config import AGENT_COPIES from app.externals.agent_config.agent_config_client import get_agent from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.s3_upload_client import upload_file from app.pdf.helpers import clean_text, clean_json +from app.requests.copy_request import CopyRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.message_request import MessageRequest from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest @@ -82,6 +84,16 @@ async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: except Exception as e: raise ValueError(f"Error procesando respuestas de agentes: {str(e)}") + async def generate_copies(self, request: CopyRequest): + agent_queries = [ + {'agent': agent, 'query': request.prompt} + for agent in AGENT_COPIES + ] + + combined_data = await self.process_multiple_agents(agent_queries) + + return {"copies": combined_data} + async def generate_pdf(self, request: GeneratePdfRequest): base_query = f"Product Name: {request.product_name} Description: {request.product_description}" diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index cec22d8..5b0cd04 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -1,5 +1,6 @@ from abc import abstractmethod, ABC +from app.requests.copy_request import CopyRequest from app.requests.message_request import MessageRequest from app.requests.recommend_product_request import RecommendProductRequest @@ -9,6 +10,10 @@ class MessageServiceInterface(ABC): async def handle_message(self, request: MessageRequest): pass + @abstractmethod + async def generate_copies(self, request: CopyRequest): + pass + @abstractmethod async def recommend_products(self, request: RecommendProductRequest): pass From 94931dd60ee6dae19ee7163afc0397152798d3d0 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 13 Mar 2025 00:34:00 -0500 Subject: [PATCH 022/195] fix id new service --- app/controllers/handle_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 66aa530..c91d6c7 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -51,7 +51,7 @@ async def generate_variation_images( service: ImageServiceInterface = Depends() ): user_info = request.state.user_info - response = await service.generate_variation_images(variation_request, user_info.get("data", {}).get("_id")) + response = await service.generate_variation_images(variation_request, user_info.get("data", {}).get("id")) return response From ccc77c3d8aba8b6bb274deb6315bf100c606d547 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 17 Mar 2025 14:03:32 -0500 Subject: [PATCH 023/195] new service image. --- app/externals/replicate/replicate_client.py | 54 +++++++++++++++++++++ app/externals/s3_upload/s3_upload_client.py | 28 ++++++----- app/services/image_service.py | 8 +-- 3 files changed, 75 insertions(+), 15 deletions(-) diff --git a/app/externals/replicate/replicate_client.py b/app/externals/replicate/replicate_client.py index c12bbda..5315461 100644 --- a/app/externals/replicate/replicate_client.py +++ b/app/externals/replicate/replicate_client.py @@ -1,5 +1,11 @@ +import base64 + import aiohttp import asyncio +import httpx +import base64 + + from app.configurations.config import REPLICATE_API_KEY @@ -55,3 +61,51 @@ async def generate_image_variation( await asyncio.sleep(1) else: raise Exception(f"Error {response.status}: {await response.text()}") + + + + +async def google_image(file: str, prompt: str) -> bytes: + API_KEY = "AIzaSyByxC4IH1klvxH4Rgb_q9z-bG7cVBJSb4Y" + url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={API_KEY}" + + payload = { + "contents": [ + { + "parts": [ + {"text": prompt}, + {"inlineData": { + "mimeType": "image/png", + "data": file + }} + ] + } + ], + "generationConfig": { + "responseModalities": ["Text", "Image"] + } + } + + headers = {'Content-Type': 'application/json'} + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, json=payload) as response: + if response.status == 200: + data = await response.json() + parts = data["candidates"][0]["content"]["parts"] + + for part in parts: + if "inlineData" in part: + img_data_base64 = part["inlineData"]["data"] + img_bytes = base64.b64decode(img_data_base64) + return img_bytes + return None + else: + error_text = await response.text() + print(f"Error {response.status}: {error_text}") + response.raise_for_status() + except Exception as e: + print(f"Error al generar imagen: {str(e)}") + raise Exception(f"Error al generar imagen: {str(e)}") + diff --git a/app/externals/s3_upload/s3_upload_client.py b/app/externals/s3_upload/s3_upload_client.py index 15dbfe6..b0cf5c0 100644 --- a/app/externals/s3_upload/s3_upload_client.py +++ b/app/externals/s3_upload/s3_upload_client.py @@ -6,15 +6,21 @@ async def upload_file(request: S3UploadRequest) -> S3UploadResponse: headers = { - 'Content-Type': 'application/json', + "Content-Type": "application/json" } - - async with httpx.AsyncClient() as client: - response = await client.post( - S3_UPLOAD_API, - headers=headers, - json=request.dict() - ) - response.raise_for_status() - - return S3UploadResponse(**response.json()) + + # Configuración de tiempos de espera más largos (3 minutos) + timeout = httpx.Timeout(timeout=180.0, connect=60.0) + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + S3_UPLOAD_API, + headers=headers, + json=request.dict() + ) + response.raise_for_status() + return S3UploadResponse(**response.json()) + except Exception as e: + print(f"Error al cargar archivo a S3: {str(e)}") + raise Exception(f"Error al cargar archivo a S3: {str(e)}") diff --git a/app/services/image_service.py b/app/services/image_service.py index 9359d7c..ca57aca 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -15,7 +15,7 @@ import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.replicate.replicate_client import generate_image_variation +from app.externals.replicate.replicate_client import generate_image_variation, google_image load_dotenv() @@ -38,8 +38,8 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) async def _generate_single_variation(self, url_image: str, prompt: str, owner_id: str, - folder_id: str) -> str: - image_content = await generate_image_variation(image_url=url_image, prompt=prompt) + folder_id: str, file: str) -> str: + image_content = await google_image(file=file, prompt=prompt) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( content_base64, @@ -68,7 +68,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ response = await self.message_service.handle_message(message_request) prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ - self._generate_single_variation(original_image_response.s3_url, prompt, owner_id, folder_id) + self._generate_single_variation(original_image_response.s3_url, prompt, owner_id, folder_id, request.file) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) From 9b94eb2d65f1bce1152268919fe7609b4fe08037 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 17 Mar 2025 14:06:41 -0500 Subject: [PATCH 024/195] to env google gemini --- .env.example | 3 ++- app/configurations/config.py | 1 + app/externals/replicate/replicate_client.py | 11 +++-------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 74605bf..559dcbd 100644 --- a/.env.example +++ b/.env.example @@ -13,4 +13,5 @@ AUTH_SERVICE_URL=https://develop.api.fluxi.com.co/api/v1/users/user-info GOOGLE_VISION_API_KEY=dsadadasda REPLICATE_API_KEY=dsadadasda SCRAPERAPI_KEY=dsadsadsadasdsadas -URL_SCRAPER_LAMBDA=https://localhost:8000/ \ No newline at end of file +URL_SCRAPER_LAMBDA=https://localhost:8000/ +GOOGLE_GEMINI_API_KEY=sadasadasdasd \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index a321696..23aeea9 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -27,3 +27,4 @@ URL_SCRAPER_LAMBDA: str = os.getenv('URL_SCRAPER_LAMBDA') API_KEY: str = os.getenv('API_KEY') +GOOGLE_GEMINI_API_KEY: str = os.getenv('GOOGLE_GEMINI_API_KEY') diff --git a/app/externals/replicate/replicate_client.py b/app/externals/replicate/replicate_client.py index 5315461..58b7a07 100644 --- a/app/externals/replicate/replicate_client.py +++ b/app/externals/replicate/replicate_client.py @@ -5,8 +5,7 @@ import httpx import base64 - -from app.configurations.config import REPLICATE_API_KEY +from app.configurations.config import REPLICATE_API_KEY, GOOGLE_GEMINI_API_KEY async def generate_image_variation( @@ -63,11 +62,8 @@ async def generate_image_variation( raise Exception(f"Error {response.status}: {await response.text()}") - - async def google_image(file: str, prompt: str) -> bytes: - API_KEY = "AIzaSyByxC4IH1klvxH4Rgb_q9z-bG7cVBJSb4Y" - url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={API_KEY}" + url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={GOOGLE_GEMINI_API_KEY}" payload = { "contents": [ @@ -87,7 +83,7 @@ async def google_image(file: str, prompt: str) -> bytes: } headers = {'Content-Type': 'application/json'} - + try: async with aiohttp.ClientSession() as session: async with session.post(url, headers=headers, json=payload) as response: @@ -108,4 +104,3 @@ async def google_image(file: str, prompt: str) -> bytes: except Exception as e: print(f"Error al generar imagen: {str(e)}") raise Exception(f"Error al generar imagen: {str(e)}") - From fbe13826bc0ba22a7452bdb47b067de86c37dfbf Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 28 Mar 2025 15:56:46 -0500 Subject: [PATCH 025/195] generate images from generate images from --- app/controllers/handle_controller.py | 13 ++++++++ app/externals/replicate/replicate_client.py | 21 ++++++++----- app/requests/generate_image_request.py | 8 +++++ app/responses/generate_image_response.py | 6 ++-- app/services/image_service.py | 33 +++++++++++++++++++-- app/services/image_service_interface.py | 5 ++++ 6 files changed, 73 insertions(+), 13 deletions(-) create mode 100644 app/requests/generate_image_request.py diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index c91d6c7..26cfa70 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,4 +1,5 @@ from app.requests.copy_request import CopyRequest +from app.requests.generate_image_request import GenerateImageRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest from fastapi import APIRouter, Depends, Request @@ -55,6 +56,18 @@ async def generate_variation_images( return response +@router.post("/generate-images-from") +@require_auth +async def generate_images_from_image( + request: Request, + generate_image_request: GenerateImageRequest, + service: ImageServiceInterface = Depends() +): + user_info = request.state.user_info + response = await service.generate_images_from(generate_image_request, user_info.get("data", {}).get("id")) + return response + + @router.post("/generate-copies") async def generate_copies( copy_request: CopyRequest, diff --git a/app/externals/replicate/replicate_client.py b/app/externals/replicate/replicate_client.py index 58b7a07..ab72e4c 100644 --- a/app/externals/replicate/replicate_client.py +++ b/app/externals/replicate/replicate_client.py @@ -1,4 +1,5 @@ import base64 +from typing import Optional import aiohttp import asyncio @@ -62,19 +63,23 @@ async def generate_image_variation( raise Exception(f"Error {response.status}: {await response.text()}") -async def google_image(file: str, prompt: str) -> bytes: +async def google_image(prompt: str, file: Optional[str] = None) -> bytes: url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={GOOGLE_GEMINI_API_KEY}" + parts = [{"text": prompt}] + + if file: + parts.append({ + "inlineData": { + "mimeType": "image/png", + "data": file + } + }) + payload = { "contents": [ { - "parts": [ - {"text": prompt}, - {"inlineData": { - "mimeType": "image/png", - "data": file - }} - ] + "parts": parts } ], "generationConfig": { diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py new file mode 100644 index 0000000..1edecf3 --- /dev/null +++ b/app/requests/generate_image_request.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel +from typing import Optional + + +class GenerateImageRequest(BaseModel): + file: Optional[str] = None + prompt: str + num_variations: int = 4 diff --git a/app/responses/generate_image_response.py b/app/responses/generate_image_response.py index 27a94af..a5129c3 100644 --- a/app/responses/generate_image_response.py +++ b/app/responses/generate_image_response.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from pydantic import BaseModel @@ -6,7 +6,7 @@ class GenerateImageResponse(BaseModel): - original_url: str + original_url: Optional[str] generated_urls: List[str] generated_prompt: str - vision_analysis: VisionAnalysisResponse + vision_analysis: Optional[VisionAnalysisResponse] = None diff --git a/app/services/image_service.py b/app/services/image_service.py index ca57aca..fc905f6 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -2,6 +2,7 @@ AGENT_IMAGE_VARIATIONS, ) from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse +from app.requests.generate_image_request import GenerateImageRequest from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest @@ -16,6 +17,7 @@ from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image from app.externals.replicate.replicate_client import generate_image_variation, google_image +from typing import Optional load_dotenv() @@ -38,8 +40,9 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) async def _generate_single_variation(self, url_image: str, prompt: str, owner_id: str, - folder_id: str, file: str) -> str: - image_content = await google_image(file=file, prompt=prompt) + folder_id: str, file: Optional[str] = None) -> str: + image_content = await google_image(prompt=prompt, file=file) + content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( content_base64, @@ -75,3 +78,29 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ return GenerateImageResponse(generated_urls=generated_urls, original_url=original_image_response.s3_url, generated_prompt=prompt, vision_analysis=vision_analysis) + + async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): + folder_id = uuid.uuid4().hex[:8] + original_url = None + + if request.file: + original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") + original_url = original_image_response.s3_url + + tasks = [ + self._generate_single_variation( + original_url, + request.prompt, + owner_id, + folder_id, + request.file + ) + for i in range(request.num_variations) + ] + generated_urls = await asyncio.gather(*tasks) + + return GenerateImageResponse( + generated_urls=generated_urls, + original_url=original_url, + generated_prompt=request.prompt + ) diff --git a/app/services/image_service_interface.py b/app/services/image_service_interface.py index d2dfdff..f081a45 100644 --- a/app/services/image_service_interface.py +++ b/app/services/image_service_interface.py @@ -1,5 +1,6 @@ from abc import abstractmethod, ABC +from app.requests.generate_image_request import GenerateImageRequest from app.requests.variation_image_request import VariationImageRequest @@ -7,3 +8,7 @@ class ImageServiceInterface(ABC): @abstractmethod async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): pass + + @abstractmethod + async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): + pass From 8555b7669f5eb78d230decda9ce9b60f4942eb0e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 28 Mar 2025 16:20:26 -0500 Subject: [PATCH 026/195] change name method. --- app/controllers/handle_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 26cfa70..914c210 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -58,7 +58,7 @@ async def generate_variation_images( @router.post("/generate-images-from") @require_auth -async def generate_images_from_image( +async def generate_images_from( request: Request, generate_image_request: GenerateImageRequest, service: ImageServiceInterface = Depends() From c5c3767129f46f7230ef8f6f6066ed7cacdeca59 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 28 Mar 2025 18:30:00 -0500 Subject: [PATCH 027/195] add logic json parser structure, copywriter generic. --- app/controllers/handle_controller.py | 9 +++ app/managers/conversation_manager.py | 4 +- app/processors/agent_processor.py | 6 +- app/processors/simple_processor.py | 86 ++++++++++++++++------- app/requests/message_request.py | 1 + app/services/message_service.py | 5 ++ app/services/message_service_interface.py | 4 ++ 7 files changed, 85 insertions(+), 30 deletions(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 914c210..c245ab6 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -25,6 +25,15 @@ async def handle_message( response = await message_service.handle_message(request) return response +@router.post("/handle-message-json") +async def handle_message( + request: MessageRequest, + message_service: MessageServiceInterface = Depends() +): + response = await message_service.handle_message_json(request) + return response + + @router.post("/recommend-product") async def recommend_products( diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index 8e554a9..1545ad6 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -10,7 +10,7 @@ class ConversationManager(ConversationManagerInterface): # TODO HISTORY - def get_conversation_history(self, conversation_id: str) -> List[str]: + def get_conversation_history(self, conversation_id: str) -> List: return [] async def process_conversation(self, request: MessageRequest, agent_config: AgentConfigResponse) -> dict[str, Any]: @@ -31,4 +31,4 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen else SimpleProcessor(llm, agent_config.prompt, history) ) - return await processor.process(request.query, request.files, ai_provider.supports_interleaved_files()) + return await processor.process(request, request.files, ai_provider.supports_interleaved_files()) diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 6843369..1e8079f 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -5,13 +5,15 @@ from langchain_core.language_models import BaseChatModel import traceback +from app.requests.message_request import MessageRequest + class AgentProcessor(ConversationProcessor): def __init__(self, llm: BaseChatModel, context: str, history: List[str], tools: List[Any]): super().__init__(llm, context, history) self.tools = tools - async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None, + async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, supports_interleaved_files: bool = False) -> Dict[str, Any]: prompt_template = ChatPromptTemplate.from_messages([ ("system", "{context}"), @@ -39,7 +41,7 @@ async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None result = await agent_executor.ainvoke({ "context": self.context or "", "chat_history": self.history, - "input": query, + "input": request.query, "agent_scratchpad": "" }) diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 38301e9..4564b09 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -1,45 +1,79 @@ -from typing import Dict, Any, Optional, List, Union -from langchain.chains import LLMChain +import json +from typing import Dict, Any, Optional, List +from langchain_core.messages import SystemMessage, HumanMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder + from app.processors.conversation_processor import ConversationProcessor +from app.requests.message_request import MessageRequest +import re class SimpleProcessor(ConversationProcessor): - async def process(self, query: str, files: Optional[List[Dict[str, str]]] = None, supports_interleaved_files: bool = False) -> Dict[str, Any]: + async def generate_response(self, context: str, chat_history: list, query: str, prompt: ChatPromptTemplate) -> Dict[ + str, Any]: + chain = ( + { + "context": lambda x: x["context"], + "chat_history": lambda x: x["chat_history"], + "input": lambda x: x["input"], + } + | prompt + | self.llm + ) + + raw_response = await chain.ainvoke({ + "context": context, + "chat_history": chat_history, + "input": query + }) + + content = raw_response.content + + match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL) + if match: + json_content = match.group(1) + response_content = json_content + else: + response_content = content + + return { + "context": context, + "chat_history": chat_history, + "input": query, + "text": response_content + } + + async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: messages = [] system_message = self.context or "" if files and not supports_interleaved_files: file_references = [] for file in files: - if file.get('type') == 'image': - file_references.append(f"{file['url']}") - else: - file_references.append(f"") + tag = 'image' if file.get('type') == 'image' else 'file' + file_references.append(f"<{tag} url='{file['url']}'>") system_message += "\n\n" + "\n".join(file_references) - messages.append(("system", system_message)) - messages.append(MessagesPlaceholder(variable_name="chat_history")) + if request.json_parser: + format_instructions = json.dumps(request.json_parser, indent=2) + system_message += ( + "\n\nIMPORTANT: Respond exclusively in JSON format following exactly this structure:\n\n" + f"{format_instructions}\n\n" + "Do NOT include markdown, explanations, or anything else besides the JSON." + ) if files and supports_interleaved_files: + interleaved_references = [] for file in files: - if file.get('type') == 'image': - messages.append(("system", f"{file['url']}")) - else: - messages.append(("system", f"")) - - messages.append(("human", query)) - prompt = ChatPromptTemplate.from_messages(messages) + tag = 'image' if file.get('type') == 'image' else 'file' + interleaved_references.append(f"<{tag} url='{file['url']}'>") + system_message += "\n\n" + "\n".join(interleaved_references) - chain = LLMChain( - llm=self.llm, - prompt=prompt, - verbose=False - ) + messages.append(SystemMessage(content=system_message)) + messages.append(MessagesPlaceholder(variable_name="chat_history")) + messages.append(HumanMessage(content=request.query)) - return await chain.ainvoke({ - "context": self.context or "", - "chat_history": self.history, - "input": query - }) + prompt = ChatPromptTemplate.from_messages(messages) + return await self.generate_response(self.context, self.history, request.query, prompt) diff --git a/app/requests/message_request.py b/app/requests/message_request.py index 0cefc15..eebf14f 100644 --- a/app/requests/message_request.py +++ b/app/requests/message_request.py @@ -7,5 +7,6 @@ class MessageRequest(BaseModel): query: str conversation_id: str metadata_filter: Optional[dict] = None + json_parser: Optional[dict] = None parameter_prompt: Optional[dict] = None files: Optional[List[Dict[str, str]]] = None diff --git a/app/services/message_service.py b/app/services/message_service.py index 864b841..7e06ca7 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -42,6 +42,11 @@ async def handle_message(self, request: MessageRequest): agent_config=agent_config ) + async def handle_message_json(self, request: MessageRequest): + response = await self.handle_message(request) + + return json.loads(response['text']) + async def recommend_products(self, request: RecommendProductRequest): agent_id = AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID if request.similar else AGENT_RECOMMEND_PRODUCTS_ID diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index 5b0cd04..e225acb 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -10,6 +10,10 @@ class MessageServiceInterface(ABC): async def handle_message(self, request: MessageRequest): pass + @abstractmethod + async def handle_message_json(self, request: MessageRequest): + pass + @abstractmethod async def generate_copies(self, request: CopyRequest): pass From b9d5ee0533628b9416ae2abe5056bf1e94023ce3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 29 Mar 2025 23:09:03 -0500 Subject: [PATCH 028/195] add logic for receive a url image. --- app/controllers/handle_controller.py | 17 +++++++++++++++-- app/requests/generate_image_request.py | 3 ++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index c245ab6..574d0fb 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,8 +1,12 @@ +import base64 + +import httpx + from app.requests.copy_request import CopyRequest from app.requests.generate_image_request import GenerateImageRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest -from fastapi import APIRouter, Depends, Request +from fastapi import APIRouter, Depends, Request, HTTPException from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest from app.requests.product_scraping_request import ProductScrapingRequest @@ -25,6 +29,7 @@ async def handle_message( response = await message_service.handle_message(request) return response + @router.post("/handle-message-json") async def handle_message( request: MessageRequest, @@ -34,7 +39,6 @@ async def handle_message( return response - @router.post("/recommend-product") async def recommend_products( request: RecommendProductRequest, @@ -72,6 +76,15 @@ async def generate_images_from( generate_image_request: GenerateImageRequest, service: ImageServiceInterface = Depends() ): + if not generate_image_request.file and generate_image_request.file_url: + async with httpx.AsyncClient() as client: + try: + response = await client.get(generate_image_request.file_url) + response.raise_for_status() + generate_image_request.file = base64.b64encode(response.content).decode() + except Exception as e: + raise HTTPException(status_code=400, detail=f"Error for get file: {str(e)}") + user_info = request.state.user_info response = await service.generate_images_from(generate_image_request, user_info.get("data", {}).get("id")) return response diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index 1edecf3..beab880 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -4,5 +4,6 @@ class GenerateImageRequest(BaseModel): file: Optional[str] = None + file_url: Optional[str] = None prompt: str - num_variations: int = 4 + num_variations: int = 4 \ No newline at end of file From f0069f5604b814629cdc08347319c09840ae5802 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 31 Mar 2025 13:38:54 -0500 Subject: [PATCH 029/195] add endpoint for generate images from api-key --- app/controllers/handle_controller.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 574d0fb..9963fec 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -90,6 +90,25 @@ async def generate_images_from( return response +@router.post("/generate-images-from/api-key") +@require_api_key +async def generate_images_from_api_key( + request: Request, + generate_image_request: GenerateImageRequest, + service: ImageServiceInterface = Depends() +): + if not generate_image_request.file and generate_image_request.file_url: + async with httpx.AsyncClient() as client: + try: + response = await client.get(generate_image_request.file_url) + response.raise_for_status() + generate_image_request.file = base64.b64encode(response.content).decode() + except Exception as e: + raise HTTPException(status_code=400, detail=f"Error for get file: {str(e)}") + response = await service.generate_images_from(generate_image_request, generate_image_request.owner_id) + return response + + @router.post("/generate-copies") async def generate_copies( copy_request: CopyRequest, From 31b315279e018a53782457b0e1a62a089ef29d9b Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 31 Mar 2025 13:51:02 -0500 Subject: [PATCH 030/195] fix develop --- app/requests/generate_image_request.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index beab880..c106957 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -5,5 +5,6 @@ class GenerateImageRequest(BaseModel): file: Optional[str] = None file_url: Optional[str] = None + owner_id: Optional[str] = None prompt: str num_variations: int = 4 \ No newline at end of file From b925be761a987b37eb65860cd596b80625f705e7 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 10 Apr 2025 17:52:45 -0500 Subject: [PATCH 031/195] add fallback generate image. --- app/services/image_service.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index fc905f6..6d80871 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -41,8 +41,12 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, async def _generate_single_variation(self, url_image: str, prompt: str, owner_id: str, folder_id: str, file: Optional[str] = None) -> str: - image_content = await google_image(prompt=prompt, file=file) - + + try: + image_content = await google_image(prompt=prompt, file=file) + except Exception as e: + image_content = await generate_image_variation(image_url=url_image, prompt=prompt) + content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( content_base64, From 6535422868b1aceb6a4f576e82c6138cfc66e98d Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sun, 13 Apr 2025 12:52:22 -0500 Subject: [PATCH 032/195] fix scrapper when prices has more elements in string --- app/scrapers/aliexpress_scraper.py | 107 +++++++++++++++-------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/app/scrapers/aliexpress_scraper.py b/app/scrapers/aliexpress_scraper.py index 31d7ce0..9b17914 100644 --- a/app/scrapers/aliexpress_scraper.py +++ b/app/scrapers/aliexpress_scraper.py @@ -3,7 +3,7 @@ from app.externals.aliexpress.aliexpress_client import get_item_detail import re from fastapi import HTTPException -from decimal import Decimal +from decimal import Decimal, InvalidOperation class AliexpressScraper(ScraperInterface): @@ -13,26 +13,26 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: try: item_data = self._get_item_data(product_details) - + result = { "name": self._get_name(item_data), "description": self._get_description(item_data), "external_sell_price": self._get_price(item_data), "images": self._get_images(item_data) } - + variants = self._extract_variants(item_data) if variants: result["variants"] = variants - + response = { "provider_id": "aliexpress", "external_id": item_id, **result } - + return {"data": response} - + except Exception as e: raise HTTPException( status_code=400, @@ -51,17 +51,17 @@ def _extract_item_id(self, url: str) -> str: return match.group(1) raise HTTPException(status_code=400, detail=f"No se pudo extraer el ID del producto de la URL: {url}") - + def _get_item_data(self, response: Dict[str, Any]) -> Dict[str, Any]: result = response.get("result", {}) item_data = result.get("item", {}) if not item_data: raise ValueError("No se encontraron datos del producto en la respuesta") return item_data - + def _get_name(self, item_data: Dict[str, Any]) -> str: return item_data.get("title", "") - + def _get_description(self, item_data: Dict[str, Any]) -> str: description = "" description_data = item_data.get("description", {}) @@ -72,34 +72,34 @@ def _get_description(self, item_data: Dict[str, Any]) -> str: # Simplificación básica - podría mejorarse con una biblioteca HTML description = re.sub(r'<[^>]+>', ' ', html_content) description = re.sub(r'\s+', ' ', description).strip() - + # Si no hay descripción, intentamos usar las propiedades if not description and "properties" in item_data: properties = item_data.get("properties", {}).get("list", []) if properties: description = "\n".join([f"{prop.get('name')}: {prop.get('value')}" for prop in properties]) - + return description - + def _get_price(self, item_data: Dict[str, Any]) -> Optional[Decimal]: sku_data = item_data.get("sku", {}) if not sku_data: return None - + # Intentar obtener el precio de promoción primero def_data = sku_data.get("def", {}) if def_data: promotion_price = def_data.get("promotionPrice") if promotion_price: return self._parse_price(promotion_price) - + price = def_data.get("price") if price: # Si el precio es un rango (ej: "3.55 - 3.87"), tomamos el valor más bajo if isinstance(price, str) and " - " in price: price = price.split(" - ")[0] return self._parse_price(price) - + # Si no hay precio en def, intentamos con la primera variante base_variants = sku_data.get("base", []) if base_variants and len(base_variants) > 0: @@ -107,80 +107,80 @@ def _get_price(self, item_data: Dict[str, Any]) -> Optional[Decimal]: promotion_price = first_variant.get("promotionPrice") if promotion_price: return self._parse_price(promotion_price) - + price = first_variant.get("price") if price: return self._parse_price(price) - + return None - + def _parse_price(self, price_str: Any) -> Optional[Decimal]: if isinstance(price_str, (int, float)): return Decimal(str(price_str)) - + if isinstance(price_str, str): - clean_price = price_str.replace("$", "").replace(",", "").strip() - try: - return Decimal(clean_price) - except: - pass - + match = re.search(r'(\d+(?:\.\d+)?)', price_str.replace(",", "")) + if match: + try: + return Decimal(match.group(1)) + except InvalidOperation: + return None return None - + def _get_images(self, item_data: Dict[str, Any]) -> List[str]: images = [] - + # Obtener imágenes principales main_images = item_data.get("images", []) if main_images: # Asegurarse de que las URLs sean absolutas images = [self._ensure_absolute_url(img) for img in main_images] - + # Si no hay imágenes principales, intentar con imágenes de descripción if not images and "description" in item_data: desc_images = item_data.get("description", {}).get("images", []) if desc_images: images = [self._ensure_absolute_url(img) for img in desc_images] - + return images - + def _ensure_absolute_url(self, url: str) -> str: """Asegura que la URL sea absoluta agregando el protocolo si es necesario.""" if url.startswith("//"): return f"https:{url}" return url - + def _extract_variants(self, item_data: Dict[str, Any]) -> List[Dict[str, Any]]: variants = [] sku_data = item_data.get("sku", {}) - + if not sku_data or "base" not in sku_data or "props" not in sku_data: return [] - + base_variants = sku_data.get("base", []) props = sku_data.get("props", []) product_title = item_data.get("title", "") - + # Crear mapeo de propiedades prop_map = self._create_property_map(props) - + # Procesar cada variante for variant in base_variants: sku_id = variant.get("skuId") sku_attr = variant.get("skuAttr", "") - + # Extraer atributos y imágenes de la variante attributes, variant_images = self._process_variant_attributes(sku_attr, prop_map) - + # Si no hay imágenes específicas de la variante, usar las imágenes principales if not variant_images: main_images = self._get_images(item_data) if main_images: variant_images = [main_images[0]] - + # Crear clave de variante variant_key = "-".join([attr["value"] for attr in attributes]) - + variant_info = { "provider_id": "aliexpress", "external_id": sku_id, @@ -189,9 +189,9 @@ def _extract_variants(self, item_data: Dict[str, Any]) -> List[Dict[str, Any]]: "variant_key": variant_key, "attributes": attributes } - + variants.append(variant_info) - + return variants def _create_property_map(self, props: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]: @@ -212,56 +212,57 @@ def _create_property_map(self, props: List[Dict[str, Any]]) -> Dict[int, Dict[st } return prop_map - def _process_variant_attributes(self, sku_attr: str, prop_map: Dict[int, Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[str]]: + def _process_variant_attributes(self, sku_attr: str, prop_map: Dict[int, Dict[str, Any]]) -> Tuple[ + List[Dict[str, Any]], List[str]]: """Procesa los atributos de una variante y extrae imágenes relacionadas.""" attributes = [] variant_images = [] - + # Atributos a ignorar ignored_attributes = ["Ships From", "ship from"] - + if not sku_attr: return attributes, variant_images - + # Parsear skuAttr (formato: "pid:vid;pid:vid") attr_parts = sku_attr.split(";") for part in attr_parts: if ":" not in part: continue - + pid_vid = part.split(":") if len(pid_vid) != 2: continue - + try: pid = int(pid_vid[0]) vid_raw = pid_vid[1] - + # Extraer el vid (puede tener formato "vid#name") vid = vid_raw if "#" in vid_raw: vid = vid_raw.split("#")[0] - + try: vid = int(vid) except: pass - + if pid in prop_map and vid in prop_map[pid]["values"]: prop_info = prop_map[pid] value_info = prop_info["values"][vid] - + # Ignorar atributos de envío if prop_info["name"] not in ignored_attributes: attributes.append({ "category_name": prop_info["name"], "value": value_info["name"] }) - + # Agregar imagen de la variante si existe if value_info["image"]: variant_images.append(self._ensure_absolute_url(value_info["image"])) except: continue - + return attributes, variant_images From 17d1a60a6504e811cdc67175c5a8d1e44dde3240 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 16 Apr 2025 02:35:00 -0500 Subject: [PATCH 033/195] add logic mcp processor. --- .gitignore | 1 + app/controllers/handle_controller.py | 3 +- .../agent_config/agent_config_client.py | 2 +- .../responses/agent_config_response.py | 13 ++-- app/managers/conversation_manager.py | 16 +++-- app/processors/mcp_processor.py | 61 +++++++++++++++++++ app/requests/message_request.py | 16 ++--- requirements.txt | 12 ++-- 8 files changed, 96 insertions(+), 28 deletions(-) create mode 100644 app/processors/mcp_processor.py diff --git a/.gitignore b/.gitignore index 626aa8c..a1e59b0 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ logs/ # Environment variables file .env +.venv # Pinecone-related cache pinecone.cache \ No newline at end of file diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 9963fec..fc60a02 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,5 +1,4 @@ import base64 - import httpx from app.requests.copy_request import CopyRequest @@ -14,13 +13,13 @@ from app.services.message_service_interface import MessageServiceInterface from app.services.product_scraping_service_interface import ProductScrapingServiceInterface from app.middlewares.auth_middleware import require_auth, require_api_key +from pydantic import BaseModel router = APIRouter( prefix="/api/ms/conversational-engine", tags=["conversational-agent"] ) - @router.post("/handle-message") async def handle_message( request: MessageRequest, diff --git a/app/externals/agent_config/agent_config_client.py b/app/externals/agent_config/agent_config_client.py index bfa44b5..e1a177d 100644 --- a/app/externals/agent_config/agent_config_client.py +++ b/app/externals/agent_config/agent_config_client.py @@ -11,7 +11,7 @@ async def get_agent(data: AgentConfigRequest) -> AgentConfigResponse: headers = {'Content-Type': 'application/json'} async with httpx.AsyncClient() as client: - response = await client.post(url, json=data.dict(), headers=headers) + response = await client.post(url, json=data.model_dump(), headers=headers) response.raise_for_status() return AgentConfigResponse(**response.json()) diff --git a/app/externals/agent_config/responses/agent_config_response.py b/app/externals/agent_config/responses/agent_config_response.py index 63a8da5..202f7fc 100644 --- a/app/externals/agent_config/responses/agent_config_response.py +++ b/app/externals/agent_config/responses/agent_config_response.py @@ -1,11 +1,11 @@ -from typing import Optional, Dict, List -from pydantic import BaseModel +from typing import Optional, Dict, List, Any +from pydantic import BaseModel, Field class AgentPreferences(BaseModel): - temperature: float - max_tokens: int - top_p: float + temperature: float = 0.7 + max_tokens: int = 1000 + top_p: float = 1.0 class Property(BaseModel): @@ -43,4 +43,5 @@ class AgentConfigResponse(BaseModel): provider_ai: str model_ai: str preferences: AgentPreferences - tools: Optional[List[dict]] + tools: Optional[List[Dict[str, Any]]] = Field(default_factory=list) + mcp_config: Optional[Dict[str, Any]] = None diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index 1545ad6..13d422f 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -6,6 +6,7 @@ from app.externals.agent_config.responses.agent_config_response import AgentConfigResponse from app.factories.ai_provider_factory import AIProviderFactory from app.tools.tool_generator import ToolGenerator +from app.processors.mcp_processor import MCPProcessor class ConversationManager(ConversationManagerInterface): @@ -23,12 +24,15 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen ) history = self.get_conversation_history(request.conversation_id) or [] - tools = ToolGenerator.generate_tools(agent_config.tools) - processor = ( - AgentProcessor(llm, agent_config.prompt, history, tools) - if tools - else SimpleProcessor(llm, agent_config.prompt, history) - ) + if agent_config.mcp_config: + processor = MCPProcessor(llm, agent_config.prompt, history, agent_config.mcp_config) + else: + tools = ToolGenerator.generate_tools(agent_config.tools or []) + processor = ( + AgentProcessor(llm, agent_config.prompt, history, tools) + if tools + else SimpleProcessor(llm, agent_config.prompt, history) + ) return await processor.process(request, request.files, ai_provider.supports_interleaved_files()) diff --git a/app/processors/mcp_processor.py b/app/processors/mcp_processor.py new file mode 100644 index 0000000..352867e --- /dev/null +++ b/app/processors/mcp_processor.py @@ -0,0 +1,61 @@ +from typing import Dict, Any, List, Optional +from app.processors.conversation_processor import ConversationProcessor +from app.requests.message_request import MessageRequest +from langchain_core.language_models import BaseChatModel +from langchain_mcp_adapters.client import MultiServerMCPClient +from langgraph.prebuilt import create_react_agent +import json +import re + + +class MCPProcessor(ConversationProcessor): + def __init__(self, llm: BaseChatModel, context: str, history: List[str], mcp_config: Dict[str, Any]): + super().__init__(llm, context, history) + self.mcp_config = mcp_config + + async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: + async with MultiServerMCPClient(self.mcp_config) as client: + agent = create_react_agent(self.llm, client.get_tools()) + + system_message = self.context or "" + if request.json_parser: + format_instructions = json.dumps(request.json_parser, indent=2) + system_message += ( + "\n\nIMPORTANT: Respond exclusively in JSON format following exactly this structure:\n\n" + f"{format_instructions}\n\n" + "Do NOT include markdown, explanations, or anything else besides the JSON." + ) + + messages = [] + if system_message: + messages.append({"role": "system", "content": system_message}) + + if self.history: + messages.extend(self.history) + + messages.append({"role": "user", "content": request.query}) + + response = await agent.ainvoke({"messages": messages}) + + content = "" + if "messages" in response and response["messages"]: + last_message = response["messages"][-1] + if hasattr(last_message, "content"): + content = last_message.content + elif isinstance(last_message, dict) and "content" in last_message: + content = last_message["content"] + else: + content = str(last_message) + else: + content = str(response) + + match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL) + result = match.group(1) if match else content + + return { + "context": self.context, + "chat_history": self.history, + "input": request.query, + "text": result + } \ No newline at end of file diff --git a/app/requests/message_request.py b/app/requests/message_request.py index eebf14f..b0f7b4f 100644 --- a/app/requests/message_request.py +++ b/app/requests/message_request.py @@ -1,12 +1,12 @@ -from pydantic import BaseModel -from typing import Optional, List, Dict +from typing import List, Dict, Any, Optional +from pydantic import BaseModel, Field class MessageRequest(BaseModel): - agent_id: Optional[str] - query: str + agent_id: str conversation_id: str - metadata_filter: Optional[dict] = None - json_parser: Optional[dict] = None - parameter_prompt: Optional[dict] = None - files: Optional[List[Dict[str, str]]] = None + query: str + metadata_filter: Optional[Dict[str, Any]] = Field(default_factory=dict) + parameter_prompt: Optional[Dict[str, Any]] = Field(default_factory=dict) + files: Optional[List[Dict[str, str]]] = Field(default_factory=list) + json_parser: Optional[Dict[str, Any]] = None diff --git a/requirements.txt b/requirements.txt index 8a228f1..626fc19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -fastapi==0.109.1 -pydantic==1.10.13 +fastapi>=0.109.1 +pydantic>=2.5.0 mangum==0.17.0 python-dotenv==1.0.0 uvicorn==0.24.0 -httpx +httpx>=0.24.0 langchain-community>=0.2.0 -langchain-openai +langchain-openai>=0.0.5 openai langgraph>=0.0.10 langchain-core>=0.1.17 @@ -13,4 +13,6 @@ langchain-anthropic langchain-ollama fpdf beautifulsoup4 -lxml \ No newline at end of file +lxml +langchain_mcp +langchain-mcp-adapters>=0.1.0 \ No newline at end of file From c7603087e202dd1969e8b6a58d2cb420a1ac026a Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 21 Apr 2025 11:18:00 -0500 Subject: [PATCH 034/195] new version --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index da216c2..9174a8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Usar una imagen base de Python -FROM python:3.9-slim +FROM python:3.10-slim # Establecer el directorio de trabajo WORKDIR /app From 6547a0d31de83f5e1b5f087c2fd1cb875b4366e5 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 21 Apr 2025 12:36:19 -0500 Subject: [PATCH 035/195] fix again version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 626fc19..e8e0c3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,4 @@ fpdf beautifulsoup4 lxml langchain_mcp -langchain-mcp-adapters>=0.1.0 \ No newline at end of file +langchain-mcp-adapters==0.0.9 \ No newline at end of file From de1be2ed0784d97e01792e12fa943dd62f2e4a13 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 21 Apr 2025 14:44:34 -0500 Subject: [PATCH 036/195] change pydantic --- .../agent_config/requests/agent_config_request.py | 4 +++- app/requests/message_request.py | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/app/externals/agent_config/requests/agent_config_request.py b/app/externals/agent_config/requests/agent_config_request.py index a91d1ca..f892dcc 100644 --- a/app/externals/agent_config/requests/agent_config_request.py +++ b/app/externals/agent_config/requests/agent_config_request.py @@ -1,9 +1,11 @@ from typing import List, Dict, Optional, Any from pydantic import BaseModel +from app.requests.message_request import MetadataFilter + class AgentConfigRequest(BaseModel): agent_id: Optional[str] = None query: str - metadata_filter: Optional[List[Dict[str, str]]] = None + metadata_filter: Optional[List[MetadataFilter]] = None parameter_prompt: Optional[Dict[str, Any]] = None diff --git a/app/requests/message_request.py b/app/requests/message_request.py index b0f7b4f..1e23bfa 100644 --- a/app/requests/message_request.py +++ b/app/requests/message_request.py @@ -2,11 +2,17 @@ from pydantic import BaseModel, Field +class MetadataFilter(BaseModel): + key: str + value: str + evaluator: str = "=" + + class MessageRequest(BaseModel): agent_id: str conversation_id: str query: str - metadata_filter: Optional[Dict[str, Any]] = Field(default_factory=dict) + metadata_filter: Optional[List[MetadataFilter]] = Field(default_factory=list) parameter_prompt: Optional[Dict[str, Any]] = Field(default_factory=dict) files: Optional[List[Dict[str, str]]] = Field(default_factory=list) json_parser: Optional[Dict[str, Any]] = None From b7a945b519e2e3ecb7465e08b4b485d533d5210f Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 21 Apr 2025 21:20:20 -0500 Subject: [PATCH 037/195] add language for manual. --- app/requests/generate_pdf_request.py | 3 ++- app/services/message_service.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py index a41620d..fcd8be7 100644 --- a/app/requests/generate_pdf_request.py +++ b/app/requests/generate_pdf_request.py @@ -4,4 +4,5 @@ class GeneratePdfRequest(BaseModel): product_name: str product_description: str - owner_id: str \ No newline at end of file + language: str + owner_id: str diff --git a/app/services/message_service.py b/app/services/message_service.py index 7e06ca7..7fa7c86 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -100,7 +100,7 @@ async def generate_copies(self, request: CopyRequest): return {"copies": combined_data} async def generate_pdf(self, request: GeneratePdfRequest): - base_query = f"Product Name: {request.product_name} Description: {request.product_description}" + base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}." agent_queries = [ {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} From c8d81de52ec55e8e0a6b4badfc9c9ad27dcc1a59 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 21 Apr 2025 23:01:22 -0500 Subject: [PATCH 038/195] check file with version for create pdf or return --- .env.example | 3 ++- app/configurations/config.py | 2 ++ app/externals/s3_upload/s3_upload_client.py | 16 +++++++++--- app/requests/generate_pdf_request.py | 1 + app/services/message_service.py | 29 ++++++++++++++------- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/.env.example b/.env.example index 559dcbd..18153f2 100644 --- a/.env.example +++ b/.env.example @@ -14,4 +14,5 @@ GOOGLE_VISION_API_KEY=dsadadasda REPLICATE_API_KEY=dsadadasda SCRAPERAPI_KEY=dsadsadsadasdsadas URL_SCRAPER_LAMBDA=https://localhost:8000/ -GOOGLE_GEMINI_API_KEY=sadasadasdasd \ No newline at end of file +GOOGLE_GEMINI_API_KEY=sadasadasdasd +ENVIRONMENT=dev \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 23aeea9..2308413 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -28,3 +28,5 @@ API_KEY: str = os.getenv('API_KEY') GOOGLE_GEMINI_API_KEY: str = os.getenv('GOOGLE_GEMINI_API_KEY') + +ENVIRONMENT: str = os.getenv('ENVIRONMENT') \ No newline at end of file diff --git a/app/externals/s3_upload/s3_upload_client.py b/app/externals/s3_upload/s3_upload_client.py index b0cf5c0..a87dd25 100644 --- a/app/externals/s3_upload/s3_upload_client.py +++ b/app/externals/s3_upload/s3_upload_client.py @@ -8,10 +8,9 @@ async def upload_file(request: S3UploadRequest) -> S3UploadResponse: headers = { "Content-Type": "application/json" } - - # Configuración de tiempos de espera más largos (3 minutos) + timeout = httpx.Timeout(timeout=180.0, connect=60.0) - + try: async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( @@ -24,3 +23,14 @@ async def upload_file(request: S3UploadRequest) -> S3UploadResponse: except Exception as e: print(f"Error al cargar archivo a S3: {str(e)}") raise Exception(f"Error al cargar archivo a S3: {str(e)}") + + +async def check_file_exists_direct(s3_url: str) -> bool: + timeout = httpx.Timeout(timeout=10.0) + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.head(s3_url) + return response.status_code == 200 + except Exception as e: + return False diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py index fcd8be7..3b125d2 100644 --- a/app/requests/generate_pdf_request.py +++ b/app/requests/generate_pdf_request.py @@ -2,6 +2,7 @@ class GeneratePdfRequest(BaseModel): + product_id: str product_name: str product_description: str language: str diff --git a/app/services/message_service.py b/app/services/message_service.py index 7fa7c86..9f4336d 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -1,12 +1,11 @@ import json -import uuid import asyncio -from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID +from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID, ENVIRONMENT from app.configurations.copies_config import AGENT_COPIES from app.externals.agent_config.agent_config_client import get_agent from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest -from app.externals.s3_upload.s3_upload_client import upload_file +from app.externals.s3_upload.s3_upload_client import upload_file, check_file_exists_direct from app.pdf.helpers import clean_text, clean_json from app.requests.copy_request import CopyRequest from app.requests.generate_pdf_request import GeneratePdfRequest @@ -101,6 +100,15 @@ async def generate_copies(self, request: CopyRequest): async def generate_pdf(self, request: GeneratePdfRequest): base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}." + base_filename = f"{request.product_id}_{request.language}" + version = "v1" + base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" + folder_path = f"{request.owner_id}/pdfs/{version}" + s3_url = f"{base_url}/{folder_path}/{base_filename}.pdf" + exists = await check_file_exists_direct(s3_url) + + if exists: + return {"s3_url": s3_url} agent_queries = [ {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} @@ -109,12 +117,15 @@ async def generate_pdf(self, request: GeneratePdfRequest): combined_data = await self.process_multiple_agents(agent_queries) - unique_id = uuid.uuid4().hex[:8] - file_name = f"{request.product_name.replace(' ', '_').lower()}_{unique_id}" - pdf_generator = PDFManualGenerator(request.product_name) pdf = await pdf_generator.create_manual(combined_data) - return await upload_file( - S3UploadRequest(file=pdf, folder=f"{request.owner_id}/pdfs", - filename=file_name)) + result = await upload_file( + S3UploadRequest( + file=pdf, + folder=folder_path, + filename=base_filename + ) + ) + + return result From ede352c5b8523e074114e3e5a839fca5985a2898 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 22 Apr 2025 01:26:00 -0500 Subject: [PATCH 039/195] fix generator --- app/pdf/pdf_generator.py | 102 ++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 17 deletions(-) diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py index 39ae17c..a1b8fc3 100644 --- a/app/pdf/pdf_generator.py +++ b/app/pdf/pdf_generator.py @@ -5,35 +5,104 @@ class PDFGenerator(FPDF): def __init__(self, product_name): super().__init__() self.product_name = product_name + self.header_height = 0 def header(self): - self.set_font("Helvetica", "B", 18) - self.set_text_color(0, 51, 102) # Azul oscuro - self.cell(0, 10, f"User Manual for {self.product_name}", ln=True, align="C") - self.ln(5) + if self.page_no() == 1: + return + + initial_y = self.get_y() + + self.set_font("Helvetica", "B", 16) + self.set_text_color(0, 51, 102) + + title = f"User Manual for {self.product_name}" + + self.set_y(10) + + width_available = self.w - 20 + self.x = 10 + + self.multi_cell(width_available, 8, title, align="C") + + end_y = self.get_y() + 2 self.set_line_width(0.5) self.set_draw_color(0, 51, 102) - self.line(10, 25, self.w - 10, 25) - self.ln(10) + self.line(10, end_y, self.w - 10, end_y) + + self.set_y(end_y + 10) + + self.header_height = self.get_y() - initial_y def footer(self): + # No mostrar el pie de página en la primera página (portada) + if self.page_no() == 1: + return + self.set_y(-20) self.set_font("Helvetica", "I", 10) self.set_text_color(128, 128, 128) - self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C") + self.cell(0, 10, f"Page {self.page_no()-1}", 0, 0, "C") # Restar 1 porque la portada no cuenta def add_cover_page(self, title, subtitle=""): self.add_page() - self.set_font("Helvetica", "B", 24) + + # Dimensiones y márgenes + page_width = self.w + page_height = self.h + margin = 15 + + # Borde completo alrededor de la página + self.set_draw_color(0, 51, 102) + self.set_line_width(0.7) + self.rect(margin, margin, page_width - 2*margin, page_height - 2*margin) + + # Título principal + self.set_font("Helvetica", "B", 24) # Reducir ligeramente el tamaño para evitar desbordamiento self.set_text_color(0, 51, 102) - self.ln(40) # Espacio superior para la portada - self.cell(0, 20, title, ln=True, align="C") + + # Definir el ancho efectivo del texto con márgenes seguros + text_width = page_width - 2*margin - 20 # 10px de margen adicional a cada lado + + # Posicionar para el título + self.set_y(page_height * 0.3) # Aproximadamente a 1/3 de la página + self.set_x(margin + 10) # Margen izquierdo + margen adicional + + # Dibujar el título con múltiples líneas si es necesario + self.multi_cell(text_width, 16, title, align="C") + + # Guardar posición después del título + title_end_y = self.get_y() + + # Subtítulo si existe if subtitle: - self.ln(10) - self.set_font("Helvetica", "", 16) - self.cell(0, 10, subtitle, ln=True, align="C") - self.ln(20) + self.ln(15) # Espacio entre título y subtítulo + self.set_font("Helvetica", "", 18) + self.set_text_color(80, 80, 80) + self.set_x(margin + 10) # Asegurar margen correcto + self.multi_cell(text_width, 12, subtitle, align="C") + self.add_page() + + def get_multi_cell_height(self, w, h, txt, align="J"): + x = self.x + y = self.y + + lines = 1 + width = 0 + text = txt.split(' ') + for word in text: + word_width = self.get_string_width(word + ' ') + if width + word_width > w: + lines += 1 + width = word_width + else: + width += word_width + + self.x = x + self.y = y + + return lines * h def add_section(self, title, content): if self.get_y() > self.h * 0.6: @@ -41,11 +110,10 @@ def add_section(self, title, content): self.set_font("Helvetica", "B", 14) self.set_text_color(255, 255, 255) - self.set_fill_color(0, 102, 204) # Azul + self.set_fill_color(0, 102, 204) self.cell(0, 12, title, ln=True, fill=True, align="C", border=1) self.ln(6) - # Contenido de la sección self.set_text_color(0, 0, 0) self.set_font("Helvetica", "", 12) @@ -57,7 +125,7 @@ def add_section(self, title, content): self.multi_cell(0, 8, formatted_text) self.ln(8) - self.set_draw_color(200, 200, 200) # Línea gris claro + self.set_draw_color(200, 200, 200) self.set_line_width(0.3) current_y = self.get_y() self.line(10, current_y, self.w - 10, current_y) From 0cc795f24b9c8ec079701f203fcd45542f8d7bac Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 22 Apr 2025 01:27:36 -0500 Subject: [PATCH 040/195] add version --- app/pdf/pdf_generator.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py index a1b8fc3..3c8a983 100644 --- a/app/pdf/pdf_generator.py +++ b/app/pdf/pdf_generator.py @@ -6,6 +6,7 @@ def __init__(self, product_name): super().__init__() self.product_name = product_name self.header_height = 0 + self.version = "1.0" # Versión del documento def header(self): if self.page_no() == 1: @@ -82,8 +83,22 @@ def add_cover_page(self, title, subtitle=""): self.set_x(margin + 10) # Asegurar margen correcto self.multi_cell(text_width, 12, subtitle, align="C") + # Agregar información de la versión en la parte inferior, dentro del marco + self.set_font("Helvetica", "I", 11) + self.set_text_color(100, 100, 100) + + # Posicionar el texto de versión en la parte inferior pero dentro del marco + version_y = page_height - margin - 20 # 20 puntos arriba del borde inferior + self.set_y(version_y) + self.set_x(margin + 10) + self.multi_cell(text_width, 10, f"Document Version: {self.version}", align="C") + self.add_page() + # Método para establecer la versión del documento + def set_document_version(self, version): + self.version = version + def get_multi_cell_height(self, w, h, txt, align="J"): x = self.x y = self.y From bd156a065c3750db7967154d64b80bab063a5bb9 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 25 Apr 2025 00:37:07 -0500 Subject: [PATCH 041/195] implement gemini provider --- app/factories/ai_provider_factory.py | 3 +++ app/providers/gemini_provider.py | 18 ++++++++++++++++++ requirements.txt | 3 ++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 app/providers/gemini_provider.py diff --git a/app/factories/ai_provider_factory.py b/app/factories/ai_provider_factory.py index 34e3c91..74fbb20 100644 --- a/app/factories/ai_provider_factory.py +++ b/app/factories/ai_provider_factory.py @@ -1,6 +1,7 @@ from app.providers.ai_provider_interface import AIProviderInterface from app.providers.anthropic_provider import AnthropicProvider from app.providers.deepseek_provider import DeepseekProvider +from app.providers.gemini_provider import GeminiProvider from app.providers.openai_provider import OpenAIProvider @@ -13,5 +14,7 @@ def get_provider(provider_name: str) -> AIProviderInterface: return AnthropicProvider() elif provider_name == "deepseek": return DeepseekProvider() + elif provider_name == "gemini": + return GeminiProvider() else: raise ValueError(f"El proveedor de AI '{provider_name}' no está implementado") \ No newline at end of file diff --git a/app/providers/gemini_provider.py b/app/providers/gemini_provider.py new file mode 100644 index 0000000..1856e98 --- /dev/null +++ b/app/providers/gemini_provider.py @@ -0,0 +1,18 @@ +import os + +from langchain_google_genai import ChatGoogleGenerativeAI +from app.providers.ai_provider_interface import AIProviderInterface + + +class GeminiProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) -> ChatGoogleGenerativeAI: + return ChatGoogleGenerativeAI( + model=model, + temperature=temperature, + max_output_tokens=max_tokens, + top_p=top_p, + google_api_key=os.getenv("GOOGLE_GEMINI_API_KEY") + ) + + def supports_interleaved_files(self) -> bool: + return True \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e8e0c3b..146cf8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ fpdf beautifulsoup4 lxml langchain_mcp -langchain-mcp-adapters==0.0.9 \ No newline at end of file +langchain-mcp-adapters==0.0.9 +langchain-google-genai \ No newline at end of file From 4c530c78940df1f8c5f88555b084d959403a9f14 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 25 Apr 2025 01:09:04 -0500 Subject: [PATCH 042/195] add logic open ai image add logic open ai image --- app/configurations/config.py | 4 +- .../image_client.py} | 46 ++++++++++++++++++- app/services/image_service.py | 10 ++-- 3 files changed, 55 insertions(+), 5 deletions(-) rename app/externals/{replicate/replicate_client.py => images/image_client.py} (68%) diff --git a/app/configurations/config.py b/app/configurations/config.py index 2308413..ae56fc4 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -29,4 +29,6 @@ API_KEY: str = os.getenv('API_KEY') GOOGLE_GEMINI_API_KEY: str = os.getenv('GOOGLE_GEMINI_API_KEY') -ENVIRONMENT: str = os.getenv('ENVIRONMENT') \ No newline at end of file +ENVIRONMENT: str = os.getenv('ENVIRONMENT') + +OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') \ No newline at end of file diff --git a/app/externals/replicate/replicate_client.py b/app/externals/images/image_client.py similarity index 68% rename from app/externals/replicate/replicate_client.py rename to app/externals/images/image_client.py index ab72e4c..2f30e4e 100644 --- a/app/externals/replicate/replicate_client.py +++ b/app/externals/images/image_client.py @@ -1,12 +1,14 @@ import base64 from typing import Optional +import os import aiohttp import asyncio import httpx import base64 -from app.configurations.config import REPLICATE_API_KEY, GOOGLE_GEMINI_API_KEY +from app.configurations import config +from app.configurations.config import REPLICATE_API_KEY, GOOGLE_GEMINI_API_KEY, OPENAI_API_KEY async def generate_image_variation( @@ -109,3 +111,45 @@ async def google_image(prompt: str, file: Optional[str] = None) -> bytes: except Exception as e: print(f"Error al generar imagen: {str(e)}") raise Exception(f"Error al generar imagen: {str(e)}") + + +async def openai_image_edit(image_url: str, prompt: str) -> bytes: + url = "https://api.openai.com/v1/images/edits" + headers = { + "Authorization": f"Bearer {config.OPENAI_API_KEY}" + } + data = aiohttp.FormData() + print("VAMOOOOSSS") + + with open(image_url, 'rb') as f: + data.add_field('image', + f.read(), + filename=os.path.basename(image_url), + content_type='application/octet-stream') + + data.add_field('prompt', prompt) + data.add_field('model', 'gpt-image-1') + data.add_field('n', '1') + data.add_field('size', '1024x1024') + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, data=data) as response: + if response.status == 200: + result = await response.json() + if "data" in result and len(result["data"]) > 0 and "b64_json" in result["data"][0]: + b64_image = result["data"][0]["b64_json"] + image_bytes = base64.b64decode(b64_image) + return image_bytes + else: + raise Exception(f"Respuesta inesperada de la API de OpenAI: {result}") + else: + error_text = await response.text() + print(f"Error {response.status}: {error_text}") + response.raise_for_status() + except aiohttp.ClientError as e: + print(f"Error red al generar imagen: {str(e)}") + raise Exception(f"Error de red al llamar a OpenAI: {e}") from e + except Exception as e: + print(f"Error al generar imagen: {str(e)}") + raise Exception(f"Error al editar imagen con OpenAI: {e}") from e diff --git a/app/services/image_service.py b/app/services/image_service.py index 6d80871..b280940 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -16,7 +16,7 @@ import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.replicate.replicate_client import generate_image_variation, google_image +from app.externals.images.image_client import generate_image_variation, google_image, openai_image_edit from typing import Optional load_dotenv() @@ -43,9 +43,13 @@ async def _generate_single_variation(self, url_image: str, prompt: str, owner_id folder_id: str, file: Optional[str] = None) -> str: try: - image_content = await google_image(prompt=prompt, file=file) + image_content = await openai_image_edit(image_url=url_image, prompt=prompt) except Exception as e: - image_content = await generate_image_variation(image_url=url_image, prompt=prompt) + try: + image_content = await google_image(prompt=prompt, file=file) + except Exception as e: + image_content = await generate_image_variation(image_url=url_image, prompt=prompt) + content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( From 38c0d8157d07da00f5352d955c62e6a77c2f2535 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sun, 27 Apr 2025 01:54:54 -0500 Subject: [PATCH 043/195] add logic for generate image with agent. --- app/controllers/handle_controller.py | 19 +++++++++++++++++ app/externals/images/image_client.py | 1 - app/requests/generate_image_request.py | 8 +++++--- app/services/image_service.py | 27 +++++++++++++++++++------ app/services/image_service_interface.py | 3 +++ 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index fc60a02..644110f 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -108,6 +108,25 @@ async def generate_images_from_api_key( return response +@router.post("/generate-images-from-agent/api-key") +@require_api_key +async def generate_images_from_agent_api_key( + request: Request, + generate_image_request: GenerateImageRequest, + service: ImageServiceInterface = Depends() +): + if not generate_image_request.file and generate_image_request.file_url: + async with httpx.AsyncClient() as client: + try: + response = await client.get(generate_image_request.file_url) + response.raise_for_status() + generate_image_request.file = base64.b64encode(response.content).decode() + except Exception as e: + raise HTTPException(status_code=400, detail=f"Error for get file: {str(e)}") + response = await service.generate_images_from_agent(generate_image_request, generate_image_request.owner_id) + return response + + @router.post("/generate-copies") async def generate_copies( copy_request: CopyRequest, diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index 2f30e4e..c4bf2a4 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -119,7 +119,6 @@ async def openai_image_edit(image_url: str, prompt: str) -> bytes: "Authorization": f"Bearer {config.OPENAI_API_KEY}" } data = aiohttp.FormData() - print("VAMOOOOSSS") with open(image_url, 'rb') as f: data.add_field('image', diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index c106957..6ca9031 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -1,10 +1,12 @@ from pydantic import BaseModel -from typing import Optional +from typing import Optional, Dict, Any class GenerateImageRequest(BaseModel): file: Optional[str] = None file_url: Optional[str] = None owner_id: Optional[str] = None - prompt: str - num_variations: int = 4 \ No newline at end of file + prompt: Optional[str] = None + agent_id: Optional[str] = None + num_variations: int = 4 + parameter_prompt: Optional[Dict[str, Any]] = None \ No newline at end of file diff --git a/app/services/image_service.py b/app/services/image_service.py index b280940..6154252 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -1,6 +1,8 @@ from app.configurations.config import ( AGENT_IMAGE_VARIATIONS, ) +from app.externals.agent_config.agent_config_client import get_agent +from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse from app.requests.generate_image_request import GenerateImageRequest from app.requests.message_request import MessageRequest @@ -90,16 +92,16 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): folder_id = uuid.uuid4().hex[:8] original_url = None - + if request.file: original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") original_url = original_image_response.s3_url - + tasks = [ self._generate_single_variation( - original_url, - request.prompt, - owner_id, + original_url, + request.prompt, + owner_id, folder_id, request.file ) @@ -108,7 +110,20 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st generated_urls = await asyncio.gather(*tasks) return GenerateImageResponse( - generated_urls=generated_urls, + generated_urls=generated_urls, original_url=original_url, generated_prompt=request.prompt ) + + async def generate_images_from_agent(self, request: GenerateImageRequest, owner_id: str): + data = AgentConfigRequest( + agent_id=request.agent_id, + query="", + parameter_prompt=request.parameter_prompt + ) + + agent_config = await get_agent(data) + request.prompt = agent_config.prompt + response = await self.generate_images_from(request, owner_id) + + return response diff --git a/app/services/image_service_interface.py b/app/services/image_service_interface.py index f081a45..40d9816 100644 --- a/app/services/image_service_interface.py +++ b/app/services/image_service_interface.py @@ -12,3 +12,6 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ @abstractmethod async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): pass + + async def generate_images_from_agent(self, generate_image_request, owner_id): + pass From 759eb74e11e1e3aacc9484010cb1990ee7b5018f Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 28 Apr 2025 18:29:55 -0500 Subject: [PATCH 044/195] create service resolve funnel. create service resolve funnel. --- app/controllers/handle_controller.py | 10 +++++ app/processors/mcp_processor.py | 44 ++++++++++++++----- app/requests/resolve_funnel_request.py | 6 +++ app/services/message_service.py | 53 +++++++++++++++++++++++ app/services/message_service_interface.py | 5 +++ 5 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 app/requests/resolve_funnel_request.py diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 644110f..9e01b14 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -5,6 +5,7 @@ from app.requests.generate_image_request import GenerateImageRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest +from app.requests.resolve_funnel_request import ResolveFunnelRequest from fastapi import APIRouter, Depends, Request, HTTPException from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest @@ -147,6 +148,15 @@ async def scrape_product( return response +@router.post("/resolve-info-funnel") +async def resolve_funnel( + request: ResolveFunnelRequest, + message_service: MessageServiceInterface = Depends() +): + response = await message_service.resolve_funnel(request) + return response + + @router.get("/health") async def health_check(): return {"status": "OK"} diff --git a/app/processors/mcp_processor.py b/app/processors/mcp_processor.py index 352867e..439d1b4 100644 --- a/app/processors/mcp_processor.py +++ b/app/processors/mcp_processor.py @@ -17,7 +17,7 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, supports_interleaved_files: bool = False) -> Dict[str, Any]: async with MultiServerMCPClient(self.mcp_config) as client: agent = create_react_agent(self.llm, client.get_tools()) - + system_message = self.context or "" if request.json_parser: format_instructions = json.dumps(request.json_parser, indent=2) @@ -26,18 +26,18 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, f"{format_instructions}\n\n" "Do NOT include markdown, explanations, or anything else besides the JSON." ) - + messages = [] if system_message: messages.append({"role": "system", "content": system_message}) - + if self.history: messages.extend(self.history) - + messages.append({"role": "user", "content": request.query}) - + response = await agent.ainvoke({"messages": messages}) - + content = "" if "messages" in response and response["messages"]: last_message = response["messages"][-1] @@ -49,13 +49,37 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, content = str(last_message) else: content = str(response) - + match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL) result = match.group(1) if match else content - + + tool_info = await self.get_tool_data(response) + return { "context": self.context, "chat_history": self.history, "input": request.query, - "text": result - } \ No newline at end of file + "text": result, + "tool_result": tool_info + } + + async def get_tool_data(self, response): + tool_messages = [ + msg for msg in response.get('messages', []) + if getattr(msg, 'type', None) == 'tool' + ] + tool_info = None + if tool_messages: + last_tool = tool_messages[-1] + name = last_tool.name + tool_result = last_tool.content + try: + tool_result_json = json.loads(tool_result) + except json.JSONDecodeError: + tool_result_json = tool_result + + tool_info = { + "name": name, + "message": tool_result_json + } + return tool_info diff --git a/app/requests/resolve_funnel_request.py b/app/requests/resolve_funnel_request.py new file mode 100644 index 0000000..d2a8209 --- /dev/null +++ b/app/requests/resolve_funnel_request.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class ResolveFunnelRequest(BaseModel): + product_name: str + product_description: str \ No newline at end of file diff --git a/app/services/message_service.py b/app/services/message_service.py index 9f4336d..3fcf009 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -20,6 +20,7 @@ from app.pdf.pdf_manual_generator import PDFManualGenerator from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest from app.externals.amazon.amazon_client import search_products +from app.requests.resolve_funnel_request import ResolveFunnelRequest class MessageService(MessageServiceInterface): @@ -129,3 +130,55 @@ async def generate_pdf(self, request: GeneratePdfRequest): ) return result + + async def resolve_funnel(self, request: ResolveFunnelRequest): + pain_detection_response = await self.handle_message(MessageRequest( + agent_id="pain_detection", + conversation_id="", + query="pain_detection", + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description + } + )) + + pain_detection_message = pain_detection_response['text'] + + buyer_detection_response = await self.handle_message(MessageRequest( + agent_id="buyer_detection", + conversation_id="", + query="buyer_detection", + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description, + "pain_detection": pain_detection_message + } + )) + + buyer_detection_message = buyer_detection_response['text'] + + sales_angles_response = await self.handle_message_json(MessageRequest( + agent_id="sales_angles_v2", + conversation_id="", + query="sales_angles_v2", + json_parser={ + "angles": [ + { + "name": "string", + "description": "string" + } + ] + }, + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description, + "pain_detection": pain_detection_message, + "buyer_detection": buyer_detection_message + } + )) + + return { + "pain_detection": pain_detection_message, + "buyer_detection": buyer_detection_message, + "sales_angles": sales_angles_response + } diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index e225acb..c4163ba 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -3,6 +3,7 @@ from app.requests.copy_request import CopyRequest from app.requests.message_request import MessageRequest from app.requests.recommend_product_request import RecommendProductRequest +from app.requests.resolve_funnel_request import ResolveFunnelRequest class MessageServiceInterface(ABC): @@ -23,4 +24,8 @@ async def recommend_products(self, request: RecommendProductRequest): pass async def generate_pdf(self, request): + pass + + @abstractmethod + async def resolve_funnel(self, request: ResolveFunnelRequest): pass \ No newline at end of file From 43cb1ae76c470e07dbc14e9bd8d89f1135b264f3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 28 Apr 2025 18:50:26 -0500 Subject: [PATCH 045/195] map only angles --- app/services/message_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/message_service.py b/app/services/message_service.py index 3fcf009..4246c9f 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -180,5 +180,5 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): return { "pain_detection": pain_detection_message, "buyer_detection": buyer_detection_message, - "sales_angles": sales_angles_response + "sales_angles": sales_angles_response["angles"] } From b2e4029503fd161689f50de76aef5e8a406b88bc Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 30 Apr 2025 00:24:18 -0500 Subject: [PATCH 046/195] change logic iamge service --- app/externals/images/image_client.py | 23 ++++++++++---- app/requests/generate_image_request.py | 1 + app/responses/generate_image_response.py | 1 + app/services/image_service.py | 38 +++++++++++------------- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index c4bf2a4..b9678c7 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -1,4 +1,5 @@ import base64 +import mimetypes from typing import Optional import os @@ -7,6 +8,8 @@ import httpx import base64 +import requests + from app.configurations import config from app.configurations.config import REPLICATE_API_KEY, GOOGLE_GEMINI_API_KEY, OPENAI_API_KEY @@ -113,18 +116,26 @@ async def google_image(prompt: str, file: Optional[str] = None) -> bytes: raise Exception(f"Error al generar imagen: {str(e)}") -async def openai_image_edit(image_url: str, prompt: str) -> bytes: +async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: url = "https://api.openai.com/v1/images/edits" headers = { "Authorization": f"Bearer {config.OPENAI_API_KEY}" } data = aiohttp.FormData() - with open(image_url, 'rb') as f: - data.add_field('image', - f.read(), - filename=os.path.basename(image_url), - content_type='application/octet-stream') + async with aiohttp.ClientSession() as fetch_session: + for image_url in image_urls: + async with fetch_session.get(image_url) as img_response: + if img_response.status == 200: + image_bytes = await img_response.read() + filename = os.path.basename(image_url) + content_type = mimetypes.guess_type(filename)[0] or 'image/jpeg' + data.add_field( + 'image[]', + image_bytes, + filename=filename, + content_type=content_type + ) data.add_field('prompt', prompt) data.add_field('model', 'gpt-image-1') diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index 6ca9031..269889e 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -5,6 +5,7 @@ class GenerateImageRequest(BaseModel): file: Optional[str] = None file_url: Optional[str] = None + file_urls: Optional[list[str]] = None owner_id: Optional[str] = None prompt: Optional[str] = None agent_id: Optional[str] = None diff --git a/app/responses/generate_image_response.py b/app/responses/generate_image_response.py index a5129c3..4a9a644 100644 --- a/app/responses/generate_image_response.py +++ b/app/responses/generate_image_response.py @@ -7,6 +7,7 @@ class GenerateImageResponse(BaseModel): original_url: Optional[str] + original_urls: Optional[list[str]] generated_urls: List[str] generated_prompt: str vision_analysis: Optional[VisionAnalysisResponse] = None diff --git a/app/services/image_service.py b/app/services/image_service.py index 6154252..0ebe49e 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -1,7 +1,6 @@ from app.configurations.config import ( AGENT_IMAGE_VARIATIONS, ) -from app.externals.agent_config.agent_config_client import get_agent from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse from app.requests.generate_image_request import GenerateImageRequest @@ -18,7 +17,7 @@ import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.images.image_client import generate_image_variation, google_image, openai_image_edit +from app.externals.images.image_client import openai_image_edit from typing import Optional load_dotenv() @@ -41,17 +40,10 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) ) - async def _generate_single_variation(self, url_image: str, prompt: str, owner_id: str, + async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, folder_id: str, file: Optional[str] = None) -> str: - try: - image_content = await openai_image_edit(image_url=url_image, prompt=prompt) - except Exception as e: - try: - image_content = await google_image(prompt=prompt, file=file) - except Exception as e: - image_content = await generate_image_variation(image_url=url_image, prompt=prompt) - + image_content = await openai_image_edit(image_urls=url_images, prompt=prompt) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( @@ -81,7 +73,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ response = await self.message_service.handle_message(message_request) prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ - self._generate_single_variation(original_image_response.s3_url, prompt, owner_id, folder_id, request.file) + self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, request.file) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) @@ -91,39 +83,45 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): folder_id = uuid.uuid4().hex[:8] - original_url = None + urls = request.file_urls or [] + original_url = request.file_url if request.file: original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") original_url = original_image_response.s3_url + if len(urls) == 0 and original_url: + urls.append(request.file_url) + tasks = [ self._generate_single_variation( - original_url, + urls, request.prompt, owner_id, folder_id, - request.file + request.file, ) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) return GenerateImageResponse( + original_urls=urls, generated_urls=generated_urls, original_url=original_url, generated_prompt=request.prompt ) async def generate_images_from_agent(self, request: GenerateImageRequest, owner_id: str): - data = AgentConfigRequest( + data = MessageRequest( agent_id=request.agent_id, - query="", - parameter_prompt=request.parameter_prompt + query=request.agent_id, + parameter_prompt=request.parameter_prompt, + conversation_id="", ) - agent_config = await get_agent(data) - request.prompt = agent_config.prompt + message = await self.message_service.handle_message(data) + request.prompt = message["text"] response = await self.generate_images_from(request, owner_id) return response From e8275ae2ac7a670aea233c878bdf568214c45cf3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 5 May 2025 21:06:36 -0500 Subject: [PATCH 047/195] add logic reduce image. --- app/services/image_service.py | 14 +++++++++++++- requirements.txt | 3 ++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index 0ebe49e..08795c6 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -13,12 +13,14 @@ from app.externals.s3_upload.s3_upload_client import upload_file from fastapi import Depends import asyncio -import base64 import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image from app.externals.images.image_client import openai_image_edit from typing import Optional +import base64 +import io +from PIL import Image load_dotenv() @@ -31,6 +33,7 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, prefix_name: str) -> S3UploadResponse: unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" + image_base64 = self.__reduce_image(image_base64) return await upload_file( S3UploadRequest( @@ -40,6 +43,15 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) ) + def __reduce_image(self, image_bytes): + image_bytes_decode = base64.b64decode(image_bytes) + img = Image.open(io.BytesIO(image_bytes_decode)) + output_buffer = io.BytesIO() + img.save(output_buffer, format='WEBP', quality=80) + reduced_image_bytes = output_buffer.getvalue() + + return base64.b64encode(reduced_image_bytes).decode('utf-8') + async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, folder_id: str, file: Optional[str] = None) -> str: diff --git a/requirements.txt b/requirements.txt index 146cf8a..5ad0186 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ beautifulsoup4 lxml langchain_mcp langchain-mcp-adapters==0.0.9 -langchain-google-genai \ No newline at end of file +langchain-google-genai +Pillow==10.3.0 \ No newline at end of file From db83883c6d5332b39be8c75df1ddb7f91d724fa1 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 7 May 2025 15:06:23 -0500 Subject: [PATCH 048/195] transform webp. --- app/services/image_service.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index 08795c6..7ab409a 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -43,13 +43,19 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, ) ) - def __reduce_image(self, image_bytes): - image_bytes_decode = base64.b64decode(image_bytes) - img = Image.open(io.BytesIO(image_bytes_decode)) + def __reduce_image(self, image_bytes_base64: str) -> str: + image_bytes = base64.b64decode(image_bytes_base64) + img = Image.open(io.BytesIO(image_bytes)) + + if img.mode in ("RGBA", "P"): + img = img.convert("RGBA") + else: + img = img.convert("RGB") + output_buffer = io.BytesIO() img.save(output_buffer, format='WEBP', quality=80) - reduced_image_bytes = output_buffer.getvalue() + reduced_image_bytes = output_buffer.getvalue() return base64.b64encode(reduced_image_bytes).decode('utf-8') async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, From ba73d27671c46ce9b483fe796f288d08af2c8dd2 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 8 May 2025 19:02:44 -0500 Subject: [PATCH 049/195] without size, automatic platica. --- app/externals/images/image_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index b9678c7..f421834 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -140,7 +140,6 @@ async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: data.add_field('prompt', prompt) data.add_field('model', 'gpt-image-1') data.add_field('n', '1') - data.add_field('size', '1024x1024') try: async with aiohttp.ClientSession() as session: From cd8832059f69e6e331337f27e94eda83aac6729a Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 9 May 2025 00:24:43 -0500 Subject: [PATCH 050/195] resize 0.70% --- app/services/image_service.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/services/image_service.py b/app/services/image_service.py index 7ab409a..3a71a1c 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -52,6 +52,13 @@ def __reduce_image(self, image_bytes_base64: str) -> str: else: img = img.convert("RGB") + original_width, original_height = img.size + new_width = int(original_width * 0.70) + new_height = int(original_height * 0.70) + new_width = max(1, new_width) + new_height = max(1, new_height) + img = img.resize((new_width, new_height)) + output_buffer = io.BytesIO() img.save(output_buffer, format='WEBP', quality=80) From cb50bef52435c636f169ea010177621d1c45cdcc Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 9 May 2025 00:37:46 -0500 Subject: [PATCH 051/195] high and low --- app/services/image_service.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index 3a71a1c..cd5f1b0 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -33,12 +33,20 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, prefix_name: str) -> S3UploadResponse: unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" - image_base64 = self.__reduce_image(image_base64) + image_base64_reduce = self.__reduce_image(image_base64) - return await upload_file( + await upload_file( S3UploadRequest( file=image_base64, - folder=f"{owner_id}/products/variations/{folder_id}", + folder=f"{owner_id}/products/variations/{folder_id}/high", + filename=file_name + ) + ) + + return await upload_file( + S3UploadRequest( + file=image_base64_reduce, + folder=f"{owner_id}/products/variations/{folder_id}/low", filename=file_name ) ) @@ -53,8 +61,8 @@ def __reduce_image(self, image_bytes_base64: str) -> str: img = img.convert("RGB") original_width, original_height = img.size - new_width = int(original_width * 0.70) - new_height = int(original_height * 0.70) + new_width = int(original_width * 0.30) + new_height = int(original_height * 0.30) new_width = max(1, new_width) new_height = max(1, new_height) img = img.resize((new_width, new_height)) From ecab6beb630d5baf92310cc28e16b8cb3c862ec4 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 9 May 2025 01:15:59 -0500 Subject: [PATCH 052/195] low and high image --- app/services/image_service.py | 48 +++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index cd5f1b0..66c33e3 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -33,11 +33,12 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, prefix_name: str) -> S3UploadResponse: unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" - image_base64_reduce = self.__reduce_image(image_base64) + original_image_bytes = base64.b64decode(image_base64) + image_base64_high, image_base64_low = self._process_image_for_upload(original_image_bytes) await upload_file( S3UploadRequest( - file=image_base64, + file=image_base64_high, folder=f"{owner_id}/products/variations/{folder_id}/high", filename=file_name ) @@ -45,33 +46,48 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, return await upload_file( S3UploadRequest( - file=image_base64_reduce, + file=image_base64_low, folder=f"{owner_id}/products/variations/{folder_id}/low", filename=file_name ) ) - def __reduce_image(self, image_bytes_base64: str) -> str: - image_bytes = base64.b64decode(image_bytes_base64) - img = Image.open(io.BytesIO(image_bytes)) + def _process_image_for_upload(self, original_image_bytes: bytes) -> tuple[str, str]: + img = Image.open(io.BytesIO(original_image_bytes)) if img.mode in ("RGBA", "P"): - img = img.convert("RGBA") + img_converted = img.convert("RGBA") else: - img = img.convert("RGB") + img_converted = img.convert("RGB") - original_width, original_height = img.size - new_width = int(original_width * 0.30) - new_height = int(original_height * 0.30) + high_output_buffer = io.BytesIO() + img_converted.save(high_output_buffer, format='WEBP') + image_base64_high = base64.b64encode(high_output_buffer.getvalue()).decode('utf-8') + + original_width, original_height = img_converted.size + new_width = int(original_width * 0.60) + new_height = int(original_height * 0.60) new_width = max(1, new_width) new_height = max(1, new_height) - img = img.resize((new_width, new_height)) - output_buffer = io.BytesIO() - img.save(output_buffer, format='WEBP', quality=80) + resized_img = img_converted.resize((new_width, new_height)) + + temp_buffer_quality_100 = io.BytesIO() + resized_img.save(temp_buffer_quality_100, format='WEBP') + bytes_quality_100 = temp_buffer_quality_100.getvalue() + size_kb_quality_100 = len(bytes_quality_100) / 1024 + + final_low_image_bytes = bytes_quality_100 + if size_kb_quality_100 > 150: + print("al pelosdasdasdas") + final_low_buffer_quality_80 = io.BytesIO() + resized_img.save(final_low_buffer_quality_80, format='WEBP', quality=80) + final_low_image_bytes = final_low_buffer_quality_80.getvalue() + + image_base64_low = base64.b64encode(final_low_image_bytes).decode('utf-8') + + return image_base64_high, image_base64_low - reduced_image_bytes = output_buffer.getvalue() - return base64.b64encode(reduced_image_bytes).decode('utf-8') async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, folder_id: str, file: Optional[str] = None) -> str: From 2f89672fc6512961cdd889e4fd324eb85ee5732c Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 9 May 2025 01:42:24 -0500 Subject: [PATCH 053/195] add to prompt --- app/externals/images/image_client.py | 3 ++ app/services/image_service.py | 42 ++++------------------------ 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index f421834..0e2dbd2 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -137,6 +137,9 @@ async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: content_type=content_type ) + prompt = prompt + " **todo visible dentro del encuadre, centrado en una imagen cuadrada, composición ajustada al marco, sin elementos fuera del marco, escena contenida, sin recortes**" + + data.add_field('size', '1024x1024') data.add_field('prompt', prompt) data.add_field('model', 'gpt-image-1') data.add_field('n', '1') diff --git a/app/services/image_service.py b/app/services/image_service.py index 66c33e3..17de94c 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -34,25 +34,17 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" original_image_bytes = base64.b64decode(image_base64) - image_base64_high, image_base64_low = self._process_image_for_upload(original_image_bytes) - - await upload_file( - S3UploadRequest( - file=image_base64_high, - folder=f"{owner_id}/products/variations/{folder_id}/high", - filename=file_name - ) - ) + image_base64_high = self._process_image_for_upload(original_image_bytes) return await upload_file( S3UploadRequest( - file=image_base64_low, - folder=f"{owner_id}/products/variations/{folder_id}/low", + file=image_base64_high, + folder=f"{owner_id}/products/variations/{folder_id}", filename=file_name ) ) - def _process_image_for_upload(self, original_image_bytes: bytes) -> tuple[str, str]: + def _process_image_for_upload(self, original_image_bytes: bytes) -> str: img = Image.open(io.BytesIO(original_image_bytes)) if img.mode in ("RGBA", "P"): @@ -61,32 +53,10 @@ def _process_image_for_upload(self, original_image_bytes: bytes) -> tuple[str, s img_converted = img.convert("RGB") high_output_buffer = io.BytesIO() - img_converted.save(high_output_buffer, format='WEBP') + img_converted.save(high_output_buffer, format='WEBP', quality=80) image_base64_high = base64.b64encode(high_output_buffer.getvalue()).decode('utf-8') - original_width, original_height = img_converted.size - new_width = int(original_width * 0.60) - new_height = int(original_height * 0.60) - new_width = max(1, new_width) - new_height = max(1, new_height) - - resized_img = img_converted.resize((new_width, new_height)) - - temp_buffer_quality_100 = io.BytesIO() - resized_img.save(temp_buffer_quality_100, format='WEBP') - bytes_quality_100 = temp_buffer_quality_100.getvalue() - size_kb_quality_100 = len(bytes_quality_100) / 1024 - - final_low_image_bytes = bytes_quality_100 - if size_kb_quality_100 > 150: - print("al pelosdasdasdas") - final_low_buffer_quality_80 = io.BytesIO() - resized_img.save(final_low_buffer_quality_80, format='WEBP', quality=80) - final_low_image_bytes = final_low_buffer_quality_80.getvalue() - - image_base64_low = base64.b64encode(final_low_image_bytes).decode('utf-8') - - return image_base64_high, image_base64_low + return image_base64_high async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, From 05c139e2fd6295ef16288ca60ad949be7dedd97f Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 9 May 2025 01:48:54 -0500 Subject: [PATCH 054/195] better prompt --- app/externals/images/image_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index 0e2dbd2..5e48451 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -137,7 +137,7 @@ async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: content_type=content_type ) - prompt = prompt + " **todo visible dentro del encuadre, centrado en una imagen cuadrada, composición ajustada al marco, sin elementos fuera del marco, escena contenida, sin recortes**" + prompt = prompt + ". **escena completa visible, composición centrada, todos los elementos dentro del marco cuadrado, nada recortado en los bordes, composición completa**" data.add_field('size', '1024x1024') data.add_field('prompt', prompt) From bdd1ccb779be913ff0aea17ce4a022655b72dedb Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 14 May 2025 01:09:08 -0500 Subject: [PATCH 055/195] add logic fallback antrhopic --- app/managers/conversation_manager.py | 35 ++++++++++++++++++++++------ app/providers/anthropic_provider.py | 2 +- app/providers/gemini_provider.py | 2 +- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index 13d422f..e340d00 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, List +from typing import Dict, Any, List, Tuple from app.managers.conversation_manager_interface import ConversationManagerInterface from app.processors.agent_processor import AgentProcessor from app.processors.simple_processor import SimpleProcessor @@ -24,15 +24,36 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen ) history = self.get_conversation_history(request.conversation_id) or [] + is_simple = False if agent_config.mcp_config: processor = MCPProcessor(llm, agent_config.prompt, history, agent_config.mcp_config) else: tools = ToolGenerator.generate_tools(agent_config.tools or []) - processor = ( - AgentProcessor(llm, agent_config.prompt, history, tools) - if tools - else SimpleProcessor(llm, agent_config.prompt, history) - ) + if tools: + processor = AgentProcessor(llm, agent_config.prompt, history, tools) + else: + processor = SimpleProcessor(llm, agent_config.prompt, history) + is_simple = True - return await processor.process(request, request.files, ai_provider.supports_interleaved_files()) + try: + response = await processor.process(request, request.files, ai_provider.supports_interleaved_files()) + except Exception as e: + if is_simple: + response = await self._fallback_with_anthropic(request, agent_config, history) + else: + raise e + + return response + + async def _fallback_with_anthropic(self, request: MessageRequest, agent_config: AgentConfigResponse, history: list) -> dict[str, Any]: + anthropic_provider = AIProviderFactory.get_provider("claude") + anthropic_llm = anthropic_provider.get_llm( + model="claude-3-7-sonnet-20250219", + temperature=agent_config.preferences.temperature, + max_tokens=agent_config.preferences.max_tokens, + top_p=agent_config.preferences.top_p + ) + processor = SimpleProcessor(anthropic_llm, agent_config.prompt, history) + + return await processor.process(request, request.files, anthropic_provider.supports_interleaved_files()) diff --git a/app/providers/anthropic_provider.py b/app/providers/anthropic_provider.py index 95df258..b91a7f9 100644 --- a/app/providers/anthropic_provider.py +++ b/app/providers/anthropic_provider.py @@ -12,4 +12,4 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) - ) def supports_interleaved_files(self) -> bool: - return False \ No newline at end of file + return True \ No newline at end of file diff --git a/app/providers/gemini_provider.py b/app/providers/gemini_provider.py index 1856e98..1adaf7b 100644 --- a/app/providers/gemini_provider.py +++ b/app/providers/gemini_provider.py @@ -15,4 +15,4 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) - ) def supports_interleaved_files(self) -> bool: - return True \ No newline at end of file + return True From 500fc920c96df0654caf788e037c485601df6c75 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 15 May 2025 12:30:06 -0500 Subject: [PATCH 056/195] fix version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5ad0186..0594e5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ httpx>=0.24.0 langchain-community>=0.2.0 langchain-openai>=0.0.5 openai -langgraph>=0.0.10 +langgraph==0.3.31 langchain-core>=0.1.17 langchain-anthropic langchain-ollama From b611b642a8b66d14cd1370dad091ea60e9bdc364 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 31 May 2025 13:55:08 -0500 Subject: [PATCH 057/195] add history in memory --- app/managers/conversation_manager.py | 43 +++++++++++++++++++++++----- main.py | 6 +++- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index e340d00..46d0df4 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -1,4 +1,5 @@ from typing import Dict, Any, List, Tuple +from collections import defaultdict from app.managers.conversation_manager_interface import ConversationManagerInterface from app.processors.agent_processor import AgentProcessor from app.processors.simple_processor import SimpleProcessor @@ -10,9 +11,14 @@ class ConversationManager(ConversationManagerInterface): - # TODO HISTORY - def get_conversation_history(self, conversation_id: str) -> List: - return [] + def __init__(self): + self.history_store: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + self.max_history_length: int = 10 + + def get_conversation_history(self, conversation_id: str) -> List[Dict[str, Any]]: + if conversation_id: + return self.history_store[conversation_id] + return [] async def process_conversation(self, request: MessageRequest, agent_config: AgentConfigResponse) -> dict[str, Any]: ai_provider = AIProviderFactory.get_provider(agent_config.provider_ai) @@ -23,7 +29,7 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen top_p=agent_config.preferences.top_p ) - history = self.get_conversation_history(request.conversation_id) or [] + history = self.get_conversation_history(request.conversation_id) is_simple = False if agent_config.mcp_config: @@ -37,14 +43,36 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen is_simple = True try: - response = await processor.process(request, request.files, ai_provider.supports_interleaved_files()) + response_data = await processor.process(request, request.files, ai_provider.supports_interleaved_files()) except Exception as e: if is_simple: - response = await self._fallback_with_anthropic(request, agent_config, history) + response_data = await self._fallback_with_anthropic(request, agent_config, history) else: raise e - return response + if request.conversation_id: + ai_response_content = response_data.get("text") + if ai_response_content is None: + ai_response_content = str(response_data) + + self._update_conversation_history( + conversation_id=request.conversation_id, + user_message_content=request.query, + ai_response_content=ai_response_content + ) + + return response_data + + def _update_conversation_history(self, conversation_id: str, user_message_content: str, ai_response_content: str) -> None: + if not conversation_id: + return + + self.history_store[conversation_id].append({"role": "user", "content": user_message_content}) + self.history_store[conversation_id].append({"role": "assistant", "content": ai_response_content}) + + current_conv_history = self.history_store[conversation_id] + if len(current_conv_history) > self.max_history_length: + self.history_store[conversation_id] = current_conv_history[-self.max_history_length:] async def _fallback_with_anthropic(self, request: MessageRequest, agent_config: AgentConfigResponse, history: list) -> dict[str, Any]: anthropic_provider = AIProviderFactory.get_provider("claude") @@ -54,6 +82,7 @@ async def _fallback_with_anthropic(self, request: MessageRequest, agent_config: max_tokens=agent_config.preferences.max_tokens, top_p=agent_config.preferences.top_p ) + processor = SimpleProcessor(anthropic_llm, agent_config.prompt, history) return await processor.process(request, request.files, anthropic_provider.supports_interleaved_files()) diff --git a/main.py b/main.py index f170616..8f89bb8 100644 --- a/main.py +++ b/main.py @@ -15,9 +15,13 @@ description="API for agent ai", version="1.0.0" ) + app.include_router(router) + +conversation_manager_singleton = ConversationManager() + app.dependency_overrides[MessageServiceInterface] = MessageService -app.dependency_overrides[ConversationManagerInterface] = ConversationManager +app.dependency_overrides[ConversationManagerInterface] = lambda: conversation_manager_singleton app.dependency_overrides[ImageServiceInterface] = ImageService app.dependency_overrides[ProductScrapingServiceInterface] = ProductScrapingService From 2f577ad8be602cea599af070ccd860afaac301e5 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 1 Jul 2025 16:23:36 -0500 Subject: [PATCH 058/195] add logic direct scrapper --- app/configurations/config.py | 1 + app/controllers/handle_controller.py | 11 +++++ app/helpers/escape_helper.py | 24 ++++++++++ app/requests/direct_scrape_request.py | 5 ++ app/scrapers/aliexpress_scraper.py | 5 +- app/scrapers/amazon_scraper.py | 4 ++ app/scrapers/cj_scraper.py | 5 ++ app/scrapers/ia_scraper.py | 48 ++++++++++++++++++- app/scrapers/scraper_interface.py | 8 ++++ app/services/product_scraping_service.py | 6 +++ .../product_scraping_service_interface.py | 3 ++ requirements.txt | 3 +- 12 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 app/requests/direct_scrape_request.py diff --git a/app/configurations/config.py b/app/configurations/config.py index ae56fc4..704ff0b 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -18,6 +18,7 @@ AGENT_IMAGE_VARIATIONS = "agent_image_variations" SCRAPER_AGENT = "scraper_agent" +SCRAPER_AGENT_DIRECT = "scraper_agent_direct_code" AUTH_SERVICE_URL: str = os.getenv('AUTH_SERVICE_URL') diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 9e01b14..0186950 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -2,6 +2,7 @@ import httpx from app.requests.copy_request import CopyRequest +from app.requests.direct_scrape_request import DirectScrapeRequest from app.requests.generate_image_request import GenerateImageRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.recommend_product_request import RecommendProductRequest @@ -147,6 +148,16 @@ async def scrape_product( response = await service.scrape_product(scraping_request) return response +@router.post("/scrape-direct-html") +@require_auth +async def scrape_product_direct( + request: Request, + scraping_request: DirectScrapeRequest, + service: ProductScrapingServiceInterface = Depends() +): + response = await service.scrape_direct(scraping_request.html) + return response + @router.post("/resolve-info-funnel") async def resolve_funnel( diff --git a/app/helpers/escape_helper.py b/app/helpers/escape_helper.py index 40b04ad..9668555 100644 --- a/app/helpers/escape_helper.py +++ b/app/helpers/escape_helper.py @@ -30,3 +30,27 @@ def clean_html_deeply(html_content): simplified_html_clean = re.sub(r'\s+', ' ', simplified_html).strip() return simplified_html_clean + +def clean_html_less_deeply(html_content): + soup = BeautifulSoup(html_content, 'html5lib') + + for tag in soup(['script', 'style', 'noscript', 'svg', 'link', 'meta', 'head']): + tag.decompose() + + for tag in soup.find_all(True): + if tag.name == 'img': + tag.attrs = {key: tag.attrs[key] for key in ['src', 'alt', 'class', 'id', 'title'] if key in tag.attrs} + elif tag.name == 'a': + tag.attrs = {key: tag.attrs[key] for key in ['href', 'title', 'target', 'class', 'id'] if key in tag.attrs} + elif tag.name == 'source': + tag.attrs = {key: tag.attrs[key] for key in ['media', 'srcset', 'type'] if key in tag.attrs} + elif tag.name == 'picture': + tag.attrs = {key: tag.attrs[key] for key in ['id', 'class'] if key in tag.attrs} + else: + allowed_common_attrs = ['id', 'class'] + tag.attrs = {key: tag.attrs[key] for key in allowed_common_attrs if key in tag.attrs} + + simplified_html = str(soup) + simplified_html_clean = re.sub(r'\s+', ' ', simplified_html).strip() + + return simplified_html_clean \ No newline at end of file diff --git a/app/requests/direct_scrape_request.py b/app/requests/direct_scrape_request.py new file mode 100644 index 0000000..1d3f2bd --- /dev/null +++ b/app/requests/direct_scrape_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, Field, validator + + +class DirectScrapeRequest(BaseModel): + html: str diff --git a/app/scrapers/aliexpress_scraper.py b/app/scrapers/aliexpress_scraper.py index 9b17914..deab4eb 100644 --- a/app/scrapers/aliexpress_scraper.py +++ b/app/scrapers/aliexpress_scraper.py @@ -4,9 +4,12 @@ import re from fastapi import HTTPException from decimal import Decimal, InvalidOperation - +from typing import Dict, Any class AliexpressScraper(ScraperInterface): + async def scrape_direct(self, html: str) -> Dict[str, Any]: + return {} + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: item_id = self._extract_item_id(url) product_details = await get_item_detail(item_id) diff --git a/app/scrapers/amazon_scraper.py b/app/scrapers/amazon_scraper.py index 40f32d2..d2235b0 100644 --- a/app/scrapers/amazon_scraper.py +++ b/app/scrapers/amazon_scraper.py @@ -6,9 +6,13 @@ import re from app.externals.amazon.amazon_client import get_product_details from decimal import Decimal +from typing import Dict, Any class AmazonScraper(ScraperInterface): + async def scrape_direct(self, html: str) -> Dict[str, Any]: + return {} + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: asin = self._extract_asin(url) diff --git a/app/scrapers/cj_scraper.py b/app/scrapers/cj_scraper.py index a328530..172eae5 100644 --- a/app/scrapers/cj_scraper.py +++ b/app/scrapers/cj_scraper.py @@ -1,3 +1,5 @@ +from typing import Dict, Any + import httpx from app.scrapers.scraper_interface import ScraperInterface from fastapi import HTTPException @@ -7,6 +9,9 @@ class CJScraper(ScraperInterface): def __init__(self): self.webhook_url = "https://n8n.fluxi.co/webhook/cj-search" + async def scrape_direct(self, html: str) -> Dict[str, Any]: + return {} + async def scrape(self, url: str, domain: str = None) -> dict: payload = { "url_cj": url diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index 6a63ae2..8cb884d 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -1,5 +1,5 @@ -from app.configurations.config import SCRAPER_AGENT -from app.helpers.escape_helper import clean_html_deeply +from app.configurations.config import SCRAPER_AGENT, SCRAPER_AGENT_DIRECT +from app.helpers.escape_helper import clean_html_less_deeply, clean_html_deeply from app.pdf.helpers import clean_text, clean_json from app.requests.message_request import MessageRequest from app.scrapers.helper_price import parse_price @@ -8,9 +8,53 @@ from app.externals.scraperapi.scraperapi_client import ScraperAPIClient from app.services.message_service_interface import MessageServiceInterface import json +import os +from datetime import datetime class IAScraper(ScraperInterface): + async def scrape_direct(self, html: str) -> Dict[str, Any]: + simplified_html_clean = clean_html_deeply(html) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"simplified_html_{timestamp}.html" + + os.makedirs("scraped_html", exist_ok=True) + + filepath = os.path.join("scraped_html", filename) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(simplified_html_clean) + + print(f"HTML simplificado guardado en: {filepath}") + + message_request = MessageRequest( + query=f"Product content: {simplified_html_clean} ", + agent_id=SCRAPER_AGENT_DIRECT, + conversation_id="", + json_parser={"code": "string"}) + + """ json_parser={ + "products": [ + { + "id": "string", + "title": "string", + "description": "string", + "price": 0, + "images": ["string"], + "product_url": "string", + "variants": [ + { + "title": "string", + "price": 0 + } + ] + } + ] + """ + + result = await self.message_service.handle_message_json(message_request) + + return result + def __init__(self, message_service: MessageServiceInterface): self.message_service = message_service diff --git a/app/scrapers/scraper_interface.py b/app/scrapers/scraper_interface.py index 44904b1..c6349fa 100644 --- a/app/scrapers/scraper_interface.py +++ b/app/scrapers/scraper_interface.py @@ -6,3 +6,11 @@ class ScraperInterface(ABC): @abstractmethod async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: pass + + @abstractmethod + async def scrape_direct(self, html: str) -> Dict[str, Any]: + """ + Optional method to scrape directly from HTML content. + This can be overridden by subclasses if needed. + """ + raise NotImplementedError("This method is not implemented.") \ No newline at end of file diff --git a/app/services/product_scraping_service.py b/app/services/product_scraping_service.py index a9540e9..8e86563 100644 --- a/app/services/product_scraping_service.py +++ b/app/services/product_scraping_service.py @@ -16,3 +16,9 @@ async def scrape_product(self, request: ProductScrapingRequest): scraper = self.scraping_factory.get_scraper(url) return await scraper.scrape(url, domain) + + async def scrape_direct(self, html): + scraper = self.scraping_factory.get_scraper("https://www.macys.com/shop/womens-clothing/accessories/womens-sunglasses/Upc_bops_purchasable,Productsperpage/5376,120?id=28295&_additionalStoreLocations=5376") + + return await scraper.scrape_direct(html) + diff --git a/app/services/product_scraping_service_interface.py b/app/services/product_scraping_service_interface.py index e9015ba..864bced 100644 --- a/app/services/product_scraping_service_interface.py +++ b/app/services/product_scraping_service_interface.py @@ -6,3 +6,6 @@ class ProductScrapingServiceInterface(ABC): @abstractmethod async def scrape_product(self, request: ProductScrapingRequest): pass + + async def scrape_direct(self, html): + pass diff --git a/requirements.txt b/requirements.txt index 0594e5b..b069251 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,5 @@ lxml langchain_mcp langchain-mcp-adapters==0.0.9 langchain-google-genai -Pillow==10.3.0 \ No newline at end of file +Pillow==10.3.0 +html5lib \ No newline at end of file From 7b9c9b469c5b7d860238ad71f2376a4f558b9279 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 1 Jul 2025 16:57:26 -0500 Subject: [PATCH 059/195] add scrapper dropi --- .env.example | 6 +- app/configurations/config.py | 6 +- app/externals/dropi/__init__.py | 1 + app/externals/dropi/dropi_client.py | 27 ++++++ app/factories/scraping_factory.py | 3 + app/scrapers/dropi_scraper.py | 123 ++++++++++++++++++++++++++++ 6 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 app/externals/dropi/__init__.py create mode 100644 app/externals/dropi/dropi_client.py create mode 100644 app/scrapers/dropi_scraper.py diff --git a/.env.example b/.env.example index 18153f2..a590dab 100644 --- a/.env.example +++ b/.env.example @@ -15,4 +15,8 @@ REPLICATE_API_KEY=dsadadasda SCRAPERAPI_KEY=dsadsadsadasdsadas URL_SCRAPER_LAMBDA=https://localhost:8000/ GOOGLE_GEMINI_API_KEY=sadasadasdasd -ENVIRONMENT=dev \ No newline at end of file +ENVIRONMENT=dev + +DROPI_S3_BASE_URL=https://d39ru7awumhhs2.cloudfront.net/ +DROPI_API_URL=https://test-api.dropi.co/integrations/products/index +DROPI_API_KEY=dasdsadadasdas \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 704ff0b..be2c5a0 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -32,4 +32,8 @@ ENVIRONMENT: str = os.getenv('ENVIRONMENT') -OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') \ No newline at end of file +OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') + +DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') +DROPI_API_URL: str = os.getenv('DROPI_API_URL', 'https://test-api.dropi.co/integrations/products/index') +DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') \ No newline at end of file diff --git a/app/externals/dropi/__init__.py b/app/externals/dropi/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/app/externals/dropi/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py new file mode 100644 index 0000000..7b49d95 --- /dev/null +++ b/app/externals/dropi/dropi_client.py @@ -0,0 +1,27 @@ +import httpx +from typing import Dict, Any + +from app.configurations.config import DROPI_API_URL, DROPI_API_KEY + + +async def get_product_details(product_id: str) -> Dict[str, Any]: + headers = { + "dropi-integration-key": DROPI_API_KEY, + "Content-Type": "application/json" + } + payload = { + "pageSize": 1, + "startData": 0, + "products.id": product_id + } + + async with httpx.AsyncClient() as client: + try: + response = await client.post(DROPI_API_URL, headers=headers, json=payload) + response.raise_for_status() + return response.json() + except httpx.HTTPStatusError as e: + # You can add more specific error handling here + raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") + except httpx.RequestError as e: + raise Exception(f"API request failed: {str(e)}") \ No newline at end of file diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py index e770dd8..10578c8 100644 --- a/app/factories/scraping_factory.py +++ b/app/factories/scraping_factory.py @@ -6,6 +6,7 @@ from app.scrapers.amazon_scraper import AmazonScraper from app.scrapers.aliexpress_scraper import AliexpressScraper from app.scrapers.cj_scraper import CJScraper +from app.scrapers.dropi_scraper import DropiScraper from app.scrapers.ia_scraper import IAScraper from app.services.message_service_interface import MessageServiceInterface @@ -23,5 +24,7 @@ def get_scraper(self, url: str) -> ScraperInterface: return AliexpressScraper() elif "cjdropshipping" in domain: return CJScraper() + elif "dropi.co" in domain: + return DropiScraper() else: return IAScraper(message_service=self.message_service) diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py new file mode 100644 index 0000000..bb57b53 --- /dev/null +++ b/app/scrapers/dropi_scraper.py @@ -0,0 +1,123 @@ +import re +from decimal import Decimal +from typing import Dict, Any, List, Optional + +from fastapi import HTTPException + +from app.externals.dropi.dropi_client import get_product_details +from app.scrapers.helper_price import parse_price +from app.scrapers.scraper_interface import ScraperInterface +from app.configurations.config import DROPI_S3_BASE_URL + + +class DropiScraper(ScraperInterface): + async def scrape_direct(self, html: str) -> Dict[str, Any]: + return {} + + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + product_id = self._extract_product_id(url) + + try: + data = await get_product_details(product_id) + product_data = self._get_product_data(data) + + result = { + "name": self._get_name(product_data), + "description": self._get_description(product_data), + "external_sell_price": self._get_price(product_data), + "images": self._get_images(product_data), + "sku": self._get_sku(product_data), + "suggested_price": self._get_suggested_price(product_data), + "stock": self._get_stock(product_data), + } + + variants = self._extract_variants(product_data) + if variants: + result["variants"] = variants + + response = { + "provider_id": "dropi", + "external_id": product_id, + **result + } + + return {"data": response} + + except Exception as e: + raise HTTPException( + status_code=400, + detail=f"Error processing product data from Dropi: {str(e)}" + ) + + def _get_product_data(self, response: Dict[str, Any]) -> Dict[str, Any]: + if not response.get("isSuccess"): + raise ValueError("Dropi API returned an error.") + + objects = response.get("objects", []) + if not objects: + raise ValueError("No product data found in Dropi response") + return objects[0] + + def _get_name(self, product_data: Dict[str, Any]) -> str: + return product_data.get("name", "") + + def _get_description(self, product_data: Dict[str, Any]) -> str: + categories = product_data.get("categories", []) + if not categories: + return "" + + category_names = [cat.get("name") for cat in categories if cat.get("name")] + if not category_names: + return "" + + return f"Categorías: {', '.join(category_names)}" + + def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: + price_str = product_data.get("sale_price") + if not price_str: + return None + return parse_price(price_str) + + def _get_sku(self, product_data: Dict[str, Any]) -> str: + return product_data.get("sku", "") + + def _get_suggested_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: + price_str = product_data.get("suggested_price") + if not price_str: + return None + return parse_price(price_str) + + def _get_stock(self, product_data: Dict[str, Any]) -> int: + warehouses = product_data.get("warehouse_product", []) + if not warehouses: + return 0 + + total_stock = sum(w.get("stock", 0) for w in warehouses) + return total_stock + + def _get_images(self, product_data: Dict[str, Any]) -> List[str]: + gallery = product_data.get("gallery", []) + if not gallery: + return [] + + images = [] + for item in gallery: + if item.get("urlS3"): + images.append(DROPI_S3_BASE_URL + item["urlS3"]) + return images + + def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any]]: + variations = product_data.get("variations", []) + if not variations: + return [] + return [] + + def _extract_product_id(self, url: str) -> str: + match = re.search(r'/product-details/(\d+)', url) + if match: + return match.group(1) + + raise HTTPException( + status_code=400, + detail="Product ID not found in Dropi URL" + ) \ No newline at end of file From 8ab5f7ceb9fd35cfc82e6d387b90bc2abceab240 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 1 Jul 2025 17:00:23 -0500 Subject: [PATCH 060/195] eliminate comment --- app/externals/dropi/dropi_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py index 7b49d95..67099de 100644 --- a/app/externals/dropi/dropi_client.py +++ b/app/externals/dropi/dropi_client.py @@ -21,7 +21,6 @@ async def get_product_details(product_id: str) -> Dict[str, Any]: response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: - # You can add more specific error handling here raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") except httpx.RequestError as e: raise Exception(f"API request failed: {str(e)}") \ No newline at end of file From 6f5ca077c28bfa53fb32d8b0d4db39ac5a09a463 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Tue, 1 Jul 2025 17:03:18 -0500 Subject: [PATCH 061/195] eliminate fields innecesaries --- app/scrapers/dropi_scraper.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py index bb57b53..cd8a2cb 100644 --- a/app/scrapers/dropi_scraper.py +++ b/app/scrapers/dropi_scraper.py @@ -26,9 +26,6 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: "description": self._get_description(product_data), "external_sell_price": self._get_price(product_data), "images": self._get_images(product_data), - "sku": self._get_sku(product_data), - "suggested_price": self._get_suggested_price(product_data), - "stock": self._get_stock(product_data), } variants = self._extract_variants(product_data) @@ -78,23 +75,6 @@ def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: return None return parse_price(price_str) - def _get_sku(self, product_data: Dict[str, Any]) -> str: - return product_data.get("sku", "") - - def _get_suggested_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: - price_str = product_data.get("suggested_price") - if not price_str: - return None - return parse_price(price_str) - - def _get_stock(self, product_data: Dict[str, Any]) -> int: - warehouses = product_data.get("warehouse_product", []) - if not warehouses: - return 0 - - total_stock = sum(w.get("stock", 0) for w in warehouses) - return total_stock - def _get_images(self, product_data: Dict[str, Any]) -> List[str]: gallery = product_data.get("gallery", []) if not gallery: From fd6ab6779a82872e28ca8001700b9f1a681b1168 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 2 Jul 2025 10:24:32 -0500 Subject: [PATCH 062/195] change api --- .env.example | 2 +- app/configurations/config.py | 2 +- app/externals/dropi/dropi_client.py | 12 ++++-------- app/scrapers/dropi_scraper.py | 27 ++++++++++++++------------- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/.env.example b/.env.example index a590dab..947fa65 100644 --- a/.env.example +++ b/.env.example @@ -18,5 +18,5 @@ GOOGLE_GEMINI_API_KEY=sadasadasdasd ENVIRONMENT=dev DROPI_S3_BASE_URL=https://d39ru7awumhhs2.cloudfront.net/ -DROPI_API_URL=https://test-api.dropi.co/integrations/products/index +DROPI_API_URL=https://test-api.dropi.co/integrations/products/v2 DROPI_API_KEY=dasdsadadasdas \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index be2c5a0..3fcad45 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -35,5 +35,5 @@ OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') -DROPI_API_URL: str = os.getenv('DROPI_API_URL', 'https://test-api.dropi.co/integrations/products/index') +DROPI_API_URL: str = os.getenv('DROPI_API_URL', 'https://test-api.dropi.co/integrations/products/v2') DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') \ No newline at end of file diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py index 67099de..bd50b5c 100644 --- a/app/externals/dropi/dropi_client.py +++ b/app/externals/dropi/dropi_client.py @@ -6,18 +6,14 @@ async def get_product_details(product_id: str) -> Dict[str, Any]: headers = { - "dropi-integration-key": DROPI_API_KEY, - "Content-Type": "application/json" - } - payload = { - "pageSize": 1, - "startData": 0, - "products.id": product_id + "dropi-integration-key": DROPI_API_KEY } + url = f"{DROPI_API_URL}/{product_id}" + async with httpx.AsyncClient() as client: try: - response = await client.post(DROPI_API_URL, headers=headers, json=payload) + response = await client.get(url, headers=headers) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py index cd8a2cb..c787e1c 100644 --- a/app/scrapers/dropi_scraper.py +++ b/app/scrapers/dropi_scraper.py @@ -50,24 +50,25 @@ def _get_product_data(self, response: Dict[str, Any]) -> Dict[str, Any]: if not response.get("isSuccess"): raise ValueError("Dropi API returned an error.") - objects = response.get("objects", []) - if not objects: + product_data = response.get("objects") + if not product_data or not isinstance(product_data, dict): raise ValueError("No product data found in Dropi response") - return objects[0] + return product_data def _get_name(self, product_data: Dict[str, Any]) -> str: return product_data.get("name", "") def _get_description(self, product_data: Dict[str, Any]) -> str: - categories = product_data.get("categories", []) - if not categories: + html_description = product_data.get("description", "") + if not html_description: return "" - category_names = [cat.get("name") for cat in categories if cat.get("name")] - if not category_names: - return "" - - return f"Categorías: {', '.join(category_names)}" + # Remove HTML tags for a cleaner description + clean_text = re.sub(r'<[^>]+>', ' ', html_description) + # Replace
with newlines and clean up whitespace + clean_text = clean_text.replace('
', '\n').strip() + clean_text = re.sub(r'\s+', ' ', clean_text).strip() + return clean_text def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: price_str = product_data.get("sale_price") @@ -76,12 +77,12 @@ def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: return parse_price(price_str) def _get_images(self, product_data: Dict[str, Any]) -> List[str]: - gallery = product_data.get("gallery", []) - if not gallery: + photos = product_data.get("photos", []) + if not photos: return [] images = [] - for item in gallery: + for item in photos: if item.get("urlS3"): images.append(DROPI_S3_BASE_URL + item["urlS3"]) return images From 8c76b6259690e0f5650cc8603f1477f6fdba7412 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 2 Jul 2025 10:52:24 -0500 Subject: [PATCH 063/195] add new services dropi --- .env.example | 2 +- app/configurations/config.py | 2 +- app/controllers/handle_controller.py | 17 ++++++++++ app/externals/dropi/dropi_client.py | 41 +++++++++++++++++++++++-- app/services/dropi_service.py | 25 +++++++++++++++ app/services/dropi_service_interface.py | 12 ++++++++ 6 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 app/services/dropi_service.py create mode 100644 app/services/dropi_service_interface.py diff --git a/.env.example b/.env.example index 947fa65..94b21c8 100644 --- a/.env.example +++ b/.env.example @@ -17,6 +17,6 @@ URL_SCRAPER_LAMBDA=https://localhost:8000/ GOOGLE_GEMINI_API_KEY=sadasadasdasd ENVIRONMENT=dev +DROPI_HOST=https://test-api.dropi.co DROPI_S3_BASE_URL=https://d39ru7awumhhs2.cloudfront.net/ -DROPI_API_URL=https://test-api.dropi.co/integrations/products/v2 DROPI_API_KEY=dasdsadadasdas \ No newline at end of file diff --git a/app/configurations/config.py b/app/configurations/config.py index 3fcad45..1b6419e 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -35,5 +35,5 @@ OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') -DROPI_API_URL: str = os.getenv('DROPI_API_URL', 'https://test-api.dropi.co/integrations/products/v2') +DROPI_HOST: str = os.getenv('DROPI_HOST', 'https://test-api.dropi.co') DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') \ No newline at end of file diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 0186950..960e92e 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -17,11 +17,28 @@ from app.middlewares.auth_middleware import require_auth, require_api_key from pydantic import BaseModel +# Importaciones para Dropi +from app.services.dropi_service_interface import DropiServiceInterface +from app.services.dropi_service import DropiService + router = APIRouter( prefix="/api/ms/conversational-engine", tags=["conversational-agent"] ) +@router.get("/integration/dropi/departments") +async def get_departments( + service: DropiServiceInterface = Depends(DropiService) +): + return await service.get_departments() + +@router.get("/integration/dropi/departments/{department_id}/cities") +async def get_cities_by_department( + department_id: int, + service: DropiServiceInterface = Depends(DropiService) +): + return await service.get_cities_by_department(department_id) + @router.post("/handle-message") async def handle_message( request: MessageRequest, diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py index bd50b5c..c68286d 100644 --- a/app/externals/dropi/dropi_client.py +++ b/app/externals/dropi/dropi_client.py @@ -1,7 +1,7 @@ import httpx from typing import Dict, Any -from app.configurations.config import DROPI_API_URL, DROPI_API_KEY +from app.configurations.config import DROPI_HOST, DROPI_API_KEY async def get_product_details(product_id: str) -> Dict[str, Any]: @@ -9,7 +9,7 @@ async def get_product_details(product_id: str) -> Dict[str, Any]: "dropi-integration-key": DROPI_API_KEY } - url = f"{DROPI_API_URL}/{product_id}" + url = f"{DROPI_HOST}/integrations/products/v2/{product_id}" async with httpx.AsyncClient() as client: try: @@ -18,5 +18,42 @@ async def get_product_details(product_id: str) -> Dict[str, Any]: return response.json() except httpx.HTTPStatusError as e: raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") + except httpx.RequestError as e: + raise Exception(f"API request failed: {str(e)}") + + +async def get_departments() -> Dict[str, Any]: + headers = { + "dropi-integration-key": DROPI_API_KEY + } + url = f"{DROPI_HOST}/integrations/department" + async with httpx.AsyncClient() as client: + try: + response = await client.get(url, headers=headers) + response.raise_for_status() + return response.json() + except httpx.HTTPStatusError as e: + raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") + except httpx.RequestError as e: + raise Exception(f"API request failed: {str(e)}") + + +async def get_cities_by_department(department_id: int, rate_type: str) -> Dict[str, Any]: + headers = { + "dropi-integration-key": DROPI_API_KEY, + "Content-Type": "application/json" + } + payload = { + "department_id": department_id, + "rate_type": rate_type + } + url = f"{DROPI_HOST}/integrations/trajectory/bycity" + async with httpx.AsyncClient() as client: + try: + response = await client.post(url, headers=headers, json=payload) + response.raise_for_status() + return response.json() + except httpx.HTTPStatusError as e: + raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") except httpx.RequestError as e: raise Exception(f"API request failed: {str(e)}") \ No newline at end of file diff --git a/app/services/dropi_service.py b/app/services/dropi_service.py new file mode 100644 index 0000000..d16768c --- /dev/null +++ b/app/services/dropi_service.py @@ -0,0 +1,25 @@ +from typing import List, Dict, Any +from fastapi import Depends, HTTPException + +from app.externals.dropi import dropi_client +from app.services.dropi_service_interface import DropiServiceInterface + + +class DropiService(DropiServiceInterface): + def __init__(self): + pass + + async def get_departments(self) -> List[Dict[str, Any]]: + try: + response = await dropi_client.get_departments() + return response.get("objects", []) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error fetching departments from Dropi: {str(e)}") + + async def get_cities_by_department(self, department_id: int) -> List[Dict[str, Any]]: + try: + rate_type = "CON RECAUDO" + response = await dropi_client.get_cities_by_department(department_id, rate_type) + return response.get("objects", {}).get("cities", []) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error fetching cities from Dropi: {str(e)}") \ No newline at end of file diff --git a/app/services/dropi_service_interface.py b/app/services/dropi_service_interface.py new file mode 100644 index 0000000..f60cb69 --- /dev/null +++ b/app/services/dropi_service_interface.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod +from typing import List, Dict, Any + + +class DropiServiceInterface(ABC): + @abstractmethod + async def get_departments(self) -> List[Dict[str, Any]]: + pass + + @abstractmethod + async def get_cities_by_department(self, department_id: int) -> List[Dict[str, Any]]: + pass \ No newline at end of file From b7a4718cc461c2e5406b19d12d8f2d6ee41c5ed3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 17 Jul 2025 03:46:52 -0500 Subject: [PATCH 064/195] add logic for create context and brand for store --- app/controllers/handle_controller.py | 15 ++++++++-- .../brand_context_resolver_request.py | 11 ++++++++ app/services/message_service.py | 28 +++++++++++++++++++ app/services/message_service_interface.py | 5 ++++ 4 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 app/requests/brand_context_resolver_request.py diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 960e92e..a8cb813 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,6 +1,7 @@ import base64 import httpx +from app.requests.brand_context_resolver_request import BrandContextResolverRequest from app.requests.copy_request import CopyRequest from app.requests.direct_scrape_request import DirectScrapeRequest from app.requests.generate_image_request import GenerateImageRequest @@ -178,10 +179,20 @@ async def scrape_product_direct( @router.post("/resolve-info-funnel") async def resolve_funnel( - request: ResolveFunnelRequest, + requestResolve: ResolveFunnelRequest, message_service: MessageServiceInterface = Depends() ): - response = await message_service.resolve_funnel(request) + response = await message_service.resolve_funnel(requestResolve) + return response + +@router.post("/store/brand-context-resolver") +@require_auth +async def brand_context_resolver( + request: Request, + requestBrand: BrandContextResolverRequest, + message_service: MessageServiceInterface = Depends() +): + response = await message_service.resolve_brand_context(requestBrand) return response diff --git a/app/requests/brand_context_resolver_request.py b/app/requests/brand_context_resolver_request.py new file mode 100644 index 0000000..b4e37ee --- /dev/null +++ b/app/requests/brand_context_resolver_request.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel +from typing import List + + +class BrandContextResolverRequest(BaseModel): + websites_info: List + + @property + def prompt(self) -> dict: + websites_info_str = ", ".join(str(item) for item in self.websites_info) + return {"websites_info": websites_info_str} diff --git a/app/services/message_service.py b/app/services/message_service.py index 4246c9f..7a3909b 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -7,6 +7,7 @@ from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.s3_upload_client import upload_file, check_file_exists_direct from app.pdf.helpers import clean_text, clean_json +from app.requests.brand_context_resolver_request import BrandContextResolverRequest from app.requests.copy_request import CopyRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.message_request import MessageRequest @@ -182,3 +183,30 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): "buyer_detection": buyer_detection_message, "sales_angles": sales_angles_response["angles"] } + + async def resolve_brand_context(self, request: BrandContextResolverRequest): + brand_agent_task = self.handle_message_json(MessageRequest( + agent_id="store_brand_agent", + conversation_id="", + query="store_brand_agent", + parameter_prompt=request.prompt, + json_parser={"brands": ["string", "string"]} + )) + + context_agent_task = self.handle_message_json(MessageRequest( + agent_id="store_context_agent", + conversation_id="", + query="store_context_agent", + parameter_prompt=request.prompt, + json_parser={"contexts": ["string", "string"]} + )) + + responses = await asyncio.gather(brand_agent_task, context_agent_task) + + brands = responses[0].get("brands", []) + contexts = responses[1].get("contexts", []) + + return { + "brands": brands, + "contexts": contexts + } diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index c4163ba..bef43c5 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -4,6 +4,7 @@ from app.requests.message_request import MessageRequest from app.requests.recommend_product_request import RecommendProductRequest from app.requests.resolve_funnel_request import ResolveFunnelRequest +from app.requests.brand_context_resolver_request import BrandContextResolverRequest class MessageServiceInterface(ABC): @@ -28,4 +29,8 @@ async def generate_pdf(self, request): @abstractmethod async def resolve_funnel(self, request: ResolveFunnelRequest): + pass + + @abstractmethod + async def resolve_brand_context(self, request: BrandContextResolverRequest): pass \ No newline at end of file From 9c4d82196acd9700a8343ad7d52c2c744324e284 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 17 Jul 2025 03:48:53 -0500 Subject: [PATCH 065/195] revert controller --- app/controllers/handle_controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index a8cb813..7f2b9ac 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -179,10 +179,10 @@ async def scrape_product_direct( @router.post("/resolve-info-funnel") async def resolve_funnel( - requestResolve: ResolveFunnelRequest, + request: ResolveFunnelRequest, message_service: MessageServiceInterface = Depends() ): - response = await message_service.resolve_funnel(requestResolve) + response = await message_service.resolve_funnel(request) return response @router.post("/store/brand-context-resolver") From 3dddc90d7228717b62b4d8f510f84f229459ed78 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 23 Jul 2025 15:39:47 -0400 Subject: [PATCH 066/195] comment variants. --- app/scrapers/aliexpress_scraper.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/scrapers/aliexpress_scraper.py b/app/scrapers/aliexpress_scraper.py index deab4eb..b23b78f 100644 --- a/app/scrapers/aliexpress_scraper.py +++ b/app/scrapers/aliexpress_scraper.py @@ -24,9 +24,11 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: "images": self._get_images(item_data) } - variants = self._extract_variants(item_data) - if variants: - result["variants"] = variants + """ + variants = self._extract_variants(item_data) + if variants: + result["variants"] = variants + """ response = { "provider_id": "aliexpress", From f1ac12b0ee5b2f5b302ac559546513296144cd1d Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 24 Jul 2025 15:29:58 -0400 Subject: [PATCH 067/195] new logic for alibaba and price --- app/scrapers/helper_price.py | 13 ++++++++----- app/scrapers/ia_scraper.py | 7 +++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/app/scrapers/helper_price.py b/app/scrapers/helper_price.py index 455cc7e..e728944 100644 --- a/app/scrapers/helper_price.py +++ b/app/scrapers/helper_price.py @@ -1,5 +1,6 @@ from decimal import Decimal from typing import Optional, Any +import re def parse_price(price_str: Any) -> Optional[Decimal]: @@ -7,10 +8,12 @@ def parse_price(price_str: Any) -> Optional[Decimal]: return Decimal(str(price_str)) if isinstance(price_str, str): - clean_price = price_str.replace("$", "").replace(",", "").strip() - try: - return Decimal(clean_price) - except: - pass + match = re.search(r"[\d,.]+", price_str) + if match: + num_str = match.group(0).replace(",", "") + try: + return Decimal(num_str) + except: + pass return None diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index 8cb884d..c9c979f 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -17,7 +17,7 @@ async def scrape_direct(self, html: str) -> Dict[str, Any]: simplified_html_clean = clean_html_deeply(html) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"simplified_html_{timestamp}.html" - + os.makedirs("scraped_html", exist_ok=True) filepath = os.path.join("scraped_html", filename) @@ -60,7 +60,10 @@ def __init__(self, message_service: MessageServiceInterface): async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: client = ScraperAPIClient() - html_content = await client.get_html_lambda(url) + if domain and 'alibaba' in domain: + html_content = await client.get_html(url) + else: + html_content = await client.get_html_lambda(url) simplified_html_clean = clean_html_deeply(html_content) message_request = MessageRequest( From bba40f32730685f349df790e41dc123549770127 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 24 Jul 2025 15:47:02 -0400 Subject: [PATCH 068/195] revert --- app/scrapers/ia_scraper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index c9c979f..8fe490b 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -60,10 +60,7 @@ def __init__(self, message_service: MessageServiceInterface): async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: client = ScraperAPIClient() - if domain and 'alibaba' in domain: - html_content = await client.get_html(url) - else: - html_content = await client.get_html_lambda(url) + html_content = await client.get_html_lambda(url) simplified_html_clean = clean_html_deeply(html_content) message_request = MessageRequest( From c2ac91b1aa28322e7821e486f5e3872715abcbd0 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 24 Jul 2025 17:54:20 -0400 Subject: [PATCH 069/195] again --- app/scrapers/ia_scraper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index 8fe490b..233f5fa 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -60,7 +60,10 @@ def __init__(self, message_service: MessageServiceInterface): async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: client = ScraperAPIClient() - html_content = await client.get_html_lambda(url) + if domain and "alibaba" in domain: + html_content = await client.get_html(url) + else: + html_content = await client.get_html_lambda(url) simplified_html_clean = clean_html_deeply(html_content) message_request = MessageRequest( From 09682a9f49b58ce9d644fbd54ee54795927e813d Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 24 Jul 2025 18:22:27 -0400 Subject: [PATCH 070/195] elminate render --- app/externals/scraperapi/scraperapi_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/externals/scraperapi/scraperapi_client.py b/app/externals/scraperapi/scraperapi_client.py index 325bc08..29e548e 100644 --- a/app/externals/scraperapi/scraperapi_client.py +++ b/app/externals/scraperapi/scraperapi_client.py @@ -15,8 +15,7 @@ def __init__(self): async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: default_params = { "api_key": self.api_key, - "url": url, - "render": "true" + "url": url } if params: From 94cbbb18aec40887d4c91df1b7cc849f3675682d Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 1 Aug 2025 11:40:12 -0400 Subject: [PATCH 071/195] add logic resolution. --- .../responses/agent_config_response.py | 1 + app/externals/images/image_client.py | 8 ++++++-- app/services/image_service.py | 17 +++++++++++++---- app/services/message_service.py | 19 +++++++++++++++++++ app/services/message_service_interface.py | 4 ++++ 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/app/externals/agent_config/responses/agent_config_response.py b/app/externals/agent_config/responses/agent_config_response.py index 202f7fc..4d39d4d 100644 --- a/app/externals/agent_config/responses/agent_config_response.py +++ b/app/externals/agent_config/responses/agent_config_response.py @@ -6,6 +6,7 @@ class AgentPreferences(BaseModel): temperature: float = 0.7 max_tokens: int = 1000 top_p: float = 1.0 + extra_parameters: Optional[Dict[str, Any]] = None class Property(BaseModel): diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index 5e48451..db49994 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -116,7 +116,7 @@ async def google_image(prompt: str, file: Optional[str] = None) -> bytes: raise Exception(f"Error al generar imagen: {str(e)}") -async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: +async def openai_image_edit(image_urls: list[str], prompt: str, resolution: Optional[str] = None) -> bytes: url = "https://api.openai.com/v1/images/edits" headers = { "Authorization": f"Bearer {config.OPENAI_API_KEY}" @@ -139,7 +139,11 @@ async def openai_image_edit(image_urls: list[str], prompt: str) -> bytes: prompt = prompt + ". **escena completa visible, composición centrada, todos los elementos dentro del marco cuadrado, nada recortado en los bordes, composición completa**" - data.add_field('size', '1024x1024') + size = '1024x1024' + if resolution and resolution.strip(): + size = resolution + + data.add_field('size', size) data.add_field('prompt', prompt) data.add_field('model', 'gpt-image-1') data.add_field('n', '1') diff --git a/app/services/image_service.py b/app/services/image_service.py index 17de94c..302f84f 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -60,9 +60,9 @@ def _process_image_for_upload(self, original_image_bytes: bytes) -> str: async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, - folder_id: str, file: Optional[str] = None) -> str: + folder_id: str, file: Optional[str] = None, resolution: Optional[str] = None) -> str: - image_content = await openai_image_edit(image_urls=url_images, prompt=prompt) + image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, resolution=resolution) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( @@ -89,10 +89,19 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ }] ) - response = await self.message_service.handle_message(message_request) + response_data = await self.message_service.handle_message_with_config(message_request) + agent_config = response_data["agent_config"] + response = response_data["message"] + + resolution = None + if (agent_config.preferences.extra_parameters and + 'resolution' in agent_config.preferences.extra_parameters): + resolution = agent_config.preferences.extra_parameters['resolution'] + prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ - self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, request.file) + self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, + request.file, resolution) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) diff --git a/app/services/message_service.py b/app/services/message_service.py index 7a3909b..501ecf9 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -43,6 +43,25 @@ async def handle_message(self, request: MessageRequest): agent_config=agent_config ) + async def handle_message_with_config(self, request: MessageRequest): + data = AgentConfigRequest( + agent_id=request.agent_id, + query=request.query, + metadata_filter=request.metadata_filter, + parameter_prompt=request.parameter_prompt + ) + + agent_config = await get_agent(data) + + message_response = await self.conversation_manager.process_conversation( + request=request, + agent_config=agent_config + ) + return { + "message": message_response, + "agent_config": agent_config + } + async def handle_message_json(self, request: MessageRequest): response = await self.handle_message(request) diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index bef43c5..b896250 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -33,4 +33,8 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): @abstractmethod async def resolve_brand_context(self, request: BrandContextResolverRequest): + pass + + @abstractmethod + async def handle_message_with_config(self, request: MessageRequest): pass \ No newline at end of file From 4f4ca2a896a8f3717666a51fa2bc5dadcf35f2ba Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 1 Aug 2025 13:10:09 -0400 Subject: [PATCH 072/195] add logic from agent. --- app/services/image_service.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/app/services/image_service.py b/app/services/image_service.py index 302f84f..badcfa2 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -109,7 +109,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ return GenerateImageResponse(generated_urls=generated_urls, original_url=original_image_response.s3_url, generated_prompt=prompt, vision_analysis=vision_analysis) - async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): + async def generate_images_from(self, request: GenerateImageRequest, owner_id: str, resolution: Optional[str] = None): folder_id = uuid.uuid4().hex[:8] urls = request.file_urls or [] original_url = request.file_url @@ -128,6 +128,7 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st owner_id, folder_id, request.file, + resolution=resolution ) for i in range(request.num_variations) ] @@ -148,8 +149,17 @@ async def generate_images_from_agent(self, request: GenerateImageRequest, owner_ conversation_id="", ) - message = await self.message_service.handle_message(data) + response_data = await self.message_service.handle_message_with_config(data) + agent_config = response_data["agent_config"] + message = response_data["message"] + request.prompt = message["text"] - response = await self.generate_images_from(request, owner_id) + + resolution = None + if (agent_config.preferences.extra_parameters and + 'resolution' in agent_config.preferences.extra_parameters): + resolution = agent_config.preferences.extra_parameters['resolution'] + + response = await self.generate_images_from(request, owner_id, resolution=resolution) return response From 31ebf40e128a634fc8a171ef60fa9254594badee Mon Sep 17 00:00:00 2001 From: stephan Date: Thu, 28 Aug 2025 15:19:26 -0500 Subject: [PATCH 073/195] fal ai integrado con eleven labs, omnihuman y kling --- app/configurations/config.py | 4 ++- app/controllers/handle_controller.py | 24 ++++++++++++- app/externals/fal/__init__.py | 1 + app/externals/fal/fal_client.py | 47 +++++++++++++++++++++++++ app/requests/generate_audio_request.py | 7 ++++ app/requests/generate_video_request.py | 11 ++++++ app/services/audio_service.py | 25 +++++++++++++ app/services/audio_service_interface.py | 9 +++++ app/services/video_service.py | 40 +++++++++++++++++++++ app/services/video_service_interface.py | 9 +++++ main.py | 6 ++++ 11 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 app/externals/fal/__init__.py create mode 100644 app/externals/fal/fal_client.py create mode 100644 app/requests/generate_audio_request.py create mode 100644 app/requests/generate_video_request.py create mode 100644 app/services/audio_service.py create mode 100644 app/services/audio_service_interface.py create mode 100644 app/services/video_service.py create mode 100644 app/services/video_service_interface.py diff --git a/app/configurations/config.py b/app/configurations/config.py index 1b6419e..9dd7685 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -36,4 +36,6 @@ DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') DROPI_HOST: str = os.getenv('DROPI_HOST', 'https://test-api.dropi.co') -DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') \ No newline at end of file +DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') + +FAL_AI_API_KEY: str = os.getenv('FAL_AI_API_KEY') \ No newline at end of file diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 7f2b9ac..7126d9d 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -16,11 +16,16 @@ from app.services.message_service_interface import MessageServiceInterface from app.services.product_scraping_service_interface import ProductScrapingServiceInterface from app.middlewares.auth_middleware import require_auth, require_api_key -from pydantic import BaseModel +from app.requests.generate_video_request import GenerateVideoRequest # Importaciones para Dropi from app.services.dropi_service_interface import DropiServiceInterface from app.services.dropi_service import DropiService +from app.services.video_service_interface import VideoServiceInterface +from app.services.video_service import VideoService +from app.services.audio_service_interface import AudioServiceInterface +from app.services.audio_service import AudioService +from app.requests.generate_audio_request import GenerateAudioRequest router = APIRouter( prefix="/api/ms/conversational-engine", @@ -195,6 +200,23 @@ async def brand_context_resolver( response = await message_service.resolve_brand_context(requestBrand) return response +@router.post("/generate-video") +async def generate_video( + request: Request, + requestGenerateVideo: GenerateVideoRequest, + video_service: VideoServiceInterface = Depends(VideoService) +): + return await video_service.generate_video(requestGenerateVideo) + + +@router.post("/generate-audio") +async def generate_audio( + request: Request, + requestGenerateAudio: GenerateAudioRequest, + audio_service: AudioServiceInterface = Depends(AudioService) +): + return await audio_service.generate_audio(requestGenerateAudio) + @router.get("/health") async def health_check(): diff --git a/app/externals/fal/__init__.py b/app/externals/fal/__init__.py new file mode 100644 index 0000000..526f30b --- /dev/null +++ b/app/externals/fal/__init__.py @@ -0,0 +1 @@ +# Package initializer for FAL externals \ No newline at end of file diff --git a/app/externals/fal/fal_client.py b/app/externals/fal/fal_client.py new file mode 100644 index 0000000..36bf7a3 --- /dev/null +++ b/app/externals/fal/fal_client.py @@ -0,0 +1,47 @@ +import urllib.parse +from typing import Optional, Dict, Any + +import httpx + +from app.configurations.config import FAL_AI_API_KEY + + +class FalClient: + def __init__(self, api_key: Optional[str] = None): + self.api_key = api_key or FAL_AI_API_KEY + + async def _post(self, path: str, payload: Dict[str, Any], fal_webhook: Optional[str] = None) -> Dict[str, Any]: + if not self.api_key: + raise ValueError("FAL_AI_API_KEY no configurada") + + base_url = f"https://queue.fal.run/{path}" + if fal_webhook: + query = f"fal_webhook={urllib.parse.quote_plus(fal_webhook)}" + url = f"{base_url}?{query}" + else: + url = base_url + + headers = { + "Authorization": f"Key {self.api_key}", + "Content-Type": "application/json", + } + + async with httpx.AsyncClient(timeout=60) as client: + response = await client.post(url, json=payload, headers=headers) + response.raise_for_status() + return response.json() + + async def tts_multilingual_v2(self, text: str, fal_webhook: Optional[str] = None, **kwargs) -> Dict[str, Any]: + payload = {"text": text} + payload.update(kwargs) + return await self._post("fal-ai/elevenlabs/tts/multilingual-v2", payload, fal_webhook) + + async def bytedance_omnihuman(self, image_url: str, audio_url: str, fal_webhook: Optional[str] = None, **kwargs) -> Dict[str, Any]: + payload = {"image_url": image_url, "audio_url": audio_url} + payload.update(kwargs) + return await self._post("fal-ai/bytedance/omnihuman", payload, fal_webhook) + + async def kling_image_to_video(self, prompt: str, image_url: str, fal_webhook: Optional[str] = None, **kwargs) -> Dict[str, Any]: + payload = {"prompt": prompt, "image_url": image_url} + payload.update(kwargs) + return await self._post("fal-ai/kling-video/v2/master/image-to-video", payload, fal_webhook) \ No newline at end of file diff --git a/app/requests/generate_audio_request.py b/app/requests/generate_audio_request.py new file mode 100644 index 0000000..0621741 --- /dev/null +++ b/app/requests/generate_audio_request.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel +from typing import Optional, Dict, Any + + +class GenerateAudioRequest(BaseModel): + text: str + content: Optional[Dict[str, Any]] = None \ No newline at end of file diff --git a/app/requests/generate_video_request.py b/app/requests/generate_video_request.py new file mode 100644 index 0000000..ce7dbc3 --- /dev/null +++ b/app/requests/generate_video_request.py @@ -0,0 +1,11 @@ +from enum import Enum +from typing import Dict, Any, Optional +from pydantic import BaseModel + +class VideoType(str, Enum): + human_scene = "human_scene" + animated_scene = "animated_scene" + +class GenerateVideoRequest(BaseModel): + type: VideoType + content: Optional[Dict[str, Any]] = None diff --git a/app/services/audio_service.py b/app/services/audio_service.py new file mode 100644 index 0000000..c8817d2 --- /dev/null +++ b/app/services/audio_service.py @@ -0,0 +1,25 @@ +from typing import Any, Dict + +from fastapi import Depends, HTTPException + +from app.requests.generate_audio_request import GenerateAudioRequest +from app.services.audio_service_interface import AudioServiceInterface +from app.externals.fal.fal_client import FalClient + + +class AudioService(AudioServiceInterface): + def __init__(self, fal_client: FalClient = Depends()): + self.fal_client = fal_client + + async def generate_audio(self, request: GenerateAudioRequest) -> Dict[str, Any]: + if not request.text: + raise HTTPException(status_code=400, detail="Falta 'text'") + + content = request.content or {} + fal_webhook = content.get("fal_webhook") + extra = {k: v for k, v in content.items() if k not in {"fal_webhook"}} + + try: + return await self.fal_client.tts_multilingual_v2(text=request.text, fal_webhook=fal_webhook, **extra) + except Exception as e: + raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") \ No newline at end of file diff --git a/app/services/audio_service_interface.py b/app/services/audio_service_interface.py new file mode 100644 index 0000000..250753c --- /dev/null +++ b/app/services/audio_service_interface.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + +from app.requests.generate_audio_request import GenerateAudioRequest + + +class AudioServiceInterface(ABC): + @abstractmethod + async def generate_audio(self, request: GenerateAudioRequest): + pass \ No newline at end of file diff --git a/app/services/video_service.py b/app/services/video_service.py new file mode 100644 index 0000000..df14cba --- /dev/null +++ b/app/services/video_service.py @@ -0,0 +1,40 @@ +from typing import Any, Dict + +from fastapi import Depends, HTTPException + +from app.requests.generate_video_request import GenerateVideoRequest, VideoType +from app.services.video_service_interface import VideoServiceInterface +from app.externals.fal.fal_client import FalClient + + +class VideoService(VideoServiceInterface): + def __init__(self, fal_client: FalClient = Depends()): + self.fal_client = fal_client + + async def generate_video(self, request: GenerateVideoRequest) -> Dict[str, Any]: + content: Dict[str, Any] = request.content or {} + + try: + if request.type == VideoType.animated_scene: + prompt = content.get("prompt") + image_url = content.get("image_url") + if not prompt or not image_url: + raise HTTPException(status_code=400, detail="Se requieren 'prompt' e 'image_url' en content para animated_scene") + fal_webhook = content.get("fal_webhook") + extra = {k: v for k, v in content.items() if k not in {"prompt", "image_url", "fal_webhook"}} + return await self.fal_client.kling_image_to_video(prompt=prompt, image_url=image_url, fal_webhook=fal_webhook, **extra) + + if request.type == VideoType.human_scene: + image_url = content.get("image_url") + audio_url = content.get("audio_url") + if not image_url or not audio_url: + raise HTTPException(status_code=400, detail="Se requieren 'image_url' y 'audio_url' en content para human_scene") + fal_webhook = content.get("fal_webhook") + extra = {k: v for k, v in content.items() if k not in {"image_url", "audio_url", "fal_webhook"}} + return await self.fal_client.bytedance_omnihuman(image_url=image_url, audio_url=audio_url, fal_webhook=fal_webhook, **extra) + + raise HTTPException(status_code=400, detail="Tipo de video no soportado") + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") \ No newline at end of file diff --git a/app/services/video_service_interface.py b/app/services/video_service_interface.py new file mode 100644 index 0000000..6632014 --- /dev/null +++ b/app/services/video_service_interface.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + +from app.requests.generate_video_request import GenerateVideoRequest + + +class VideoServiceInterface(ABC): + @abstractmethod + async def generate_video(self, request: GenerateVideoRequest): + pass \ No newline at end of file diff --git a/main.py b/main.py index 8f89bb8..a4c32bd 100644 --- a/main.py +++ b/main.py @@ -9,6 +9,10 @@ from app.services.message_service_interface import MessageServiceInterface from app.services.product_scraping_service import ProductScrapingService from app.services.product_scraping_service_interface import ProductScrapingServiceInterface +from app.services.video_service import VideoService +from app.services.video_service_interface import VideoServiceInterface +from app.services.audio_service import AudioService +from app.services.audio_service_interface import AudioServiceInterface app = FastAPI( title="Conversational Agent API", @@ -24,6 +28,8 @@ app.dependency_overrides[ConversationManagerInterface] = lambda: conversation_manager_singleton app.dependency_overrides[ImageServiceInterface] = ImageService app.dependency_overrides[ProductScrapingServiceInterface] = ProductScrapingService +app.dependency_overrides[VideoServiceInterface] = VideoService +app.dependency_overrides[AudioServiceInterface] = AudioService if __name__ == "__main__": import uvicorn From c591f44debff4d59a24c7a54a63de8d2dcbb0be3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sun, 31 Aug 2025 21:48:23 -0500 Subject: [PATCH 074/195] add logic image flash 2.5 --- app/externals/images/image_client.py | 46 ++++++++++++++++++---------- app/services/image_service.py | 13 +++++--- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index db49994..82007e2 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -68,28 +68,41 @@ async def generate_image_variation( raise Exception(f"Error {response.status}: {await response.text()}") -async def google_image(prompt: str, file: Optional[str] = None) -> bytes: - url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={GOOGLE_GEMINI_API_KEY}" +async def google_image(image_urls: list[str], prompt: str, resolution: Optional[str] = None) -> bytes: + url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent?key={GOOGLE_GEMINI_API_KEY}" parts = [{"text": prompt}] - if file: - parts.append({ - "inlineData": { - "mimeType": "image/png", - "data": file - } - }) - + if image_urls: + async with aiohttp.ClientSession() as fetch_session: + for image_url in image_urls: + try: + async with fetch_session.get(image_url) as img_response: + if img_response.status == 200: + image_bytes = await img_response.read() + image_base64 = base64.b64encode(image_bytes).decode('utf-8') + + parts.append({ + "inlineData": { + "mimeType": 'image/jpeg', + "data": image_base64 + } + }) + except Exception as e: + print(f"Error al procesar imagen de {image_url}: {str(e)}") + continue + + generation_config = { + "responseModalities": ["Text", "Image"] + } + payload = { "contents": [ { "parts": parts } ], - "generationConfig": { - "responseModalities": ["Text", "Image"] - } + "generationConfig": generation_config } headers = {'Content-Type': 'application/json'} @@ -106,14 +119,15 @@ async def google_image(prompt: str, file: Optional[str] = None) -> bytes: img_data_base64 = part["inlineData"]["data"] img_bytes = base64.b64decode(img_data_base64) return img_bytes - return None + + raise Exception("No se generó ninguna imagen en la respuesta de Google Gemini") else: error_text = await response.text() print(f"Error {response.status}: {error_text}") response.raise_for_status() except Exception as e: - print(f"Error al generar imagen: {str(e)}") - raise Exception(f"Error al generar imagen: {str(e)}") + print(f"Error al generar imagen con Google Gemini: {str(e)}") + raise Exception(f"Error al generar imagen con Google Gemini: {str(e)}") async def openai_image_edit(image_urls: list[str], prompt: str, resolution: Optional[str] = None) -> bytes: diff --git a/app/services/image_service.py b/app/services/image_service.py index badcfa2..557cdd8 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -16,7 +16,7 @@ import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.images.image_client import openai_image_edit +from app.externals.images.image_client import google_image from typing import Optional import base64 import io @@ -62,7 +62,7 @@ def _process_image_for_upload(self, original_image_bytes: bytes) -> str: async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, folder_id: str, file: Optional[str] = None, resolution: Optional[str] = None) -> str: - image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, resolution=resolution) + image_content = await google_image(image_urls=url_images, prompt=prompt, resolution=resolution) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( @@ -106,8 +106,13 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ ] generated_urls = await asyncio.gather(*tasks) - return GenerateImageResponse(generated_urls=generated_urls, original_url=original_image_response.s3_url, - generated_prompt=prompt, vision_analysis=vision_analysis) + return GenerateImageResponse( + generated_urls=generated_urls, + original_url=original_image_response.s3_url, + original_urls=[original_image_response.s3_url], + generated_prompt=prompt, + vision_analysis=vision_analysis + ) async def generate_images_from(self, request: GenerateImageRequest, owner_id: str, resolution: Optional[str] = None): folder_id = uuid.uuid4().hex[:8] From 64809c80ca75470a02fde4cea0579ff6cd2697e8 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Fri, 5 Sep 2025 11:39:13 -0500 Subject: [PATCH 075/195] add logic provider --- app/requests/generate_image_request.py | 1 + app/services/image_service.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index 269889e..23ae10a 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -9,5 +9,6 @@ class GenerateImageRequest(BaseModel): owner_id: Optional[str] = None prompt: Optional[str] = None agent_id: Optional[str] = None + provider: Optional[str] = None num_variations: int = 4 parameter_prompt: Optional[Dict[str, Any]] = None \ No newline at end of file diff --git a/app/services/image_service.py b/app/services/image_service.py index 557cdd8..66b9c0c 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -16,7 +16,7 @@ import uuid from dotenv import load_dotenv from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.images.image_client import google_image +from app.externals.images.image_client import google_image, openai_image_edit from typing import Optional import base64 import io @@ -60,9 +60,14 @@ def _process_image_for_upload(self, original_image_bytes: bytes) -> str: async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, - folder_id: str, file: Optional[str] = None, resolution: Optional[str] = None) -> str: + folder_id: str, file: Optional[str] = None, resolution: Optional[str] = None, + provider: Optional[str] = None) -> str: - image_content = await google_image(image_urls=url_images, prompt=prompt, resolution=resolution) + if provider and provider.lower() == "gemini": + image_content = await google_image(image_urls=url_images, prompt=prompt, resolution=resolution) + else: + + image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, resolution=resolution) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( @@ -101,7 +106,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, - request.file, resolution) + request.file, resolution, provider=agent_config.provider_ai) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) @@ -133,7 +138,8 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st owner_id, folder_id, request.file, - resolution=resolution + resolution=resolution, + provider=request.provider ) for i in range(request.num_variations) ] @@ -159,6 +165,7 @@ async def generate_images_from_agent(self, request: GenerateImageRequest, owner_ message = response_data["message"] request.prompt = message["text"] + request.provider = agent_config.provider_ai resolution = None if (agent_config.preferences.extra_parameters and From 019b148626f7af1a00c28e03b144976c86cd23c1 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 8 Sep 2025 10:59:59 -0500 Subject: [PATCH 076/195] add logic pdf generator --- app/configurations/pdf_manual_config.py | 14 +- app/pdf/pdf_generator.py | 371 +++++++++++++++++----- app/pdf/pdf_manual_generator.py | 14 +- app/requests/generate_pdf_request.py | 3 + app/services/message_service.py | 6 +- app/services/message_service_interface.py | 4 +- requirements.txt | 3 +- 7 files changed, 323 insertions(+), 92 deletions(-) diff --git a/app/configurations/pdf_manual_config.py b/app/configurations/pdf_manual_config.py index 9580698..fbfa290 100644 --- a/app/configurations/pdf_manual_config.py +++ b/app/configurations/pdf_manual_config.py @@ -1,10 +1,9 @@ PDF_MANUAL_SECTIONS = { - "introduction": "Introduction", - "main_features": "Main Features", - "usage_instructions": "Usage Instructions", - "troubleshooting": "Troubleshooting", - "faq": "FAQ", - "conclusion": "Conclusion" + "introduction": "Introducción", + "main_features": "Características principales", + "usage_instructions": "Instrucciones de uso", + "troubleshooting": "Solución de problemas", + "faq": "Preguntas frecuentes" } PDF_MANUAL_SECTION_ORDER = [ @@ -12,6 +11,5 @@ "main_features", "usage_instructions", "troubleshooting", - "faq", - "conclusion" + "faq" ] diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py index 3c8a983..613f01c 100644 --- a/app/pdf/pdf_generator.py +++ b/app/pdf/pdf_generator.py @@ -1,103 +1,260 @@ from fpdf import FPDF +import requests +import io +import os +from typing import Optional, Tuple +try: + import PIL.Image as PILImage + PILLOW_AVAILABLE = True +except ImportError: + PILImage = None + PILLOW_AVAILABLE = False + +# Constantes de diseño +class PDFConstants: + # Colores + HEADER_COLOR = (0, 0, 0) # Negro para el header (título y línea) + SECTION_BG_COLOR = (64, 64, 64) # Gris oscuro más suave para el fondo del título de la sección + SECTION_BORDER_COLOR = (255, 140, 0) # Naranja/dorado para el borde + WHITE_COLOR = (255, 255, 255) + BLACK_COLOR = (0, 0, 0) + GRAY_COLOR = (128, 128, 128) + LIGHT_GRAY_COLOR = (200, 200, 200) + + # Tamaños de fuente + HEADER_FONT_SIZE = 16 + COVER_TITLE_FONT_SIZE = 28 + SECTION_TITLE_FONT_SIZE = 14 + CONTENT_FONT_SIZE = 12 + FOOTER_FONT_SIZE = 10 + + # Márgenes y espaciado + PAGE_MARGIN = 15 + HEADER_MARGIN = 10 + OVERLAY_HEIGHT = 80 + LINE_WIDTH_THIN = 0.3 + LINE_WIDTH_MEDIUM = 0.5 + LINE_WIDTH_THICK = 0.7 + + # Otros + IMAGE_QUALITY = 85 + TEMP_IMAGE_PATH = "/tmp/temp_cover_image.jpg" + REQUEST_TIMEOUT = 10 class PDFGenerator(FPDF): - def __init__(self, product_name): + def __init__(self, product_name: str): super().__init__() self.product_name = product_name + self.custom_title: Optional[str] = None self.header_height = 0 - self.version = "1.0" # Versión del documento + self.version = "1.0" + self.first_section = True # Para controlar la primera sección - def header(self): + def header(self) -> None: + """Genera el header de cada página (excepto la portada).""" if self.page_no() == 1: return initial_y = self.get_y() - self.set_font("Helvetica", "B", 16) - self.set_text_color(0, 51, 102) - - title = f"User Manual for {self.product_name}" + self.set_font("Helvetica", "B", PDFConstants.HEADER_FONT_SIZE) + self.set_text_color(*PDFConstants.HEADER_COLOR) - self.set_y(10) + title = self.custom_title if self.custom_title else f"User Manual for {self.product_name}" + clean_title = self._clean_text_for_latin1(title) - width_available = self.w - 20 - self.x = 10 + self.set_y(PDFConstants.HEADER_MARGIN) + width_available = self.w - (2 * PDFConstants.HEADER_MARGIN) + self.x = PDFConstants.HEADER_MARGIN - self.multi_cell(width_available, 8, title, align="C") + self.multi_cell(width_available, 8, clean_title, align="C") end_y = self.get_y() + 2 - self.set_line_width(0.5) - self.set_draw_color(0, 51, 102) - self.line(10, end_y, self.w - 10, end_y) - - self.set_y(end_y + 10) + self.set_line_width(PDFConstants.LINE_WIDTH_MEDIUM) + self.set_draw_color(*PDFConstants.HEADER_COLOR) + self.line(PDFConstants.HEADER_MARGIN, end_y, self.w - PDFConstants.HEADER_MARGIN, end_y) + self.set_y(end_y + PDFConstants.HEADER_MARGIN) self.header_height = self.get_y() - initial_y - def footer(self): - # No mostrar el pie de página en la primera página (portada) + def footer(self) -> None: + """Genera el footer de cada página (excepto la portada).""" if self.page_no() == 1: return self.set_y(-20) - self.set_font("Helvetica", "I", 10) - self.set_text_color(128, 128, 128) - self.cell(0, 10, f"Page {self.page_no()-1}", 0, 0, "C") # Restar 1 porque la portada no cuenta + self.set_font("Helvetica", "I", PDFConstants.FOOTER_FONT_SIZE) + self.set_text_color(*PDFConstants.GRAY_COLOR) + self.cell(0, 10, f"Page {self.page_no()-1}", 0, 0, "C") - def add_cover_page(self, title, subtitle=""): + def add_cover_page(self, title: str, subtitle: str = "", image_url: Optional[str] = None) -> None: + """ + Crea la página de portada del PDF. + + Args: + title: Título principal de la portada + subtitle: Subtítulo opcional + image_url: URL de imagen opcional para usar como fondo + """ self.add_page() - # Dimensiones y márgenes page_width = self.w page_height = self.h - margin = 15 - # Borde completo alrededor de la página - self.set_draw_color(0, 51, 102) - self.set_line_width(0.7) - self.rect(margin, margin, page_width - 2*margin, page_height - 2*margin) + if image_url and PILLOW_AVAILABLE: + # Solo mostrar la imagen sin texto si hay imagen + self._create_image_only_cover(image_url, page_width, page_height) + else: + # Portada tradicional con texto si no hay imagen + title_y_pos, title_color = self._create_cover_background(None, page_width, page_height) + self._add_cover_text(title, subtitle, title_y_pos, title_color, page_width, page_height, None) + + self.add_page() + + def _create_cover_background(self, image_url: Optional[str], page_width: float, page_height: float) -> Tuple[float, Tuple[int, int, int]]: + """Crea el fondo de la portada (imagen o borde tradicional).""" + if image_url and PILLOW_AVAILABLE: + image_result = self._download_and_process_image(image_url) + if image_result: + temp_path, img_width, img_height = image_result + + available_width = page_width - 2 * PDFConstants.PAGE_MARGIN + available_height = page_height - 2 * PDFConstants.PAGE_MARGIN + + x_pos, y_pos, final_width, final_height = self._calculate_image_dimensions( + img_width, img_height, available_width, available_height + ) + + self.image(temp_path, x=x_pos, y=y_pos, w=final_width, h=final_height) + self._cleanup_temp_image() + + # Crear overlay para el título + overlay_y = page_height - PDFConstants.OVERLAY_HEIGHT - PDFConstants.PAGE_MARGIN + self.set_fill_color(*PDFConstants.BLACK_COLOR) + self.rect(PDFConstants.PAGE_MARGIN, overlay_y, + page_width - 2 * PDFConstants.PAGE_MARGIN, + PDFConstants.OVERLAY_HEIGHT, 'F') + + return overlay_y + 15, PDFConstants.WHITE_COLOR - # Título principal - self.set_font("Helvetica", "B", 24) # Reducir ligeramente el tamaño para evitar desbordamiento - self.set_text_color(0, 51, 102) + # Portada tradicional con borde + self.set_draw_color(*PDFConstants.HEADER_COLOR) + self.set_line_width(PDFConstants.LINE_WIDTH_THICK) + self.rect(PDFConstants.PAGE_MARGIN, PDFConstants.PAGE_MARGIN, + page_width - 2 * PDFConstants.PAGE_MARGIN, + page_height - 2 * PDFConstants.PAGE_MARGIN) - # Definir el ancho efectivo del texto con márgenes seguros - text_width = page_width - 2*margin - 20 # 10px de margen adicional a cada lado + return page_height * 0.4, PDFConstants.HEADER_COLOR + + def _add_cover_text(self, title: str, subtitle: str, title_y_pos: float, + title_color: Tuple[int, int, int], page_width: float, + page_height: float, image_url: Optional[str]) -> None: + """Agrega el texto de la portada.""" + self.set_font("Helvetica", "B", PDFConstants.COVER_TITLE_FONT_SIZE) + self.set_text_color(*title_color) - # Posicionar para el título - self.set_y(page_height * 0.3) # Aproximadamente a 1/3 de la página - self.set_x(margin + 10) # Margen izquierdo + margen adicional + text_width = page_width - 2 * PDFConstants.PAGE_MARGIN - 20 - # Dibujar el título con múltiples líneas si es necesario - self.multi_cell(text_width, 16, title, align="C") + self.set_y(title_y_pos) + self.set_x(PDFConstants.PAGE_MARGIN + 10) + clean_title = self._clean_text_for_latin1(title) + self.multi_cell(text_width, 18, clean_title, align="C") - # Guardar posición después del título - title_end_y = self.get_y() + # Solo mostrar subtítulo y versión si no hay imagen + if not image_url: + if subtitle: + self.ln(15) + self.set_font("Helvetica", "", 18) + self.set_text_color(80, 80, 80) + self.set_x(PDFConstants.PAGE_MARGIN + 10) + clean_subtitle = self._clean_text_for_latin1(subtitle) + self.multi_cell(text_width, 12, clean_subtitle, align="C") + + self.set_font("Helvetica", "I", 11) + self.set_text_color(100, 100, 100) + version_y = page_height - PDFConstants.PAGE_MARGIN - 20 + self.set_y(version_y) + self.set_x(PDFConstants.PAGE_MARGIN + 10) + self.multi_cell(text_width, 10, f"Document Version: {self.version}", align="C") + + def set_document_version(self, version: str) -> None: + """Establece la versión del documento.""" + self.version = version + + def set_custom_title(self, title: str) -> None: + """Establece el título personalizado que aparecerá en el header de cada página.""" + self.custom_title = title + + def _download_and_process_image(self, image_url: str) -> Optional[Tuple[str, int, int]]: + """ + Descarga y procesa una imagen desde una URL. - # Subtítulo si existe - if subtitle: - self.ln(15) # Espacio entre título y subtítulo - self.set_font("Helvetica", "", 18) - self.set_text_color(80, 80, 80) - self.set_x(margin + 10) # Asegurar margen correcto - self.multi_cell(text_width, 12, subtitle, align="C") + Returns: + Tuple con (ruta_temporal, ancho, alto) o None si falla + """ + try: + response = requests.get(image_url, timeout=PDFConstants.REQUEST_TIMEOUT) + response.raise_for_status() + + image = PILImage.open(io.BytesIO(response.content)) + + if image.mode != 'RGB': + image = image.convert('RGB') + + image.save(PDFConstants.TEMP_IMAGE_PATH, "JPEG", quality=PDFConstants.IMAGE_QUALITY) + + return PDFConstants.TEMP_IMAGE_PATH, image.width, image.height + + except Exception as e: + print(f"Error al procesar imagen: {e}") + return None + + def _calculate_image_dimensions(self, img_width: int, img_height: int, + available_width: float, available_height: float) -> Tuple[float, float, float, float]: + """ + Calcula las dimensiones y posición para centrar una imagen manteniendo la proporción. - # Agregar información de la versión en la parte inferior, dentro del marco - self.set_font("Helvetica", "I", 11) - self.set_text_color(100, 100, 100) + Returns: + Tuple con (x_pos, y_pos, final_width, final_height) + """ + scale_width = available_width / img_width + scale_height = available_height / img_height + scale = min(scale_width, scale_height) - # Posicionar el texto de versión en la parte inferior pero dentro del marco - version_y = page_height - margin - 20 # 20 puntos arriba del borde inferior - self.set_y(version_y) - self.set_x(margin + 10) - self.multi_cell(text_width, 10, f"Document Version: {self.version}", align="C") + final_width = img_width * scale + final_height = img_height * scale - self.add_page() + x_pos = (self.w - final_width) / 2 + y_pos = (self.h - final_height) / 2 + + return x_pos, y_pos, final_width, final_height - # Método para establecer la versión del documento - def set_document_version(self, version): - self.version = version + def _cleanup_temp_image(self) -> None: + """Elimina el archivo temporal de imagen si existe.""" + if os.path.exists(PDFConstants.TEMP_IMAGE_PATH): + os.remove(PDFConstants.TEMP_IMAGE_PATH) + + def _create_image_only_cover(self, image_url: str, page_width: float, page_height: float) -> None: + """Crea una portada que muestra solo la imagen sin texto.""" + image_result = self._download_and_process_image(image_url) + if image_result: + temp_path, img_width, img_height = image_result + + # Usar toda la página disponible para la imagen + available_width = page_width + available_height = page_height + + x_pos, y_pos, final_width, final_height = self._calculate_image_dimensions( + img_width, img_height, available_width, available_height + ) + + # Centrar la imagen en toda la página + x_pos = (page_width - final_width) / 2 + y_pos = (page_height - final_height) / 2 + + self.image(temp_path, x=x_pos, y=y_pos, w=final_width, h=final_height) + self._cleanup_temp_image() def get_multi_cell_height(self, w, h, txt, align="J"): x = self.x @@ -119,29 +276,91 @@ def get_multi_cell_height(self, w, h, txt, align="J"): return lines * h - def add_section(self, title, content): - if self.get_y() > self.h * 0.6: + def add_section(self, title: str, content: str) -> None: + """ + Agrega una sección al PDF con título en negrita y contenido. + Cada sección inicia en una nueva página. + + Args: + title: Título de la sección + content: Contenido de la sección + """ + # Cada sección inicia en una nueva página (excepto la primera) + if not self.first_section: self.add_page() + else: + self.first_section = False - self.set_font("Helvetica", "B", 14) - self.set_text_color(255, 255, 255) - self.set_fill_color(0, 102, 204) - self.cell(0, 12, title, ln=True, fill=True, align="C", border=1) - self.ln(6) - - self.set_text_color(0, 0, 0) - self.set_font("Helvetica", "", 12) + # Crear el borde naranja exterior primero + margin = 10 + current_y = self.get_y() + + # Dibujar el rectángulo del borde naranja + self.set_draw_color(*PDFConstants.SECTION_BORDER_COLOR) + self.set_line_width(0.5) # Línea más delgada + self.rect(margin, current_y, self.w - 2*margin, 16) # Rectángulo exterior + + # Crear el título con fondo negro y texto blanco (con pequeño margen interno) + self.set_font("Helvetica", "B", PDFConstants.SECTION_TITLE_FONT_SIZE) + self.set_text_color(*PDFConstants.WHITE_COLOR) # Texto blanco + self.set_fill_color(*PDFConstants.SECTION_BG_COLOR) # Fondo negro + + # Posicionar el título con un pequeño margen interno + self.set_xy(margin + 2, current_y + 2) # 2 puntos de separación + clean_title = self._clean_text_for_latin1(title) + self.cell(self.w - 2*margin - 4, 12, clean_title, ln=False, fill=True, align="C", border=0) + + # Mover a la siguiente línea + self.set_y(current_y + 16 + 6) - if isinstance(content, list): - formatted_text = "\n".join(str(item) for item in content) - else: - formatted_text = content.replace("\\n", "\n") + # Contenido de la sección + self.set_text_color(*PDFConstants.BLACK_COLOR) + self.set_font("Helvetica", "", PDFConstants.CONTENT_FONT_SIZE) + formatted_text = self._format_content(content) self.multi_cell(0, 8, formatted_text) + # Separador entre secciones self.ln(8) - self.set_draw_color(200, 200, 200) - self.set_line_width(0.3) + self.set_draw_color(*PDFConstants.LIGHT_GRAY_COLOR) + self.set_line_width(PDFConstants.LINE_WIDTH_THIN) current_y = self.get_y() - self.line(10, current_y, self.w - 10, current_y) + self.line(PDFConstants.HEADER_MARGIN, current_y, self.w - PDFConstants.HEADER_MARGIN, current_y) self.ln(10) + + def _format_content(self, content) -> str: + """Formatea el contenido de una sección.""" + if isinstance(content, list): + text = "\n".join(str(item) for item in content) + else: + text = content.replace("\\n", "\n") + + # Limpiar caracteres que no son compatibles con latin-1 + return self._clean_text_for_latin1(text) + + def _clean_text_for_latin1(self, text: str) -> str: + """Limpia el texto para que sea compatible con latin-1.""" + # Reemplazos de caracteres especiales comunes + replacements = { + '\u2022': '•', # Bullet point + '\u2013': '-', # En dash + '\u2014': '-', # Em dash + '\u2018': "'", # Left single quotation mark + '\u2019': "'", # Right single quotation mark + '\u201c': '"', # Left double quotation mark + '\u201d': '"', # Right double quotation mark + '\u2026': '...', # Horizontal ellipsis + '\u00a0': ' ', # Non-breaking space + } + + # Aplicar reemplazos + for unicode_char, replacement in replacements.items(): + text = text.replace(unicode_char, replacement) + + # Intentar codificar y decodificar para detectar otros problemas + try: + text.encode('latin-1') + return text + except UnicodeEncodeError: + # Si aún hay problemas, reemplazar caracteres problemáticos + return text.encode('latin-1', errors='replace').decode('latin-1') diff --git a/app/pdf/pdf_manual_generator.py b/app/pdf/pdf_manual_generator.py index b834283..e1e66c5 100644 --- a/app/pdf/pdf_manual_generator.py +++ b/app/pdf/pdf_manual_generator.py @@ -9,10 +9,18 @@ def __init__(self, product_name: str): self.product_name = product_name self.pdf = PDFGenerator(product_name) - async def create_manual(self, data: dict) -> str: + async def create_manual(self, data: dict, title: str = None, image_url: str = None) -> str: + # Usar el título personalizado si se proporciona, sino usar el por defecto + cover_title = title if title else f"User Manual for {self.product_name}" + + # Establecer el título personalizado para que aparezca en el header de todas las páginas + if title: + self.pdf.set_custom_title(title) + self.pdf.add_cover_page( - f"User Manual for {self.product_name}", - "Everything You Need to Know to Get Started" + cover_title, + "Everything You Need to Know to Get Started", + image_url ) self.pdf.set_auto_page_break(auto=True, margin=20) diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py index 3b125d2..53a7a74 100644 --- a/app/requests/generate_pdf_request.py +++ b/app/requests/generate_pdf_request.py @@ -7,3 +7,6 @@ class GeneratePdfRequest(BaseModel): product_description: str language: str owner_id: str + image_url: str + title: str + content: str diff --git a/app/services/message_service.py b/app/services/message_service.py index 501ecf9..de00559 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -120,9 +120,9 @@ async def generate_copies(self, request: CopyRequest): return {"copies": combined_data} async def generate_pdf(self, request: GeneratePdfRequest): - base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}." + base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}. Content: {request.content}" base_filename = f"{request.product_id}_{request.language}" - version = "v1" + version = "v2" base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" folder_path = f"{request.owner_id}/pdfs/{version}" s3_url = f"{base_url}/{folder_path}/{base_filename}.pdf" @@ -139,7 +139,7 @@ async def generate_pdf(self, request: GeneratePdfRequest): combined_data = await self.process_multiple_agents(agent_queries) pdf_generator = PDFManualGenerator(request.product_name) - pdf = await pdf_generator.create_manual(combined_data) + pdf = await pdf_generator.create_manual(combined_data, request.title, request.image_url) result = await upload_file( S3UploadRequest( diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index b896250..6a940ca 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -5,6 +5,7 @@ from app.requests.recommend_product_request import RecommendProductRequest from app.requests.resolve_funnel_request import ResolveFunnelRequest from app.requests.brand_context_resolver_request import BrandContextResolverRequest +from app.requests.generate_pdf_request import GeneratePdfRequest class MessageServiceInterface(ABC): @@ -24,7 +25,8 @@ async def generate_copies(self, request: CopyRequest): async def recommend_products(self, request: RecommendProductRequest): pass - async def generate_pdf(self, request): + @abstractmethod + async def generate_pdf(self, request: GeneratePdfRequest): pass @abstractmethod diff --git a/requirements.txt b/requirements.txt index b069251..59780a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ langchain_mcp langchain-mcp-adapters==0.0.9 langchain-google-genai Pillow==10.3.0 -html5lib \ No newline at end of file +html5lib +requests \ No newline at end of file From 05ca8c0b5fde7236ce5cb5e14252e1398c98cfe1 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 8 Sep 2025 17:38:22 -0500 Subject: [PATCH 077/195] fix generator. --- app/pdf/pdf_generator.py | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py index 613f01c..ee005fe 100644 --- a/app/pdf/pdf_generator.py +++ b/app/pdf/pdf_generator.py @@ -236,20 +236,21 @@ def _cleanup_temp_image(self) -> None: os.remove(PDFConstants.TEMP_IMAGE_PATH) def _create_image_only_cover(self, image_url: str, page_width: float, page_height: float) -> None: - """Crea una portada que muestra solo la imagen sin texto.""" + """Crea una portada que muestra solo la imagen ocupando toda la página.""" image_result = self._download_and_process_image(image_url) if image_result: temp_path, img_width, img_height = image_result - # Usar toda la página disponible para la imagen - available_width = page_width - available_height = page_height + # Calcular la escala para llenar toda la página (puede recortar) + scale_width = page_width / img_width + scale_height = page_height / img_height + # Usar la escala mayor para llenar completamente (crop to fit) + scale = max(scale_width, scale_height) - x_pos, y_pos, final_width, final_height = self._calculate_image_dimensions( - img_width, img_height, available_width, available_height - ) + final_width = img_width * scale + final_height = img_height * scale - # Centrar la imagen en toda la página + # Centrar la imagen (puede quedar parcialmente fuera de los bordes) x_pos = (page_width - final_width) / 2 y_pos = (page_height - final_height) / 2 @@ -291,27 +292,15 @@ def add_section(self, title: str, content: str) -> None: else: self.first_section = False - # Crear el borde naranja exterior primero - margin = 10 - current_y = self.get_y() - - # Dibujar el rectángulo del borde naranja - self.set_draw_color(*PDFConstants.SECTION_BORDER_COLOR) - self.set_line_width(0.5) # Línea más delgada - self.rect(margin, current_y, self.w - 2*margin, 16) # Rectángulo exterior - - # Crear el título con fondo negro y texto blanco (con pequeño margen interno) + # Crear el título con fondo gris y texto blanco (sin borde naranja) self.set_font("Helvetica", "B", PDFConstants.SECTION_TITLE_FONT_SIZE) self.set_text_color(*PDFConstants.WHITE_COLOR) # Texto blanco - self.set_fill_color(*PDFConstants.SECTION_BG_COLOR) # Fondo negro + self.set_fill_color(*PDFConstants.SECTION_BG_COLOR) # Fondo gris - # Posicionar el título con un pequeño margen interno - self.set_xy(margin + 2, current_y + 2) # 2 puntos de separación + # Crear el título con fondo gris completo clean_title = self._clean_text_for_latin1(title) - self.cell(self.w - 2*margin - 4, 12, clean_title, ln=False, fill=True, align="C", border=0) - - # Mover a la siguiente línea - self.set_y(current_y + 16 + 6) + self.cell(0, 12, clean_title, ln=True, fill=True, align="C", border=0) + self.ln(6) # Contenido de la sección self.set_text_color(*PDFConstants.BLACK_COLOR) From 76386d688310c75e00bada16fead588cf7549300 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 10 Sep 2025 18:57:14 -0500 Subject: [PATCH 078/195] add force in request --- app/requests/generate_pdf_request.py | 2 ++ app/services/message_service.py | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py index 53a7a74..12e7e4a 100644 --- a/app/requests/generate_pdf_request.py +++ b/app/requests/generate_pdf_request.py @@ -1,4 +1,5 @@ from pydantic import BaseModel +from typing import Optional class GeneratePdfRequest(BaseModel): @@ -10,3 +11,4 @@ class GeneratePdfRequest(BaseModel): image_url: str title: str content: str + force: Optional[bool] = False diff --git a/app/services/message_service.py b/app/services/message_service.py index de00559..2283c16 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -126,10 +126,11 @@ async def generate_pdf(self, request: GeneratePdfRequest): base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" folder_path = f"{request.owner_id}/pdfs/{version}" s3_url = f"{base_url}/{folder_path}/{base_filename}.pdf" - exists = await check_file_exists_direct(s3_url) - if exists: - return {"s3_url": s3_url} + if not request.force: + exists = await check_file_exists_direct(s3_url) + if exists: + return {"s3_url": s3_url} agent_queries = [ {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} From 909d7c39c473e8bd0cede8d3ef44f2216380f6a3 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 10 Sep 2025 19:45:00 -0500 Subject: [PATCH 079/195] change s3 direct --- app/services/message_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/message_service.py b/app/services/message_service.py index 2283c16..4b85a75 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -123,7 +123,7 @@ async def generate_pdf(self, request: GeneratePdfRequest): base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}. Content: {request.content}" base_filename = f"{request.product_id}_{request.language}" version = "v2" - base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" + base_url = f"https://fluxi.s3.us-west-2.amazonaws.com/{ENVIRONMENT}/assets" folder_path = f"{request.owner_id}/pdfs/{version}" s3_url = f"{base_url}/{folder_path}/{base_filename}.pdf" From 49a644abea02c9a3b453845717f7680223b0110e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 10 Sep 2025 20:00:47 -0500 Subject: [PATCH 080/195] replace s3 url --- app/services/message_service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/services/message_service.py b/app/services/message_service.py index 4b85a75..1423b8a 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -150,6 +150,9 @@ async def generate_pdf(self, request: GeneratePdfRequest): ) ) + if hasattr(result, 's3_url') and result.s3_url: + result.s3_url = result.s3_url.replace("https://fluxi.co/", "https://fluxi.s3.us-west-2.amazonaws.com/") + return result async def resolve_funnel(self, request: ResolveFunnelRequest): From 1f50d650bc172968105c9d269f9eb3711fa831b6 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 10 Sep 2025 21:21:31 -0500 Subject: [PATCH 081/195] add nw logic --- app/requests/generate_pdf_request.py | 2 -- app/services/message_service.py | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/app/requests/generate_pdf_request.py b/app/requests/generate_pdf_request.py index 12e7e4a..53a7a74 100644 --- a/app/requests/generate_pdf_request.py +++ b/app/requests/generate_pdf_request.py @@ -1,5 +1,4 @@ from pydantic import BaseModel -from typing import Optional class GeneratePdfRequest(BaseModel): @@ -11,4 +10,3 @@ class GeneratePdfRequest(BaseModel): image_url: str title: str content: str - force: Optional[bool] = False diff --git a/app/services/message_service.py b/app/services/message_service.py index 1423b8a..686ec32 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -1,5 +1,6 @@ import json import asyncio +import hashlib from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID, ENVIRONMENT from app.configurations.copies_config import AGENT_COPIES @@ -121,16 +122,18 @@ async def generate_copies(self, request: CopyRequest): async def generate_pdf(self, request: GeneratePdfRequest): base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}. Content: {request.content}" - base_filename = f"{request.product_id}_{request.language}" + + content_hash = hashlib.md5(f"{request.title}_{request.image_url}".encode()).hexdigest()[:8] + base_filename = f"{request.product_id}_{request.language}_{content_hash}" + version = "v2" - base_url = f"https://fluxi.s3.us-west-2.amazonaws.com/{ENVIRONMENT}/assets" + base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" folder_path = f"{request.owner_id}/pdfs/{version}" s3_url = f"{base_url}/{folder_path}/{base_filename}.pdf" - if not request.force: - exists = await check_file_exists_direct(s3_url) - if exists: - return {"s3_url": s3_url} + exists = await check_file_exists_direct(s3_url) + if exists: + return {"s3_url": s3_url} agent_queries = [ {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} @@ -150,9 +153,6 @@ async def generate_pdf(self, request: GeneratePdfRequest): ) ) - if hasattr(result, 's3_url') and result.s3_url: - result.s3_url = result.s3_url.replace("https://fluxi.co/", "https://fluxi.s3.us-west-2.amazonaws.com/") - return result async def resolve_funnel(self, request: ResolveFunnelRequest): From 51ad8a795bce568f2f26e60ffa4e43551b384098 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 11 Oct 2025 01:18:59 -0500 Subject: [PATCH 082/195] add langsmith --- .env.example | 7 ++++++- app/processors/agent_processor.py | 8 +++++++- app/processors/conversation_processor.py | 11 +++++++++++ app/processors/mcp_processor.py | 8 +++++++- app/processors/simple_processor.py | 16 ++++++++++++---- 5 files changed, 43 insertions(+), 7 deletions(-) diff --git a/.env.example b/.env.example index 94b21c8..fcfbd56 100644 --- a/.env.example +++ b/.env.example @@ -19,4 +19,9 @@ ENVIRONMENT=dev DROPI_HOST=https://test-api.dropi.co DROPI_S3_BASE_URL=https://d39ru7awumhhs2.cloudfront.net/ -DROPI_API_KEY=dasdsadadasdas \ No newline at end of file +DROPI_API_KEY=dasdsadadasdas + +LANGCHAIN_TRACING_V2=true +LANGCHAIN_ENDPOINT=https://api.smith.langchain.com +LANGCHAIN_API_KEY=tu_api_key_aqui +LANGCHAIN_PROJECT=develop \ No newline at end of file diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 1e8079f..687c321 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -38,12 +38,18 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, ) try: + config = self._get_langsmith_config( + request, + "agent_processor", + has_tools=len(self.tools) > 0 + ) + result = await agent_executor.ainvoke({ "context": self.context or "", "chat_history": self.history, "input": request.query, "agent_scratchpad": "" - }) + }, config=config) if "text" not in result and "output" in result: result["text"] = result["output"] diff --git a/app/processors/conversation_processor.py b/app/processors/conversation_processor.py index aadc8dd..6df29c5 100644 --- a/app/processors/conversation_processor.py +++ b/app/processors/conversation_processor.py @@ -8,5 +8,16 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str]): self.context = context self.history = history + def _get_langsmith_config(self, request, processor_type: str, **extra_metadata) -> Dict[str, Any]: + config = { + "tags": [processor_type, f"agent_{request.agent_id}"], + "metadata": { + "agent_id": request.agent_id, + "conversation_id": request.conversation_id, + **extra_metadata + } + } + return config + async def process(self, query: str, files: Optional[List[Dict[str, str]]], supports_interleaved_files: bool) -> Dict[str, Any]: raise NotImplementedError diff --git a/app/processors/mcp_processor.py b/app/processors/mcp_processor.py index 439d1b4..d4343f5 100644 --- a/app/processors/mcp_processor.py +++ b/app/processors/mcp_processor.py @@ -36,7 +36,13 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, messages.append({"role": "user", "content": request.query}) - response = await agent.ainvoke({"messages": messages}) + config = self._get_langsmith_config( + request, + "mcp_processor", + mcp_servers=list(self.mcp_config.keys()) if isinstance(self.mcp_config, dict) else [] + ) + + response = await agent.ainvoke({"messages": messages}, config=config) content = "" if "messages" in response and response["messages"]: diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 4564b09..117a032 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -9,8 +9,8 @@ class SimpleProcessor(ConversationProcessor): - async def generate_response(self, context: str, chat_history: list, query: str, prompt: ChatPromptTemplate) -> Dict[ - str, Any]: + async def generate_response(self, context: str, chat_history: list, query: str, prompt: ChatPromptTemplate, + config: dict = None) -> Dict[str, Any]: chain = ( { "context": lambda x: x["context"], @@ -25,7 +25,7 @@ async def generate_response(self, context: str, chat_history: list, query: str, "context": context, "chat_history": chat_history, "input": query - }) + }, config=config) content = raw_response.content @@ -76,4 +76,12 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, messages.append(HumanMessage(content=request.query)) prompt = ChatPromptTemplate.from_messages(messages) - return await self.generate_response(self.context, self.history, request.query, prompt) + + config = self._get_langsmith_config( + request, + "simple_processor", + has_json_parser=request.json_parser is not None, + has_files=files is not None and len(files) > 0 + ) + + return await self.generate_response(self.context, self.history, request.query, prompt, config) From 00ea8af342d436d18d306f12dc57d71c64a33a57 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 11 Oct 2025 01:19:06 -0500 Subject: [PATCH 083/195] add req --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 59780a4..f8d2fdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,5 @@ langchain-mcp-adapters==0.0.9 langchain-google-genai Pillow==10.3.0 html5lib -requests \ No newline at end of file +requests +langsmith \ No newline at end of file From 26ada674df7d8bad6d2c67869df64e7da6b8d8b8 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 27 Oct 2025 15:38:29 -0500 Subject: [PATCH 084/195] add logic for get variants dropi --- app/scrapers/dropi_scraper.py | 150 +++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py index c787e1c..be65cf2 100644 --- a/app/scrapers/dropi_scraper.py +++ b/app/scrapers/dropi_scraper.py @@ -91,7 +91,155 @@ def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any] variations = product_data.get("variations", []) if not variations: return [] - return [] + + product_name = product_data.get("name", "") + product_photos = product_data.get("photos", []) + + variants = [] + for variation in variations: + variant = self._build_variant(variation, product_name, product_photos) + if variant: + variants.append(variant) + + return variants + + def _build_variant(self, variation: Dict[str, Any], product_name: str, product_photos: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Construye un objeto de variante en el formato estándar""" + + # Extraer atributos + attributes = self._extract_attributes(variation) + + # Construir nombre de la variante + variant_name = self._build_variant_name(product_name, attributes) + + # Construir clave de variante + variant_key = self._build_variant_key(attributes) + + # Obtener precios + sale_price = self._parse_variant_price(variation.get("sale_price")) + suggested_price = self._parse_variant_price(variation.get("suggested_price")) + + # Determinar disponibilidad basada en stock + available = self._check_availability(variation) + + # Obtener imágenes de la variante + images = self._get_variant_images(variation, product_photos) + + return { + "name": variant_name, + "variant_key": variant_key, + "price": float(sale_price) if sale_price else None, + "available": available, + "images": images, + "attributes": attributes, + "provider_id": "dropi", + "external_id": str(variation.get("id", "")), + "external_sell_price": float(sale_price) if sale_price else None, + "external_suggested_sell_price": float(suggested_price) if suggested_price else None + } + + def _extract_attributes(self, variation: Dict[str, Any]) -> List[Dict[str, str]]: + """Extrae los atributos de una variación""" + attributes = [] + attribute_values = variation.get("attribute_values", []) + + for attr_value in attribute_values: + attribute_info = attr_value.get("attribute", {}) + attribute_name = attribute_info.get("description", "") + value = attr_value.get("value", "") + + # El valor puede venir en formato "COLOR-TALLA VALOR" o similar + # Intentamos limpiar y separar si es necesario + if attribute_name and value: + # Si el valor contiene el nombre del atributo, lo limpiamos + clean_value = self._clean_attribute_value(value, attribute_name) + + attributes.append({ + "name": attribute_name.title(), + "value": clean_value + }) + + return attributes + + def _clean_attribute_value(self, value: str, attribute_name: str) -> str: + """Limpia el valor del atributo removiendo prefijos redundantes""" + # Ejemplo: "NEGRO-TALLA L" cuando el atributo es "TALLA" -> "NEGRO-L" + # O mejor aún, intentar separar los componentes + parts = value.split("-") + + # Si hay múltiples partes, intentamos encontrar la relevante + if len(parts) > 1: + # Buscar la parte que no sea el nombre del atributo + cleaned_parts = [] + for part in parts: + # Remover el nombre del atributo si aparece en la parte + part_clean = part.replace(attribute_name.upper(), "").strip() + if part_clean: + cleaned_parts.append(part_clean) + + return " ".join(cleaned_parts).strip() if cleaned_parts else value + + return value + + def _build_variant_name(self, product_name: str, attributes: List[Dict[str, str]]) -> str: + """Construye el nombre de la variante combinando el nombre del producto y los atributos""" + if not attributes: + return product_name + + # Concatenar los valores de atributos + attribute_parts = [attr["value"] for attr in attributes] + attribute_string = " - ".join(attribute_parts) + + return f"{product_name} - {attribute_string}" + + def _build_variant_key(self, attributes: List[Dict[str, str]]) -> str: + """Construye una clave única para la variante basada en los atributos""" + if not attributes: + return "default" + + # Crear clave en formato "attribute1-value1-attribute2-value2" + key_parts = [] + for attr in attributes: + attr_name = attr["name"].lower().replace(" ", "-") + attr_value = attr["value"].lower().replace(" ", "-") + key_parts.append(f"{attr_name}-{attr_value}") + + return "-".join(key_parts) + + def _parse_variant_price(self, price_str: Any) -> Optional[Decimal]: + """Parsea el precio de una variante""" + if not price_str: + return None + return parse_price(str(price_str)) + + def _check_availability(self, variation: Dict[str, Any]) -> bool: + """Verifica si la variante está disponible basándose en el stock""" + warehouse_variations = variation.get("warehouse_product_variation", []) + + if not warehouse_variations: + return False + + # Verificar si hay stock disponible en algún almacén + total_stock = sum(wh.get("stock", 0) for wh in warehouse_variations) + return total_stock > 0 + + def _get_variant_images(self, variation: Dict[str, Any], product_photos: List[Dict[str, Any]]) -> List[str]: + """Obtiene las imágenes de la variante o del producto principal""" + variation_id = variation.get("id") + images = [] + + # Primero buscar imágenes específicas de esta variación + for photo in product_photos: + if photo.get("variation_id") == variation_id and photo.get("urlS3"): + images.append(DROPI_S3_BASE_URL + photo["urlS3"]) + + # Si no hay imágenes específicas de la variación, usar las imágenes principales del producto + if not images: + for photo in product_photos: + if not photo.get("variation_id") and photo.get("urlS3"): + images.append(DROPI_S3_BASE_URL + photo["urlS3"]) + + return images def _extract_product_id(self, url: str) -> str: match = re.search(r'/product-details/(\d+)', url) From 4b09add7cad206c3757cc05463ce38325f11951e Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Thu, 20 Nov 2025 23:47:33 -0500 Subject: [PATCH 085/195] add new logic for gemini --- app/externals/images/image_client.py | 98 ++++++++++++++++--------- app/helpers/image_compression_helper.py | 72 ++++++++++++++++++ app/requests/generate_image_request.py | 4 +- app/services/image_service.py | 58 ++++++--------- 4 files changed, 159 insertions(+), 73 deletions(-) create mode 100644 app/helpers/image_compression_helper.py diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index 82007e2..c579af2 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -68,41 +68,68 @@ async def generate_image_variation( raise Exception(f"Error {response.status}: {await response.text()}") -async def google_image(image_urls: list[str], prompt: str, resolution: Optional[str] = None) -> bytes: - url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent?key={GOOGLE_GEMINI_API_KEY}" +def _build_image_part(image_base64: str, is_model_25: bool) -> dict: + if is_model_25: + return { + "inlineData": { + "mimeType": 'image/jpeg', + "data": image_base64 + } + } + return { + "inline_data": { + "mime_type": 'image/jpeg', + "data": image_base64 + } + } - parts = [{"text": prompt}] - if image_urls: - async with aiohttp.ClientSession() as fetch_session: - for image_url in image_urls: - try: - async with fetch_session.get(image_url) as img_response: - if img_response.status == 200: - image_bytes = await img_response.read() - image_base64 = base64.b64encode(image_bytes).decode('utf-8') - - parts.append({ - "inlineData": { - "mimeType": 'image/jpeg', - "data": image_base64 - } - }) - except Exception as e: - print(f"Error al procesar imagen de {image_url}: {str(e)}") - continue - - generation_config = { - "responseModalities": ["Text", "Image"] - } +async def _fetch_and_encode_images(image_urls: list[str], is_model_25: bool) -> list[dict]: + parts = [] + async with aiohttp.ClientSession() as fetch_session: + for image_url in image_urls: + try: + async with fetch_session.get(image_url) as img_response: + if img_response.status == 200: + image_bytes = await img_response.read() + image_base64 = base64.b64encode(image_bytes).decode('utf-8') + parts.append(_build_image_part(image_base64, is_model_25)) + except Exception as e: + print(f"Error al procesar imagen de {image_url}: {str(e)}") + continue + return parts + + +def _build_generation_config(is_model_25: bool, aspect_ratio: str, image_size: str) -> dict: + config = {"responseModalities": ["Text", "Image"]} + if not is_model_25: + config["imageConfig"] = { + "aspectRatio": aspect_ratio, + "imageSize": image_size + } + return config + + +async def google_image(image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None) -> bytes: + if extra_params is None: + extra_params = {} + + is_model_25 = model_ia and '2.5' in model_ia + aspect_ratio = extra_params.get('aspect_ratio', '1:1') + image_size = extra_params.get('image_size', '1K') + + model_name = 'gemini-2.5-flash-image-preview' if is_model_25 else 'gemini-3-pro-image-preview' + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent?key={GOOGLE_GEMINI_API_KEY}" + + parts = [{"text": prompt}] + if image_urls: + image_parts = await _fetch_and_encode_images(image_urls, is_model_25) + parts.extend(image_parts) + payload = { - "contents": [ - { - "parts": parts - } - ], - "generationConfig": generation_config + "contents": [{"parts": parts}], + "generationConfig": _build_generation_config(is_model_25, aspect_ratio, image_size) } headers = {'Content-Type': 'application/json'} @@ -130,7 +157,7 @@ async def google_image(image_urls: list[str], prompt: str, resolution: Optional[ raise Exception(f"Error al generar imagen con Google Gemini: {str(e)}") -async def openai_image_edit(image_urls: list[str], prompt: str, resolution: Optional[str] = None) -> bytes: +async def openai_image_edit(image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None) -> bytes: url = "https://api.openai.com/v1/images/edits" headers = { "Authorization": f"Bearer {config.OPENAI_API_KEY}" @@ -153,9 +180,10 @@ async def openai_image_edit(image_urls: list[str], prompt: str, resolution: Opti prompt = prompt + ". **escena completa visible, composición centrada, todos los elementos dentro del marco cuadrado, nada recortado en los bordes, composición completa**" - size = '1024x1024' - if resolution and resolution.strip(): - size = resolution + if extra_params is None: + extra_params = {} + + size = extra_params.get('resolution', '1024x1024') or '1024x1024' data.add_field('size', size) data.add_field('prompt', prompt) diff --git a/app/helpers/image_compression_helper.py b/app/helpers/image_compression_helper.py new file mode 100644 index 0000000..922706b --- /dev/null +++ b/app/helpers/image_compression_helper.py @@ -0,0 +1,72 @@ +import io +import base64 +from PIL import Image + + +def compress_image_to_target(original_image_bytes: bytes, target_kb: int = 120) -> str: + img = Image.open(io.BytesIO(original_image_bytes)) + + if img.mode in ("RGBA", "P"): + img_converted = img.convert("RGBA") + else: + img_converted = img.convert("RGB") + + target_bytes = target_kb * 1024 + + output_buffer = io.BytesIO() + img_converted.save(output_buffer, format='WEBP', quality=80) + webp_size = len(output_buffer.getvalue()) + + if webp_size <= target_bytes: + return base64.b64encode(output_buffer.getvalue()).decode('utf-8') + + quality = _calculate_initial_quality(webp_size, target_bytes) + + for attempt in range(2): + output_buffer = io.BytesIO() + img_converted.save(output_buffer, format='WEBP', quality=quality) + compressed_size = len(output_buffer.getvalue()) + + if compressed_size <= target_bytes: + return base64.b64encode(output_buffer.getvalue()).decode('utf-8') + + quality = max(40, quality - 10) + + if compressed_size > target_bytes and max(img_converted.size) > 1024: + img_resized = _resize_image(img_converted, target_bytes, compressed_size) + output_buffer = io.BytesIO() + img_resized.save(output_buffer, format='WEBP', quality=70) + return base64.b64encode(output_buffer.getvalue()).decode('utf-8') + + return base64.b64encode(output_buffer.getvalue()).decode('utf-8') + + +def _calculate_initial_quality(current_size: int, target_size: int) -> int: + ratio = target_size / current_size + + if ratio >= 0.8: + return 75 + elif ratio >= 0.5: + return 65 + elif ratio >= 0.3: + return 55 + else: + return 45 + + +def _resize_image(img: Image, target_bytes: int, current_bytes: int) -> Image: + ratio = (target_bytes / current_bytes) ** 0.5 + new_width = int(img.width * ratio) + new_height = int(img.height * ratio) + + max_dimension = 1920 + if new_width > max_dimension or new_height > max_dimension: + if new_width > new_height: + new_height = int(new_height * max_dimension / new_width) + new_width = max_dimension + else: + new_width = int(new_width * max_dimension / new_height) + new_height = max_dimension + + return img.resize((new_width, new_height), Image.Resampling.LANCZOS) + diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index 23ae10a..d1083b4 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -10,5 +10,7 @@ class GenerateImageRequest(BaseModel): prompt: Optional[str] = None agent_id: Optional[str] = None provider: Optional[str] = None + model_ai: Optional[str] = None num_variations: int = 4 - parameter_prompt: Optional[Dict[str, Any]] = None \ No newline at end of file + parameter_prompt: Optional[Dict[str, Any]] = None + extra_parameters: Optional[Dict[str, Any]] = None \ No newline at end of file diff --git a/app/services/image_service.py b/app/services/image_service.py index 66b9c0c..fbcf399 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -11,6 +11,7 @@ from app.services.image_service_interface import ImageServiceInterface from app.services.message_service_interface import MessageServiceInterface from app.externals.s3_upload.s3_upload_client import upload_file +from app.helpers.image_compression_helper import compress_image_to_target from fastapi import Depends import asyncio import uuid @@ -19,8 +20,6 @@ from app.externals.images.image_client import google_image, openai_image_edit from typing import Optional import base64 -import io -from PIL import Image load_dotenv() @@ -34,40 +33,25 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" original_image_bytes = base64.b64decode(image_base64) - image_base64_high = self._process_image_for_upload(original_image_bytes) + image_base64_compressed = compress_image_to_target(original_image_bytes, target_kb=120) return await upload_file( S3UploadRequest( - file=image_base64_high, + file=image_base64_compressed, folder=f"{owner_id}/products/variations/{folder_id}", filename=file_name ) ) - def _process_image_for_upload(self, original_image_bytes: bytes) -> str: - img = Image.open(io.BytesIO(original_image_bytes)) - - if img.mode in ("RGBA", "P"): - img_converted = img.convert("RGBA") - else: - img_converted = img.convert("RGB") - - high_output_buffer = io.BytesIO() - img_converted.save(high_output_buffer, format='WEBP', quality=80) - image_base64_high = base64.b64encode(high_output_buffer.getvalue()).decode('utf-8') - - return image_base64_high - async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, - folder_id: str, file: Optional[str] = None, resolution: Optional[str] = None, - provider: Optional[str] = None) -> str: + folder_id: str, file: Optional[str] = None, extra_params: Optional[dict] = None, + provider: Optional[str] = None, model_ai: Optional[str] = None) -> str: - if provider and provider.lower() == "gemini": - image_content = await google_image(image_urls=url_images, prompt=prompt, resolution=resolution) + if provider and provider.lower() == "openai": + image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params) else: - - image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, resolution=resolution) + image_content = await google_image(image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params) content_base64 = base64.b64encode(image_content).decode('utf-8') final_upload = await self._upload_to_s3( @@ -98,15 +82,15 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ agent_config = response_data["agent_config"] response = response_data["message"] - resolution = None - if (agent_config.preferences.extra_parameters and - 'resolution' in agent_config.preferences.extra_parameters): - resolution = agent_config.preferences.extra_parameters['resolution'] + extra_params = None + if agent_config.preferences.extra_parameters: + extra_params = agent_config.preferences.extra_parameters prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, - request.file, resolution, provider=agent_config.provider_ai) + request.file, extra_params, provider=agent_config.provider_ai, + model_ai=agent_config.model_ai) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) @@ -119,7 +103,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ vision_analysis=vision_analysis ) - async def generate_images_from(self, request: GenerateImageRequest, owner_id: str, resolution: Optional[str] = None): + async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): folder_id = uuid.uuid4().hex[:8] urls = request.file_urls or [] original_url = request.file_url @@ -138,8 +122,9 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st owner_id, folder_id, request.file, - resolution=resolution, - provider=request.provider + extra_params=request.extra_parameters, + provider=request.provider, + model_ai=request.model_ai ) for i in range(request.num_variations) ] @@ -166,12 +151,11 @@ async def generate_images_from_agent(self, request: GenerateImageRequest, owner_ request.prompt = message["text"] request.provider = agent_config.provider_ai + request.model_ai = agent_config.model_ai - resolution = None - if (agent_config.preferences.extra_parameters and - 'resolution' in agent_config.preferences.extra_parameters): - resolution = agent_config.preferences.extra_parameters['resolution'] + if agent_config.preferences.extra_parameters: + request.extra_parameters = agent_config.preferences.extra_parameters - response = await self.generate_images_from(request, owner_id, resolution=resolution) + response = await self.generate_images_from(request, owner_id) return response From 4298aecc61568c51ce28a4ef7642ebdd900d190b Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 15 Dec 2025 16:19:06 -0500 Subject: [PATCH 086/195] add language --- app/requests/resolve_funnel_request.py | 4 +++- app/services/message_service.py | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/requests/resolve_funnel_request.py b/app/requests/resolve_funnel_request.py index d2a8209..40a9d24 100644 --- a/app/requests/resolve_funnel_request.py +++ b/app/requests/resolve_funnel_request.py @@ -1,6 +1,8 @@ from pydantic import BaseModel +from typing import Optional class ResolveFunnelRequest(BaseModel): product_name: str - product_description: str \ No newline at end of file + product_description: str + language: Optional[str] = "es" \ No newline at end of file diff --git a/app/services/message_service.py b/app/services/message_service.py index 686ec32..6f303f9 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -162,7 +162,8 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): query="pain_detection", parameter_prompt={ "product_name": request.product_name, - "product_description": request.product_description + "product_description": request.product_description, + "language": request.language } )) @@ -175,7 +176,8 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): parameter_prompt={ "product_name": request.product_name, "product_description": request.product_description, - "pain_detection": pain_detection_message + "pain_detection": pain_detection_message, + "language": request.language } )) @@ -197,7 +199,8 @@ async def resolve_funnel(self, request: ResolveFunnelRequest): "product_name": request.product_name, "product_description": request.product_description, "pain_detection": pain_detection_message, - "buyer_detection": buyer_detection_message + "buyer_detection": buyer_detection_message, + "language": request.language } )) From 431fac7b1a0e9a5164f246fa03cd9d07bc693170 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 15 Dec 2025 17:33:26 -0500 Subject: [PATCH 087/195] add translate manual --- app/configurations/pdf_manual_config.py | 34 ++++++++++++++++++++----- app/pdf/pdf_manual_generator.py | 8 +++--- app/services/message_service.py | 8 +++--- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/app/configurations/pdf_manual_config.py b/app/configurations/pdf_manual_config.py index fbfa290..42480f7 100644 --- a/app/configurations/pdf_manual_config.py +++ b/app/configurations/pdf_manual_config.py @@ -1,11 +1,29 @@ -PDF_MANUAL_SECTIONS = { - "introduction": "Introducción", - "main_features": "Características principales", - "usage_instructions": "Instrucciones de uso", - "troubleshooting": "Solución de problemas", - "faq": "Preguntas frecuentes" +PDF_MANUAL_SECTIONS_TRANSLATIONS = { + "es": { + "introduction": "Introducción", + "main_features": "Características principales", + "usage_instructions": "Instrucciones de uso", + "troubleshooting": "Solución de problemas", + "faq": "Preguntas frecuentes" + }, + "en": { + "introduction": "Introduction", + "main_features": "Main Features", + "usage_instructions": "Usage Instructions", + "troubleshooting": "Troubleshooting", + "faq": "Frequently Asked Questions" + }, + "pt": { + "introduction": "Introdução", + "main_features": "Características Principais", + "usage_instructions": "Instruções de Uso", + "troubleshooting": "Solução de Problemas", + "faq": "Perguntas Frequentes" + } } +PDF_MANUAL_SECTIONS = PDF_MANUAL_SECTIONS_TRANSLATIONS["es"] + PDF_MANUAL_SECTION_ORDER = [ "introduction", "main_features", @@ -13,3 +31,7 @@ "troubleshooting", "faq" ] + + +def get_sections_for_language(language: str = "es") -> dict: + return PDF_MANUAL_SECTIONS_TRANSLATIONS.get(language, PDF_MANUAL_SECTIONS_TRANSLATIONS["es"]) diff --git a/app/pdf/pdf_manual_generator.py b/app/pdf/pdf_manual_generator.py index e1e66c5..ccc4095 100644 --- a/app/pdf/pdf_manual_generator.py +++ b/app/pdf/pdf_manual_generator.py @@ -1,12 +1,14 @@ import base64 import os from app.pdf.pdf_generator import PDFGenerator -from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS, PDF_MANUAL_SECTION_ORDER +from app.configurations.pdf_manual_config import PDF_MANUAL_SECTION_ORDER, get_sections_for_language class PDFManualGenerator: - def __init__(self, product_name: str): + def __init__(self, product_name: str, language: str = "es"): self.product_name = product_name + self.language = language + self.sections = get_sections_for_language(language) self.pdf = PDFGenerator(product_name) async def create_manual(self, data: dict, title: str = None, image_url: str = None) -> str: @@ -25,7 +27,7 @@ async def create_manual(self, data: dict, title: str = None, image_url: str = No self.pdf.set_auto_page_break(auto=True, margin=20) for key in PDF_MANUAL_SECTION_ORDER: - self.pdf.add_section(PDF_MANUAL_SECTIONS[key], data.get(key, "")) + self.pdf.add_section(self.sections[key], data.get(key, "")) pdf_str = self.pdf.output(dest="S") pdf_bytes = pdf_str.encode("latin1") diff --git a/app/services/message_service.py b/app/services/message_service.py index 6f303f9..1d7b088 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -18,7 +18,7 @@ from app.services.message_service_interface import MessageServiceInterface from app.managers.conversation_manager_interface import ConversationManagerInterface from fastapi import Depends -from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS +from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS, get_sections_for_language from app.pdf.pdf_manual_generator import PDFManualGenerator from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest from app.externals.amazon.amazon_client import search_products @@ -135,14 +135,16 @@ async def generate_pdf(self, request: GeneratePdfRequest): if exists: return {"s3_url": s3_url} + sections = get_sections_for_language(request.language) + agent_queries = [ {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} - for section, _ in PDF_MANUAL_SECTIONS.items() + for section in sections.keys() ] combined_data = await self.process_multiple_agents(agent_queries) - pdf_generator = PDFManualGenerator(request.product_name) + pdf_generator = PDFManualGenerator(request.product_name, language=request.language) pdf = await pdf_generator.create_manual(combined_data, request.title, request.image_url) result = await upload_file( From af760d24ae7cdf534645230591fa54566849d9d0 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 17 Dec 2025 16:26:20 -0500 Subject: [PATCH 088/195] add language generate image --- app/requests/generate_image_request.py | 3 ++- app/requests/variation_image_request.py | 4 +++- app/services/image_service.py | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index d1083b4..f85a855 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -13,4 +13,5 @@ class GenerateImageRequest(BaseModel): model_ai: Optional[str] = None num_variations: int = 4 parameter_prompt: Optional[Dict[str, Any]] = None - extra_parameters: Optional[Dict[str, Any]] = None \ No newline at end of file + extra_parameters: Optional[Dict[str, Any]] = None + language: Optional[str] = "es" \ No newline at end of file diff --git a/app/requests/variation_image_request.py b/app/requests/variation_image_request.py index 33d6376..a89edc2 100644 --- a/app/requests/variation_image_request.py +++ b/app/requests/variation_image_request.py @@ -1,9 +1,11 @@ from pydantic import BaseModel, Field, validator +from typing import Optional class VariationImageRequest(BaseModel): file: str - num_variations: int = Field(default=3, ge=1, le=10) # mínimo 1, máximo 10 variaciones + num_variations: int = Field(default=3, ge=1, le=10) + language: Optional[str] = "es" @validator('num_variations') def validate_variations(cls, v): diff --git a/app/services/image_service.py b/app/services/image_service.py index fbcf399..6555f66 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -71,6 +71,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ query=f"Attached is the product image. {vision_analysis.get_analysis_text()}", agent_id=AGENT_IMAGE_VARIATIONS, conversation_id="", + parameter_prompt={"language": request.language}, files=[{ "type": "image", "url": original_image_response.s3_url, @@ -138,10 +139,13 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st ) async def generate_images_from_agent(self, request: GenerateImageRequest, owner_id: str): + parameter_prompt = request.parameter_prompt or {} + parameter_prompt["language"] = request.language + data = MessageRequest( agent_id=request.agent_id, query=request.agent_id, - parameter_prompt=request.parameter_prompt, + parameter_prompt=parameter_prompt, conversation_id="", ) From 4318d04a8b78ff43347213f1822f28edc54026e8 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Sat, 27 Dec 2025 18:09:49 -0500 Subject: [PATCH 089/195] add logic dropi multiple country --- .env.example | 8 ++++++++ app/configurations/config.py | 20 +++++++++++++++++++- app/controllers/handle_controller.py | 6 ++++-- app/externals/dropi/dropi_client.py | 23 +++++++++++++---------- app/factories/scraping_factory.py | 6 +++--- app/requests/product_scraping_request.py | 2 ++ app/scrapers/dropi_scraper.py | 8 ++++---- app/services/dropi_service.py | 8 ++++---- app/services/dropi_service_interface.py | 4 ++-- app/services/product_scraping_service.py | 2 +- 10 files changed, 60 insertions(+), 27 deletions(-) diff --git a/.env.example b/.env.example index fcfbd56..755a274 100644 --- a/.env.example +++ b/.env.example @@ -21,6 +21,14 @@ DROPI_HOST=https://test-api.dropi.co DROPI_S3_BASE_URL=https://d39ru7awumhhs2.cloudfront.net/ DROPI_API_KEY=dasdsadadasdas +# Dropi - API Keys por País (opcional, si no se especifica usa DROPI_API_KEY) +DROPI_API_KEY_CO=your_dropi_api_key_colombia +DROPI_API_KEY_MX=your_dropi_api_key_mexico +DROPI_API_KEY_AR=your_dropi_api_key_argentina +DROPI_API_KEY_CL=your_dropi_api_key_chile +DROPI_API_KEY_PE=your_dropi_api_key_peru +DROPI_API_KEY_PY=your_dropi_api_key_paraguay + LANGCHAIN_TRACING_V2=true LANGCHAIN_ENDPOINT=https://api.smith.langchain.com LANGCHAIN_API_KEY=tu_api_key_aqui diff --git a/app/configurations/config.py b/app/configurations/config.py index 1b6419e..759ae8b 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -36,4 +36,22 @@ DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') DROPI_HOST: str = os.getenv('DROPI_HOST', 'https://test-api.dropi.co') -DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') \ No newline at end of file +DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') +DROPI_API_KEY_CO: str = os.getenv('DROPI_API_KEY_CO', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_MX: str = os.getenv('DROPI_API_KEY_MX', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_AR: str = os.getenv('DROPI_API_KEY_AR', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_CL: str = os.getenv('DROPI_API_KEY_CL', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_PE: str = os.getenv('DROPI_API_KEY_PE', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_PY: str = os.getenv('DROPI_API_KEY_PY', os.getenv('DROPI_API_KEY')) + + +def get_dropi_api_key(country: str = "co") -> str: + country_keys = { + "co": DROPI_API_KEY_CO, + "mx": DROPI_API_KEY_MX, + "ar": DROPI_API_KEY_AR, + "cl": DROPI_API_KEY_CL, + "pe": DROPI_API_KEY_PE, + "py": DROPI_API_KEY_PY, + } + return country_keys.get(country.lower(), DROPI_API_KEY) \ No newline at end of file diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index 7f2b9ac..c88dfb2 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -29,16 +29,18 @@ @router.get("/integration/dropi/departments") async def get_departments( + country: str = "co", service: DropiServiceInterface = Depends(DropiService) ): - return await service.get_departments() + return await service.get_departments(country) @router.get("/integration/dropi/departments/{department_id}/cities") async def get_cities_by_department( department_id: int, + country: str = "co", service: DropiServiceInterface = Depends(DropiService) ): - return await service.get_cities_by_department(department_id) + return await service.get_cities_by_department(department_id, country) @router.post("/handle-message") async def handle_message( diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py index c68286d..489e818 100644 --- a/app/externals/dropi/dropi_client.py +++ b/app/externals/dropi/dropi_client.py @@ -1,15 +1,16 @@ import httpx from typing import Dict, Any -from app.configurations.config import DROPI_HOST, DROPI_API_KEY +from app.configurations.config import DROPI_HOST, get_dropi_api_key -async def get_product_details(product_id: str) -> Dict[str, Any]: +async def get_product_details(product_id: str, country: str = "co") -> Dict[str, Any]: headers = { - "dropi-integration-key": DROPI_API_KEY + "dropi-integration-key": get_dropi_api_key(country) } - url = f"{DROPI_HOST}/integrations/products/v2/{product_id}" + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/products/v2/{product_id}" async with httpx.AsyncClient() as client: try: @@ -22,11 +23,12 @@ async def get_product_details(product_id: str) -> Dict[str, Any]: raise Exception(f"API request failed: {str(e)}") -async def get_departments() -> Dict[str, Any]: +async def get_departments(country: str = "co") -> Dict[str, Any]: headers = { - "dropi-integration-key": DROPI_API_KEY + "dropi-integration-key": get_dropi_api_key(country) } - url = f"{DROPI_HOST}/integrations/department" + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/department" async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) @@ -38,16 +40,17 @@ async def get_departments() -> Dict[str, Any]: raise Exception(f"API request failed: {str(e)}") -async def get_cities_by_department(department_id: int, rate_type: str) -> Dict[str, Any]: +async def get_cities_by_department(department_id: int, rate_type: str, country: str = "co") -> Dict[str, Any]: headers = { - "dropi-integration-key": DROPI_API_KEY, + "dropi-integration-key": get_dropi_api_key(country), "Content-Type": "application/json" } payload = { "department_id": department_id, "rate_type": rate_type } - url = f"{DROPI_HOST}/integrations/trajectory/bycity" + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/trajectory/bycity" async with httpx.AsyncClient() as client: try: response = await client.post(url, headers=headers, json=payload) diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py index 10578c8..6487233 100644 --- a/app/factories/scraping_factory.py +++ b/app/factories/scraping_factory.py @@ -15,7 +15,7 @@ class ScrapingFactory: def __init__(self, message_service: MessageServiceInterface = Depends()): self.message_service = message_service - def get_scraper(self, url: str) -> ScraperInterface: + def get_scraper(self, url: str, country: str = "co") -> ScraperInterface: domain = urlparse(url).netloc.lower() if "amazon" in domain: @@ -24,7 +24,7 @@ def get_scraper(self, url: str) -> ScraperInterface: return AliexpressScraper() elif "cjdropshipping" in domain: return CJScraper() - elif "dropi.co" in domain: - return DropiScraper() + elif "dropi" in domain: + return DropiScraper(country=country) else: return IAScraper(message_service=self.message_service) diff --git a/app/requests/product_scraping_request.py b/app/requests/product_scraping_request.py index 4587410..7ef4bab 100644 --- a/app/requests/product_scraping_request.py +++ b/app/requests/product_scraping_request.py @@ -1,5 +1,7 @@ from pydantic import BaseModel, HttpUrl +from typing import Optional class ProductScrapingRequest(BaseModel): product_url: HttpUrl + country: Optional[str] = "co" diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py index be65cf2..29b8477 100644 --- a/app/scrapers/dropi_scraper.py +++ b/app/scrapers/dropi_scraper.py @@ -11,6 +11,9 @@ class DropiScraper(ScraperInterface): + def __init__(self, country: str = "co"): + self.country = country + async def scrape_direct(self, html: str) -> Dict[str, Any]: return {} @@ -18,7 +21,7 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: product_id = self._extract_product_id(url) try: - data = await get_product_details(product_id) + data = await get_product_details(product_id, self.country) product_data = self._get_product_data(data) result = { @@ -224,16 +227,13 @@ def _check_availability(self, variation: Dict[str, Any]) -> bool: return total_stock > 0 def _get_variant_images(self, variation: Dict[str, Any], product_photos: List[Dict[str, Any]]) -> List[str]: - """Obtiene las imágenes de la variante o del producto principal""" variation_id = variation.get("id") images = [] - # Primero buscar imágenes específicas de esta variación for photo in product_photos: if photo.get("variation_id") == variation_id and photo.get("urlS3"): images.append(DROPI_S3_BASE_URL + photo["urlS3"]) - # Si no hay imágenes específicas de la variación, usar las imágenes principales del producto if not images: for photo in product_photos: if not photo.get("variation_id") and photo.get("urlS3"): diff --git a/app/services/dropi_service.py b/app/services/dropi_service.py index d16768c..61ef333 100644 --- a/app/services/dropi_service.py +++ b/app/services/dropi_service.py @@ -9,17 +9,17 @@ class DropiService(DropiServiceInterface): def __init__(self): pass - async def get_departments(self) -> List[Dict[str, Any]]: + async def get_departments(self, country: str = "co") -> List[Dict[str, Any]]: try: - response = await dropi_client.get_departments() + response = await dropi_client.get_departments(country) return response.get("objects", []) except Exception as e: raise HTTPException(status_code=500, detail=f"Error fetching departments from Dropi: {str(e)}") - async def get_cities_by_department(self, department_id: int) -> List[Dict[str, Any]]: + async def get_cities_by_department(self, department_id: int, country: str = "co") -> List[Dict[str, Any]]: try: rate_type = "CON RECAUDO" - response = await dropi_client.get_cities_by_department(department_id, rate_type) + response = await dropi_client.get_cities_by_department(department_id, rate_type, country) return response.get("objects", {}).get("cities", []) except Exception as e: raise HTTPException(status_code=500, detail=f"Error fetching cities from Dropi: {str(e)}") \ No newline at end of file diff --git a/app/services/dropi_service_interface.py b/app/services/dropi_service_interface.py index f60cb69..3de8899 100644 --- a/app/services/dropi_service_interface.py +++ b/app/services/dropi_service_interface.py @@ -4,9 +4,9 @@ class DropiServiceInterface(ABC): @abstractmethod - async def get_departments(self) -> List[Dict[str, Any]]: + async def get_departments(self, country: str = "co") -> List[Dict[str, Any]]: pass @abstractmethod - async def get_cities_by_department(self, department_id: int) -> List[Dict[str, Any]]: + async def get_cities_by_department(self, department_id: int, country: str = "co") -> List[Dict[str, Any]]: pass \ No newline at end of file diff --git a/app/services/product_scraping_service.py b/app/services/product_scraping_service.py index 8e86563..c9c2367 100644 --- a/app/services/product_scraping_service.py +++ b/app/services/product_scraping_service.py @@ -14,7 +14,7 @@ async def scrape_product(self, request: ProductScrapingRequest): url = str(request.product_url) domain = urlparse(url).netloc.lower() - scraper = self.scraping_factory.get_scraper(url) + scraper = self.scraping_factory.get_scraper(url, country=request.country) return await scraper.scrape(url, domain) async def scrape_direct(self, html): From 823f13cad9697bcdcf0a8f72dcae372af78983a9 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Mon, 5 Jan 2026 15:41:41 -0500 Subject: [PATCH 090/195] add ec --- app/configurations/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/configurations/config.py b/app/configurations/config.py index 9f9ace2..1849c2a 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -43,6 +43,7 @@ DROPI_API_KEY_CL: str = os.getenv('DROPI_API_KEY_CL', os.getenv('DROPI_API_KEY')) DROPI_API_KEY_PE: str = os.getenv('DROPI_API_KEY_PE', os.getenv('DROPI_API_KEY')) DROPI_API_KEY_PY: str = os.getenv('DROPI_API_KEY_PY', os.getenv('DROPI_API_KEY')) +DROPI_API_KEY_EC: str = os.getenv('DROPI_API_KEY_EC', os.getenv('DROPI_API_KEY')) def get_dropi_api_key(country: str = "co") -> str: @@ -53,6 +54,7 @@ def get_dropi_api_key(country: str = "co") -> str: "cl": DROPI_API_KEY_CL, "pe": DROPI_API_KEY_PE, "py": DROPI_API_KEY_PY, + "ec": DROPI_API_KEY_EC, } return country_keys.get(country.lower(), DROPI_API_KEY) From f42fa47f54a1329441fa1a766ba17918d41a45e0 Mon Sep 17 00:00:00 2001 From: Oscar Arellano Date: Wed, 14 Jan 2026 21:39:48 -0500 Subject: [PATCH 091/195] add docs, ci, format add docs, ci, format --- .flake8 | 17 + .github/workflows/ci.yml | 142 ++++++ Makefile | 73 +++ app/__init__.py | 2 +- app/configurations/config.py | 54 +-- app/configurations/copies_config.py | 4 +- app/configurations/pdf_manual_config.py | 16 +- app/controllers/__init__.py | 4 +- app/controllers/handle_controller.py | 117 ++--- .../agent_config/agent_config_client.py | 4 +- .../requests/agent_config_request.py | 3 +- .../responses/agent_config_response.py | 3 +- app/externals/aliexpress/aliexpress_client.py | 38 +- .../requests/aliexpress_search_request.py | 1 + .../responses/aliexpress_search_response.py | 30 +- app/externals/amazon/amazon_client.py | 41 +- .../amazon/requests/amazon_search_request.py | 6 +- .../responses/amazon_search_response.py | 18 +- app/externals/dropi/__init__.py | 1 - app/externals/dropi/dropi_client.py | 23 +- app/externals/fal/__init__.py | 2 +- app/externals/fal/fal_client.py | 12 +- .../google_vision/google_vision_client.py | 33 +- app/externals/images/image_client.py | 117 ++--- app/externals/s3_upload/s3_upload_client.py | 11 +- app/externals/scraperapi/__init__.py | 2 +- app/externals/scraperapi/scraperapi_client.py | 23 +- app/factories/ai_provider_factory.py | 8 +- app/factories/scraping_factory.py | 4 +- app/helpers/escape_helper.py | 40 +- app/helpers/image_compression_helper.py | 40 +- app/managers/conversation_manager.py | 35 +- .../conversation_manager_interface.py | 3 +- app/middlewares/auth_middleware.py | 48 +- app/pdf/helpers.py | 10 +- app/pdf/pdf_generator.py | 197 ++++---- app/pdf/pdf_manual_generator.py | 15 +- app/processors/agent_processor.py | 64 +-- app/processors/conversation_processor.py | 13 +- app/processors/mcp_processor.py | 36 +- app/processors/simple_processor.py | 61 ++- app/providers/ai_provider_interface.py | 4 +- app/providers/anthropic_provider.py | 10 +- app/providers/deepseek_provider.py | 9 +- app/providers/gemini_provider.py | 3 +- app/providers/openai_provider.py | 6 +- app/requestors/base_requestor.py | 40 +- app/requests/__init__.py | 2 +- .../brand_context_resolver_request.py | 3 +- app/requests/generate_audio_request.py | 5 +- app/requests/generate_image_request.py | 5 +- app/requests/generate_video_request.py | 5 +- app/requests/message_request.py | 3 +- app/requests/product_scraping_request.py | 3 +- app/requests/recommend_product_request.py | 5 +- app/requests/resolve_funnel_request.py | 5 +- app/requests/variation_image_request.py | 5 +- app/scrapers/aliexpress_scraper.py | 57 +-- app/scrapers/amazon_scraper.py | 54 +-- app/scrapers/cj_scraper.py | 19 +- app/scrapers/dropi_scraper.py | 109 ++--- app/scrapers/helper_price.py | 4 +- app/scrapers/ia_scraper.py | 45 +- app/scrapers/scraper_interface.py | 4 +- app/services/audio_service.py | 4 +- app/services/audio_service_interface.py | 2 +- app/services/dropi_service.py | 5 +- app/services/dropi_service_interface.py | 4 +- app/services/image_service.py | 102 ++-- app/services/image_service_interface.py | 2 +- app/services/message_service.py | 223 +++++---- app/services/message_service_interface.py | 8 +- app/services/product_scraping_service.py | 10 +- .../product_scraping_service_interface.py | 1 + app/services/video_service.py | 20 +- app/services/video_service_interface.py | 2 +- app/tools/tool_generator.py | 25 +- docs/README.md | 64 +++ docs/ai-providers.md | 302 ++++++++++++ docs/api-endpoints.md | 435 ++++++++++++++++++ docs/architecture.md | 165 +++++++ docs/external-clients.md | 401 ++++++++++++++++ docs/installation.md | 169 +++++++ docs/processors.md | 346 ++++++++++++++ docs/scrapers.md | 350 ++++++++++++++ docs/services.md | 376 +++++++++++++++ pyproject.toml | 62 +++ requirements.txt | 11 + tests/README.md | 185 ++++++++ tests/__init__.py | 1 + tests/conftest.py | 256 +++++++++++ tests/integration/__init__.py | 1 + tests/integration/test_api_endpoints.py | 307 ++++++++++++ tests/unit/externals/__init__.py | 1 + tests/unit/externals/test_fal_client.py | 192 ++++++++ .../externals/test_google_vision_client.py | 193 ++++++++ tests/unit/factories/__init__.py | 1 + .../factories/test_ai_provider_factory.py | 96 ++++ tests/unit/factories/test_scraping_factory.py | 139 ++++++ tests/unit/helpers/__init__.py | 1 + tests/unit/helpers/test_escape_helper.py | 170 +++++++ .../helpers/test_image_compression_helper.py | 180 ++++++++ tests/unit/managers/__init__.py | 1 + .../managers/test_conversation_manager.py | 313 +++++++++++++ tests/unit/middlewares/__init__.py | 1 + .../unit/middlewares/test_auth_middleware.py | 254 ++++++++++ tests/unit/models/__init__.py | 1 + tests/unit/models/test_models.py | 258 +++++++++++ tests/unit/processors/__init__.py | 1 + tests/unit/processors/test_agent_processor.py | 180 ++++++++ .../processors/test_conversation_processor.py | 102 ++++ .../unit/processors/test_simple_processor.py | 194 ++++++++ tests/unit/providers/__init__.py | 1 + tests/unit/providers/test_providers.py | 187 ++++++++ tests/unit/scrapers/__init__.py | 1 + .../unit/scrapers/test_aliexpress_scraper.py | 208 +++++++++ tests/unit/scrapers/test_amazon_scraper.py | 190 ++++++++ tests/unit/scrapers/test_helper_price.py | 94 ++++ tests/unit/services/__init__.py | 1 + tests/unit/services/test_audio_service.py | 116 +++++ tests/unit/services/test_image_service.py | 224 +++++++++ tests/unit/services/test_message_service.py | 258 +++++++++++ .../services/test_product_scraping_service.py | 142 ++++++ tests/unit/services/test_video_service.py | 181 ++++++++ tests/unit/tools/__init__.py | 1 + tests/unit/tools/test_tool_generator.py | 198 ++++++++ 126 files changed, 8641 insertions(+), 1054 deletions(-) create mode 100644 .flake8 create mode 100644 .github/workflows/ci.yml create mode 100644 Makefile create mode 100644 docs/README.md create mode 100644 docs/ai-providers.md create mode 100644 docs/api-endpoints.md create mode 100644 docs/architecture.md create mode 100644 docs/external-clients.md create mode 100644 docs/installation.md create mode 100644 docs/processors.md create mode 100644 docs/scrapers.md create mode 100644 docs/services.md create mode 100644 pyproject.toml create mode 100644 tests/README.md create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/test_api_endpoints.py create mode 100644 tests/unit/externals/__init__.py create mode 100644 tests/unit/externals/test_fal_client.py create mode 100644 tests/unit/externals/test_google_vision_client.py create mode 100644 tests/unit/factories/__init__.py create mode 100644 tests/unit/factories/test_ai_provider_factory.py create mode 100644 tests/unit/factories/test_scraping_factory.py create mode 100644 tests/unit/helpers/__init__.py create mode 100644 tests/unit/helpers/test_escape_helper.py create mode 100644 tests/unit/helpers/test_image_compression_helper.py create mode 100644 tests/unit/managers/__init__.py create mode 100644 tests/unit/managers/test_conversation_manager.py create mode 100644 tests/unit/middlewares/__init__.py create mode 100644 tests/unit/middlewares/test_auth_middleware.py create mode 100644 tests/unit/models/__init__.py create mode 100644 tests/unit/models/test_models.py create mode 100644 tests/unit/processors/__init__.py create mode 100644 tests/unit/processors/test_agent_processor.py create mode 100644 tests/unit/processors/test_conversation_processor.py create mode 100644 tests/unit/processors/test_simple_processor.py create mode 100644 tests/unit/providers/__init__.py create mode 100644 tests/unit/providers/test_providers.py create mode 100644 tests/unit/scrapers/__init__.py create mode 100644 tests/unit/scrapers/test_aliexpress_scraper.py create mode 100644 tests/unit/scrapers/test_amazon_scraper.py create mode 100644 tests/unit/scrapers/test_helper_price.py create mode 100644 tests/unit/services/__init__.py create mode 100644 tests/unit/services/test_audio_service.py create mode 100644 tests/unit/services/test_image_service.py create mode 100644 tests/unit/services/test_message_service.py create mode 100644 tests/unit/services/test_product_scraping_service.py create mode 100644 tests/unit/services/test_video_service.py create mode 100644 tests/unit/tools/__init__.py create mode 100644 tests/unit/tools/test_tool_generator.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..81d9b2d --- /dev/null +++ b/.flake8 @@ -0,0 +1,17 @@ +[flake8] +max-line-length = 120 +max-complexity = 10 +extend-ignore = E501,W503,E203,E266,E402 +exclude = + .git, + __pycache__, + .venv, + venv, + build, + dist, + *.egg-info, + .eggs, + .tox, +per-file-ignores = + __init__.py:F401 + tests/*:F401,F811 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d22a0cd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,142 @@ +name: CI Pipeline + +on: + push: + branches: [main, master, develop] + pull_request: + branches: [main, master, develop] + +jobs: + lint: + name: Lint & Format Check + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Cache pip dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-lint-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-lint- + + - name: Install linting tools + run: | + python -m pip install --upgrade pip + pip install black flake8 isort + + - name: Check formatting with Black + run: | + black --check --line-length 120 app/ tests/ + + - name: Check import sorting with isort + run: | + isort --check-only --profile black --line-length 120 app/ tests/ + + - name: Lint with flake8 + run: | + # Stop build if there are Python syntax errors or undefined names + flake8 app/ --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings. Line length set to 120 + flake8 app/ --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics --ignore=E501,W503,E203 + + test: + name: Run Tests + runs-on: ubuntu-latest + needs: lint + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Cache pip dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run tests with pytest + run: | + pytest tests/ -v --tb=short --junitxml=test-results.xml + env: + # Variables de entorno necesarias para tests + HOST_AGENT_CONFIG: http://localhost:8000 + DEEP_SEEK_HOST: http://localhost:11434 + API_KEY: test-api-key + AUTH_SERVICE_URL: http://localhost:8001/auth + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: test-results + path: test-results.xml + + test-coverage: + name: Test Coverage + runs-on: ubuntu-latest + needs: lint + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Cache pip dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run tests with coverage + run: | + pytest tests/ --cov=app --cov-report=xml --cov-report=html --cov-fail-under=60 + env: + HOST_AGENT_CONFIG: http://localhost:8000 + DEEP_SEEK_HOST: http://localhost:11434 + API_KEY: test-api-key + AUTH_SERVICE_URL: http://localhost:8001/auth + + - name: Upload coverage report + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: htmlcov/ + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./coverage.xml + fail_ci_if_error: false + continue-on-error: true diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e8f1913 --- /dev/null +++ b/Makefile @@ -0,0 +1,73 @@ +.PHONY: help install lint format test test-cov clean + +# Default target +help: + @echo "Conversational Engine - Available commands:" + @echo "" + @echo " make install - Install all dependencies" + @echo " make lint - Run linting checks (black, isort, flake8)" + @echo " make format - Format code with black and isort" + @echo " make test - Run all tests" + @echo " make test-cov - Run tests with coverage report" + @echo " make clean - Remove cache and build files" + @echo "" + +# Install dependencies +install: + pip install -r requirements.txt + +# Run linting checks +lint: + @echo "Checking code formatting with Black..." + black --check --line-length 120 app/ tests/ + @echo "" + @echo "Checking import sorting with isort..." + isort --check-only --profile black --line-length 120 app/ tests/ + @echo "" + @echo "Running flake8..." + flake8 app/ --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 app/ --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics --ignore=E501,W503,E203 + @echo "" + @echo "All lint checks passed! ✓" + +# Format code +format: + @echo "Formatting code with Black..." + black --line-length 120 app/ tests/ + @echo "" + @echo "Sorting imports with isort..." + isort --profile black --line-length 120 app/ tests/ + @echo "" + @echo "Code formatted! ✓" + +# Run tests +test: + pytest tests/ -v --tb=short + +# Run tests with coverage +test-cov: + pytest tests/ --cov=app --cov-report=html --cov-report=term-missing + @echo "" + @echo "Coverage report generated in htmlcov/index.html" + +# Run only unit tests +test-unit: + pytest tests/unit -v --tb=short + +# Run only integration tests +test-integration: + pytest tests/integration -v --tb=short + +# Clean cache files +clean: + find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true + find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true + find . -type d -name "htmlcov" -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete 2>/dev/null || true + find . -type f -name ".coverage" -delete 2>/dev/null || true + find . -type f -name "coverage.xml" -delete 2>/dev/null || true + @echo "Cleaned! ✓" + +# Run the application locally +run: + uvicorn main:app --reload --host 0.0.0.0 --port 8000 diff --git a/app/__init__.py b/app/__init__.py index 63788fe..7f044e3 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1 +1 @@ -# Archivo vacío \ No newline at end of file +# Archivo vacío diff --git a/app/configurations/config.py b/app/configurations/config.py index 1849c2a..50a7df3 100644 --- a/app/configurations/config.py +++ b/app/configurations/config.py @@ -4,46 +4,46 @@ load_dotenv() -HOST_AGENT_CONFIG = os.getenv('HOST_AGENT_CONFIG') +HOST_AGENT_CONFIG = os.getenv("HOST_AGENT_CONFIG") -DEEP_SEEK_HOST = os.getenv('HOST_DEEP_SEEK') +DEEP_SEEK_HOST = os.getenv("HOST_DEEP_SEEK") -AGENT_RECOMMEND_PRODUCTS_ID = os.getenv('AGENT_RECOMMEND_PRODUCTS_ID') -AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID = os.getenv('AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID') -RAPIDAPI_KEY = os.getenv('RAPIDAPI_KEY') +AGENT_RECOMMEND_PRODUCTS_ID = os.getenv("AGENT_RECOMMEND_PRODUCTS_ID") +AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID = os.getenv("AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID") +RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY") -RAPIDAPI_HOST = os.getenv('RAPIDAPI_HOST') +RAPIDAPI_HOST = os.getenv("RAPIDAPI_HOST") -S3_UPLOAD_API = os.getenv('S3_UPLOAD_API') +S3_UPLOAD_API = os.getenv("S3_UPLOAD_API") AGENT_IMAGE_VARIATIONS = "agent_image_variations" SCRAPER_AGENT = "scraper_agent" SCRAPER_AGENT_DIRECT = "scraper_agent_direct_code" -AUTH_SERVICE_URL: str = os.getenv('AUTH_SERVICE_URL') +AUTH_SERVICE_URL: str = os.getenv("AUTH_SERVICE_URL") -GOOGLE_VISION_API_KEY: str = os.getenv('GOOGLE_VISION_API_KEY') -REPLICATE_API_KEY: str = os.getenv('REPLICATE_API_KEY') -SCRAPERAPI_KEY: str = os.getenv('SCRAPERAPI_KEY') -URL_SCRAPER_LAMBDA: str = os.getenv('URL_SCRAPER_LAMBDA') +GOOGLE_VISION_API_KEY: str = os.getenv("GOOGLE_VISION_API_KEY") +REPLICATE_API_KEY: str = os.getenv("REPLICATE_API_KEY") +SCRAPERAPI_KEY: str = os.getenv("SCRAPERAPI_KEY") +URL_SCRAPER_LAMBDA: str = os.getenv("URL_SCRAPER_LAMBDA") -API_KEY: str = os.getenv('API_KEY') -GOOGLE_GEMINI_API_KEY: str = os.getenv('GOOGLE_GEMINI_API_KEY') +API_KEY: str = os.getenv("API_KEY") +GOOGLE_GEMINI_API_KEY: str = os.getenv("GOOGLE_GEMINI_API_KEY") -ENVIRONMENT: str = os.getenv('ENVIRONMENT') +ENVIRONMENT: str = os.getenv("ENVIRONMENT") -OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY') +OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY") -DROPI_S3_BASE_URL: str = os.getenv('DROPI_S3_BASE_URL', 'https://d39ru7awumhhs2.cloudfront.net/') -DROPI_HOST: str = os.getenv('DROPI_HOST', 'https://test-api.dropi.co') -DROPI_API_KEY: str = os.getenv('DROPI_API_KEY') -DROPI_API_KEY_CO: str = os.getenv('DROPI_API_KEY_CO', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_MX: str = os.getenv('DROPI_API_KEY_MX', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_AR: str = os.getenv('DROPI_API_KEY_AR', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_CL: str = os.getenv('DROPI_API_KEY_CL', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_PE: str = os.getenv('DROPI_API_KEY_PE', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_PY: str = os.getenv('DROPI_API_KEY_PY', os.getenv('DROPI_API_KEY')) -DROPI_API_KEY_EC: str = os.getenv('DROPI_API_KEY_EC', os.getenv('DROPI_API_KEY')) +DROPI_S3_BASE_URL: str = os.getenv("DROPI_S3_BASE_URL", "https://d39ru7awumhhs2.cloudfront.net/") +DROPI_HOST: str = os.getenv("DROPI_HOST", "https://test-api.dropi.co") +DROPI_API_KEY: str = os.getenv("DROPI_API_KEY") +DROPI_API_KEY_CO: str = os.getenv("DROPI_API_KEY_CO", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_MX: str = os.getenv("DROPI_API_KEY_MX", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_AR: str = os.getenv("DROPI_API_KEY_AR", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_CL: str = os.getenv("DROPI_API_KEY_CL", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_PE: str = os.getenv("DROPI_API_KEY_PE", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_PY: str = os.getenv("DROPI_API_KEY_PY", os.getenv("DROPI_API_KEY")) +DROPI_API_KEY_EC: str = os.getenv("DROPI_API_KEY_EC", os.getenv("DROPI_API_KEY")) def get_dropi_api_key(country: str = "co") -> str: @@ -59,4 +59,4 @@ def get_dropi_api_key(country: str = "co") -> str: return country_keys.get(country.lower(), DROPI_API_KEY) -FAL_AI_API_KEY: str = os.getenv('FAL_AI_API_KEY') +FAL_AI_API_KEY: str = os.getenv("FAL_AI_API_KEY") diff --git a/app/configurations/copies_config.py b/app/configurations/copies_config.py index 7c94b76..0a45be3 100644 --- a/app/configurations/copies_config.py +++ b/app/configurations/copies_config.py @@ -4,5 +4,5 @@ "agent_prompt_copies_benefits_v1", "agent_prompt_copies_features_v1", "agent_prompt_copies_testimonials_v1", - "agent_prompt_copies_faqs_v1" -] \ No newline at end of file + "agent_prompt_copies_faqs_v1", +] diff --git a/app/configurations/pdf_manual_config.py b/app/configurations/pdf_manual_config.py index 42480f7..d29fe00 100644 --- a/app/configurations/pdf_manual_config.py +++ b/app/configurations/pdf_manual_config.py @@ -4,33 +4,27 @@ "main_features": "Características principales", "usage_instructions": "Instrucciones de uso", "troubleshooting": "Solución de problemas", - "faq": "Preguntas frecuentes" + "faq": "Preguntas frecuentes", }, "en": { "introduction": "Introduction", "main_features": "Main Features", "usage_instructions": "Usage Instructions", "troubleshooting": "Troubleshooting", - "faq": "Frequently Asked Questions" + "faq": "Frequently Asked Questions", }, "pt": { "introduction": "Introdução", "main_features": "Características Principais", "usage_instructions": "Instruções de Uso", "troubleshooting": "Solução de Problemas", - "faq": "Perguntas Frequentes" - } + "faq": "Perguntas Frequentes", + }, } PDF_MANUAL_SECTIONS = PDF_MANUAL_SECTIONS_TRANSLATIONS["es"] -PDF_MANUAL_SECTION_ORDER = [ - "introduction", - "main_features", - "usage_instructions", - "troubleshooting", - "faq" -] +PDF_MANUAL_SECTION_ORDER = ["introduction", "main_features", "usage_instructions", "troubleshooting", "faq"] def get_sections_for_language(language: str = "es") -> dict: diff --git a/app/controllers/__init__.py b/app/controllers/__init__.py index 46aac27..9a7f1f4 100644 --- a/app/controllers/__init__.py +++ b/app/controllers/__init__.py @@ -1,7 +1,7 @@ -# Archivo vacío +# Archivo vacío # Importar el nuevo controlador from .handle_controller import router as handle_router # Registrar el router -routers = [handle_router] \ No newline at end of file +routers = [handle_router] diff --git a/app/controllers/handle_controller.py b/app/controllers/handle_controller.py index df4b513..9586d48 100644 --- a/app/controllers/handle_controller.py +++ b/app/controllers/handle_controller.py @@ -1,84 +1,68 @@ import base64 + import httpx +from fastapi import APIRouter, Depends, HTTPException, Request +from app.middlewares.auth_middleware import require_api_key, require_auth from app.requests.brand_context_resolver_request import BrandContextResolverRequest from app.requests.copy_request import CopyRequest from app.requests.direct_scrape_request import DirectScrapeRequest +from app.requests.generate_audio_request import GenerateAudioRequest from app.requests.generate_image_request import GenerateImageRequest from app.requests.generate_pdf_request import GeneratePdfRequest +from app.requests.generate_video_request import GenerateVideoRequest +from app.requests.message_request import MessageRequest +from app.requests.product_scraping_request import ProductScrapingRequest from app.requests.recommend_product_request import RecommendProductRequest from app.requests.resolve_funnel_request import ResolveFunnelRequest -from fastapi import APIRouter, Depends, Request, HTTPException -from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest -from app.requests.product_scraping_request import ProductScrapingRequest -from app.services.image_service_interface import ImageServiceInterface -from app.services.message_service_interface import MessageServiceInterface -from app.services.product_scraping_service_interface import ProductScrapingServiceInterface -from app.middlewares.auth_middleware import require_auth, require_api_key -from app.requests.generate_video_request import GenerateVideoRequest +from app.services.audio_service import AudioService +from app.services.audio_service_interface import AudioServiceInterface +from app.services.dropi_service import DropiService # Importaciones para Dropi from app.services.dropi_service_interface import DropiServiceInterface -from app.services.dropi_service import DropiService -from app.services.video_service_interface import VideoServiceInterface +from app.services.image_service_interface import ImageServiceInterface +from app.services.message_service_interface import MessageServiceInterface +from app.services.product_scraping_service_interface import ProductScrapingServiceInterface from app.services.video_service import VideoService -from app.services.audio_service_interface import AudioServiceInterface -from app.services.audio_service import AudioService -from app.requests.generate_audio_request import GenerateAudioRequest +from app.services.video_service_interface import VideoServiceInterface + +router = APIRouter(prefix="/api/ms/conversational-engine", tags=["conversational-agent"]) -router = APIRouter( - prefix="/api/ms/conversational-engine", - tags=["conversational-agent"] -) @router.get("/integration/dropi/departments") -async def get_departments( - country: str = "co", - service: DropiServiceInterface = Depends(DropiService) -): +async def get_departments(country: str = "co", service: DropiServiceInterface = Depends(DropiService)): return await service.get_departments(country) + @router.get("/integration/dropi/departments/{department_id}/cities") async def get_cities_by_department( - department_id: int, - country: str = "co", - service: DropiServiceInterface = Depends(DropiService) + department_id: int, country: str = "co", service: DropiServiceInterface = Depends(DropiService) ): return await service.get_cities_by_department(department_id, country) + @router.post("/handle-message") -async def handle_message( - request: MessageRequest, - message_service: MessageServiceInterface = Depends() -): +async def handle_message(request: MessageRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.handle_message(request) return response @router.post("/handle-message-json") -async def handle_message( - request: MessageRequest, - message_service: MessageServiceInterface = Depends() -): +async def handle_message(request: MessageRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.handle_message_json(request) return response @router.post("/recommend-product") -async def recommend_products( - request: RecommendProductRequest, - message_service: MessageServiceInterface = Depends() -): +async def recommend_products(request: RecommendProductRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.recommend_products(request) return response @router.post("/generate-pdf") -async def generate_pdf( - request: GeneratePdfRequest, - message_service: MessageServiceInterface = Depends() -): +async def generate_pdf(request: GeneratePdfRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.generate_pdf(request) return response @@ -86,9 +70,7 @@ async def generate_pdf( @router.post("/generate-variation-images") @require_auth async def generate_variation_images( - request: Request, - variation_request: VariationImageRequest, - service: ImageServiceInterface = Depends() + request: Request, variation_request: VariationImageRequest, service: ImageServiceInterface = Depends() ): user_info = request.state.user_info response = await service.generate_variation_images(variation_request, user_info.get("data", {}).get("id")) @@ -98,9 +80,7 @@ async def generate_variation_images( @router.post("/generate-images-from") @require_auth async def generate_images_from( - request: Request, - generate_image_request: GenerateImageRequest, - service: ImageServiceInterface = Depends() + request: Request, generate_image_request: GenerateImageRequest, service: ImageServiceInterface = Depends() ): if not generate_image_request.file and generate_image_request.file_url: async with httpx.AsyncClient() as client: @@ -119,9 +99,7 @@ async def generate_images_from( @router.post("/generate-images-from/api-key") @require_api_key async def generate_images_from_api_key( - request: Request, - generate_image_request: GenerateImageRequest, - service: ImageServiceInterface = Depends() + request: Request, generate_image_request: GenerateImageRequest, service: ImageServiceInterface = Depends() ): if not generate_image_request.file and generate_image_request.file_url: async with httpx.AsyncClient() as client: @@ -138,9 +116,7 @@ async def generate_images_from_api_key( @router.post("/generate-images-from-agent/api-key") @require_api_key async def generate_images_from_agent_api_key( - request: Request, - generate_image_request: GenerateImageRequest, - service: ImageServiceInterface = Depends() + request: Request, generate_image_request: GenerateImageRequest, service: ImageServiceInterface = Depends() ): if not generate_image_request.file and generate_image_request.file_url: async with httpx.AsyncClient() as client: @@ -155,10 +131,7 @@ async def generate_images_from_agent_api_key( @router.post("/generate-copies") -async def generate_copies( - copy_request: CopyRequest, - message_service: MessageServiceInterface = Depends() -): +async def generate_copies(copy_request: CopyRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.generate_copies(copy_request) return response @@ -166,56 +139,50 @@ async def generate_copies( @router.post("/scrape-product") @require_auth async def scrape_product( - request: Request, - scraping_request: ProductScrapingRequest, - service: ProductScrapingServiceInterface = Depends() + request: Request, scraping_request: ProductScrapingRequest, service: ProductScrapingServiceInterface = Depends() ): response = await service.scrape_product(scraping_request) return response + @router.post("/scrape-direct-html") @require_auth async def scrape_product_direct( - request: Request, - scraping_request: DirectScrapeRequest, - service: ProductScrapingServiceInterface = Depends() + request: Request, scraping_request: DirectScrapeRequest, service: ProductScrapingServiceInterface = Depends() ): response = await service.scrape_direct(scraping_request.html) return response @router.post("/resolve-info-funnel") -async def resolve_funnel( - request: ResolveFunnelRequest, - message_service: MessageServiceInterface = Depends() -): +async def resolve_funnel(request: ResolveFunnelRequest, message_service: MessageServiceInterface = Depends()): response = await message_service.resolve_funnel(request) return response + @router.post("/store/brand-context-resolver") @require_auth async def brand_context_resolver( - request: Request, - requestBrand: BrandContextResolverRequest, - message_service: MessageServiceInterface = Depends() + request: Request, requestBrand: BrandContextResolverRequest, message_service: MessageServiceInterface = Depends() ): response = await message_service.resolve_brand_context(requestBrand) return response + @router.post("/generate-video") async def generate_video( - request: Request, - requestGenerateVideo: GenerateVideoRequest, - video_service: VideoServiceInterface = Depends(VideoService) + request: Request, + requestGenerateVideo: GenerateVideoRequest, + video_service: VideoServiceInterface = Depends(VideoService), ): return await video_service.generate_video(requestGenerateVideo) @router.post("/generate-audio") async def generate_audio( - request: Request, - requestGenerateAudio: GenerateAudioRequest, - audio_service: AudioServiceInterface = Depends(AudioService) + request: Request, + requestGenerateAudio: GenerateAudioRequest, + audio_service: AudioServiceInterface = Depends(AudioService), ): return await audio_service.generate_audio(requestGenerateAudio) diff --git a/app/externals/agent_config/agent_config_client.py b/app/externals/agent_config/agent_config_client.py index e1a177d..f7f0f7a 100644 --- a/app/externals/agent_config/agent_config_client.py +++ b/app/externals/agent_config/agent_config_client.py @@ -6,9 +6,9 @@ async def get_agent(data: AgentConfigRequest) -> AgentConfigResponse: - endpoint = '/api/ms/agent/config/search-agent' + endpoint = "/api/ms/agent/config/search-agent" url = f"{HOST_AGENT_CONFIG}{endpoint}" - headers = {'Content-Type': 'application/json'} + headers = {"Content-Type": "application/json"} async with httpx.AsyncClient() as client: response = await client.post(url, json=data.model_dump(), headers=headers) diff --git a/app/externals/agent_config/requests/agent_config_request.py b/app/externals/agent_config/requests/agent_config_request.py index f892dcc..5dc5b2d 100644 --- a/app/externals/agent_config/requests/agent_config_request.py +++ b/app/externals/agent_config/requests/agent_config_request.py @@ -1,4 +1,5 @@ -from typing import List, Dict, Optional, Any +from typing import Any, Dict, List, Optional + from pydantic import BaseModel from app.requests.message_request import MetadataFilter diff --git a/app/externals/agent_config/responses/agent_config_response.py b/app/externals/agent_config/responses/agent_config_response.py index 4d39d4d..11878ee 100644 --- a/app/externals/agent_config/responses/agent_config_response.py +++ b/app/externals/agent_config/responses/agent_config_response.py @@ -1,4 +1,5 @@ -from typing import Optional, Dict, List, Any +from typing import Any, Dict, List, Optional + from pydantic import BaseModel, Field diff --git a/app/externals/aliexpress/aliexpress_client.py b/app/externals/aliexpress/aliexpress_client.py index 8a758ce..6935487 100644 --- a/app/externals/aliexpress/aliexpress_client.py +++ b/app/externals/aliexpress/aliexpress_client.py @@ -1,55 +1,39 @@ import httpx + from app.configurations.config import RAPIDAPI_HOST, RAPIDAPI_KEY from app.externals.aliexpress.requests.aliexpress_search_request import AliexpressSearchRequest from app.externals.aliexpress.responses.aliexpress_search_response import AliexpressSearchResponse async def search_products(data: AliexpressSearchRequest) -> AliexpressSearchResponse: - endpoint = '/item_search_5' + endpoint = "/item_search_5" url = f"{RAPIDAPI_HOST}{endpoint}" - headers = { - 'Content-Type': 'application/json', - 'x-rapidapi-key': RAPIDAPI_KEY - } + headers = {"Content-Type": "application/json", "x-rapidapi-key": RAPIDAPI_KEY} - params = { - 'q': data.q, - 'page': str(data.page), - 'sort': data.sort - } + params = {"q": data.q, "page": str(data.page), "sort": data.sort} async with httpx.AsyncClient() as client: - response = await client.get( - url, - params=params, - headers=headers - ) + response = await client.get(url, params=params, headers=headers) response.raise_for_status() return AliexpressSearchResponse(**response.json()) async def get_item_detail(item_id: str): - endpoint = '/item_detail_6' + endpoint = "/item_detail_6" url = f"{RAPIDAPI_HOST}{endpoint}" headers = { - 'Content-Type': 'application/json', - 'x-rapidapi-host': 'aliexpress-datahub.p.rapidapi.com', - 'x-rapidapi-key': RAPIDAPI_KEY + "Content-Type": "application/json", + "x-rapidapi-host": "aliexpress-datahub.p.rapidapi.com", + "x-rapidapi-key": RAPIDAPI_KEY, } - params = { - 'itemId': item_id - } + params = {"itemId": item_id} async with httpx.AsyncClient() as client: - response = await client.get( - url, - params=params, - headers=headers - ) + response = await client.get(url, params=params, headers=headers) response.raise_for_status() return response.json() diff --git a/app/externals/aliexpress/requests/aliexpress_search_request.py b/app/externals/aliexpress/requests/aliexpress_search_request.py index d356093..06cb955 100644 --- a/app/externals/aliexpress/requests/aliexpress_search_request.py +++ b/app/externals/aliexpress/requests/aliexpress_search_request.py @@ -1,4 +1,5 @@ from typing import Optional + from pydantic import BaseModel diff --git a/app/externals/aliexpress/responses/aliexpress_search_response.py b/app/externals/aliexpress/responses/aliexpress_search_response.py index ca63f2d..b493abe 100644 --- a/app/externals/aliexpress/responses/aliexpress_search_response.py +++ b/app/externals/aliexpress/responses/aliexpress_search_response.py @@ -1,4 +1,5 @@ from typing import List, Optional + from pydantic import BaseModel @@ -11,9 +12,7 @@ class ItemSku(BaseModel): def_: Optional[SkuDef] = None class Config: - fields = { - 'def_': 'def' - } + fields = {"def_": "def"} class ItemData(BaseModel): @@ -48,24 +47,25 @@ def get_products(self) -> List[dict]: for result_item in self.result.resultList: price = None if result_item.item.sku and result_item.item.sku.def_: - price = (result_item.item.sku.def_.price or - result_item.item.sku.def_.promotionPrice) + price = result_item.item.sku.def_.price or result_item.item.sku.def_.promotionPrice item_url = result_item.item.itemUrl - if item_url.startswith('//'): + if item_url.startswith("//"): item_url = f"https:{item_url}" image_url = result_item.item.image - if image_url.startswith('//'): + if image_url.startswith("//"): image_url = f"https:{image_url}" - products.append({ - 'source': 'aliexpress', - 'external_id': result_item.item.itemId, - 'name': result_item.item.title, - 'url_website': item_url, - 'url_image': image_url, - 'price': price - }) + products.append( + { + "source": "aliexpress", + "external_id": result_item.item.itemId, + "name": result_item.item.title, + "url_website": item_url, + "url_image": image_url, + "price": price, + } + ) return products diff --git a/app/externals/amazon/amazon_client.py b/app/externals/amazon/amazon_client.py index b44d76b..30c6c8c 100644 --- a/app/externals/amazon/amazon_client.py +++ b/app/externals/amazon/amazon_client.py @@ -1,31 +1,28 @@ +from typing import Any, Dict + import httpx + from app.configurations.config import RAPIDAPI_KEY from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest from app.externals.amazon.responses.amazon_search_response import AmazonSearchResponse -from typing import Dict, Any async def search_products(request: AmazonSearchRequest) -> AmazonSearchResponse: - headers = { - 'x-rapidapi-host': 'real-time-amazon-data.p.rapidapi.com', - 'x-rapidapi-key': RAPIDAPI_KEY - } + headers = {"x-rapidapi-host": "real-time-amazon-data.p.rapidapi.com", "x-rapidapi-key": RAPIDAPI_KEY} params = { - 'query': request.query, - 'page': '1', - 'country': 'US', - 'sort_by': 'RELEVANCE', - 'product_condition': 'ALL', - 'is_prime': 'false', - 'deals_and_discounts': 'NONE' + "query": request.query, + "page": "1", + "country": "US", + "sort_by": "RELEVANCE", + "product_condition": "ALL", + "is_prime": "false", + "deals_and_discounts": "NONE", } async with httpx.AsyncClient() as client: response = await client.get( - 'https://real-time-amazon-data.p.rapidapi.com/search', - headers=headers, - params=params + "https://real-time-amazon-data.p.rapidapi.com/search", headers=headers, params=params ) if response.status_code != 200: @@ -36,21 +33,13 @@ async def search_products(request: AmazonSearchRequest) -> AmazonSearchResponse: async def get_product_details(asin: str, country: str = "US") -> Dict[str, Any]: - headers = { - 'x-rapidapi-host': 'real-time-amazon-data.p.rapidapi.com', - 'x-rapidapi-key': RAPIDAPI_KEY - } + headers = {"x-rapidapi-host": "real-time-amazon-data.p.rapidapi.com", "x-rapidapi-key": RAPIDAPI_KEY} - params = { - 'asin': asin, - 'country': country - } + params = {"asin": asin, "country": country} async with httpx.AsyncClient() as client: response = await client.get( - 'https://real-time-amazon-data.p.rapidapi.com/product-details', - headers=headers, - params=params + "https://real-time-amazon-data.p.rapidapi.com/product-details", headers=headers, params=params ) if response.status_code != 200: diff --git a/app/externals/amazon/requests/amazon_search_request.py b/app/externals/amazon/requests/amazon_search_request.py index 6a44190..e1b767f 100644 --- a/app/externals/amazon/requests/amazon_search_request.py +++ b/app/externals/amazon/requests/amazon_search_request.py @@ -1,8 +1,6 @@ - - class AmazonSearchRequest: def __init__( - self, - query: str, + self, + query: str, ): self.query = query diff --git a/app/externals/amazon/responses/amazon_search_response.py b/app/externals/amazon/responses/amazon_search_response.py index 36509ff..ffae33a 100644 --- a/app/externals/amazon/responses/amazon_search_response.py +++ b/app/externals/amazon/responses/amazon_search_response.py @@ -17,17 +17,17 @@ def __init__(self, raw_response: dict): def get_products(self) -> List[dict]: products = [] - - for item in self.raw_response.get('data', {}).get('products', []): - price = self._format_price(item.get('product_price')) + + for item in self.raw_response.get("data", {}).get("products", []): + price = self._format_price(item.get("product_price")) if price is not None and price > 0: product = { "source": "amazon", - "external_id": item.get('asin', ''), - "name": item.get('product_title', ''), - "url_website": item.get('product_url', ''), - "url_image": item.get('product_photo', ''), - "price": price + "external_id": item.get("asin", ""), + "name": item.get("product_title", ""), + "url_website": item.get("product_url", ""), + "url_image": item.get("product_photo", ""), + "price": price, } products.append(product) @@ -37,6 +37,6 @@ def _format_price(self, price) -> Optional[float]: if not price: return None try: - return float(str(price).replace('$', '').replace(',', '')) + return float(str(price).replace("$", "").replace(",", "")) except (ValueError, TypeError): return None diff --git a/app/externals/dropi/__init__.py b/app/externals/dropi/__init__.py index 0519ecb..e69de29 100644 --- a/app/externals/dropi/__init__.py +++ b/app/externals/dropi/__init__.py @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/app/externals/dropi/dropi_client.py b/app/externals/dropi/dropi_client.py index 489e818..9f09005 100644 --- a/app/externals/dropi/dropi_client.py +++ b/app/externals/dropi/dropi_client.py @@ -1,13 +1,12 @@ +from typing import Any, Dict + import httpx -from typing import Dict, Any from app.configurations.config import DROPI_HOST, get_dropi_api_key async def get_product_details(product_id: str, country: str = "co") -> Dict[str, Any]: - headers = { - "dropi-integration-key": get_dropi_api_key(country) - } + headers = {"dropi-integration-key": get_dropi_api_key(country)} dropi_host = DROPI_HOST.replace(".co", f".{country}") url = f"{dropi_host}/integrations/products/v2/{product_id}" @@ -24,9 +23,7 @@ async def get_product_details(product_id: str, country: str = "co") -> Dict[str, async def get_departments(country: str = "co") -> Dict[str, Any]: - headers = { - "dropi-integration-key": get_dropi_api_key(country) - } + headers = {"dropi-integration-key": get_dropi_api_key(country)} dropi_host = DROPI_HOST.replace(".co", f".{country}") url = f"{dropi_host}/integrations/department" async with httpx.AsyncClient() as client: @@ -41,14 +38,8 @@ async def get_departments(country: str = "co") -> Dict[str, Any]: async def get_cities_by_department(department_id: int, rate_type: str, country: str = "co") -> Dict[str, Any]: - headers = { - "dropi-integration-key": get_dropi_api_key(country), - "Content-Type": "application/json" - } - payload = { - "department_id": department_id, - "rate_type": rate_type - } + headers = {"dropi-integration-key": get_dropi_api_key(country), "Content-Type": "application/json"} + payload = {"department_id": department_id, "rate_type": rate_type} dropi_host = DROPI_HOST.replace(".co", f".{country}") url = f"{dropi_host}/integrations/trajectory/bycity" async with httpx.AsyncClient() as client: @@ -59,4 +50,4 @@ async def get_cities_by_department(department_id: int, rate_type: str, country: except httpx.HTTPStatusError as e: raise Exception(f"API request failed with status {e.response.status_code}: {e.response.text}") except httpx.RequestError as e: - raise Exception(f"API request failed: {str(e)}") \ No newline at end of file + raise Exception(f"API request failed: {str(e)}") diff --git a/app/externals/fal/__init__.py b/app/externals/fal/__init__.py index 526f30b..0861b7f 100644 --- a/app/externals/fal/__init__.py +++ b/app/externals/fal/__init__.py @@ -1 +1 @@ -# Package initializer for FAL externals \ No newline at end of file +# Package initializer for FAL externals diff --git a/app/externals/fal/fal_client.py b/app/externals/fal/fal_client.py index 36bf7a3..fd76dbe 100644 --- a/app/externals/fal/fal_client.py +++ b/app/externals/fal/fal_client.py @@ -1,5 +1,5 @@ import urllib.parse -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional import httpx @@ -36,12 +36,16 @@ async def tts_multilingual_v2(self, text: str, fal_webhook: Optional[str] = None payload.update(kwargs) return await self._post("fal-ai/elevenlabs/tts/multilingual-v2", payload, fal_webhook) - async def bytedance_omnihuman(self, image_url: str, audio_url: str, fal_webhook: Optional[str] = None, **kwargs) -> Dict[str, Any]: + async def bytedance_omnihuman( + self, image_url: str, audio_url: str, fal_webhook: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: payload = {"image_url": image_url, "audio_url": audio_url} payload.update(kwargs) return await self._post("fal-ai/bytedance/omnihuman", payload, fal_webhook) - async def kling_image_to_video(self, prompt: str, image_url: str, fal_webhook: Optional[str] = None, **kwargs) -> Dict[str, Any]: + async def kling_image_to_video( + self, prompt: str, image_url: str, fal_webhook: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: payload = {"prompt": prompt, "image_url": image_url} payload.update(kwargs) - return await self._post("fal-ai/kling-video/v2/master/image-to-video", payload, fal_webhook) \ No newline at end of file + return await self._post("fal-ai/kling-video/v2/master/image-to-video", payload, fal_webhook) diff --git a/app/externals/google_vision/google_vision_client.py b/app/externals/google_vision/google_vision_client.py index 21112d3..8d91952 100644 --- a/app/externals/google_vision/google_vision_client.py +++ b/app/externals/google_vision/google_vision_client.py @@ -1,4 +1,5 @@ import aiohttp + from app.configurations.config import GOOGLE_VISION_API_KEY from app.externals.google_vision.responses.vision_analysis_response import VisionAnalysisResponse @@ -7,29 +8,16 @@ async def analyze_image(image_base64: str) -> VisionAnalysisResponse: vision_api_url = f"https://vision.googleapis.com/v1/images:annotate?key={GOOGLE_VISION_API_KEY}" payload = { - "requests": [{ - "image": { - "content": image_base64 - }, - "features": [ - { - "type": "LABEL_DETECTION", - "maxResults": 3 - }, - { - "type": "LOGO_DETECTION", - "maxResults": 1 - } - ] - }] + "requests": [ + { + "image": {"content": image_base64}, + "features": [{"type": "LABEL_DETECTION", "maxResults": 3}, {"type": "LOGO_DETECTION", "maxResults": 1}], + } + ] } async with aiohttp.ClientSession() as session: - async with session.post( - vision_api_url, - json=payload, - headers={"Content-Type": "application/json"} - ) as response: + async with session.post(vision_api_url, json=payload, headers={"Content-Type": "application/json"}) as response: if response.status != 200: raise Exception(f"Error en Google Vision API: {await response.text()}") @@ -51,7 +39,4 @@ async def analyze_image(image_base64: str) -> VisionAnalysisResponse: label_description = ", ".join(labels) - return VisionAnalysisResponse( - logo_description=logo_description, - label_description=label_description - ) + return VisionAnalysisResponse(logo_description=logo_description, label_description=label_description) diff --git a/app/externals/images/image_client.py b/app/externals/images/image_client.py index c579af2..49a4c5e 100644 --- a/app/externals/images/image_client.py +++ b/app/externals/images/image_client.py @@ -1,27 +1,25 @@ +import asyncio import base64 import mimetypes -from typing import Optional import os +from typing import Optional import aiohttp -import asyncio import httpx -import base64 - import requests from app.configurations import config -from app.configurations.config import REPLICATE_API_KEY, GOOGLE_GEMINI_API_KEY, OPENAI_API_KEY +from app.configurations.config import GOOGLE_GEMINI_API_KEY, OPENAI_API_KEY, REPLICATE_API_KEY async def generate_image_variation( - image_url: str, - prompt: str, - aspect_ratio: str = "1:1", - output_format: str = "webp", - output_quality: int = 80, - prompt_upsampling: bool = False, - safety_tolerance: int = 2 + image_url: str, + prompt: str, + aspect_ratio: str = "1:1", + output_format: str = "webp", + output_quality: int = 80, + prompt_upsampling: bool = False, + safety_tolerance: int = 2, ) -> bytes: payload = { "input": { @@ -31,26 +29,22 @@ async def generate_image_variation( "output_quality": output_quality, "prompt": prompt, "prompt_upsampling": prompt_upsampling, - "safety_tolerance": safety_tolerance + "safety_tolerance": safety_tolerance, } } async with aiohttp.ClientSession() as session: async with session.post( - "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions", - headers={ - "Authorization": f"Bearer {REPLICATE_API_KEY}", - "Content-Type": "application/json" - }, - json=payload + "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions", + headers={"Authorization": f"Bearer {REPLICATE_API_KEY}", "Content-Type": "application/json"}, + json=payload, ) as response: if response.status == 200 or response.status == 201: prediction_data = await response.json() while True: async with session.get( - prediction_data["urls"]["get"], - headers={"Authorization": f"Bearer {REPLICATE_API_KEY}"} + prediction_data["urls"]["get"], headers={"Authorization": f"Bearer {REPLICATE_API_KEY}"} ) as status_response: status_data = await status_response.json() if status_data["status"] == "succeeded": @@ -70,18 +64,8 @@ async def generate_image_variation( def _build_image_part(image_base64: str, is_model_25: bool) -> dict: if is_model_25: - return { - "inlineData": { - "mimeType": 'image/jpeg', - "data": image_base64 - } - } - return { - "inline_data": { - "mime_type": 'image/jpeg', - "data": image_base64 - } - } + return {"inlineData": {"mimeType": "image/jpeg", "data": image_base64}} + return {"inline_data": {"mime_type": "image/jpeg", "data": image_base64}} async def _fetch_and_encode_images(image_urls: list[str], is_model_25: bool) -> list[dict]: @@ -92,7 +76,7 @@ async def _fetch_and_encode_images(image_urls: list[str], is_model_25: bool) -> async with fetch_session.get(image_url) as img_response: if img_response.status == 200: image_bytes = await img_response.read() - image_base64 = base64.b64encode(image_bytes).decode('utf-8') + image_base64 = base64.b64encode(image_bytes).decode("utf-8") parts.append(_build_image_part(image_base64, is_model_25)) except Exception as e: print(f"Error al procesar imagen de {image_url}: {str(e)}") @@ -103,36 +87,35 @@ async def _fetch_and_encode_images(image_urls: list[str], is_model_25: bool) -> def _build_generation_config(is_model_25: bool, aspect_ratio: str, image_size: str) -> dict: config = {"responseModalities": ["Text", "Image"]} if not is_model_25: - config["imageConfig"] = { - "aspectRatio": aspect_ratio, - "imageSize": image_size - } + config["imageConfig"] = {"aspectRatio": aspect_ratio, "imageSize": image_size} return config -async def google_image(image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None) -> bytes: +async def google_image( + image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None +) -> bytes: if extra_params is None: extra_params = {} - - is_model_25 = model_ia and '2.5' in model_ia - aspect_ratio = extra_params.get('aspect_ratio', '1:1') - image_size = extra_params.get('image_size', '1K') - - model_name = 'gemini-2.5-flash-image-preview' if is_model_25 else 'gemini-3-pro-image-preview' + + is_model_25 = model_ia and "2.5" in model_ia + aspect_ratio = extra_params.get("aspect_ratio", "1:1") + image_size = extra_params.get("image_size", "1K") + + model_name = "gemini-2.5-flash-image-preview" if is_model_25 else "gemini-3-pro-image-preview" url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent?key={GOOGLE_GEMINI_API_KEY}" parts = [{"text": prompt}] - + if image_urls: image_parts = await _fetch_and_encode_images(image_urls, is_model_25) parts.extend(image_parts) payload = { "contents": [{"parts": parts}], - "generationConfig": _build_generation_config(is_model_25, aspect_ratio, image_size) + "generationConfig": _build_generation_config(is_model_25, aspect_ratio, image_size), } - headers = {'Content-Type': 'application/json'} + headers = {"Content-Type": "application/json"} try: async with aiohttp.ClientSession() as session: @@ -146,7 +129,7 @@ async def google_image(image_urls: list[str], prompt: str, model_ia: Optional[st img_data_base64 = part["inlineData"]["data"] img_bytes = base64.b64decode(img_data_base64) return img_bytes - + raise Exception("No se generó ninguna imagen en la respuesta de Google Gemini") else: error_text = await response.text() @@ -157,11 +140,11 @@ async def google_image(image_urls: list[str], prompt: str, model_ia: Optional[st raise Exception(f"Error al generar imagen con Google Gemini: {str(e)}") -async def openai_image_edit(image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None) -> bytes: +async def openai_image_edit( + image_urls: list[str], prompt: str, model_ia: Optional[str] = None, extra_params: Optional[dict] = None +) -> bytes: url = "https://api.openai.com/v1/images/edits" - headers = { - "Authorization": f"Bearer {config.OPENAI_API_KEY}" - } + headers = {"Authorization": f"Bearer {config.OPENAI_API_KEY}"} data = aiohttp.FormData() async with aiohttp.ClientSession() as fetch_session: @@ -170,25 +153,23 @@ async def openai_image_edit(image_urls: list[str], prompt: str, model_ia: Option if img_response.status == 200: image_bytes = await img_response.read() filename = os.path.basename(image_url) - content_type = mimetypes.guess_type(filename)[0] or 'image/jpeg' - data.add_field( - 'image[]', - image_bytes, - filename=filename, - content_type=content_type - ) + content_type = mimetypes.guess_type(filename)[0] or "image/jpeg" + data.add_field("image[]", image_bytes, filename=filename, content_type=content_type) - prompt = prompt + ". **escena completa visible, composición centrada, todos los elementos dentro del marco cuadrado, nada recortado en los bordes, composición completa**" + prompt = ( + prompt + + ". **escena completa visible, composición centrada, todos los elementos dentro del marco cuadrado, nada recortado en los bordes, composición completa**" + ) if extra_params is None: extra_params = {} - - size = extra_params.get('resolution', '1024x1024') or '1024x1024' - - data.add_field('size', size) - data.add_field('prompt', prompt) - data.add_field('model', 'gpt-image-1') - data.add_field('n', '1') + + size = extra_params.get("resolution", "1024x1024") or "1024x1024" + + data.add_field("size", size) + data.add_field("prompt", prompt) + data.add_field("model", "gpt-image-1") + data.add_field("n", "1") try: async with aiohttp.ClientSession() as session: diff --git a/app/externals/s3_upload/s3_upload_client.py b/app/externals/s3_upload/s3_upload_client.py index a87dd25..53a662a 100644 --- a/app/externals/s3_upload/s3_upload_client.py +++ b/app/externals/s3_upload/s3_upload_client.py @@ -1,23 +1,18 @@ import httpx + from app.configurations.config import S3_UPLOAD_API from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse async def upload_file(request: S3UploadRequest) -> S3UploadResponse: - headers = { - "Content-Type": "application/json" - } + headers = {"Content-Type": "application/json"} timeout = httpx.Timeout(timeout=180.0, connect=60.0) try: async with httpx.AsyncClient(timeout=timeout) as client: - response = await client.post( - S3_UPLOAD_API, - headers=headers, - json=request.dict() - ) + response = await client.post(S3_UPLOAD_API, headers=headers, json=request.dict()) response.raise_for_status() return S3UploadResponse(**response.json()) except Exception as e: diff --git a/app/externals/scraperapi/__init__.py b/app/externals/scraperapi/__init__.py index fdb4450..8edada0 100644 --- a/app/externals/scraperapi/__init__.py +++ b/app/externals/scraperapi/__init__.py @@ -1 +1 @@ -# Inicialización del paquete scraperapi \ No newline at end of file +# Inicialización del paquete scraperapi diff --git a/app/externals/scraperapi/scraperapi_client.py b/app/externals/scraperapi/scraperapi_client.py index 29e548e..d6c812f 100644 --- a/app/externals/scraperapi/scraperapi_client.py +++ b/app/externals/scraperapi/scraperapi_client.py @@ -1,6 +1,6 @@ -import aiohttp -from typing import Dict, Any +from typing import Any, Dict +import aiohttp from fastapi import HTTPException from app.configurations.config import SCRAPERAPI_KEY, URL_SCRAPER_LAMBDA @@ -13,10 +13,7 @@ def __init__(self): self.lambda_url = URL_SCRAPER_LAMBDA async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: - default_params = { - "api_key": self.api_key, - "url": url - } + default_params = {"api_key": self.api_key, "url": url} if params: default_params.update(params) @@ -28,21 +25,17 @@ async def get_html(self, url: str, params: Dict[str, Any] = None) -> str: raise HTTPException(status_code=400, detail=error_text) return await response.text() - + async def get_html_lambda(self, url: str) -> str: - payload = { - "url": url - } - + payload = {"url": url} + async with aiohttp.ClientSession() as session: async with session.post( - self.lambda_url, - headers={"Content-Type": "application/json"}, - json=payload + self.lambda_url, headers={"Content-Type": "application/json"}, json=payload ) as response: if response.status != 200: error_text = await response.text() raise HTTPException(status_code=400, detail=f"Error lambda API scraper: {error_text}") - + response_data = await response.json() return response_data.get("content", "") diff --git a/app/factories/ai_provider_factory.py b/app/factories/ai_provider_factory.py index 74fbb20..a187e93 100644 --- a/app/factories/ai_provider_factory.py +++ b/app/factories/ai_provider_factory.py @@ -10,11 +10,11 @@ class AIProviderFactory: def get_provider(provider_name: str) -> AIProviderInterface: if provider_name == "openai": return OpenAIProvider() - elif provider_name == "claude": + elif provider_name == "claude": return AnthropicProvider() - elif provider_name == "deepseek": + elif provider_name == "deepseek": return DeepseekProvider() - elif provider_name == "gemini": + elif provider_name == "gemini": return GeminiProvider() else: - raise ValueError(f"El proveedor de AI '{provider_name}' no está implementado") \ No newline at end of file + raise ValueError(f"El proveedor de AI '{provider_name}' no está implementado") diff --git a/app/factories/scraping_factory.py b/app/factories/scraping_factory.py index 6487233..5c7bf5a 100644 --- a/app/factories/scraping_factory.py +++ b/app/factories/scraping_factory.py @@ -2,12 +2,12 @@ from fastapi import Depends -from app.scrapers.scraper_interface import ScraperInterface -from app.scrapers.amazon_scraper import AmazonScraper from app.scrapers.aliexpress_scraper import AliexpressScraper +from app.scrapers.amazon_scraper import AmazonScraper from app.scrapers.cj_scraper import CJScraper from app.scrapers.dropi_scraper import DropiScraper from app.scrapers.ia_scraper import IAScraper +from app.scrapers.scraper_interface import ScraperInterface from app.services.message_service_interface import MessageServiceInterface diff --git a/app/helpers/escape_helper.py b/app/helpers/escape_helper.py index 9668555..ed7e19d 100644 --- a/app/helpers/escape_helper.py +++ b/app/helpers/escape_helper.py @@ -1,4 +1,5 @@ import re + from bs4 import BeautifulSoup @@ -7,7 +8,7 @@ def clean_placeholders(text: str, allowed_keys: list = None) -> str: allowed_keys = [] def replace_placeholder(match): - key = match.group(1).strip('"\' ') # Remueve comillas internas + key = match.group(1).strip("\"' ") # Remueve comillas internas return match.group(0) if key in allowed_keys else "" pattern = re.compile(r"\{\s*[\"']?([^\"'\{\}]+)[\"']?\s*\}") @@ -15,42 +16,43 @@ def replace_placeholder(match): def clean_html_deeply(html_content): - soup = BeautifulSoup(html_content, 'html.parser') + soup = BeautifulSoup(html_content, "html.parser") - for tag in soup(['script', 'style', 'noscript', 'svg', 'link', 'meta', 'head']): + for tag in soup(["script", "style", "noscript", "svg", "link", "meta", "head"]): tag.decompose() for tag in soup.find_all(True): - if tag.name == 'img': - tag.attrs = {key: tag.attrs[key] for key in ['src', 'alt'] if key in tag.attrs} + if tag.name == "img": + tag.attrs = {key: tag.attrs[key] for key in ["src", "alt"] if key in tag.attrs} else: tag.attrs = {} simplified_html = str(soup) - simplified_html_clean = re.sub(r'\s+', ' ', simplified_html).strip() + simplified_html_clean = re.sub(r"\s+", " ", simplified_html).strip() return simplified_html_clean + def clean_html_less_deeply(html_content): - soup = BeautifulSoup(html_content, 'html5lib') + soup = BeautifulSoup(html_content, "html5lib") - for tag in soup(['script', 'style', 'noscript', 'svg', 'link', 'meta', 'head']): + for tag in soup(["script", "style", "noscript", "svg", "link", "meta", "head"]): tag.decompose() for tag in soup.find_all(True): - if tag.name == 'img': - tag.attrs = {key: tag.attrs[key] for key in ['src', 'alt', 'class', 'id', 'title'] if key in tag.attrs} - elif tag.name == 'a': - tag.attrs = {key: tag.attrs[key] for key in ['href', 'title', 'target', 'class', 'id'] if key in tag.attrs} - elif tag.name == 'source': - tag.attrs = {key: tag.attrs[key] for key in ['media', 'srcset', 'type'] if key in tag.attrs} - elif tag.name == 'picture': - tag.attrs = {key: tag.attrs[key] for key in ['id', 'class'] if key in tag.attrs} + if tag.name == "img": + tag.attrs = {key: tag.attrs[key] for key in ["src", "alt", "class", "id", "title"] if key in tag.attrs} + elif tag.name == "a": + tag.attrs = {key: tag.attrs[key] for key in ["href", "title", "target", "class", "id"] if key in tag.attrs} + elif tag.name == "source": + tag.attrs = {key: tag.attrs[key] for key in ["media", "srcset", "type"] if key in tag.attrs} + elif tag.name == "picture": + tag.attrs = {key: tag.attrs[key] for key in ["id", "class"] if key in tag.attrs} else: - allowed_common_attrs = ['id', 'class'] + allowed_common_attrs = ["id", "class"] tag.attrs = {key: tag.attrs[key] for key in allowed_common_attrs if key in tag.attrs} simplified_html = str(soup) - simplified_html_clean = re.sub(r'\s+', ' ', simplified_html).strip() + simplified_html_clean = re.sub(r"\s+", " ", simplified_html).strip() - return simplified_html_clean \ No newline at end of file + return simplified_html_clean diff --git a/app/helpers/image_compression_helper.py b/app/helpers/image_compression_helper.py index 922706b..8655d4d 100644 --- a/app/helpers/image_compression_helper.py +++ b/app/helpers/image_compression_helper.py @@ -1,5 +1,6 @@ -import io import base64 +import io + from PIL import Image @@ -12,38 +13,38 @@ def compress_image_to_target(original_image_bytes: bytes, target_kb: int = 120) img_converted = img.convert("RGB") target_bytes = target_kb * 1024 - + output_buffer = io.BytesIO() - img_converted.save(output_buffer, format='WEBP', quality=80) + img_converted.save(output_buffer, format="WEBP", quality=80) webp_size = len(output_buffer.getvalue()) - + if webp_size <= target_bytes: - return base64.b64encode(output_buffer.getvalue()).decode('utf-8') - + return base64.b64encode(output_buffer.getvalue()).decode("utf-8") + quality = _calculate_initial_quality(webp_size, target_bytes) - + for attempt in range(2): output_buffer = io.BytesIO() - img_converted.save(output_buffer, format='WEBP', quality=quality) + img_converted.save(output_buffer, format="WEBP", quality=quality) compressed_size = len(output_buffer.getvalue()) - + if compressed_size <= target_bytes: - return base64.b64encode(output_buffer.getvalue()).decode('utf-8') - + return base64.b64encode(output_buffer.getvalue()).decode("utf-8") + quality = max(40, quality - 10) - + if compressed_size > target_bytes and max(img_converted.size) > 1024: img_resized = _resize_image(img_converted, target_bytes, compressed_size) output_buffer = io.BytesIO() - img_resized.save(output_buffer, format='WEBP', quality=70) - return base64.b64encode(output_buffer.getvalue()).decode('utf-8') - - return base64.b64encode(output_buffer.getvalue()).decode('utf-8') + img_resized.save(output_buffer, format="WEBP", quality=70) + return base64.b64encode(output_buffer.getvalue()).decode("utf-8") + + return base64.b64encode(output_buffer.getvalue()).decode("utf-8") def _calculate_initial_quality(current_size: int, target_size: int) -> int: ratio = target_size / current_size - + if ratio >= 0.8: return 75 elif ratio >= 0.5: @@ -58,7 +59,7 @@ def _resize_image(img: Image, target_bytes: int, current_bytes: int) -> Image: ratio = (target_bytes / current_bytes) ** 0.5 new_width = int(img.width * ratio) new_height = int(img.height * ratio) - + max_dimension = 1920 if new_width > max_dimension or new_height > max_dimension: if new_width > new_height: @@ -67,6 +68,5 @@ def _resize_image(img: Image, target_bytes: int, current_bytes: int) -> Image: else: new_width = int(new_width * max_dimension / new_height) new_height = max_dimension - - return img.resize((new_width, new_height), Image.Resampling.LANCZOS) + return img.resize((new_width, new_height), Image.Resampling.LANCZOS) diff --git a/app/managers/conversation_manager.py b/app/managers/conversation_manager.py index 46d0df4..47904b9 100644 --- a/app/managers/conversation_manager.py +++ b/app/managers/conversation_manager.py @@ -1,13 +1,14 @@ -from typing import Dict, Any, List, Tuple from collections import defaultdict +from typing import Any, Dict, List, Tuple + +from app.externals.agent_config.responses.agent_config_response import AgentConfigResponse +from app.factories.ai_provider_factory import AIProviderFactory from app.managers.conversation_manager_interface import ConversationManagerInterface from app.processors.agent_processor import AgentProcessor +from app.processors.mcp_processor import MCPProcessor from app.processors.simple_processor import SimpleProcessor from app.requests.message_request import MessageRequest -from app.externals.agent_config.responses.agent_config_response import AgentConfigResponse -from app.factories.ai_provider_factory import AIProviderFactory from app.tools.tool_generator import ToolGenerator -from app.processors.mcp_processor import MCPProcessor class ConversationManager(ConversationManagerInterface): @@ -18,7 +19,7 @@ def __init__(self): def get_conversation_history(self, conversation_id: str) -> List[Dict[str, Any]]: if conversation_id: return self.history_store[conversation_id] - return [] + return [] async def process_conversation(self, request: MessageRequest, agent_config: AgentConfigResponse) -> dict[str, Any]: ai_provider = AIProviderFactory.get_provider(agent_config.provider_ai) @@ -26,7 +27,7 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen model=agent_config.model_ai, temperature=agent_config.preferences.temperature, max_tokens=agent_config.preferences.max_tokens, - top_p=agent_config.preferences.top_p + top_p=agent_config.preferences.top_p, ) history = self.get_conversation_history(request.conversation_id) @@ -54,17 +55,19 @@ async def process_conversation(self, request: MessageRequest, agent_config: Agen ai_response_content = response_data.get("text") if ai_response_content is None: ai_response_content = str(response_data) - + self._update_conversation_history( conversation_id=request.conversation_id, user_message_content=request.query, - ai_response_content=ai_response_content + ai_response_content=ai_response_content, ) - + return response_data - def _update_conversation_history(self, conversation_id: str, user_message_content: str, ai_response_content: str) -> None: - if not conversation_id: + def _update_conversation_history( + self, conversation_id: str, user_message_content: str, ai_response_content: str + ) -> None: + if not conversation_id: return self.history_store[conversation_id].append({"role": "user", "content": user_message_content}) @@ -72,17 +75,19 @@ def _update_conversation_history(self, conversation_id: str, user_message_conten current_conv_history = self.history_store[conversation_id] if len(current_conv_history) > self.max_history_length: - self.history_store[conversation_id] = current_conv_history[-self.max_history_length:] + self.history_store[conversation_id] = current_conv_history[-self.max_history_length :] - async def _fallback_with_anthropic(self, request: MessageRequest, agent_config: AgentConfigResponse, history: list) -> dict[str, Any]: + async def _fallback_with_anthropic( + self, request: MessageRequest, agent_config: AgentConfigResponse, history: list + ) -> dict[str, Any]: anthropic_provider = AIProviderFactory.get_provider("claude") anthropic_llm = anthropic_provider.get_llm( model="claude-3-7-sonnet-20250219", temperature=agent_config.preferences.temperature, max_tokens=agent_config.preferences.max_tokens, - top_p=agent_config.preferences.top_p + top_p=agent_config.preferences.top_p, ) processor = SimpleProcessor(anthropic_llm, agent_config.prompt, history) - + return await processor.process(request, request.files, anthropic_provider.supports_interleaved_files()) diff --git a/app/managers/conversation_manager_interface.py b/app/managers/conversation_manager_interface.py index ef44688..88d9a1c 100644 --- a/app/managers/conversation_manager_interface.py +++ b/app/managers/conversation_manager_interface.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod -from app.requests.message_request import MessageRequest + from app.externals.agent_config.responses.agent_config_response import AgentConfigResponse +from app.requests.message_request import MessageRequest class ConversationManagerInterface(ABC): diff --git a/app/middlewares/auth_middleware.py b/app/middlewares/auth_middleware.py index a7bfc7b..6bbf0da 100644 --- a/app/middlewares/auth_middleware.py +++ b/app/middlewares/auth_middleware.py @@ -1,23 +1,18 @@ from functools import wraps -from fastapi import HTTPException, Header, Request from typing import Optional + import httpx +from fastapi import Header, HTTPException, Request -from app.configurations.config import AUTH_SERVICE_URL, API_KEY +from app.configurations.config import API_KEY, AUTH_SERVICE_URL async def verify_api_key(api_key: Optional[str]) -> bool: if not api_key: - raise HTTPException( - status_code=401, - detail="API Key not provided" - ) + raise HTTPException(status_code=401, detail="API Key not provided") if api_key != API_KEY: - raise HTTPException( - status_code=401, - detail="Invalid API Key" - ) + raise HTTPException(status_code=401, detail="Invalid API Key") return True @@ -26,10 +21,7 @@ def require_api_key(func): @wraps(func) async def wrapper(request: Request, *args, **kwargs): if request is None: - raise HTTPException( - status_code=500, - detail="Request not found" - ) + raise HTTPException(status_code=500, detail="Request not found") await verify_api_key(request.headers.get("x-api-key")) return await func(request, *args, **kwargs) @@ -38,43 +30,27 @@ async def wrapper(request: Request, *args, **kwargs): async def verify_user_token(authorization: Optional[str]) -> dict: if not authorization: - raise HTTPException( - status_code=401, - detail="Authorization token not provided" - ) + raise HTTPException(status_code=401, detail="Authorization token not provided") try: async with httpx.AsyncClient() as client: - response = await client.get( - AUTH_SERVICE_URL, - headers={"Authorization": authorization}, - timeout=3.0 - ) + response = await client.get(AUTH_SERVICE_URL, headers={"Authorization": authorization}, timeout=3.0) if response.status_code != 200: - raise HTTPException( - status_code=401, - detail="Invalid token" - ) + raise HTTPException(status_code=401, detail="Invalid token") return response.json() except httpx.RequestError: - raise HTTPException( - status_code=500, - detail="Error verifying token" - ) + raise HTTPException(status_code=500, detail="Error verifying token") def require_auth(func): @wraps(func) async def wrapper(request: Request, *args, **kwargs): if request is None: - raise HTTPException( - status_code=500, - detail="Request not found" - ) + raise HTTPException(status_code=500, detail="Request not found") user_info = await verify_user_token(request.headers.get("authorization")) request.state.user_info = user_info - return await func(request ,*args, **kwargs) + return await func(request, *args, **kwargs) return wrapper diff --git a/app/pdf/helpers.py b/app/pdf/helpers.py index eeffe3c..bf47cef 100644 --- a/app/pdf/helpers.py +++ b/app/pdf/helpers.py @@ -1,8 +1,8 @@ def clean_text(text): text = text.replace("\u2019", "'") text = text.replace("\u2018", "'") - text = text.replace("\u201C", '"') - text = text.replace("\u201D", '"') + text = text.replace("\u201c", '"') + text = text.replace("\u201d", '"') text = text.replace("\u2014", "-") text = text.replace("\u2013", "-") text = text.replace("\u2026", "...") @@ -12,9 +12,9 @@ def clean_text(text): def clean_json(text): text = text.strip() if text.startswith("```json"): - text = text[len("```json"):].strip() + text = text[len("```json") :].strip() elif text.startswith("```"): - text = text[len("```"):].strip() + text = text[len("```") :].strip() if text.endswith("```"): - text = text[:-len("```")].strip() + text = text[: -len("```")].strip() return text diff --git a/app/pdf/pdf_generator.py b/app/pdf/pdf_generator.py index ee005fe..46c97d6 100644 --- a/app/pdf/pdf_generator.py +++ b/app/pdf/pdf_generator.py @@ -1,15 +1,19 @@ -from fpdf import FPDF -import requests import io import os from typing import Optional, Tuple + +import requests +from fpdf import FPDF + try: import PIL.Image as PILImage + PILLOW_AVAILABLE = True except ImportError: PILImage = None PILLOW_AVAILABLE = False + # Constantes de diseño class PDFConstants: # Colores @@ -20,14 +24,14 @@ class PDFConstants: BLACK_COLOR = (0, 0, 0) GRAY_COLOR = (128, 128, 128) LIGHT_GRAY_COLOR = (200, 200, 200) - + # Tamaños de fuente HEADER_FONT_SIZE = 16 COVER_TITLE_FONT_SIZE = 28 SECTION_TITLE_FONT_SIZE = 14 CONTENT_FONT_SIZE = 12 FOOTER_FONT_SIZE = 10 - + # Márgenes y espaciado PAGE_MARGIN = 15 HEADER_MARGIN = 10 @@ -35,7 +39,7 @@ class PDFConstants: LINE_WIDTH_THIN = 0.3 LINE_WIDTH_MEDIUM = 0.5 LINE_WIDTH_THICK = 0.7 - + # Otros IMAGE_QUALITY = 85 TEMP_IMAGE_PATH = "/tmp/temp_cover_image.jpg" @@ -55,26 +59,26 @@ def header(self) -> None: """Genera el header de cada página (excepto la portada).""" if self.page_no() == 1: return - + initial_y = self.get_y() - + self.set_font("Helvetica", "B", PDFConstants.HEADER_FONT_SIZE) self.set_text_color(*PDFConstants.HEADER_COLOR) - + title = self.custom_title if self.custom_title else f"User Manual for {self.product_name}" clean_title = self._clean_text_for_latin1(title) - + self.set_y(PDFConstants.HEADER_MARGIN) width_available = self.w - (2 * PDFConstants.HEADER_MARGIN) self.x = PDFConstants.HEADER_MARGIN - + self.multi_cell(width_available, 8, clean_title, align="C") - + end_y = self.get_y() + 2 self.set_line_width(PDFConstants.LINE_WIDTH_MEDIUM) self.set_draw_color(*PDFConstants.HEADER_COLOR) self.line(PDFConstants.HEADER_MARGIN, end_y, self.w - PDFConstants.HEADER_MARGIN, end_y) - + self.set_y(end_y + PDFConstants.HEADER_MARGIN) self.header_height = self.get_y() - initial_y @@ -82,7 +86,7 @@ def footer(self) -> None: """Genera el footer de cada página (excepto la portada).""" if self.page_no() == 1: return - + self.set_y(-20) self.set_font("Helvetica", "I", PDFConstants.FOOTER_FONT_SIZE) self.set_text_color(*PDFConstants.GRAY_COLOR) @@ -91,17 +95,17 @@ def footer(self) -> None: def add_cover_page(self, title: str, subtitle: str = "", image_url: Optional[str] = None) -> None: """ Crea la página de portada del PDF. - + Args: title: Título principal de la portada subtitle: Subtítulo opcional image_url: URL de imagen opcional para usar como fondo """ self.add_page() - + page_width = self.w page_height = self.h - + if image_url and PILLOW_AVAILABLE: # Solo mostrar la imagen sin texto si hay imagen self._create_image_only_cover(image_url, page_width, page_height) @@ -109,58 +113,74 @@ def add_cover_page(self, title: str, subtitle: str = "", image_url: Optional[str # Portada tradicional con texto si no hay imagen title_y_pos, title_color = self._create_cover_background(None, page_width, page_height) self._add_cover_text(title, subtitle, title_y_pos, title_color, page_width, page_height, None) - + self.add_page() - - def _create_cover_background(self, image_url: Optional[str], page_width: float, page_height: float) -> Tuple[float, Tuple[int, int, int]]: + + def _create_cover_background( + self, image_url: Optional[str], page_width: float, page_height: float + ) -> Tuple[float, Tuple[int, int, int]]: """Crea el fondo de la portada (imagen o borde tradicional).""" if image_url and PILLOW_AVAILABLE: image_result = self._download_and_process_image(image_url) if image_result: temp_path, img_width, img_height = image_result - + available_width = page_width - 2 * PDFConstants.PAGE_MARGIN available_height = page_height - 2 * PDFConstants.PAGE_MARGIN - + x_pos, y_pos, final_width, final_height = self._calculate_image_dimensions( img_width, img_height, available_width, available_height ) - + self.image(temp_path, x=x_pos, y=y_pos, w=final_width, h=final_height) self._cleanup_temp_image() - + # Crear overlay para el título overlay_y = page_height - PDFConstants.OVERLAY_HEIGHT - PDFConstants.PAGE_MARGIN self.set_fill_color(*PDFConstants.BLACK_COLOR) - self.rect(PDFConstants.PAGE_MARGIN, overlay_y, - page_width - 2 * PDFConstants.PAGE_MARGIN, - PDFConstants.OVERLAY_HEIGHT, 'F') - + self.rect( + PDFConstants.PAGE_MARGIN, + overlay_y, + page_width - 2 * PDFConstants.PAGE_MARGIN, + PDFConstants.OVERLAY_HEIGHT, + "F", + ) + return overlay_y + 15, PDFConstants.WHITE_COLOR - + # Portada tradicional con borde self.set_draw_color(*PDFConstants.HEADER_COLOR) self.set_line_width(PDFConstants.LINE_WIDTH_THICK) - self.rect(PDFConstants.PAGE_MARGIN, PDFConstants.PAGE_MARGIN, - page_width - 2 * PDFConstants.PAGE_MARGIN, - page_height - 2 * PDFConstants.PAGE_MARGIN) - + self.rect( + PDFConstants.PAGE_MARGIN, + PDFConstants.PAGE_MARGIN, + page_width - 2 * PDFConstants.PAGE_MARGIN, + page_height - 2 * PDFConstants.PAGE_MARGIN, + ) + return page_height * 0.4, PDFConstants.HEADER_COLOR - - def _add_cover_text(self, title: str, subtitle: str, title_y_pos: float, - title_color: Tuple[int, int, int], page_width: float, - page_height: float, image_url: Optional[str]) -> None: + + def _add_cover_text( + self, + title: str, + subtitle: str, + title_y_pos: float, + title_color: Tuple[int, int, int], + page_width: float, + page_height: float, + image_url: Optional[str], + ) -> None: """Agrega el texto de la portada.""" self.set_font("Helvetica", "B", PDFConstants.COVER_TITLE_FONT_SIZE) self.set_text_color(*title_color) - + text_width = page_width - 2 * PDFConstants.PAGE_MARGIN - 20 - + self.set_y(title_y_pos) self.set_x(PDFConstants.PAGE_MARGIN + 10) clean_title = self._clean_text_for_latin1(title) self.multi_cell(text_width, 18, clean_title, align="C") - + # Solo mostrar subtítulo y versión si no hay imagen if not image_url: if subtitle: @@ -170,118 +190,119 @@ def _add_cover_text(self, title: str, subtitle: str, title_y_pos: float, self.set_x(PDFConstants.PAGE_MARGIN + 10) clean_subtitle = self._clean_text_for_latin1(subtitle) self.multi_cell(text_width, 12, clean_subtitle, align="C") - + self.set_font("Helvetica", "I", 11) self.set_text_color(100, 100, 100) version_y = page_height - PDFConstants.PAGE_MARGIN - 20 self.set_y(version_y) self.set_x(PDFConstants.PAGE_MARGIN + 10) self.multi_cell(text_width, 10, f"Document Version: {self.version}", align="C") - + def set_document_version(self, version: str) -> None: """Establece la versión del documento.""" self.version = version - + def set_custom_title(self, title: str) -> None: """Establece el título personalizado que aparecerá en el header de cada página.""" self.custom_title = title - + def _download_and_process_image(self, image_url: str) -> Optional[Tuple[str, int, int]]: """ Descarga y procesa una imagen desde una URL. - + Returns: Tuple con (ruta_temporal, ancho, alto) o None si falla """ try: response = requests.get(image_url, timeout=PDFConstants.REQUEST_TIMEOUT) response.raise_for_status() - + image = PILImage.open(io.BytesIO(response.content)) - - if image.mode != 'RGB': - image = image.convert('RGB') - + + if image.mode != "RGB": + image = image.convert("RGB") + image.save(PDFConstants.TEMP_IMAGE_PATH, "JPEG", quality=PDFConstants.IMAGE_QUALITY) - + return PDFConstants.TEMP_IMAGE_PATH, image.width, image.height - + except Exception as e: print(f"Error al procesar imagen: {e}") return None - - def _calculate_image_dimensions(self, img_width: int, img_height: int, - available_width: float, available_height: float) -> Tuple[float, float, float, float]: + + def _calculate_image_dimensions( + self, img_width: int, img_height: int, available_width: float, available_height: float + ) -> Tuple[float, float, float, float]: """ Calcula las dimensiones y posición para centrar una imagen manteniendo la proporción. - + Returns: Tuple con (x_pos, y_pos, final_width, final_height) """ scale_width = available_width / img_width scale_height = available_height / img_height scale = min(scale_width, scale_height) - + final_width = img_width * scale final_height = img_height * scale - + x_pos = (self.w - final_width) / 2 y_pos = (self.h - final_height) / 2 - + return x_pos, y_pos, final_width, final_height - + def _cleanup_temp_image(self) -> None: """Elimina el archivo temporal de imagen si existe.""" if os.path.exists(PDFConstants.TEMP_IMAGE_PATH): os.remove(PDFConstants.TEMP_IMAGE_PATH) - + def _create_image_only_cover(self, image_url: str, page_width: float, page_height: float) -> None: """Crea una portada que muestra solo la imagen ocupando toda la página.""" image_result = self._download_and_process_image(image_url) if image_result: temp_path, img_width, img_height = image_result - + # Calcular la escala para llenar toda la página (puede recortar) scale_width = page_width / img_width scale_height = page_height / img_height # Usar la escala mayor para llenar completamente (crop to fit) scale = max(scale_width, scale_height) - + final_width = img_width * scale final_height = img_height * scale - + # Centrar la imagen (puede quedar parcialmente fuera de los bordes) x_pos = (page_width - final_width) / 2 y_pos = (page_height - final_height) / 2 - + self.image(temp_path, x=x_pos, y=y_pos, w=final_width, h=final_height) self._cleanup_temp_image() - + def get_multi_cell_height(self, w, h, txt, align="J"): x = self.x y = self.y - + lines = 1 width = 0 - text = txt.split(' ') + text = txt.split(" ") for word in text: - word_width = self.get_string_width(word + ' ') + word_width = self.get_string_width(word + " ") if width + word_width > w: lines += 1 width = word_width else: width += word_width - + self.x = x self.y = y - + return lines * h def add_section(self, title: str, content: str) -> None: """ Agrega una sección al PDF con título en negrita y contenido. Cada sección inicia en una nueva página. - + Args: title: Título de la sección content: Contenido de la sección @@ -296,7 +317,7 @@ def add_section(self, title: str, content: str) -> None: self.set_font("Helvetica", "B", PDFConstants.SECTION_TITLE_FONT_SIZE) self.set_text_color(*PDFConstants.WHITE_COLOR) # Texto blanco self.set_fill_color(*PDFConstants.SECTION_BG_COLOR) # Fondo gris - + # Crear el título con fondo gris completo clean_title = self._clean_text_for_latin1(title) self.cell(0, 12, clean_title, ln=True, fill=True, align="C", border=0) @@ -316,40 +337,40 @@ def add_section(self, title: str, content: str) -> None: current_y = self.get_y() self.line(PDFConstants.HEADER_MARGIN, current_y, self.w - PDFConstants.HEADER_MARGIN, current_y) self.ln(10) - + def _format_content(self, content) -> str: """Formatea el contenido de una sección.""" if isinstance(content, list): text = "\n".join(str(item) for item in content) else: text = content.replace("\\n", "\n") - + # Limpiar caracteres que no son compatibles con latin-1 return self._clean_text_for_latin1(text) - + def _clean_text_for_latin1(self, text: str) -> str: """Limpia el texto para que sea compatible con latin-1.""" # Reemplazos de caracteres especiales comunes replacements = { - '\u2022': '•', # Bullet point - '\u2013': '-', # En dash - '\u2014': '-', # Em dash - '\u2018': "'", # Left single quotation mark - '\u2019': "'", # Right single quotation mark - '\u201c': '"', # Left double quotation mark - '\u201d': '"', # Right double quotation mark - '\u2026': '...', # Horizontal ellipsis - '\u00a0': ' ', # Non-breaking space + "\u2022": "•", # Bullet point + "\u2013": "-", # En dash + "\u2014": "-", # Em dash + "\u2018": "'", # Left single quotation mark + "\u2019": "'", # Right single quotation mark + "\u201c": '"', # Left double quotation mark + "\u201d": '"', # Right double quotation mark + "\u2026": "...", # Horizontal ellipsis + "\u00a0": " ", # Non-breaking space } - + # Aplicar reemplazos for unicode_char, replacement in replacements.items(): text = text.replace(unicode_char, replacement) - + # Intentar codificar y decodificar para detectar otros problemas try: - text.encode('latin-1') + text.encode("latin-1") return text except UnicodeEncodeError: # Si aún hay problemas, reemplazar caracteres problemáticos - return text.encode('latin-1', errors='replace').decode('latin-1') + return text.encode("latin-1", errors="replace").decode("latin-1") diff --git a/app/pdf/pdf_manual_generator.py b/app/pdf/pdf_manual_generator.py index ccc4095..66fd962 100644 --- a/app/pdf/pdf_manual_generator.py +++ b/app/pdf/pdf_manual_generator.py @@ -1,7 +1,8 @@ import base64 import os -from app.pdf.pdf_generator import PDFGenerator + from app.configurations.pdf_manual_config import PDF_MANUAL_SECTION_ORDER, get_sections_for_language +from app.pdf.pdf_generator import PDFGenerator class PDFManualGenerator: @@ -14,16 +15,12 @@ def __init__(self, product_name: str, language: str = "es"): async def create_manual(self, data: dict, title: str = None, image_url: str = None) -> str: # Usar el título personalizado si se proporciona, sino usar el por defecto cover_title = title if title else f"User Manual for {self.product_name}" - + # Establecer el título personalizado para que aparezca en el header de todas las páginas if title: self.pdf.set_custom_title(title) - - self.pdf.add_cover_page( - cover_title, - "Everything You Need to Know to Get Started", - image_url - ) + + self.pdf.add_cover_page(cover_title, "Everything You Need to Know to Get Started", image_url) self.pdf.set_auto_page_break(auto=True, margin=20) for key in PDF_MANUAL_SECTION_ORDER: @@ -34,4 +31,4 @@ async def create_manual(self, data: dict, title: str = None, image_url: str = No base64_str = base64.b64encode(pdf_bytes).decode("utf-8") - return base64_str \ No newline at end of file + return base64_str diff --git a/app/processors/agent_processor.py b/app/processors/agent_processor.py index 687c321..6d4f2e4 100644 --- a/app/processors/agent_processor.py +++ b/app/processors/agent_processor.py @@ -1,10 +1,11 @@ -from typing import Dict, Any, List, Optional +import traceback +from typing import Any, Dict, List, Optional + from langchain.agents import AgentExecutor, create_tool_calling_agent -from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder -from app.processors.conversation_processor import ConversationProcessor from langchain_core.language_models import BaseChatModel -import traceback +from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder +from app.processors.conversation_processor import ConversationProcessor from app.requests.message_request import MessageRequest @@ -13,47 +14,48 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str], tools: super().__init__(llm, context, history) self.tools = tools - async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, - supports_interleaved_files: bool = False) -> Dict[str, Any]: - prompt_template = ChatPromptTemplate.from_messages([ - ("system", "{context}"), - MessagesPlaceholder(variable_name="chat_history"), - ("human", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ]) - - agent = create_tool_calling_agent( - llm=self.llm, - tools=self.tools, - prompt=prompt_template + async def process( + self, + request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False, + ) -> Dict[str, Any]: + prompt_template = ChatPromptTemplate.from_messages( + [ + ("system", "{context}"), + MessagesPlaceholder(variable_name="chat_history"), + ("human", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] ) + agent = create_tool_calling_agent(llm=self.llm, tools=self.tools, prompt=prompt_template) + agent_executor = AgentExecutor( agent=agent, tools=self.tools, verbose=False, handle_parsing_errors=True, max_iterations=3, - return_intermediate_steps=True + return_intermediate_steps=True, ) try: - config = self._get_langsmith_config( - request, - "agent_processor", - has_tools=len(self.tools) > 0 + config = self._get_langsmith_config(request, "agent_processor", has_tools=len(self.tools) > 0) + + result = await agent_executor.ainvoke( + { + "context": self.context or "", + "chat_history": self.history, + "input": request.query, + "agent_scratchpad": "", + }, + config=config, ) - - result = await agent_executor.ainvoke({ - "context": self.context or "", - "chat_history": self.history, - "input": request.query, - "agent_scratchpad": "" - }, config=config) - + if "text" not in result and "output" in result: result["text"] = result["output"] - + return result except Exception as e: print(f"Error durante la ejecución del agente: {str(e)}") diff --git a/app/processors/conversation_processor.py b/app/processors/conversation_processor.py index 6df29c5..c98819d 100644 --- a/app/processors/conversation_processor.py +++ b/app/processors/conversation_processor.py @@ -1,4 +1,5 @@ -from typing import Dict, Any, List, Optional +from typing import Any, Dict, List, Optional + from langchain_core.language_models import BaseChatModel @@ -11,13 +12,11 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str]): def _get_langsmith_config(self, request, processor_type: str, **extra_metadata) -> Dict[str, Any]: config = { "tags": [processor_type, f"agent_{request.agent_id}"], - "metadata": { - "agent_id": request.agent_id, - "conversation_id": request.conversation_id, - **extra_metadata - } + "metadata": {"agent_id": request.agent_id, "conversation_id": request.conversation_id, **extra_metadata}, } return config - async def process(self, query: str, files: Optional[List[Dict[str, str]]], supports_interleaved_files: bool) -> Dict[str, Any]: + async def process( + self, query: str, files: Optional[List[Dict[str, str]]], supports_interleaved_files: bool + ) -> Dict[str, Any]: raise NotImplementedError diff --git a/app/processors/mcp_processor.py b/app/processors/mcp_processor.py index d4343f5..5807ce7 100644 --- a/app/processors/mcp_processor.py +++ b/app/processors/mcp_processor.py @@ -1,11 +1,13 @@ -from typing import Dict, Any, List, Optional -from app.processors.conversation_processor import ConversationProcessor -from app.requests.message_request import MessageRequest +import json +import re +from typing import Any, Dict, List, Optional + from langchain_core.language_models import BaseChatModel from langchain_mcp_adapters.client import MultiServerMCPClient from langgraph.prebuilt import create_react_agent -import json -import re + +from app.processors.conversation_processor import ConversationProcessor +from app.requests.message_request import MessageRequest class MCPProcessor(ConversationProcessor): @@ -13,8 +15,12 @@ def __init__(self, llm: BaseChatModel, context: str, history: List[str], mcp_con super().__init__(llm, context, history) self.mcp_config = mcp_config - async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, - supports_interleaved_files: bool = False) -> Dict[str, Any]: + async def process( + self, + request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False, + ) -> Dict[str, Any]: async with MultiServerMCPClient(self.mcp_config) as client: agent = create_react_agent(self.llm, client.get_tools()) @@ -39,7 +45,7 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, config = self._get_langsmith_config( request, "mcp_processor", - mcp_servers=list(self.mcp_config.keys()) if isinstance(self.mcp_config, dict) else [] + mcp_servers=list(self.mcp_config.keys()) if isinstance(self.mcp_config, dict) else [], ) response = await agent.ainvoke({"messages": messages}, config=config) @@ -56,7 +62,7 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, else: content = str(response) - match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL) + match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL) result = match.group(1) if match else content tool_info = await self.get_tool_data(response) @@ -66,14 +72,11 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, "chat_history": self.history, "input": request.query, "text": result, - "tool_result": tool_info + "tool_result": tool_info, } async def get_tool_data(self, response): - tool_messages = [ - msg for msg in response.get('messages', []) - if getattr(msg, 'type', None) == 'tool' - ] + tool_messages = [msg for msg in response.get("messages", []) if getattr(msg, "type", None) == "tool"] tool_info = None if tool_messages: last_tool = tool_messages[-1] @@ -84,8 +87,5 @@ async def get_tool_data(self, response): except json.JSONDecodeError: tool_result_json = tool_result - tool_info = { - "name": name, - "message": tool_result_json - } + tool_info = {"name": name, "message": tool_result_json} return tool_info diff --git a/app/processors/simple_processor.py b/app/processors/simple_processor.py index 117a032..6265dad 100644 --- a/app/processors/simple_processor.py +++ b/app/processors/simple_processor.py @@ -1,57 +1,56 @@ import json -from typing import Dict, Any, Optional, List -from langchain_core.messages import SystemMessage, HumanMessage +import re +from typing import Any, Dict, List, Optional + +from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from app.processors.conversation_processor import ConversationProcessor from app.requests.message_request import MessageRequest -import re class SimpleProcessor(ConversationProcessor): - async def generate_response(self, context: str, chat_history: list, query: str, prompt: ChatPromptTemplate, - config: dict = None) -> Dict[str, Any]: + async def generate_response( + self, context: str, chat_history: list, query: str, prompt: ChatPromptTemplate, config: dict = None + ) -> Dict[str, Any]: chain = ( - { - "context": lambda x: x["context"], - "chat_history": lambda x: x["chat_history"], - "input": lambda x: x["input"], - } - | prompt - | self.llm + { + "context": lambda x: x["context"], + "chat_history": lambda x: x["chat_history"], + "input": lambda x: x["input"], + } + | prompt + | self.llm ) - raw_response = await chain.ainvoke({ - "context": context, - "chat_history": chat_history, - "input": query - }, config=config) + raw_response = await chain.ainvoke( + {"context": context, "chat_history": chat_history, "input": query}, config=config + ) content = raw_response.content - match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL) + match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL) if match: json_content = match.group(1) response_content = json_content else: response_content = content - return { - "context": context, - "chat_history": chat_history, - "input": query, - "text": response_content - } + return {"context": context, "chat_history": chat_history, "input": query, "text": response_content} - async def process(self, request: MessageRequest, files: Optional[List[Dict[str, str]]] = None, - supports_interleaved_files: bool = False) -> Dict[str, Any]: + async def process( + self, + request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False, + ) -> Dict[str, Any]: messages = [] system_message = self.context or "" if files and not supports_interleaved_files: file_references = [] for file in files: - tag = 'image' if file.get('type') == 'image' else 'file' + tag = "image" if file.get("type") == "image" else "file" file_references.append(f"<{tag} url='{file['url']}'>") system_message += "\n\n" + "\n".join(file_references) @@ -67,7 +66,7 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, if files and supports_interleaved_files: interleaved_references = [] for file in files: - tag = 'image' if file.get('type') == 'image' else 'file' + tag = "image" if file.get("type") == "image" else "file" interleaved_references.append(f"<{tag} url='{file['url']}'>") system_message += "\n\n" + "\n".join(interleaved_references) @@ -76,12 +75,12 @@ async def process(self, request: MessageRequest, files: Optional[List[Dict[str, messages.append(HumanMessage(content=request.query)) prompt = ChatPromptTemplate.from_messages(messages) - + config = self._get_langsmith_config( request, "simple_processor", has_json_parser=request.json_parser is not None, - has_files=files is not None and len(files) > 0 + has_files=files is not None and len(files) > 0, ) - + return await self.generate_response(self.context, self.history, request.query, prompt, config) diff --git a/app/providers/ai_provider_interface.py b/app/providers/ai_provider_interface.py index f61d99c..5ed4e9b 100644 --- a/app/providers/ai_provider_interface.py +++ b/app/providers/ai_provider_interface.py @@ -4,8 +4,8 @@ class BaseChatModel(Protocol): """Protocol for chat models""" - def __call__(self, *args: Any, **kwargs: Any) -> Any: - ... + + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... class AIProviderInterface(ABC): diff --git a/app/providers/anthropic_provider.py b/app/providers/anthropic_provider.py index b91a7f9..3068649 100644 --- a/app/providers/anthropic_provider.py +++ b/app/providers/anthropic_provider.py @@ -1,15 +1,11 @@ from langchain_anthropic import ChatAnthropic + from app.providers.ai_provider_interface import AIProviderInterface class AnthropicProvider(AIProviderInterface): def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) -> ChatAnthropic: - return ChatAnthropic( - model=model, - temperature=temperature, - max_tokens=max_tokens, - top_p=top_p - ) + return ChatAnthropic(model=model, temperature=temperature, max_tokens=max_tokens, top_p=top_p) def supports_interleaved_files(self) -> bool: - return True \ No newline at end of file + return True diff --git a/app/providers/deepseek_provider.py b/app/providers/deepseek_provider.py index 19cde42..c787b3d 100644 --- a/app/providers/deepseek_provider.py +++ b/app/providers/deepseek_provider.py @@ -1,6 +1,7 @@ from langchain_community.llms.ollama import Ollama -from app.providers.ai_provider_interface import AIProviderInterface + from app.configurations.config import DEEP_SEEK_HOST +from app.providers.ai_provider_interface import AIProviderInterface class DeepseekProvider(AIProviderInterface): @@ -11,11 +12,7 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) "max_tokens": max_tokens, } - return Ollama( - model=model, - base_url=DEEP_SEEK_HOST - ** model_kwargs - ) + return Ollama(model=model, base_url=DEEP_SEEK_HOST**model_kwargs) def supports_interleaved_files(self) -> bool: return False diff --git a/app/providers/gemini_provider.py b/app/providers/gemini_provider.py index 1adaf7b..84b911c 100644 --- a/app/providers/gemini_provider.py +++ b/app/providers/gemini_provider.py @@ -1,6 +1,7 @@ import os from langchain_google_genai import ChatGoogleGenerativeAI + from app.providers.ai_provider_interface import AIProviderInterface @@ -11,7 +12,7 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: int) - temperature=temperature, max_output_tokens=max_tokens, top_p=top_p, - google_api_key=os.getenv("GOOGLE_GEMINI_API_KEY") + google_api_key=os.getenv("GOOGLE_GEMINI_API_KEY"), ) def supports_interleaved_files(self) -> bool: diff --git a/app/providers/openai_provider.py b/app/providers/openai_provider.py index 7dd23a4..78815f3 100644 --- a/app/providers/openai_provider.py +++ b/app/providers/openai_provider.py @@ -1,4 +1,5 @@ from langchain_openai import ChatOpenAI + from app.providers.ai_provider_interface import AIProviderInterface @@ -10,10 +11,7 @@ def get_llm(self, model: str, temperature: float, max_tokens: int, top_p: float) "max_tokens": max_tokens, } - return ChatOpenAI( - model=model, - **model_kwargs - ) + return ChatOpenAI(model=model, **model_kwargs) def supports_interleaved_files(self) -> bool: return True diff --git a/app/requestors/base_requestor.py b/app/requestors/base_requestor.py index ffab00e..35833d8 100644 --- a/app/requestors/base_requestor.py +++ b/app/requestors/base_requestor.py @@ -1,4 +1,5 @@ from typing import Dict + import requests @@ -15,35 +16,30 @@ def replace_placeholders(text: str, params: dict) -> str: @classmethod def prepare_request_data(cls, config: dict, params: dict) -> dict: """Prepara los datos de la petición reemplazando los placeholders""" - request_data = { - 'url': config['api'], - 'method': config['method'], - 'headers': {}, - 'body': config.get('body', {}) - } + request_data = {"url": config["api"], "method": config["method"], "headers": {}, "body": config.get("body", {})} # Procesar headers - for header in config.get('headers', []): - key = header['key'] - value = cls.replace_placeholders(header['value'], params) - request_data['headers'][key] = value + for header in config.get("headers", []): + key = header["key"] + value = cls.replace_placeholders(header["value"], params) + request_data["headers"][key] = value # Procesar body - if isinstance(request_data['body'], dict): + if isinstance(request_data["body"], dict): processed_body = {} - for key, value in request_data['body'].items(): + for key, value in request_data["body"].items(): processed_body[key] = cls.replace_placeholders(value, params) - request_data['body'] = processed_body + request_data["body"] = processed_body # Procesar URL - request_data['url'] = cls.replace_placeholders(request_data['url'], params) + request_data["url"] = cls.replace_placeholders(request_data["url"], params) # Procesar query params si existen - if 'query_params' in config: + if "query_params" in config: processed_params = {} - for key, value in config['query_params'].items(): + for key, value in config["query_params"].items(): processed_params[key] = cls.replace_placeholders(value, params) - request_data['params'] = processed_params + request_data["params"] = processed_params return request_data @@ -54,11 +50,11 @@ def execute_request(cls, config: Dict, params: Dict) -> Dict: request_data = cls.prepare_request_data(config, params) response = requests.request( - method=request_data['method'], - url=request_data['url'], - headers=request_data['headers'], - json=request_data.get('body'), - params=request_data.get('params', {}) + method=request_data["method"], + url=request_data["url"], + headers=request_data["headers"], + json=request_data.get("body"), + params=request_data.get("params", {}), ) response.raise_for_status() diff --git a/app/requests/__init__.py b/app/requests/__init__.py index 63788fe..7f044e3 100644 --- a/app/requests/__init__.py +++ b/app/requests/__init__.py @@ -1 +1 @@ -# Archivo vacío \ No newline at end of file +# Archivo vacío diff --git a/app/requests/brand_context_resolver_request.py b/app/requests/brand_context_resolver_request.py index b4e37ee..f000df7 100644 --- a/app/requests/brand_context_resolver_request.py +++ b/app/requests/brand_context_resolver_request.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel from typing import List +from pydantic import BaseModel + class BrandContextResolverRequest(BaseModel): websites_info: List diff --git a/app/requests/generate_audio_request.py b/app/requests/generate_audio_request.py index 0621741..42313bc 100644 --- a/app/requests/generate_audio_request.py +++ b/app/requests/generate_audio_request.py @@ -1,7 +1,8 @@ +from typing import Any, Dict, Optional + from pydantic import BaseModel -from typing import Optional, Dict, Any class GenerateAudioRequest(BaseModel): text: str - content: Optional[Dict[str, Any]] = None \ No newline at end of file + content: Optional[Dict[str, Any]] = None diff --git a/app/requests/generate_image_request.py b/app/requests/generate_image_request.py index f85a855..032237c 100644 --- a/app/requests/generate_image_request.py +++ b/app/requests/generate_image_request.py @@ -1,5 +1,6 @@ +from typing import Any, Dict, Optional + from pydantic import BaseModel -from typing import Optional, Dict, Any class GenerateImageRequest(BaseModel): @@ -14,4 +15,4 @@ class GenerateImageRequest(BaseModel): num_variations: int = 4 parameter_prompt: Optional[Dict[str, Any]] = None extra_parameters: Optional[Dict[str, Any]] = None - language: Optional[str] = "es" \ No newline at end of file + language: Optional[str] = "es" diff --git a/app/requests/generate_video_request.py b/app/requests/generate_video_request.py index ce7dbc3..e459863 100644 --- a/app/requests/generate_video_request.py +++ b/app/requests/generate_video_request.py @@ -1,11 +1,14 @@ from enum import Enum -from typing import Dict, Any, Optional +from typing import Any, Dict, Optional + from pydantic import BaseModel + class VideoType(str, Enum): human_scene = "human_scene" animated_scene = "animated_scene" + class GenerateVideoRequest(BaseModel): type: VideoType content: Optional[Dict[str, Any]] = None diff --git a/app/requests/message_request.py b/app/requests/message_request.py index 1e23bfa..8eb92e1 100644 --- a/app/requests/message_request.py +++ b/app/requests/message_request.py @@ -1,4 +1,5 @@ -from typing import List, Dict, Any, Optional +from typing import Any, Dict, List, Optional + from pydantic import BaseModel, Field diff --git a/app/requests/product_scraping_request.py b/app/requests/product_scraping_request.py index 7ef4bab..256064b 100644 --- a/app/requests/product_scraping_request.py +++ b/app/requests/product_scraping_request.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel, HttpUrl from typing import Optional +from pydantic import BaseModel, HttpUrl + class ProductScrapingRequest(BaseModel): product_url: HttpUrl diff --git a/app/requests/recommend_product_request.py b/app/requests/recommend_product_request.py index 9712013..e483cf1 100644 --- a/app/requests/recommend_product_request.py +++ b/app/requests/recommend_product_request.py @@ -1,8 +1,9 @@ +from typing import Dict, List, Optional + from pydantic import BaseModel -from typing import Optional, List, Dict class RecommendProductRequest(BaseModel): product_name: str product_description: str - similar: Optional[bool] = False \ No newline at end of file + similar: Optional[bool] = False diff --git a/app/requests/resolve_funnel_request.py b/app/requests/resolve_funnel_request.py index 40a9d24..e8bff65 100644 --- a/app/requests/resolve_funnel_request.py +++ b/app/requests/resolve_funnel_request.py @@ -1,8 +1,9 @@ -from pydantic import BaseModel from typing import Optional +from pydantic import BaseModel + class ResolveFunnelRequest(BaseModel): product_name: str product_description: str - language: Optional[str] = "es" \ No newline at end of file + language: Optional[str] = "es" diff --git a/app/requests/variation_image_request.py b/app/requests/variation_image_request.py index a89edc2..e4b794b 100644 --- a/app/requests/variation_image_request.py +++ b/app/requests/variation_image_request.py @@ -1,13 +1,14 @@ -from pydantic import BaseModel, Field, validator from typing import Optional +from pydantic import BaseModel, Field, validator + class VariationImageRequest(BaseModel): file: str num_variations: int = Field(default=3, ge=1, le=10) language: Optional[str] = "es" - @validator('num_variations') + @validator("num_variations") def validate_variations(cls, v): if v > 10: raise ValueError("El número máximo de variaciones permitidas es 10") diff --git a/app/scrapers/aliexpress_scraper.py b/app/scrapers/aliexpress_scraper.py index b23b78f..bf2196b 100644 --- a/app/scrapers/aliexpress_scraper.py +++ b/app/scrapers/aliexpress_scraper.py @@ -1,10 +1,12 @@ -from app.scrapers.scraper_interface import ScraperInterface -from typing import Dict, Any, List, Optional, Tuple -from app.externals.aliexpress.aliexpress_client import get_item_detail import re -from fastapi import HTTPException from decimal import Decimal, InvalidOperation -from typing import Dict, Any +from typing import Any, Dict, List, Optional, Tuple + +from fastapi import HTTPException + +from app.externals.aliexpress.aliexpress_client import get_item_detail +from app.scrapers.scraper_interface import ScraperInterface + class AliexpressScraper(ScraperInterface): async def scrape_direct(self, html: str) -> Dict[str, Any]: @@ -21,7 +23,7 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: "name": self._get_name(item_data), "description": self._get_description(item_data), "external_sell_price": self._get_price(item_data), - "images": self._get_images(item_data) + "images": self._get_images(item_data), } """ @@ -30,27 +32,20 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: result["variants"] = variants """ - response = { - "provider_id": "aliexpress", - "external_id": item_id, - **result - } + response = {"provider_id": "aliexpress", "external_id": item_id, **result} return {"data": response} except Exception as e: - raise HTTPException( - status_code=400, - detail=f"Error procesando datos del producto: {str(e)}" - ) + raise HTTPException(status_code=400, detail=f"Error procesando datos del producto: {str(e)}") def _extract_item_id(self, url: str) -> str: - pattern = r'item/(\d+)\.html' + pattern = r"item/(\d+)\.html" match = re.search(pattern, url) if match: return match.group(1) - pattern = r'itemId=(\d+)' + pattern = r"itemId=(\d+)" match = re.search(pattern, url) if match: return match.group(1) @@ -75,8 +70,8 @@ def _get_description(self, item_data: Dict[str, Any]) -> str: html_content = description_data.get("html", "") if html_content: # Simplificación básica - podría mejorarse con una biblioteca HTML - description = re.sub(r'<[^>]+>', ' ', html_content) - description = re.sub(r'\s+', ' ', description).strip() + description = re.sub(r"<[^>]+>", " ", html_content) + description = re.sub(r"\s+", " ", description).strip() # Si no hay descripción, intentamos usar las propiedades if not description and "properties" in item_data: @@ -124,7 +119,7 @@ def _parse_price(self, price_str: Any) -> Optional[Decimal]: return Decimal(str(price_str)) if isinstance(price_str, str): - match = re.search(r'(\d+(?:\.\d+)?)', price_str.replace(",", "")) + match = re.search(r"(\d+(?:\.\d+)?)", price_str.replace(",", "")) if match: try: return Decimal(match.group(1)) @@ -192,7 +187,7 @@ def _extract_variants(self, item_data: Dict[str, Any]) -> List[Dict[str, Any]]: "name": product_title, "images": variant_images, "variant_key": variant_key, - "attributes": attributes + "attributes": attributes, } variants.append(variant_info) @@ -207,18 +202,13 @@ def _create_property_map(self, props: List[Dict[str, Any]]) -> Dict[int, Dict[st prop_name = prop.get("name") values = {} for val in prop.get("values", []): - values[val.get("vid")] = { - "name": val.get("name"), - "image": val.get("image", "") - } - prop_map[prop_id] = { - "name": prop_name, - "values": values - } + values[val.get("vid")] = {"name": val.get("name"), "image": val.get("image", "")} + prop_map[prop_id] = {"name": prop_name, "values": values} return prop_map - def _process_variant_attributes(self, sku_attr: str, prop_map: Dict[int, Dict[str, Any]]) -> Tuple[ - List[Dict[str, Any]], List[str]]: + def _process_variant_attributes( + self, sku_attr: str, prop_map: Dict[int, Dict[str, Any]] + ) -> Tuple[List[Dict[str, Any]], List[str]]: """Procesa los atributos de una variante y extrae imágenes relacionadas.""" attributes = [] variant_images = [] @@ -259,10 +249,7 @@ def _process_variant_attributes(self, sku_attr: str, prop_map: Dict[int, Dict[st # Ignorar atributos de envío if prop_info["name"] not in ignored_attributes: - attributes.append({ - "category_name": prop_info["name"], - "value": value_info["name"] - }) + attributes.append({"category_name": prop_info["name"], "value": value_info["name"]}) # Agregar imagen de la variante si existe if value_info["image"]: diff --git a/app/scrapers/amazon_scraper.py b/app/scrapers/amazon_scraper.py index d2235b0..a4a03fb 100644 --- a/app/scrapers/amazon_scraper.py +++ b/app/scrapers/amazon_scraper.py @@ -1,12 +1,12 @@ +import re +from decimal import Decimal +from typing import Any, Dict, List, Optional + from fastapi import HTTPException +from app.externals.amazon.amazon_client import get_product_details from app.scrapers.helper_price import parse_price from app.scrapers.scraper_interface import ScraperInterface -from typing import Dict, Any, List, Optional -import re -from app.externals.amazon.amazon_client import get_product_details -from decimal import Decimal -from typing import Dict, Any class AmazonScraper(ScraperInterface): @@ -24,26 +24,19 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: "name": self._get_name(product_data), "description": self._get_description(product_data), "external_sell_price": self._get_price(product_data), - "images": self._get_images(product_data) + "images": self._get_images(product_data), } variants = self._extract_variants(product_data) if variants: result["variants"] = variants - response = { - "provider_id": "amazon", - "external_id": asin, - **result - } + response = {"provider_id": "amazon", "external_id": asin, **result} return {"data": response} except Exception as e: - raise HTTPException( - status_code=400, - detail=f"Error processing product data: {str(e)}" - ) + raise HTTPException(status_code=400, detail=f"Error processing product data: {str(e)}") def _get_product_data(self, response: Dict[str, Any]) -> Dict[str, Any]: product_data = response.get("data", {}) @@ -94,11 +87,11 @@ def _get_images(self, product_data: Dict[str, Any]) -> List[str]: def _extract_asin(self, url: str) -> str: patterns = [ - r'/dp/([A-Z0-9]{10})', - r'/gp/product/([A-Z0-9]{10})', - r'/ASIN/([A-Z0-9]{10})', - r'asin=([A-Z0-9]{10})', - r'asin%3D([A-Z0-9]{10})' + r"/dp/([A-Z0-9]{10})", + r"/gp/product/([A-Z0-9]{10})", + r"/ASIN/([A-Z0-9]{10})", + r"asin=([A-Z0-9]{10})", + r"asin%3D([A-Z0-9]{10})", ] for pattern in patterns: @@ -106,10 +99,7 @@ def _extract_asin(self, url: str) -> str: if match: return match.group(1) - raise HTTPException( - status_code=400, - detail="Product not found - Invalid Amazon URL" - ) + raise HTTPException(status_code=400, detail="Product not found - Invalid Amazon URL") def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any]]: dimensions = product_data.get("product_variations_dimensions", []) @@ -132,7 +122,7 @@ def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any] "name": product_title, "images": self._get_variant_images(dimensions, variations, variant_data, product_data), "variant_key": variant_key, - "attributes": variant_attributes + "attributes": variant_attributes, } variants.append(variant_info) @@ -144,15 +134,17 @@ def _get_variant_attributes(self, dimensions: List[str], variant_data: Dict[str, for dim in dimensions: if dim in variant_data: - attributes.append({ - "category_name": dim.capitalize(), - "value": variant_data[dim] - }) + attributes.append({"category_name": dim.capitalize(), "value": variant_data[dim]}) return attributes - def _get_variant_images(self, dimensions: List[str], variations: Dict[str, List], - variant_data: Dict[str, str], product_data: Dict[str, Any]) -> List[str]: + def _get_variant_images( + self, + dimensions: List[str], + variations: Dict[str, List], + variant_data: Dict[str, str], + product_data: Dict[str, Any], + ) -> List[str]: images = [] for dim in dimensions: if dim in variations and dim in variant_data: diff --git a/app/scrapers/cj_scraper.py b/app/scrapers/cj_scraper.py index 172eae5..68a4c12 100644 --- a/app/scrapers/cj_scraper.py +++ b/app/scrapers/cj_scraper.py @@ -1,9 +1,10 @@ -from typing import Dict, Any +from typing import Any, Dict import httpx -from app.scrapers.scraper_interface import ScraperInterface from fastapi import HTTPException +from app.scrapers.scraper_interface import ScraperInterface + class CJScraper(ScraperInterface): def __init__(self): @@ -13,21 +14,13 @@ async def scrape_direct(self, html: str) -> Dict[str, Any]: return {} async def scrape(self, url: str, domain: str = None) -> dict: - payload = { - "url_cj": url - } + payload = {"url_cj": url} - headers = { - "Content-Type": "application/json" - } + headers = {"Content-Type": "application/json"} try: async with httpx.AsyncClient(timeout=20.0) as client: - response = await client.post( - self.webhook_url, - headers=headers, - json=payload - ) + response = await client.post(self.webhook_url, headers=headers, json=payload) if response.status_code == 200: return response.json() diff --git a/app/scrapers/dropi_scraper.py b/app/scrapers/dropi_scraper.py index 29b8477..f2bed92 100644 --- a/app/scrapers/dropi_scraper.py +++ b/app/scrapers/dropi_scraper.py @@ -1,19 +1,19 @@ import re from decimal import Decimal -from typing import Dict, Any, List, Optional +from typing import Any, Dict, List, Optional from fastapi import HTTPException +from app.configurations.config import DROPI_S3_BASE_URL from app.externals.dropi.dropi_client import get_product_details from app.scrapers.helper_price import parse_price from app.scrapers.scraper_interface import ScraperInterface -from app.configurations.config import DROPI_S3_BASE_URL class DropiScraper(ScraperInterface): def __init__(self, country: str = "co"): self.country = country - + async def scrape_direct(self, html: str) -> Dict[str, Any]: return {} @@ -35,19 +35,12 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: if variants: result["variants"] = variants - response = { - "provider_id": "dropi", - "external_id": product_id, - **result - } + response = {"provider_id": "dropi", "external_id": product_id, **result} return {"data": response} except Exception as e: - raise HTTPException( - status_code=400, - detail=f"Error processing product data from Dropi: {str(e)}" - ) + raise HTTPException(status_code=400, detail=f"Error processing product data from Dropi: {str(e)}") def _get_product_data(self, response: Dict[str, Any]) -> Dict[str, Any]: if not response.get("isSuccess"): @@ -67,10 +60,10 @@ def _get_description(self, product_data: Dict[str, Any]) -> str: return "" # Remove HTML tags for a cleaner description - clean_text = re.sub(r'<[^>]+>', ' ', html_description) + clean_text = re.sub(r"<[^>]+>", " ", html_description) # Replace
with newlines and clean up whitespace - clean_text = clean_text.replace('
', '\n').strip() - clean_text = re.sub(r'\s+', ' ', clean_text).strip() + clean_text = clean_text.replace("
", "\n").strip() + clean_text = re.sub(r"\s+", " ", clean_text).strip() return clean_text def _get_price(self, product_data: Dict[str, Any]) -> Optional[Decimal]: @@ -94,40 +87,42 @@ def _extract_variants(self, product_data: Dict[str, Any]) -> List[Dict[str, Any] variations = product_data.get("variations", []) if not variations: return [] - + product_name = product_data.get("name", "") product_photos = product_data.get("photos", []) - + variants = [] for variation in variations: variant = self._build_variant(variation, product_name, product_photos) if variant: variants.append(variant) - + return variants - - def _build_variant(self, variation: Dict[str, Any], product_name: str, product_photos: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + + def _build_variant( + self, variation: Dict[str, Any], product_name: str, product_photos: List[Dict[str, Any]] + ) -> Optional[Dict[str, Any]]: """Construye un objeto de variante en el formato estándar""" - + # Extraer atributos attributes = self._extract_attributes(variation) - + # Construir nombre de la variante variant_name = self._build_variant_name(product_name, attributes) - + # Construir clave de variante variant_key = self._build_variant_key(attributes) - + # Obtener precios sale_price = self._parse_variant_price(variation.get("sale_price")) suggested_price = self._parse_variant_price(variation.get("suggested_price")) - + # Determinar disponibilidad basada en stock available = self._check_availability(variation) - + # Obtener imágenes de la variante images = self._get_variant_images(variation, product_photos) - + return { "name": variant_name, "variant_key": variant_key, @@ -138,38 +133,35 @@ def _build_variant(self, variation: Dict[str, Any], product_name: str, product_p "provider_id": "dropi", "external_id": str(variation.get("id", "")), "external_sell_price": float(sale_price) if sale_price else None, - "external_suggested_sell_price": float(suggested_price) if suggested_price else None + "external_suggested_sell_price": float(suggested_price) if suggested_price else None, } - + def _extract_attributes(self, variation: Dict[str, Any]) -> List[Dict[str, str]]: """Extrae los atributos de una variación""" attributes = [] attribute_values = variation.get("attribute_values", []) - + for attr_value in attribute_values: attribute_info = attr_value.get("attribute", {}) attribute_name = attribute_info.get("description", "") value = attr_value.get("value", "") - + # El valor puede venir en formato "COLOR-TALLA VALOR" o similar # Intentamos limpiar y separar si es necesario if attribute_name and value: # Si el valor contiene el nombre del atributo, lo limpiamos clean_value = self._clean_attribute_value(value, attribute_name) - - attributes.append({ - "name": attribute_name.title(), - "value": clean_value - }) - + + attributes.append({"name": attribute_name.title(), "value": clean_value}) + return attributes - + def _clean_attribute_value(self, value: str, attribute_name: str) -> str: """Limpia el valor del atributo removiendo prefijos redundantes""" # Ejemplo: "NEGRO-TALLA L" cuando el atributo es "TALLA" -> "NEGRO-L" # O mejor aún, intentar separar los componentes parts = value.split("-") - + # Si hay múltiples partes, intentamos encontrar la relevante if len(parts) > 1: # Buscar la parte que no sea el nombre del atributo @@ -179,74 +171,71 @@ def _clean_attribute_value(self, value: str, attribute_name: str) -> str: part_clean = part.replace(attribute_name.upper(), "").strip() if part_clean: cleaned_parts.append(part_clean) - + return " ".join(cleaned_parts).strip() if cleaned_parts else value - + return value - + def _build_variant_name(self, product_name: str, attributes: List[Dict[str, str]]) -> str: """Construye el nombre de la variante combinando el nombre del producto y los atributos""" if not attributes: return product_name - + # Concatenar los valores de atributos attribute_parts = [attr["value"] for attr in attributes] attribute_string = " - ".join(attribute_parts) - + return f"{product_name} - {attribute_string}" - + def _build_variant_key(self, attributes: List[Dict[str, str]]) -> str: """Construye una clave única para la variante basada en los atributos""" if not attributes: return "default" - + # Crear clave en formato "attribute1-value1-attribute2-value2" key_parts = [] for attr in attributes: attr_name = attr["name"].lower().replace(" ", "-") attr_value = attr["value"].lower().replace(" ", "-") key_parts.append(f"{attr_name}-{attr_value}") - + return "-".join(key_parts) - + def _parse_variant_price(self, price_str: Any) -> Optional[Decimal]: """Parsea el precio de una variante""" if not price_str: return None return parse_price(str(price_str)) - + def _check_availability(self, variation: Dict[str, Any]) -> bool: """Verifica si la variante está disponible basándose en el stock""" warehouse_variations = variation.get("warehouse_product_variation", []) - + if not warehouse_variations: return False - + # Verificar si hay stock disponible en algún almacén total_stock = sum(wh.get("stock", 0) for wh in warehouse_variations) return total_stock > 0 - + def _get_variant_images(self, variation: Dict[str, Any], product_photos: List[Dict[str, Any]]) -> List[str]: variation_id = variation.get("id") images = [] - + for photo in product_photos: if photo.get("variation_id") == variation_id and photo.get("urlS3"): images.append(DROPI_S3_BASE_URL + photo["urlS3"]) - + if not images: for photo in product_photos: if not photo.get("variation_id") and photo.get("urlS3"): images.append(DROPI_S3_BASE_URL + photo["urlS3"]) - + return images def _extract_product_id(self, url: str) -> str: - match = re.search(r'/product-details/(\d+)', url) + match = re.search(r"/product-details/(\d+)", url) if match: return match.group(1) - raise HTTPException( - status_code=400, - detail="Product ID not found in Dropi URL" - ) \ No newline at end of file + raise HTTPException(status_code=400, detail="Product ID not found in Dropi URL") diff --git a/app/scrapers/helper_price.py b/app/scrapers/helper_price.py index e728944..c739c63 100644 --- a/app/scrapers/helper_price.py +++ b/app/scrapers/helper_price.py @@ -1,6 +1,6 @@ -from decimal import Decimal -from typing import Optional, Any import re +from decimal import Decimal +from typing import Any, Optional def parse_price(price_str: Any) -> Optional[Decimal]: diff --git a/app/scrapers/ia_scraper.py b/app/scrapers/ia_scraper.py index 233f5fa..3bb80ea 100644 --- a/app/scrapers/ia_scraper.py +++ b/app/scrapers/ia_scraper.py @@ -1,15 +1,16 @@ +import json +import os +from datetime import datetime +from typing import Any, Dict + from app.configurations.config import SCRAPER_AGENT, SCRAPER_AGENT_DIRECT -from app.helpers.escape_helper import clean_html_less_deeply, clean_html_deeply -from app.pdf.helpers import clean_text, clean_json +from app.externals.scraperapi.scraperapi_client import ScraperAPIClient +from app.helpers.escape_helper import clean_html_deeply, clean_html_less_deeply +from app.pdf.helpers import clean_json, clean_text from app.requests.message_request import MessageRequest from app.scrapers.helper_price import parse_price from app.scrapers.scraper_interface import ScraperInterface -from typing import Dict, Any -from app.externals.scraperapi.scraperapi_client import ScraperAPIClient from app.services.message_service_interface import MessageServiceInterface -import json -import os -from datetime import datetime class IAScraper(ScraperInterface): @@ -19,18 +20,19 @@ async def scrape_direct(self, html: str) -> Dict[str, Any]: filename = f"simplified_html_{timestamp}.html" os.makedirs("scraped_html", exist_ok=True) - + filepath = os.path.join("scraped_html", filename) - with open(filepath, 'w', encoding='utf-8') as f: + with open(filepath, "w", encoding="utf-8") as f: f.write(simplified_html_clean) - + print(f"HTML simplificado guardado en: {filepath}") message_request = MessageRequest( query=f"Product content: {simplified_html_clean} ", agent_id=SCRAPER_AGENT_DIRECT, conversation_id="", - json_parser={"code": "string"}) + json_parser={"code": "string"}, + ) """ json_parser={ "products": [ @@ -73,19 +75,16 @@ async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: ) result = await self.message_service.handle_message(message_request) - data_clean = clean_text(clean_json(result['text'])) + data_clean = clean_text(clean_json(result["text"])) data = json.loads(data_clean) - data['data']['external_sell_price'] = parse_price(data['data']['external_sell_price']) - images = data['data'].get('images', []) - cleaned_images = [ - f"https:{img}" if img.startswith("//") else img for img in images - ] - data['data']['images'] = cleaned_images - - if 'variants' in data['data']: - data['data']['variants'] = [ - variant for variant in data['data']['variants'] - if variant.get('variant_key') != 'unknown' + data["data"]["external_sell_price"] = parse_price(data["data"]["external_sell_price"]) + images = data["data"].get("images", []) + cleaned_images = [f"https:{img}" if img.startswith("//") else img for img in images] + data["data"]["images"] = cleaned_images + + if "variants" in data["data"]: + data["data"]["variants"] = [ + variant for variant in data["data"]["variants"] if variant.get("variant_key") != "unknown" ] return data diff --git a/app/scrapers/scraper_interface.py b/app/scrapers/scraper_interface.py index c6349fa..7ec42fb 100644 --- a/app/scrapers/scraper_interface.py +++ b/app/scrapers/scraper_interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict, Any +from typing import Any, Dict class ScraperInterface(ABC): @@ -13,4 +13,4 @@ async def scrape_direct(self, html: str) -> Dict[str, Any]: Optional method to scrape directly from HTML content. This can be overridden by subclasses if needed. """ - raise NotImplementedError("This method is not implemented.") \ No newline at end of file + raise NotImplementedError("This method is not implemented.") diff --git a/app/services/audio_service.py b/app/services/audio_service.py index c8817d2..cad64c8 100644 --- a/app/services/audio_service.py +++ b/app/services/audio_service.py @@ -2,9 +2,9 @@ from fastapi import Depends, HTTPException +from app.externals.fal.fal_client import FalClient from app.requests.generate_audio_request import GenerateAudioRequest from app.services.audio_service_interface import AudioServiceInterface -from app.externals.fal.fal_client import FalClient class AudioService(AudioServiceInterface): @@ -22,4 +22,4 @@ async def generate_audio(self, request: GenerateAudioRequest) -> Dict[str, Any]: try: return await self.fal_client.tts_multilingual_v2(text=request.text, fal_webhook=fal_webhook, **extra) except Exception as e: - raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") diff --git a/app/services/audio_service_interface.py b/app/services/audio_service_interface.py index 250753c..405c94c 100644 --- a/app/services/audio_service_interface.py +++ b/app/services/audio_service_interface.py @@ -6,4 +6,4 @@ class AudioServiceInterface(ABC): @abstractmethod async def generate_audio(self, request: GenerateAudioRequest): - pass \ No newline at end of file + pass diff --git a/app/services/dropi_service.py b/app/services/dropi_service.py index 61ef333..06ccc26 100644 --- a/app/services/dropi_service.py +++ b/app/services/dropi_service.py @@ -1,4 +1,5 @@ -from typing import List, Dict, Any +from typing import Any, Dict, List + from fastapi import Depends, HTTPException from app.externals.dropi import dropi_client @@ -22,4 +23,4 @@ async def get_cities_by_department(self, department_id: int, country: str = "co" response = await dropi_client.get_cities_by_department(department_id, rate_type, country) return response.get("objects", {}).get("cities", []) except Exception as e: - raise HTTPException(status_code=500, detail=f"Error fetching cities from Dropi: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Error fetching cities from Dropi: {str(e)}") diff --git a/app/services/dropi_service_interface.py b/app/services/dropi_service_interface.py index 3de8899..0683e55 100644 --- a/app/services/dropi_service_interface.py +++ b/app/services/dropi_service_interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Dict, Any +from typing import Any, Dict, List class DropiServiceInterface(ABC): @@ -9,4 +9,4 @@ async def get_departments(self, country: str = "co") -> List[Dict[str, Any]]: @abstractmethod async def get_cities_by_department(self, department_id: int, country: str = "co") -> List[Dict[str, Any]]: - pass \ No newline at end of file + pass diff --git a/app/services/image_service.py b/app/services/image_service.py index 6555f66..1b7195d 100644 --- a/app/services/image_service.py +++ b/app/services/image_service.py @@ -1,25 +1,27 @@ +import asyncio +import base64 +import uuid +from typing import Optional + +from dotenv import load_dotenv +from fastapi import Depends + from app.configurations.config import ( AGENT_IMAGE_VARIATIONS, ) from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest +from app.externals.google_vision.google_vision_client import analyze_image +from app.externals.images.image_client import google_image, openai_image_edit +from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.externals.s3_upload.responses.s3_upload_response import S3UploadResponse +from app.externals.s3_upload.s3_upload_client import upload_file +from app.helpers.image_compression_helper import compress_image_to_target from app.requests.generate_image_request import GenerateImageRequest from app.requests.message_request import MessageRequest from app.requests.variation_image_request import VariationImageRequest -from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest from app.responses.generate_image_response import GenerateImageResponse from app.services.image_service_interface import ImageServiceInterface from app.services.message_service_interface import MessageServiceInterface -from app.externals.s3_upload.s3_upload_client import upload_file -from app.helpers.image_compression_helper import compress_image_to_target -from fastapi import Depends -import asyncio -import uuid -from dotenv import load_dotenv -from app.externals.google_vision.google_vision_client import analyze_image -from app.externals.images.image_client import google_image, openai_image_edit -from typing import Optional -import base64 load_dotenv() @@ -28,8 +30,9 @@ class ImageService(ImageServiceInterface): def __init__(self, message_service: MessageServiceInterface = Depends()): self.message_service = message_service - async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, - prefix_name: str) -> S3UploadResponse: + async def _upload_to_s3( + self, image_base64: str, owner_id: str, folder_id: str, prefix_name: str + ) -> S3UploadResponse: unique_id = uuid.uuid4().hex[:8] file_name = f"{prefix_name}_{unique_id}" original_image_bytes = base64.b64decode(image_base64) @@ -37,29 +40,33 @@ async def _upload_to_s3(self, image_base64: str, owner_id: str, folder_id: str, return await upload_file( S3UploadRequest( - file=image_base64_compressed, - folder=f"{owner_id}/products/variations/{folder_id}", - filename=file_name + file=image_base64_compressed, folder=f"{owner_id}/products/variations/{folder_id}", filename=file_name ) ) - - async def _generate_single_variation(self, url_images: list[str], prompt: str, owner_id: str, - folder_id: str, file: Optional[str] = None, extra_params: Optional[dict] = None, - provider: Optional[str] = None, model_ai: Optional[str] = None) -> str: + async def _generate_single_variation( + self, + url_images: list[str], + prompt: str, + owner_id: str, + folder_id: str, + file: Optional[str] = None, + extra_params: Optional[dict] = None, + provider: Optional[str] = None, + model_ai: Optional[str] = None, + ) -> str: if provider and provider.lower() == "openai": - image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params) + image_content = await openai_image_edit( + image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params + ) else: - image_content = await google_image(image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params) - - content_base64 = base64.b64encode(image_content).decode('utf-8') - final_upload = await self._upload_to_s3( - content_base64, - owner_id, - folder_id, - "variation" - ) + image_content = await google_image( + image_urls=url_images, prompt=prompt, model_ia=model_ai, extra_params=extra_params + ) + + content_base64 = base64.b64encode(image_content).decode("utf-8") + final_upload = await self._upload_to_s3(content_base64, owner_id, folder_id, "variation") return final_upload.s3_url async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): @@ -72,11 +79,7 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ agent_id=AGENT_IMAGE_VARIATIONS, conversation_id="", parameter_prompt={"language": request.language}, - files=[{ - "type": "image", - "url": original_image_response.s3_url, - "content": request.file - }] + files=[{"type": "image", "url": original_image_response.s3_url, "content": request.file}], ) response_data = await self.message_service.handle_message_with_config(message_request) @@ -89,19 +92,26 @@ async def generate_variation_images(self, request: VariationImageRequest, owner_ prompt = response["text"] + " Do not modify any text, letters, brand logos, brand names, or symbols." tasks = [ - self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, - request.file, extra_params, provider=agent_config.provider_ai, - model_ai=agent_config.model_ai) + self._generate_single_variation( + [original_image_response.s3_url], + prompt, + owner_id, + folder_id, + request.file, + extra_params, + provider=agent_config.provider_ai, + model_ai=agent_config.model_ai, + ) for i in range(request.num_variations) ] generated_urls = await asyncio.gather(*tasks) return GenerateImageResponse( - generated_urls=generated_urls, + generated_urls=generated_urls, original_url=original_image_response.s3_url, original_urls=[original_image_response.s3_url], - generated_prompt=prompt, - vision_analysis=vision_analysis + generated_prompt=prompt, + vision_analysis=vision_analysis, ) async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): @@ -125,7 +135,7 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st request.file, extra_params=request.extra_parameters, provider=request.provider, - model_ai=request.model_ai + model_ai=request.model_ai, ) for i in range(request.num_variations) ] @@ -135,13 +145,13 @@ async def generate_images_from(self, request: GenerateImageRequest, owner_id: st original_urls=urls, generated_urls=generated_urls, original_url=original_url, - generated_prompt=request.prompt + generated_prompt=request.prompt, ) async def generate_images_from_agent(self, request: GenerateImageRequest, owner_id: str): parameter_prompt = request.parameter_prompt or {} parameter_prompt["language"] = request.language - + data = MessageRequest( agent_id=request.agent_id, query=request.agent_id, @@ -152,11 +162,11 @@ async def generate_images_from_agent(self, request: GenerateImageRequest, owner_ response_data = await self.message_service.handle_message_with_config(data) agent_config = response_data["agent_config"] message = response_data["message"] - + request.prompt = message["text"] request.provider = agent_config.provider_ai request.model_ai = agent_config.model_ai - + if agent_config.preferences.extra_parameters: request.extra_parameters = agent_config.preferences.extra_parameters diff --git a/app/services/image_service_interface.py b/app/services/image_service_interface.py index 40d9816..64b9614 100644 --- a/app/services/image_service_interface.py +++ b/app/services/image_service_interface.py @@ -1,4 +1,4 @@ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod from app.requests.generate_image_request import GenerateImageRequest from app.requests.variation_image_request import VariationImageRequest diff --git a/app/services/message_service.py b/app/services/message_service.py index 1d7b088..2866a5c 100644 --- a/app/services/message_service.py +++ b/app/services/message_service.py @@ -1,28 +1,29 @@ -import json import asyncio import hashlib +import json + +from fastapi import Depends from app.configurations.config import AGENT_RECOMMEND_PRODUCTS_ID, AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID, ENVIRONMENT from app.configurations.copies_config import AGENT_COPIES +from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS, get_sections_for_language from app.externals.agent_config.agent_config_client import get_agent +from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest +from app.externals.amazon.amazon_client import search_products +from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest from app.externals.s3_upload.requests.s3_upload_request import S3UploadRequest -from app.externals.s3_upload.s3_upload_client import upload_file, check_file_exists_direct -from app.pdf.helpers import clean_text, clean_json +from app.externals.s3_upload.s3_upload_client import check_file_exists_direct, upload_file +from app.managers.conversation_manager_interface import ConversationManagerInterface +from app.pdf.helpers import clean_json, clean_text +from app.pdf.pdf_manual_generator import PDFManualGenerator from app.requests.brand_context_resolver_request import BrandContextResolverRequest from app.requests.copy_request import CopyRequest from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.message_request import MessageRequest -from app.externals.agent_config.requests.agent_config_request import AgentConfigRequest from app.requests.recommend_product_request import RecommendProductRequest +from app.requests.resolve_funnel_request import ResolveFunnelRequest from app.responses.recommend_product_response import RecommendProductResponse from app.services.message_service_interface import MessageServiceInterface -from app.managers.conversation_manager_interface import ConversationManagerInterface -from fastapi import Depends -from app.configurations.pdf_manual_config import PDF_MANUAL_SECTIONS, get_sections_for_language -from app.pdf.pdf_manual_generator import PDFManualGenerator -from app.externals.amazon.requests.amazon_search_request import AmazonSearchRequest -from app.externals.amazon.amazon_client import search_products -from app.requests.resolve_funnel_request import ResolveFunnelRequest class MessageService(MessageServiceInterface): @@ -34,61 +35,53 @@ async def handle_message(self, request: MessageRequest): agent_id=request.agent_id, query=request.query, metadata_filter=request.metadata_filter, - parameter_prompt=request.parameter_prompt + parameter_prompt=request.parameter_prompt, ) agent_config = await get_agent(data) - return await self.conversation_manager.process_conversation( - request=request, - agent_config=agent_config - ) + return await self.conversation_manager.process_conversation(request=request, agent_config=agent_config) async def handle_message_with_config(self, request: MessageRequest): data = AgentConfigRequest( agent_id=request.agent_id, query=request.query, metadata_filter=request.metadata_filter, - parameter_prompt=request.parameter_prompt + parameter_prompt=request.parameter_prompt, ) agent_config = await get_agent(data) message_response = await self.conversation_manager.process_conversation( - request=request, - agent_config=agent_config + request=request, agent_config=agent_config ) - return { - "message": message_response, - "agent_config": agent_config - } + return {"message": message_response, "agent_config": agent_config} async def handle_message_json(self, request: MessageRequest): response = await self.handle_message(request) - return json.loads(response['text']) + return json.loads(response["text"]) async def recommend_products(self, request: RecommendProductRequest): agent_id = AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID if request.similar else AGENT_RECOMMEND_PRODUCTS_ID - data = await self.handle_message(MessageRequest( - agent_id=agent_id, - conversation_id="", - query=f"Product Name: {request.product_name} Description: {request.product_description}", - )) + data = await self.handle_message( + MessageRequest( + agent_id=agent_id, + conversation_id="", + query=f"Product Name: {request.product_name} Description: {request.product_description}", + ) + ) - json_data = json.loads(data['text']) - amazon_data = await search_products(AmazonSearchRequest(query=json_data['recommended_product'])) + json_data = json.loads(data["text"]) + amazon_data = await search_products(AmazonSearchRequest(query=json_data["recommended_product"])) return RecommendProductResponse(ai_response=json_data, products=amazon_data.get_products()) async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: tasks = [ - self.handle_message(MessageRequest( - agent_id=item['agent'], - conversation_id="", - query=item['query'] - )) for item in agent_queries + self.handle_message(MessageRequest(agent_id=item["agent"], conversation_id="", query=item["query"])) + for item in agent_queries ] try: @@ -98,7 +91,7 @@ async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: for response in responses: if isinstance(response, Exception): continue - data_clean = clean_text(clean_json(response['text'])) + data_clean = clean_text(clean_json(response["text"])) data = json.loads(data_clean) combined_data.update(data) @@ -111,10 +104,7 @@ async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: raise ValueError(f"Error procesando respuestas de agentes: {str(e)}") async def generate_copies(self, request: CopyRequest): - agent_queries = [ - {'agent': agent, 'query': request.prompt} - for agent in AGENT_COPIES - ] + agent_queries = [{"agent": agent, "query": request.prompt} for agent in AGENT_COPIES] combined_data = await self.process_multiple_agents(agent_queries) @@ -122,10 +112,10 @@ async def generate_copies(self, request: CopyRequest): async def generate_pdf(self, request: GeneratePdfRequest): base_query = f"Product Name: {request.product_name} Description: {request.product_description}. Language: {request.language}. Content: {request.content}" - + content_hash = hashlib.md5(f"{request.title}_{request.image_url}".encode()).hexdigest()[:8] base_filename = f"{request.product_id}_{request.language}_{content_hash}" - + version = "v2" base_url = f"https://fluxi.co/{ENVIRONMENT}/assets" folder_path = f"{request.owner_id}/pdfs/{version}" @@ -136,10 +126,9 @@ async def generate_pdf(self, request: GeneratePdfRequest): return {"s3_url": s3_url} sections = get_sections_for_language(request.language) - + agent_queries = [ - {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query} "} - for section in sections.keys() + {"agent": "agent_copies_pdf", "query": f"section: {section}. {base_query} "} for section in sections.keys() ] combined_data = await self.process_multiple_agents(agent_queries) @@ -147,94 +136,88 @@ async def generate_pdf(self, request: GeneratePdfRequest): pdf_generator = PDFManualGenerator(request.product_name, language=request.language) pdf = await pdf_generator.create_manual(combined_data, request.title, request.image_url) - result = await upload_file( - S3UploadRequest( - file=pdf, - folder=folder_path, - filename=base_filename - ) - ) + result = await upload_file(S3UploadRequest(file=pdf, folder=folder_path, filename=base_filename)) return result async def resolve_funnel(self, request: ResolveFunnelRequest): - pain_detection_response = await self.handle_message(MessageRequest( - agent_id="pain_detection", - conversation_id="", - query="pain_detection", - parameter_prompt={ - "product_name": request.product_name, - "product_description": request.product_description, - "language": request.language - } - )) - - pain_detection_message = pain_detection_response['text'] - - buyer_detection_response = await self.handle_message(MessageRequest( - agent_id="buyer_detection", - conversation_id="", - query="buyer_detection", - parameter_prompt={ - "product_name": request.product_name, - "product_description": request.product_description, - "pain_detection": pain_detection_message, - "language": request.language - } - )) - - buyer_detection_message = buyer_detection_response['text'] - - sales_angles_response = await self.handle_message_json(MessageRequest( - agent_id="sales_angles_v2", - conversation_id="", - query="sales_angles_v2", - json_parser={ - "angles": [ - { - "name": "string", - "description": "string" - } - ] - }, - parameter_prompt={ - "product_name": request.product_name, - "product_description": request.product_description, - "pain_detection": pain_detection_message, - "buyer_detection": buyer_detection_message, - "language": request.language - } - )) + pain_detection_response = await self.handle_message( + MessageRequest( + agent_id="pain_detection", + conversation_id="", + query="pain_detection", + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description, + "language": request.language, + }, + ) + ) + + pain_detection_message = pain_detection_response["text"] + + buyer_detection_response = await self.handle_message( + MessageRequest( + agent_id="buyer_detection", + conversation_id="", + query="buyer_detection", + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description, + "pain_detection": pain_detection_message, + "language": request.language, + }, + ) + ) + + buyer_detection_message = buyer_detection_response["text"] + + sales_angles_response = await self.handle_message_json( + MessageRequest( + agent_id="sales_angles_v2", + conversation_id="", + query="sales_angles_v2", + json_parser={"angles": [{"name": "string", "description": "string"}]}, + parameter_prompt={ + "product_name": request.product_name, + "product_description": request.product_description, + "pain_detection": pain_detection_message, + "buyer_detection": buyer_detection_message, + "language": request.language, + }, + ) + ) return { "pain_detection": pain_detection_message, "buyer_detection": buyer_detection_message, - "sales_angles": sales_angles_response["angles"] + "sales_angles": sales_angles_response["angles"], } async def resolve_brand_context(self, request: BrandContextResolverRequest): - brand_agent_task = self.handle_message_json(MessageRequest( - agent_id="store_brand_agent", - conversation_id="", - query="store_brand_agent", - parameter_prompt=request.prompt, - json_parser={"brands": ["string", "string"]} - )) - - context_agent_task = self.handle_message_json(MessageRequest( - agent_id="store_context_agent", - conversation_id="", - query="store_context_agent", - parameter_prompt=request.prompt, - json_parser={"contexts": ["string", "string"]} - )) + brand_agent_task = self.handle_message_json( + MessageRequest( + agent_id="store_brand_agent", + conversation_id="", + query="store_brand_agent", + parameter_prompt=request.prompt, + json_parser={"brands": ["string", "string"]}, + ) + ) + + context_agent_task = self.handle_message_json( + MessageRequest( + agent_id="store_context_agent", + conversation_id="", + query="store_context_agent", + parameter_prompt=request.prompt, + json_parser={"contexts": ["string", "string"]}, + ) + ) responses = await asyncio.gather(brand_agent_task, context_agent_task) brands = responses[0].get("brands", []) contexts = responses[1].get("contexts", []) - return { - "brands": brands, - "contexts": contexts - } + return {"brands": brands, "contexts": contexts} diff --git a/app/services/message_service_interface.py b/app/services/message_service_interface.py index 6a940ca..fe40178 100644 --- a/app/services/message_service_interface.py +++ b/app/services/message_service_interface.py @@ -1,11 +1,11 @@ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod +from app.requests.brand_context_resolver_request import BrandContextResolverRequest from app.requests.copy_request import CopyRequest +from app.requests.generate_pdf_request import GeneratePdfRequest from app.requests.message_request import MessageRequest from app.requests.recommend_product_request import RecommendProductRequest from app.requests.resolve_funnel_request import ResolveFunnelRequest -from app.requests.brand_context_resolver_request import BrandContextResolverRequest -from app.requests.generate_pdf_request import GeneratePdfRequest class MessageServiceInterface(ABC): @@ -39,4 +39,4 @@ async def resolve_brand_context(self, request: BrandContextResolverRequest): @abstractmethod async def handle_message_with_config(self, request: MessageRequest): - pass \ No newline at end of file + pass diff --git a/app/services/product_scraping_service.py b/app/services/product_scraping_service.py index c9c2367..28c7a64 100644 --- a/app/services/product_scraping_service.py +++ b/app/services/product_scraping_service.py @@ -1,9 +1,10 @@ +from urllib.parse import urlparse + from fastapi import Depends +from app.factories.scraping_factory import ScrapingFactory from app.requests.product_scraping_request import ProductScrapingRequest from app.services.product_scraping_service_interface import ProductScrapingServiceInterface -from app.factories.scraping_factory import ScrapingFactory -from urllib.parse import urlparse class ProductScrapingService(ProductScrapingServiceInterface): @@ -18,7 +19,8 @@ async def scrape_product(self, request: ProductScrapingRequest): return await scraper.scrape(url, domain) async def scrape_direct(self, html): - scraper = self.scraping_factory.get_scraper("https://www.macys.com/shop/womens-clothing/accessories/womens-sunglasses/Upc_bops_purchasable,Productsperpage/5376,120?id=28295&_additionalStoreLocations=5376") + scraper = self.scraping_factory.get_scraper( + "https://www.macys.com/shop/womens-clothing/accessories/womens-sunglasses/Upc_bops_purchasable,Productsperpage/5376,120?id=28295&_additionalStoreLocations=5376" + ) return await scraper.scrape_direct(html) - diff --git a/app/services/product_scraping_service_interface.py b/app/services/product_scraping_service_interface.py index 864bced..fd1a989 100644 --- a/app/services/product_scraping_service_interface.py +++ b/app/services/product_scraping_service_interface.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod + from app.requests.product_scraping_request import ProductScrapingRequest diff --git a/app/services/video_service.py b/app/services/video_service.py index df14cba..34d6d11 100644 --- a/app/services/video_service.py +++ b/app/services/video_service.py @@ -2,9 +2,9 @@ from fastapi import Depends, HTTPException +from app.externals.fal.fal_client import FalClient from app.requests.generate_video_request import GenerateVideoRequest, VideoType from app.services.video_service_interface import VideoServiceInterface -from app.externals.fal.fal_client import FalClient class VideoService(VideoServiceInterface): @@ -19,22 +19,30 @@ async def generate_video(self, request: GenerateVideoRequest) -> Dict[str, Any]: prompt = content.get("prompt") image_url = content.get("image_url") if not prompt or not image_url: - raise HTTPException(status_code=400, detail="Se requieren 'prompt' e 'image_url' en content para animated_scene") + raise HTTPException( + status_code=400, detail="Se requieren 'prompt' e 'image_url' en content para animated_scene" + ) fal_webhook = content.get("fal_webhook") extra = {k: v for k, v in content.items() if k not in {"prompt", "image_url", "fal_webhook"}} - return await self.fal_client.kling_image_to_video(prompt=prompt, image_url=image_url, fal_webhook=fal_webhook, **extra) + return await self.fal_client.kling_image_to_video( + prompt=prompt, image_url=image_url, fal_webhook=fal_webhook, **extra + ) if request.type == VideoType.human_scene: image_url = content.get("image_url") audio_url = content.get("audio_url") if not image_url or not audio_url: - raise HTTPException(status_code=400, detail="Se requieren 'image_url' y 'audio_url' en content para human_scene") + raise HTTPException( + status_code=400, detail="Se requieren 'image_url' y 'audio_url' en content para human_scene" + ) fal_webhook = content.get("fal_webhook") extra = {k: v for k, v in content.items() if k not in {"image_url", "audio_url", "fal_webhook"}} - return await self.fal_client.bytedance_omnihuman(image_url=image_url, audio_url=audio_url, fal_webhook=fal_webhook, **extra) + return await self.fal_client.bytedance_omnihuman( + image_url=image_url, audio_url=audio_url, fal_webhook=fal_webhook, **extra + ) raise HTTPException(status_code=400, detail="Tipo de video no soportado") except HTTPException: raise except Exception as e: - raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=502, detail=f"Error al llamar a FAL: {str(e)}") diff --git a/app/services/video_service_interface.py b/app/services/video_service_interface.py index 6632014..26ccee5 100644 --- a/app/services/video_service_interface.py +++ b/app/services/video_service_interface.py @@ -6,4 +6,4 @@ class VideoServiceInterface(ABC): @abstractmethod async def generate_video(self, request: GenerateVideoRequest): - pass \ No newline at end of file + pass diff --git a/app/tools/tool_generator.py b/app/tools/tool_generator.py index bd751d5..1869fb5 100644 --- a/app/tools/tool_generator.py +++ b/app/tools/tool_generator.py @@ -1,6 +1,7 @@ from typing import List, Optional + from langchain_core.tools import StructuredTool -from pydantic import create_model, Field +from pydantic import Field, create_model from app.requestors.base_requestor import BaseRequestor @@ -9,7 +10,7 @@ class ToolGenerator: @classmethod def create_tool_function(cls, tool_config: dict): """Crea la función de implementación basada en la configuración de la herramienta""" - config = tool_config['config'] + config = tool_config["config"] def tool_function(**kwargs): return {"tool_result": BaseRequestor.execute_request(config, kwargs)} @@ -27,23 +28,17 @@ def generate_tools(cls, tools: Optional[List[dict]]) -> List[StructuredTool]: for tool_config in tools: # Crear el modelo Pydantic para los argumentos field_definitions = {} - for prop in tool_config['config']['properties']: - field_definitions[prop['name']] = ( - str, - Field(..., description=prop['description']) - ) - - args_schema = create_model( - f"{tool_config['tool_name'].title()}Input", - **field_definitions - ) + for prop in tool_config["config"]["properties"]: + field_definitions[prop["name"]] = (str, Field(..., description=prop["description"])) + + args_schema = create_model(f"{tool_config['tool_name'].title()}Input", **field_definitions) # Crear la herramienta tool = StructuredTool( - name=tool_config['tool_name'], - description=tool_config['description'], + name=tool_config["tool_name"], + description=tool_config["description"], func=cls.create_tool_function(tool_config), - args_schema=args_schema + args_schema=args_schema, ) structured_tools.append(tool) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..db16739 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,64 @@ +# Documentación del Conversational Engine + +## Descripción General + +**Conversational Engine** es un microservicio construido con Python y FastAPI que actúa como motor de conversación con inteligencia artificial. Se integra con múltiples proveedores de IA (OpenAI, Anthropic Claude, Google Gemini, DeepSeek) y servicios externos para procesar consultas de usuarios, generar contenido, realizar scraping de productos y mucho más. + +## Índice de Documentación + +1. [Arquitectura](./architecture.md) - Visión general de la arquitectura del sistema +2. [Instalación y Configuración](./installation.md) - Guía de instalación y configuración +3. [API Endpoints](./api-endpoints.md) - Documentación de todos los endpoints +4. [Proveedores de IA](./ai-providers.md) - Integración con proveedores de IA +5. [Procesadores](./processors.md) - Sistema de procesamiento de conversaciones +6. [Scrapers](./scrapers.md) - Sistema de scraping de productos +7. [Servicios](./services.md) - Documentación de servicios internos +8. [Clientes Externos](./external-clients.md) - Integraciones con servicios externos +9. [Middlewares](./middlewares.md) - Autenticación y seguridad +10. [Variables de Entorno](./environment-variables.md) - Configuración del entorno + +## Características Principales + +- **Procesamiento de Conversaciones**: Manejo inteligente de conversaciones con historial y contexto +- **Multi-Proveedor de IA**: Soporte para OpenAI, Claude, Gemini y DeepSeek +- **Generación de Imágenes**: Creación y variación de imágenes con IA +- **Generación de Video**: Creación de videos animados y escenas humanas +- **Generación de Audio**: Text-to-speech multilingüe +- **Scraping de Productos**: Extracción de datos de Amazon, AliExpress, Dropi y más +- **Generación de PDFs**: Creación de manuales y documentos +- **Integración MCP**: Soporte para Model Context Protocol +- **Tools Dinámicas**: Generación dinámica de herramientas para agentes + +## Tecnologías Utilizadas + +- **Python 3.10+** +- **FastAPI** - Framework web asíncrono +- **LangChain** - Orquestación de LLMs +- **LangGraph** - Grafos de agentes +- **Pydantic** - Validación de datos +- **httpx** - Cliente HTTP asíncrono +- **FPDF** - Generación de PDFs + +## Inicio Rápido + +```bash +# Clonar el repositorio +git clone + +# Instalar dependencias +pip install -r requirements.txt + +# Configurar variables de entorno +cp .env.example .env + +# Ejecutar el servidor +python main.py +``` + +El servidor estará disponible en `http://localhost:8000` + +## Documentación Swagger + +Una vez que el servidor esté corriendo, accede a la documentación interactiva en: +- Swagger UI: `http://localhost:8000/docs` +- ReDoc: `http://localhost:8000/redoc` diff --git a/docs/ai-providers.md b/docs/ai-providers.md new file mode 100644 index 0000000..ddde5db --- /dev/null +++ b/docs/ai-providers.md @@ -0,0 +1,302 @@ +# Proveedores de IA + +El sistema soporta múltiples proveedores de IA a través de un patrón Factory que permite intercambiarlos fácilmente. + +## Arquitectura + +``` +┌─────────────────────────────────────────────────────────────┐ +│ AIProviderFactory │ +│ ┌─────────────────────────────────────────────────────────┐│ +│ │ get_provider(provider_name) ││ +│ └─────────────────────────────────────────────────────────┘│ +└──────────────────────────┬──────────────────────────────────┘ + │ + ┌───────────────────┼───────────────────┐ + ▼ ▼ ▼ +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ OpenAI │ │ Anthropic │ │ Gemini │ +│ Provider │ │ Provider │ │ Provider │ +└─────────────┘ └─────────────┘ └─────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ ChatOpenAI │ │ChatAnthropic│ │ ChatGoogle │ +│ │ │ │ │ GenerativeAI│ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +## AIProviderInterface + +Interfaz base que todos los proveedores deben implementar: + +```python +class AIProviderInterface(ABC): + @abstractmethod + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: float) -> BaseChatModel: + """Retorna el modelo de lenguaje configurado""" + pass + + @abstractmethod + def supports_interleaved_files(self) -> bool: + """Indica si soporta archivos intercalados en el contexto""" + pass +``` + +## Proveedores Disponibles + +### 1. OpenAI Provider + +**Identificador:** `openai` + +**Modelos soportados:** +- gpt-4 +- gpt-4-turbo +- gpt-4o +- gpt-4o-mini +- gpt-3.5-turbo + +**Configuración:** + +```python +class OpenAIProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: float) -> ChatOpenAI: + return ChatOpenAI( + model=model, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p + ) + + def supports_interleaved_files(self) -> bool: + return True +``` + +**Variable de entorno requerida:** +- `OPENAI_API_KEY` + +--- + +### 2. Anthropic Provider (Claude) + +**Identificador:** `claude` + +**Modelos soportados:** +- claude-3-opus-20240229 +- claude-3-sonnet-20240229 +- claude-3-haiku-20240307 +- claude-3-7-sonnet-20250219 + +**Configuración:** + +```python +class AnthropicProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: int) -> ChatAnthropic: + return ChatAnthropic( + model=model, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p + ) + + def supports_interleaved_files(self) -> bool: + return True +``` + +**Variable de entorno requerida:** +- `ANTHROPIC_API_KEY` + +--- + +### 3. Gemini Provider + +**Identificador:** `gemini` + +**Modelos soportados:** +- gemini-pro +- gemini-1.5-pro +- gemini-1.5-flash + +**Configuración:** + +```python +class GeminiProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: int) -> ChatGoogleGenerativeAI: + return ChatGoogleGenerativeAI( + model=model, + temperature=temperature, + max_output_tokens=max_tokens, + top_p=top_p, + google_api_key=os.getenv("GOOGLE_GEMINI_API_KEY") + ) + + def supports_interleaved_files(self) -> bool: + return True +``` + +**Variable de entorno requerida:** +- `GOOGLE_GEMINI_API_KEY` + +--- + +### 4. DeepSeek Provider + +**Identificador:** `deepseek` + +**Modelos soportados:** +- deepseek-coder +- deepseek-chat + +**Configuración:** + +```python +class DeepseekProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: float) -> Ollama: + return Ollama( + model=model, + base_url=DEEP_SEEK_HOST, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p + ) + + def supports_interleaved_files(self) -> bool: + return False # DeepSeek no soporta archivos intercalados +``` + +**Variable de entorno requerida:** +- `HOST_DEEP_SEEK` + +--- + +## Factory Pattern + +### AIProviderFactory + +```python +class AIProviderFactory: + @staticmethod + def get_provider(provider_name: str) -> AIProviderInterface: + if provider_name == "openai": + return OpenAIProvider() + elif provider_name == "claude": + return AnthropicProvider() + elif provider_name == "deepseek": + return DeepseekProvider() + elif provider_name == "gemini": + return GeminiProvider() + else: + raise ValueError(f"El proveedor de AI '{provider_name}' no está implementado") +``` + +## Uso en el Sistema + +### Obtener un proveedor + +```python +# Obtener el proveedor +provider = AIProviderFactory.get_provider("openai") + +# Crear el LLM con configuración +llm = provider.get_llm( + model="gpt-4", + temperature=0.7, + max_tokens=1000, + top_p=1.0 +) + +# Verificar soporte de archivos +if provider.supports_interleaved_files(): + # Procesar con archivos + pass +``` + +### Integración con ConversationManager + +```python +async def process_conversation(self, request, agent_config): + # El proveedor se obtiene de la configuración del agente + ai_provider = AIProviderFactory.get_provider(agent_config.provider_ai) + + llm = ai_provider.get_llm( + model=agent_config.model_ai, + temperature=agent_config.preferences.temperature, + max_tokens=agent_config.preferences.max_tokens, + top_p=agent_config.preferences.top_p + ) + + # Usar el LLM en el procesador... +``` + +## Fallback Automático + +El sistema implementa un fallback automático a Claude cuando hay errores: + +```python +async def _fallback_with_anthropic(self, request, agent_config, history): + anthropic_provider = AIProviderFactory.get_provider("claude") + anthropic_llm = anthropic_provider.get_llm( + model="claude-3-7-sonnet-20250219", + temperature=agent_config.preferences.temperature, + max_tokens=agent_config.preferences.max_tokens, + top_p=agent_config.preferences.top_p + ) + + processor = SimpleProcessor(anthropic_llm, agent_config.prompt, history) + return await processor.process(request, request.files, True) +``` + +## Agregar un Nuevo Proveedor + +Para agregar un nuevo proveedor de IA: + +1. Crear clase que implemente `AIProviderInterface`: + +```python +# app/providers/new_provider.py +from app.providers.ai_provider_interface import AIProviderInterface + +class NewProvider(AIProviderInterface): + def get_llm(self, model: str, temperature: float, + max_tokens: int, top_p: float): + return NewLLMClient( + model=model, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p + ) + + def supports_interleaved_files(self) -> bool: + return True # o False según corresponda +``` + +2. Registrar en el Factory: + +```python +# app/factories/ai_provider_factory.py +from app.providers.new_provider import NewProvider + +class AIProviderFactory: + @staticmethod + def get_provider(provider_name: str) -> AIProviderInterface: + # ... otros proveedores ... + elif provider_name == "new_provider": + return NewProvider() +``` + +3. Configurar variables de entorno necesarias. + +## Parámetros de Configuración + +| Parámetro | Tipo | Descripción | Default | +|-----------|------|-------------|---------| +| temperature | float | Creatividad de respuestas (0-2) | 0.7 | +| max_tokens | int | Máximo de tokens en respuesta | 1000 | +| top_p | float | Nucleus sampling (0-1) | 1.0 | + +Estos parámetros se configuran por agente en el servicio `agent-config`. diff --git a/docs/api-endpoints.md b/docs/api-endpoints.md new file mode 100644 index 0000000..e48f5dd --- /dev/null +++ b/docs/api-endpoints.md @@ -0,0 +1,435 @@ +# API Endpoints + +Todos los endpoints están prefijados con `/api/ms/conversational-engine`. + +## Índice de Endpoints + +| Método | Endpoint | Descripción | Auth | +|--------|----------|-------------|------| +| POST | `/handle-message` | Procesar mensaje conversacional | No | +| POST | `/handle-message-json` | Procesar mensaje con respuesta JSON | No | +| POST | `/recommend-product` | Recomendar productos | No | +| POST | `/generate-pdf` | Generar PDF manual | No | +| POST | `/generate-variation-images` | Generar variaciones de imagen | Bearer | +| POST | `/generate-images-from` | Generar imágenes desde prompt | Bearer | +| POST | `/generate-images-from/api-key` | Generar imágenes (API Key) | API Key | +| POST | `/generate-images-from-agent/api-key` | Generar imágenes con agente | API Key | +| POST | `/generate-copies` | Generar copys de marketing | No | +| POST | `/scrape-product` | Scraping de producto | Bearer | +| POST | `/scrape-direct-html` | Scraping directo de HTML | Bearer | +| POST | `/resolve-info-funnel` | Resolver información de funnel | No | +| POST | `/store/brand-context-resolver` | Resolver contexto de marca | Bearer | +| POST | `/generate-video` | Generar video con IA | No | +| POST | `/generate-audio` | Generar audio (TTS) | No | +| GET | `/integration/dropi/departments` | Obtener departamentos Dropi | No | +| GET | `/integration/dropi/departments/{id}/cities` | Obtener ciudades por departamento | No | +| GET | `/health` | Health check | No | + +--- + +## Mensajería y Conversación + +### POST /handle-message + +Procesa un mensaje y retorna la respuesta del agente de IA. + +**Request Body:** + +```json +{ + "agent_id": "string", + "conversation_id": "string", + "query": "string", + "metadata_filter": [ + { + "key": "string", + "value": "string", + "evaluator": "=" + } + ], + "parameter_prompt": { + "key": "value" + }, + "files": [ + { + "type": "image", + "url": "https://example.com/image.jpg", + "content": "base64_string" + } + ], + "json_parser": { + "field": "type" + } +} +``` + +**Campos:** + +| Campo | Tipo | Requerido | Descripción | +|-------|------|-----------|-------------| +| agent_id | string | Sí | ID del agente a utilizar | +| conversation_id | string | Sí | ID de la conversación (vacío para nueva) | +| query | string | Sí | Mensaje del usuario | +| metadata_filter | array | No | Filtros de metadatos | +| parameter_prompt | object | No | Parámetros adicionales para el prompt | +| files | array | No | Archivos adjuntos | +| json_parser | object | No | Esquema esperado de respuesta JSON | + +**Response:** + +```json +{ + "context": "string", + "chat_history": [], + "input": "string", + "text": "Respuesta del agente" +} +``` + +--- + +### POST /handle-message-json + +Similar a `/handle-message` pero parsea la respuesta como JSON. + +**Response:** + +Retorna directamente el JSON parseado de la respuesta del agente. + +--- + +## Recomendación de Productos + +### POST /recommend-product + +Recomienda productos basándose en nombre y descripción. + +**Request Body:** + +```json +{ + "product_name": "string", + "product_description": "string", + "similar": false +} +``` + +**Response:** + +```json +{ + "ai_response": { + "recommended_product": "string" + }, + "products": [ + { + "asin": "string", + "title": "string", + "price": "string", + "image": "string" + } + ] +} +``` + +--- + +## Generación de Contenido + +### POST /generate-pdf + +Genera un manual PDF para un producto. + +**Request Body:** + +```json +{ + "product_id": "string", + "product_name": "string", + "product_description": "string", + "language": "es", + "content": "string", + "title": "string", + "image_url": "string", + "owner_id": "string" +} +``` + +**Response:** + +```json +{ + "s3_url": "https://fluxi.co/..." +} +``` + +--- + +### POST /generate-copies + +Genera textos de marketing (copys). + +**Request Body:** + +```json +{ + "prompt": "string" +} +``` + +**Response:** + +```json +{ + "copies": { + "headline": "string", + "subheadline": "string", + "cta": "string" + } +} +``` + +--- + +## Generación de Imágenes + +### POST /generate-variation-images + +Genera variaciones de una imagen existente. + +**Headers:** +- `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "file": "base64_encoded_image", + "num_variations": 3, + "language": "es" +} +``` + +**Response:** + +```json +{ + "generated_urls": ["url1", "url2", "url3"], + "original_url": "string", + "original_urls": ["string"], + "generated_prompt": "string", + "vision_analysis": { + "logo_description": "string", + "label_description": "string" + } +} +``` + +--- + +### POST /generate-images-from + +Genera imágenes desde un prompt y/o imagen base. + +**Headers:** +- `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "file": "base64_encoded_image", + "file_url": "https://example.com/image.jpg", + "file_urls": ["url1", "url2"], + "prompt": "string", + "num_variations": 1, + "provider": "openai", + "model_ai": "dall-e-3", + "extra_parameters": {}, + "language": "es" +} +``` + +--- + +## Generación de Video + +### POST /generate-video + +Genera videos usando FAL AI. + +**Request Body:** + +```json +{ + "type": "animated_scene", + "content": { + "prompt": "string", + "image_url": "string", + "fal_webhook": "string" + } +} +``` + +**Tipos de video:** + +| Tipo | Descripción | Campos requeridos | +|------|-------------|-------------------| +| `animated_scene` | Escena animada | prompt, image_url | +| `human_scene` | Escena con humano | image_url, audio_url | + +--- + +## Generación de Audio + +### POST /generate-audio + +Genera audio usando Text-to-Speech. + +**Request Body:** + +```json +{ + "text": "Texto a convertir en audio", + "content": { + "fal_webhook": "string", + "voice_id": "string" + } +} +``` + +--- + +## Scraping de Productos + +### POST /scrape-product + +Extrae información de un producto desde su URL. + +**Headers:** +- `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "product_url": "https://www.amazon.com/dp/B01234567", + "country": "co" +} +``` + +**Response:** + +```json +{ + "data": { + "provider_id": "amazon", + "external_id": "B01234567", + "name": "Nombre del producto", + "description": "Descripción", + "external_sell_price": 29.99, + "images": ["url1", "url2"], + "variants": [] + } +} +``` + +--- + +## Funnel y Marca + +### POST /resolve-info-funnel + +Analiza un producto para generar información de funnel de ventas. + +**Request Body:** + +```json +{ + "product_name": "string", + "product_description": "string", + "language": "es" +} +``` + +**Response:** + +```json +{ + "pain_detection": "string", + "buyer_detection": "string", + "sales_angles": [ + { + "name": "string", + "description": "string" + } + ] +} +``` + +--- + +### POST /store/brand-context-resolver + +Resuelve el contexto de marca para una tienda. + +**Headers:** +- `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "prompt": { + "store_info": "string" + } +} +``` + +**Response:** + +```json +{ + "brands": ["brand1", "brand2"], + "contexts": ["context1", "context2"] +} +``` + +--- + +## Integración Dropi + +### GET /integration/dropi/departments + +Obtiene la lista de departamentos. + +**Query Parameters:** +- `country`: Código de país (default: "co") + +--- + +### GET /integration/dropi/departments/{department_id}/cities + +Obtiene las ciudades de un departamento. + +**Path Parameters:** +- `department_id`: ID del departamento + +**Query Parameters:** +- `country`: Código de país (default: "co") + +--- + +## Health Check + +### GET /health + +Verifica el estado del servicio. + +**Response:** + +```json +{ + "status": "OK" +} +``` diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..f1fea32 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,165 @@ +# Arquitectura del Sistema + +## Diagrama de Arquitectura + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Cliente (HTTP Request) │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ FastAPI Application │ +│ ┌─────────────────────────────────────────────────────────────────────────┐│ +│ │ Middlewares (Auth) ││ +│ └─────────────────────────────────────────────────────────────────────────┘│ +│ ┌─────────────────────────────────────────────────────────────────────────┐│ +│ │ Controllers/Router ││ +│ └─────────────────────────────────────────────────────────────────────────┘│ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ┌───────────────────┼───────────────────┐ + ▼ ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ + │ MessageService │ │ ImageService │ │ProductScraping │ + │ │ │ │ │ Service │ + └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ + │ │ │ + ▼ ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ + │ Conversation │ │ External APIs │ │ ScrapingFactory │ + │ Manager │ │ (Google Vision, │ │ │ + │ │ │ S3, FAL) │ │ │ + └────────┬────────┘ └─────────────────┘ └────────┬────────┘ + │ │ + ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ + │ Processors │ │ Scrapers │ + │ ┌─────────────┐ │ │ ┌─────────────┐ │ + │ │ Simple │ │ │ │ Amazon │ │ + │ │ Processor │ │ │ │ Scraper │ │ + │ └─────────────┘ │ │ └─────────────┘ │ + │ ┌─────────────┐ │ │ ┌─────────────┐ │ + │ │ Agent │ │ │ │ AliExpress │ │ + │ │ Processor │ │ │ │ Scraper │ │ + │ └─────────────┘ │ │ └─────────────┘ │ + │ ┌─────────────┐ │ │ ┌─────────────┐ │ + │ │ MCP │ │ │ │ Dropi │ │ + │ │ Processor │ │ │ │ Scraper │ │ + │ └─────────────┘ │ │ └─────────────┘ │ + └────────┬────────┘ │ ┌─────────────┐ │ + │ │ │ IA Scraper │ │ + ▼ │ └─────────────┘ │ + ┌─────────────────┐ └─────────────────┘ + │ AI Provider │ + │ Factory │ + │ ┌─────────────┐ │ + │ │ OpenAI │ │ + │ └─────────────┘ │ + │ ┌─────────────┐ │ + │ │ Anthropic │ │ + │ └─────────────┘ │ + │ ┌─────────────┐ │ + │ │ Gemini │ │ + │ └─────────────┘ │ + │ ┌─────────────┐ │ + │ │ DeepSeek │ │ + │ └─────────────┘ │ + └─────────────────┘ +``` + +## Componentes Principales + +### 1. Capa de Entrada (Controllers) + +**handle_controller.py** +- Punto de entrada para todas las solicitudes HTTP +- Define los endpoints de la API +- Inyecta dependencias de servicios +- Aplica middlewares de autenticación + +### 2. Capa de Servicios + +| Servicio | Descripción | +|----------|-------------| +| `MessageService` | Procesamiento principal de mensajes y conversaciones | +| `ImageService` | Generación y variación de imágenes | +| `VideoService` | Generación de videos con FAL AI | +| `AudioService` | Generación de audio (TTS) | +| `ProductScrapingService` | Scraping de productos de e-commerce | +| `DropiService` | Integración con la plataforma Dropi | + +### 3. Gestión de Conversaciones + +**ConversationManager** +- Almacena el historial de conversaciones en memoria +- Límite configurable de historial (10 mensajes por defecto) +- Selecciona el procesador adecuado según la configuración del agente + +### 4. Procesadores + +| Procesador | Uso | +|------------|-----| +| `SimpleProcessor` | Conversaciones simples sin herramientas | +| `AgentProcessor` | Agentes con herramientas dinámicas | +| `MCPProcessor` | Agentes con Model Context Protocol | + +### 5. Proveedores de IA + +Implementación del patrón Factory para manejar múltiples proveedores: + +- **OpenAI**: GPT-4, GPT-3.5, etc. +- **Anthropic**: Claude 3 (Opus, Sonnet, Haiku) +- **Gemini**: Google Gemini Pro +- **DeepSeek**: Modelos DeepSeek via Ollama + +### 6. Sistema de Scraping + +Factory pattern para seleccionar el scraper correcto: + +- **AmazonScraper**: Productos de Amazon +- **AliexpressScraper**: Productos de AliExpress +- **DropiScraper**: Productos de Dropi +- **CJScraper**: Productos de CJ Dropshipping +- **IAScraper**: Scraping genérico con IA + +## Flujo de Datos + +### Procesamiento de Mensaje + +``` +1. Request HTTP → Controller +2. Controller → MessageService +3. MessageService → AgentConfigClient (obtener configuración) +4. MessageService → ConversationManager +5. ConversationManager → AIProviderFactory (crear LLM) +6. ConversationManager → Processor (según configuración) +7. Processor → LLM (procesar query) +8. Response → Cliente +``` + +### Scraping de Producto + +``` +1. Request HTTP → Controller +2. Controller → ProductScrapingService +3. ProductScrapingService → ScrapingFactory +4. ScrapingFactory → Scraper específico (según URL) +5. Scraper → API externa o HTML parsing +6. Response estructurada → Cliente +``` + +## Patrones de Diseño + +1. **Factory Pattern**: AIProviderFactory, ScrapingFactory +2. **Strategy Pattern**: Procesadores intercambiables +3. **Dependency Injection**: FastAPI Depends +4. **Interface Segregation**: Interfaces para cada servicio +5. **Repository Pattern**: ConversationManager para historial + +## Escalabilidad + +- **Stateless**: Cada request es independiente (excepto historial en memoria) +- **Async/Await**: Operaciones I/O no bloqueantes +- **Docker Ready**: Containerización lista +- **Horizontal Scaling**: Puede ejecutarse en múltiples instancias (considerar Redis para historial compartido) diff --git a/docs/external-clients.md b/docs/external-clients.md new file mode 100644 index 0000000..5e9b1bf --- /dev/null +++ b/docs/external-clients.md @@ -0,0 +1,401 @@ +# Clientes Externos + +El sistema se integra con múltiples servicios externos para funcionalidades específicas. + +## Agent Config Client + +Cliente para obtener la configuración de agentes desde el servicio externo. + +### Endpoint + +``` +POST {HOST_AGENT_CONFIG}/api/ms/agent/config/search-agent +``` + +### Implementación + +```python +async def get_agent(data: AgentConfigRequest) -> AgentConfigResponse: + endpoint = '/api/ms/agent/config/search-agent' + url = f"{HOST_AGENT_CONFIG}{endpoint}" + headers = {'Content-Type': 'application/json'} + + async with httpx.AsyncClient() as client: + response = await client.post(url, json=data.model_dump(), headers=headers) + response.raise_for_status() + return AgentConfigResponse(**response.json()) +``` + +### Estructura de Respuesta + +```python +class AgentConfigResponse(BaseModel): + id: int + agent_id: str + description: str + prompt: str + provider_ai: str # openai, claude, gemini, deepseek + model_ai: str # gpt-4, claude-3-sonnet, etc. + preferences: AgentPreferences + tools: Optional[List[Dict[str, Any]]] + mcp_config: Optional[Dict[str, Any]] + +class AgentPreferences(BaseModel): + temperature: float = 0.7 + max_tokens: int = 1000 + top_p: float = 1.0 + extra_parameters: Optional[Dict[str, Any]] = None +``` + +--- + +## FAL Client + +Cliente para el servicio FAL AI (generación de video y audio). + +### Configuración + +```python +class FalClient: + def __init__(self, api_key: Optional[str] = None): + self.api_key = api_key or FAL_AI_API_KEY +``` + +### Métodos + +#### Text-to-Speech Multilingüe + +```python +async def tts_multilingual_v2(self, text: str, fal_webhook: Optional[str] = None, **kwargs): + payload = {"text": text} + payload.update(kwargs) + return await self._post("fal-ai/elevenlabs/tts/multilingual-v2", payload, fal_webhook) +``` + +#### Video desde Imagen (Kling) + +```python +async def kling_image_to_video(self, prompt: str, image_url: str, + fal_webhook: Optional[str] = None, **kwargs): + payload = {"prompt": prompt, "image_url": image_url} + payload.update(kwargs) + return await self._post("fal-ai/kling-video/v2/master/image-to-video", payload, fal_webhook) +``` + +#### Video con Humano (OmniHuman) + +```python +async def bytedance_omnihuman(self, image_url: str, audio_url: str, + fal_webhook: Optional[str] = None, **kwargs): + payload = {"image_url": image_url, "audio_url": audio_url} + payload.update(kwargs) + return await self._post("fal-ai/bytedance/omnihuman", payload, fal_webhook) +``` + +### Soporte para Webhooks + +FAL soporta webhooks para notificaciones asíncronas: + +```python +async def _post(self, path: str, payload: Dict, fal_webhook: Optional[str] = None): + base_url = f"https://queue.fal.run/{path}" + if fal_webhook: + query = f"fal_webhook={urllib.parse.quote_plus(fal_webhook)}" + url = f"{base_url}?{query}" + else: + url = base_url + + headers = { + "Authorization": f"Key {self.api_key}", + "Content-Type": "application/json", + } + + async with httpx.AsyncClient(timeout=60) as client: + response = await client.post(url, json=payload, headers=headers) + return response.json() +``` + +--- + +## Google Vision Client + +Cliente para el servicio Google Cloud Vision. + +### Funcionalidad + +- Detección de etiquetas (LABEL_DETECTION) +- Detección de logos (LOGO_DETECTION) + +### Implementación + +```python +async def analyze_image(image_base64: str) -> VisionAnalysisResponse: + vision_api_url = f"https://vision.googleapis.com/v1/images:annotate?key={GOOGLE_VISION_API_KEY}" + + payload = { + "requests": [{ + "image": {"content": image_base64}, + "features": [ + {"type": "LABEL_DETECTION", "maxResults": 3}, + {"type": "LOGO_DETECTION", "maxResults": 1} + ] + }] + } + + async with aiohttp.ClientSession() as session: + async with session.post(vision_api_url, json=payload) as response: + data = await response.json() + + # Extraer logo (si score > 0.65) + logo_description = "" + if data["responses"][0].get("logoAnnotations"): + logo = data["responses"][0]["logoAnnotations"][0] + if logo.get("score", 0) > 0.65: + logo_description = logo["description"] + + # Extraer etiquetas (si score > 0.65) + labels = [ + label["description"] + for label in data["responses"][0].get("labelAnnotations", []) + if label.get("score", 0) > 0.65 + ] + + return VisionAnalysisResponse( + logo_description=logo_description, + label_description=", ".join(labels) + ) +``` + +### Respuesta + +```python +class VisionAnalysisResponse(BaseModel): + logo_description: str + label_description: str + + def get_analysis_text(self) -> str: + parts = [] + if self.logo_description: + parts.append(f"Logo detected: {self.logo_description}") + if self.label_description: + parts.append(f"Labels: {self.label_description}") + return ". ".join(parts) +``` + +--- + +## Dropi Client + +Cliente para la plataforma Dropi (dropshipping). + +### Configuración Multi-País + +```python +DROPI_HOST = os.getenv('DROPI_HOST', 'https://test-api.dropi.co') + +def get_dropi_api_key(country: str = "co") -> str: + country_keys = { + "co": DROPI_API_KEY_CO, + "mx": DROPI_API_KEY_MX, + "ar": DROPI_API_KEY_AR, + "cl": DROPI_API_KEY_CL, + "pe": DROPI_API_KEY_PE, + "py": DROPI_API_KEY_PY, + "ec": DROPI_API_KEY_EC, + } + return country_keys.get(country.lower(), DROPI_API_KEY) +``` + +### Métodos + +#### Obtener Detalles de Producto + +```python +async def get_product_details(product_id: str, country: str = "co") -> Dict[str, Any]: + headers = {"dropi-integration-key": get_dropi_api_key(country)} + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/products/v2/{product_id}" + + async with httpx.AsyncClient() as client: + response = await client.get(url, headers=headers) + return response.json() +``` + +#### Obtener Departamentos + +```python +async def get_departments(country: str = "co") -> Dict[str, Any]: + headers = {"dropi-integration-key": get_dropi_api_key(country)} + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/department" + + async with httpx.AsyncClient() as client: + response = await client.get(url, headers=headers) + return response.json() +``` + +#### Obtener Ciudades por Departamento + +```python +async def get_cities_by_department(department_id: int, rate_type: str, + country: str = "co") -> Dict[str, Any]: + headers = { + "dropi-integration-key": get_dropi_api_key(country), + "Content-Type": "application/json" + } + payload = {"department_id": department_id, "rate_type": rate_type} + + dropi_host = DROPI_HOST.replace(".co", f".{country}") + url = f"{dropi_host}/integrations/trajectory/bycity" + + async with httpx.AsyncClient() as client: + response = await client.post(url, headers=headers, json=payload) + return response.json() +``` + +--- + +## Amazon Client + +Cliente para la API de Amazon via RapidAPI. + +### Endpoints + +- Búsqueda de productos +- Detalles de producto por ASIN + +### Implementación + +```python +async def search_products(request: AmazonSearchRequest): + headers = { + "X-RapidAPI-Key": RAPIDAPI_KEY, + "X-RapidAPI-Host": RAPIDAPI_HOST + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"https://{RAPIDAPI_HOST}/search", + params={"query": request.query}, + headers=headers + ) + return response.json() + +async def get_product_details(asin: str): + headers = { + "X-RapidAPI-Key": RAPIDAPI_KEY, + "X-RapidAPI-Host": RAPIDAPI_HOST + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"https://{RAPIDAPI_HOST}/product-details", + params={"asin": asin}, + headers=headers + ) + return response.json() +``` + +--- + +## AliExpress Client + +Cliente para la API de AliExpress via RapidAPI. + +### Obtener Detalles de Producto + +```python +async def get_item_detail(item_id: str) -> Dict[str, Any]: + headers = { + "X-RapidAPI-Key": RAPIDAPI_KEY, + "X-RapidAPI-Host": "aliexpress-datahub.p.rapidapi.com" + } + + async with httpx.AsyncClient() as client: + response = await client.get( + "https://aliexpress-datahub.p.rapidapi.com/item_detail_2", + params={"itemId": item_id}, + headers=headers + ) + return response.json() +``` + +--- + +## S3 Upload Client + +Cliente para subir archivos a S3. + +### Subir Archivo + +```python +async def upload_file(request: S3UploadRequest) -> S3UploadResponse: + url = f"{S3_UPLOAD_API}/upload" + + payload = { + "file": request.file, # Base64 + "folder": request.folder, + "filename": request.filename + } + + async with httpx.AsyncClient() as client: + response = await client.post(url, json=payload) + return S3UploadResponse(**response.json()) +``` + +### Verificar si Existe + +```python +async def check_file_exists_direct(s3_url: str) -> bool: + async with httpx.AsyncClient() as client: + response = await client.head(s3_url) + return response.status_code == 200 +``` + +--- + +## ScraperAPI Client + +Cliente para el servicio ScraperAPI. + +### Obtener HTML de una URL + +```python +class ScraperAPIClient: + async def get_html(self, url: str) -> str: + params = { + "api_key": SCRAPERAPI_KEY, + "url": url, + "render": "true" + } + + async with httpx.AsyncClient(timeout=60) as client: + response = await client.get( + "https://api.scraperapi.com", + params=params + ) + return response.text + + async def get_html_lambda(self, url: str) -> str: + async with httpx.AsyncClient(timeout=60) as client: + response = await client.post( + URL_SCRAPER_LAMBDA, + json={"url": url} + ) + return response.json().get("html", "") +``` + +--- + +## Resumen de Variables de Entorno + +| Cliente | Variables Requeridas | +|---------|---------------------| +| Agent Config | `HOST_AGENT_CONFIG` | +| FAL | `FAL_AI_API_KEY` | +| Google Vision | `GOOGLE_VISION_API_KEY` | +| Dropi | `DROPI_HOST`, `DROPI_API_KEY`, `DROPI_API_KEY_*` | +| Amazon | `RAPIDAPI_KEY`, `RAPIDAPI_HOST` | +| AliExpress | `RAPIDAPI_KEY` | +| S3 | `S3_UPLOAD_API` | +| ScraperAPI | `SCRAPERAPI_KEY`, `URL_SCRAPER_LAMBDA` | diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..1c5826e --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,169 @@ +# Instalación y Configuración + +## Requisitos del Sistema + +- Python 3.10 o superior +- pip (gestor de paquetes de Python) +- Docker (opcional, para despliegue containerizado) + +## Instalación Local + +### 1. Clonar el Repositorio + +```bash +git clone +cd conversational-engine +``` + +### 2. Crear Entorno Virtual (Recomendado) + +```bash +python -m venv venv +source venv/bin/activate # Linux/macOS +# o +.\venv\Scripts\activate # Windows +``` + +### 3. Instalar Dependencias + +```bash +pip install -r requirements.txt +``` + +### 4. Configurar Variables de Entorno + +Crear un archivo `.env` en la raíz del proyecto: + +```bash +cp .env.example .env +``` + +Editar el archivo `.env` con tus credenciales (ver [Variables de Entorno](./environment-variables.md)). + +### 5. Ejecutar el Servidor + +```bash +python main.py +``` + +O usando uvicorn directamente: + +```bash +uvicorn main:app --reload --host 0.0.0.0 --port 8000 +``` + +## Instalación con Docker + +### 1. Construir la Imagen + +```bash +docker build -t conversational-engine . +``` + +### 2. Ejecutar el Contenedor + +```bash +docker run -p 8000:8000 --env-file .env conversational-engine +``` + +### Dockerfile + +```dockerfile +FROM python:3.10-slim + +WORKDIR /app + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["python", "main.py"] +``` + +## Dependencias Principales + +| Paquete | Versión | Descripción | +|---------|---------|-------------| +| fastapi | >=0.109.1 | Framework web asíncrono | +| pydantic | >=2.5.0 | Validación de datos | +| uvicorn | 0.24.0 | Servidor ASGI | +| httpx | >=0.24.0 | Cliente HTTP asíncrono | +| langchain-community | >=0.2.0 | Herramientas LangChain | +| langchain-openai | >=0.0.5 | Integración OpenAI | +| langchain-anthropic | - | Integración Anthropic | +| langchain-google-genai | - | Integración Google Gemini | +| langgraph | 0.3.31 | Grafos de agentes | +| langchain-mcp-adapters | 0.0.9 | Adaptadores MCP | +| fpdf | - | Generación de PDFs | +| beautifulsoup4 | - | Parsing HTML | +| Pillow | 10.3.0 | Procesamiento de imágenes | +| langsmith | - | Observabilidad de LLMs | + +## Verificar Instalación + +Una vez iniciado el servidor, verifica que funcione correctamente: + +### Health Check + +```bash +curl http://localhost:8000/api/ms/conversational-engine/health +``` + +Respuesta esperada: +```json +{"status": "OK"} +``` + +### Documentación API + +Accede a la documentación interactiva: +- Swagger UI: http://localhost:8000/docs +- ReDoc: http://localhost:8000/redoc + +## Configuración para Desarrollo + +### Hot Reload + +Para desarrollo con recarga automática: + +```bash +uvicorn main:app --reload --host 0.0.0.0 --port 8000 +``` + +### Debug Mode + +Habilitar logging detallado añadiendo al `.env`: + +``` +ENVIRONMENT=development +``` + +## Solución de Problemas + +### Error: ModuleNotFoundError + +```bash +pip install -r requirements.txt --force-reinstall +``` + +### Error: Puerto 8000 en uso + +```bash +# Encontrar proceso usando el puerto +lsof -i :8000 + +# Matar el proceso +kill -9 +``` + +### Error: Variables de entorno no encontradas + +Verificar que el archivo `.env` existe y tiene las variables requeridas: + +```bash +cat .env +``` diff --git a/docs/processors.md b/docs/processors.md new file mode 100644 index 0000000..a50a32b --- /dev/null +++ b/docs/processors.md @@ -0,0 +1,346 @@ +# Procesadores de Conversación + +Los procesadores son el corazón del sistema de conversación. Cada tipo de procesador maneja diferentes escenarios de interacción con la IA. + +## Arquitectura + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ConversationProcessor (Base) │ +│ ┌─────────────────────────────────────────────────────────┐│ +│ │ - llm: BaseChatModel ││ +│ │ - context: str ││ +│ │ - history: List[str] ││ +│ │ + process(request, files, supports_interleaved) ││ +│ │ + _get_langsmith_config(request, processor_type) ││ +│ └─────────────────────────────────────────────────────────┘│ +└─────────────────────────────────────────────────────────────┘ + ▲ + ┌───────────────┼───────────────┐ + │ │ │ + ┌────────┴────────┐ ┌────┴────┐ ┌───────┴───────┐ + │ SimpleProcessor │ │ Agent │ │ MCPProcessor │ + │ │ │Processor│ │ │ + └─────────────────┘ └─────────┘ └───────────────┘ +``` + +## ConversationProcessor (Base) + +Clase base abstracta que define la interfaz común para todos los procesadores. + +```python +class ConversationProcessor: + def __init__(self, llm: BaseChatModel, context: str, history: List[str]): + self.llm = llm + self.context = context + self.history = history + + def _get_langsmith_config(self, request, processor_type: str, **extra_metadata): + """Genera configuración para trazabilidad con LangSmith""" + return { + "tags": [processor_type, f"agent_{request.agent_id}"], + "metadata": { + "agent_id": request.agent_id, + "conversation_id": request.conversation_id, + **extra_metadata + } + } + + async def process(self, query: str, files: Optional[List], + supports_interleaved_files: bool) -> Dict[str, Any]: + raise NotImplementedError +``` + +--- + +## SimpleProcessor + +Procesador para conversaciones simples sin herramientas externas. + +### Características + +- Conversación directa con el LLM +- Soporte para archivos (imágenes) +- Parsing opcional de respuestas JSON +- Extracción automática de JSON de bloques markdown + +### Flujo de Procesamiento + +``` +1. Construir mensaje del sistema (context + archivos + json_parser) +2. Añadir historial de conversación +3. Añadir mensaje del usuario +4. Invocar el LLM +5. Parsear respuesta (extraer JSON si aplica) +6. Retornar resultado estructurado +``` + +### Implementación + +```python +class SimpleProcessor(ConversationProcessor): + async def process(self, request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: + messages = [] + system_message = self.context or "" + + # Añadir referencias de archivos + if files and not supports_interleaved_files: + file_references = [] + for file in files: + tag = 'image' if file.get('type') == 'image' else 'file' + file_references.append(f"<{tag} url='{file['url']}'>") + system_message += "\n\n" + "\n".join(file_references) + + # Añadir instrucciones de JSON si se requiere + if request.json_parser: + format_instructions = json.dumps(request.json_parser, indent=2) + system_message += ( + "\n\nIMPORTANT: Respond exclusively in JSON format...\n" + f"{format_instructions}\n" + ) + + # Construir prompt + messages.append(SystemMessage(content=system_message)) + messages.append(MessagesPlaceholder(variable_name="chat_history")) + messages.append(HumanMessage(content=request.query)) + + prompt = ChatPromptTemplate.from_messages(messages) + + return await self.generate_response( + self.context, self.history, request.query, prompt + ) +``` + +### Uso + +```python +processor = SimpleProcessor(llm, agent_config.prompt, history) +result = await processor.process(request, files, True) +# result = {"context": "...", "chat_history": [...], "input": "...", "text": "..."} +``` + +--- + +## AgentProcessor + +Procesador para agentes con herramientas dinámicas (function calling). + +### Características + +- Soporte para herramientas personalizadas +- Uso de LangChain AgentExecutor +- Manejo de múltiples iteraciones +- Retorno de pasos intermedios + +### Flujo de Procesamiento + +``` +1. Crear template de prompt con placeholders +2. Crear agente con tool_calling +3. Configurar AgentExecutor +4. Invocar el agente +5. Retornar resultado con pasos intermedios +``` + +### Implementación + +```python +class AgentProcessor(ConversationProcessor): + def __init__(self, llm: BaseChatModel, context: str, + history: List[str], tools: List[Any]): + super().__init__(llm, context, history) + self.tools = tools + + async def process(self, request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: + + prompt_template = ChatPromptTemplate.from_messages([ + ("system", "{context}"), + MessagesPlaceholder(variable_name="chat_history"), + ("human", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ]) + + agent = create_tool_calling_agent( + llm=self.llm, + tools=self.tools, + prompt=prompt_template + ) + + agent_executor = AgentExecutor( + agent=agent, + tools=self.tools, + verbose=False, + handle_parsing_errors=True, + max_iterations=3, + return_intermediate_steps=True + ) + + result = await agent_executor.ainvoke({ + "context": self.context or "", + "chat_history": self.history, + "input": request.query, + "agent_scratchpad": "" + }) + + if "text" not in result and "output" in result: + result["text"] = result["output"] + + return result +``` + +### Configuración de Herramientas + +Las herramientas se generan dinámicamente desde la configuración del agente: + +```python +tools = ToolGenerator.generate_tools(agent_config.tools or []) +if tools: + processor = AgentProcessor(llm, agent_config.prompt, history, tools) +``` + +--- + +## MCPProcessor + +Procesador para agentes que utilizan Model Context Protocol (MCP). + +### Características + +- Integración con servidores MCP +- Uso de LangGraph para agentes React +- Soporte para múltiples servidores MCP +- Extracción de información de herramientas + +### Flujo de Procesamiento + +``` +1. Conectar con servidores MCP +2. Obtener herramientas disponibles +3. Crear agente React con LangGraph +4. Procesar mensajes +5. Extraer información de herramientas usadas +6. Retornar resultado con tool_result +``` + +### Implementación + +```python +class MCPProcessor(ConversationProcessor): + def __init__(self, llm: BaseChatModel, context: str, + history: List[str], mcp_config: Dict[str, Any]): + super().__init__(llm, context, history) + self.mcp_config = mcp_config + + async def process(self, request: MessageRequest, + files: Optional[List[Dict[str, str]]] = None, + supports_interleaved_files: bool = False) -> Dict[str, Any]: + + async with MultiServerMCPClient(self.mcp_config) as client: + agent = create_react_agent(self.llm, client.get_tools()) + + messages = [] + if self.context: + messages.append({"role": "system", "content": self.context}) + + if self.history: + messages.extend(self.history) + + messages.append({"role": "user", "content": request.query}) + + response = await agent.ainvoke({"messages": messages}) + + # Extraer contenido de la respuesta + content = self._extract_content(response) + + # Extraer información de herramientas + tool_info = await self.get_tool_data(response) + + return { + "context": self.context, + "chat_history": self.history, + "input": request.query, + "text": content, + "tool_result": tool_info + } + + async def get_tool_data(self, response): + """Extrae información de las herramientas utilizadas""" + tool_messages = [ + msg for msg in response.get('messages', []) + if getattr(msg, 'type', None) == 'tool' + ] + + if tool_messages: + last_tool = tool_messages[-1] + return { + "name": last_tool.name, + "message": json.loads(last_tool.content) + } + return None +``` + +### Configuración MCP + +El MCP se configura en la respuesta del agente: + +```python +{ + "mcp_config": { + "server1": { + "url": "http://mcp-server:3000", + "transport": "sse" + } + } +} +``` + +--- + +## Selección de Procesador + +El `ConversationManager` selecciona el procesador apropiado: + +```python +async def process_conversation(self, request, agent_config): + ai_provider = AIProviderFactory.get_provider(agent_config.provider_ai) + llm = ai_provider.get_llm(...) + history = self.get_conversation_history(request.conversation_id) + + # Selección del procesador + if agent_config.mcp_config: + processor = MCPProcessor(llm, agent_config.prompt, history, agent_config.mcp_config) + else: + tools = ToolGenerator.generate_tools(agent_config.tools or []) + if tools: + processor = AgentProcessor(llm, agent_config.prompt, history, tools) + else: + processor = SimpleProcessor(llm, agent_config.prompt, history) + + return await processor.process(request, request.files, + ai_provider.supports_interleaved_files()) +``` + +## Trazabilidad con LangSmith + +Todos los procesadores incluyen configuración para LangSmith: + +```python +config = self._get_langsmith_config( + request, + "simple_processor", # o "agent_processor", "mcp_processor" + has_json_parser=request.json_parser is not None, + has_files=files is not None and len(files) > 0 +) + +result = await chain.ainvoke(input_data, config=config) +``` + +Esto permite: +- Ver trazas de cada request +- Identificar agentes por ID +- Depurar conversaciones específicas +- Analizar métricas de rendimiento diff --git a/docs/scrapers.md b/docs/scrapers.md new file mode 100644 index 0000000..9d5e21f --- /dev/null +++ b/docs/scrapers.md @@ -0,0 +1,350 @@ +# Sistema de Scraping + +El sistema de scraping permite extraer información de productos desde diferentes plataformas de e-commerce. + +## Arquitectura + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ScrapingFactory │ +│ ┌─────────────────────────────────────────────────────────┐│ +│ │ get_scraper(url, country) ││ +│ └─────────────────────────────────────────────────────────┘│ +└──────────────────────────┬──────────────────────────────────┘ + │ + ┌──────────┬───────────┼───────────┬──────────┐ + ▼ ▼ ▼ ▼ ▼ +┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ +│ Amazon │ │AliExpr │ │ Dropi │ │ CJ │ │ IA │ +│Scraper │ │Scraper │ │Scraper │ │Scraper │ │Scraper │ +└────────┘ └────────┘ └────────┘ └────────┘ └────────┘ +``` + +## ScraperInterface + +Interfaz base que todos los scrapers deben implementar: + +```python +class ScraperInterface(ABC): + @abstractmethod + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + """Extrae información de un producto desde su URL""" + pass + + @abstractmethod + async def scrape_direct(self, html: str) -> Dict[str, Any]: + """Extrae información directamente desde HTML""" + raise NotImplementedError +``` + +## ScrapingFactory + +Factory que selecciona el scraper apropiado según la URL: + +```python +class ScrapingFactory: + def __init__(self, message_service: MessageServiceInterface = Depends()): + self.message_service = message_service + + def get_scraper(self, url: str, country: str = "co") -> ScraperInterface: + domain = urlparse(url).netloc.lower() + + if "amazon" in domain: + return AmazonScraper() + elif "aliexpress" in domain: + return AliexpressScraper() + elif "cjdropshipping" in domain: + return CJScraper() + elif "dropi" in domain: + return DropiScraper(country=country) + else: + return IAScraper(message_service=self.message_service) +``` + +--- + +## AmazonScraper + +Extrae productos de Amazon usando RapidAPI. + +### Características + +- Extracción de ASIN desde URL +- Información de precios y variantes +- Imágenes del producto +- Descripción y características + +### Estructura de Respuesta + +```json +{ + "data": { + "provider_id": "amazon", + "external_id": "B01234567", + "name": "Nombre del producto", + "description": "Descripción del producto", + "external_sell_price": 29.99, + "images": ["url1", "url2"], + "variants": [ + { + "provider_id": "amazon", + "external_id": "B01234568", + "name": "Nombre del producto", + "images": ["url"], + "variant_key": "color-blue-size-M", + "attributes": [ + {"category_name": "Color", "value": "Blue"}, + {"category_name": "Size", "value": "M"} + ] + } + ] + } +} +``` + +### Patrones de Extracción de ASIN + +```python +patterns = [ + r'/dp/([A-Z0-9]{10})', + r'/gp/product/([A-Z0-9]{10})', + r'/ASIN/([A-Z0-9]{10})', + r'asin=([A-Z0-9]{10})', + r'asin%3D([A-Z0-9]{10})' +] +``` + +--- + +## AliexpressScraper + +Extrae productos de AliExpress usando RapidAPI. + +### Características + +- Extracción de Item ID desde URL +- Precios promocionales +- Múltiples imágenes +- Variantes con atributos + +### Estructura de Respuesta + +```json +{ + "data": { + "provider_id": "aliexpress", + "external_id": "1005001234567890", + "name": "Nombre del producto", + "description": "Propiedades del producto", + "external_sell_price": 15.99, + "images": ["url1", "url2"] + } +} +``` + +### Extracción de Precios + +```python +def _get_price(self, item_data: Dict[str, Any]) -> Optional[Decimal]: + sku_data = item_data.get("sku", {}) + def_data = sku_data.get("def", {}) + + # Precio promocional primero + promotion_price = def_data.get("promotionPrice") + if promotion_price: + return self._parse_price(promotion_price) + + # Precio regular + price = def_data.get("price") + if isinstance(price, str) and " - " in price: + price = price.split(" - ")[0] # Tomar el menor + return self._parse_price(price) +``` + +--- + +## DropiScraper + +Extrae productos de la plataforma Dropi. + +### Características + +- Soporte multi-país (CO, MX, AR, CL, PE, PY, EC) +- Variantes con atributos +- Stock por almacén +- Precios sugeridos + +### Configuración por País + +```python +class DropiScraper(ScraperInterface): + def __init__(self, country: str = "co"): + self.country = country +``` + +### Estructura de Respuesta + +```json +{ + "data": { + "provider_id": "dropi", + "external_id": "12345", + "name": "Nombre del producto", + "description": "Descripción limpia", + "external_sell_price": 50000, + "images": ["https://d39ru7awumhhs2.cloudfront.net/..."], + "variants": [ + { + "name": "Producto - Negro - L", + "variant_key": "color-negro-talla-l", + "price": 50000, + "available": true, + "images": ["url"], + "attributes": [ + {"name": "Color", "value": "Negro"}, + {"name": "Talla", "value": "L"} + ], + "provider_id": "dropi", + "external_id": "123", + "external_sell_price": 50000, + "external_suggested_sell_price": 80000 + } + ] + } +} +``` + +### Limpieza de Descripción + +```python +def _get_description(self, product_data: Dict[str, Any]) -> str: + html_description = product_data.get("description", "") + # Remover tags HTML + clean_text = re.sub(r'<[^>]+>', ' ', html_description) + clean_text = clean_text.replace('
', '\n').strip() + clean_text = re.sub(r'\s+', ' ', clean_text).strip() + return clean_text +``` + +--- + +## IAScraper + +Scraper genérico que usa IA para extraer información de cualquier sitio. + +### Características + +- Funciona con cualquier sitio web +- Usa ScraperAPI para obtener HTML +- Procesa el HTML con un agente de IA +- Limpieza profunda de HTML + +### Flujo de Procesamiento + +``` +1. Obtener HTML del sitio (ScraperAPI) +2. Limpiar HTML profundamente +3. Enviar a agente de IA para extracción +4. Parsear respuesta JSON +5. Normalizar datos +``` + +### Implementación + +```python +class IAScraper(ScraperInterface): + def __init__(self, message_service: MessageServiceInterface): + self.message_service = message_service + + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + client = ScraperAPIClient() + + if domain and "alibaba" in domain: + html_content = await client.get_html(url) + else: + html_content = await client.get_html_lambda(url) + + # Limpiar HTML + simplified_html = clean_html_deeply(html_content) + + # Enviar a agente de IA + message_request = MessageRequest( + query=f"provider_id={domain} . product_url={url} Product content: {simplified_html}", + agent_id=SCRAPER_AGENT, + conversation_id="", + ) + + result = await self.message_service.handle_message(message_request) + + # Parsear y normalizar + data = json.loads(clean_json(result['text'])) + data['data']['external_sell_price'] = parse_price(data['data']['external_sell_price']) + + return data +``` + +--- + +## ProductScrapingService + +Servicio que orquesta el scraping: + +```python +class ProductScrapingService(ProductScrapingServiceInterface): + def __init__(self, scraping_factory: ScrapingFactory = Depends()): + self.scraping_factory = scraping_factory + + async def scrape_product(self, request: ProductScrapingRequest): + url = str(request.product_url) + domain = urlparse(url).netloc.lower() + + scraper = self.scraping_factory.get_scraper(url, country=request.country) + return await scraper.scrape(url, domain) + + async def scrape_direct(self, html): + scraper = self.scraping_factory.get_scraper("https://default-url.com") + return await scraper.scrape_direct(html) +``` + +--- + +## Helper de Precios + +Utilidad para parsear diferentes formatos de precio: + +```python +def parse_price(price_str: Any) -> Optional[Decimal]: + if isinstance(price_str, (int, float)): + return Decimal(str(price_str)) + + if isinstance(price_str, str): + # Extraer números del string + match = re.search(r'(\d+(?:\.\d+)?)', price_str.replace(",", "")) + if match: + return Decimal(match.group(1)) + + return None +``` + +## Agregar Nuevo Scraper + +1. Crear clase que implemente `ScraperInterface`: + +```python +# app/scrapers/new_scraper.py +class NewScraper(ScraperInterface): + async def scrape(self, url: str, domain: str = None) -> Dict[str, Any]: + # Implementación específica + pass + + async def scrape_direct(self, html: str) -> Dict[str, Any]: + return {} +``` + +2. Registrar en ScrapingFactory: + +```python +# app/factories/scraping_factory.py +elif "newsite" in domain: + return NewScraper() +``` diff --git a/docs/services.md b/docs/services.md new file mode 100644 index 0000000..f228a2d --- /dev/null +++ b/docs/services.md @@ -0,0 +1,376 @@ +# Servicios + +Los servicios encapsulan la lógica de negocio principal de la aplicación. + +## MessageService + +Servicio principal para el procesamiento de mensajes y conversaciones. + +### Métodos + +#### handle_message + +Procesa un mensaje y retorna la respuesta del agente. + +```python +async def handle_message(self, request: MessageRequest) -> dict: + data = AgentConfigRequest( + agent_id=request.agent_id, + query=request.query, + metadata_filter=request.metadata_filter, + parameter_prompt=request.parameter_prompt + ) + + agent_config = await get_agent(data) + + return await self.conversation_manager.process_conversation( + request=request, + agent_config=agent_config + ) +``` + +#### handle_message_json + +Procesa un mensaje y parsea la respuesta como JSON. + +```python +async def handle_message_json(self, request: MessageRequest): + response = await self.handle_message(request) + return json.loads(response['text']) +``` + +#### recommend_products + +Recomienda productos basándose en nombre y descripción. + +```python +async def recommend_products(self, request: RecommendProductRequest): + agent_id = AGENT_RECOMMEND_SIMILAR_PRODUCTS_ID if request.similar else AGENT_RECOMMEND_PRODUCTS_ID + + data = await self.handle_message(MessageRequest( + agent_id=agent_id, + conversation_id="", + query=f"Product Name: {request.product_name} Description: {request.product_description}", + )) + + json_data = json.loads(data['text']) + amazon_data = await search_products(AmazonSearchRequest(query=json_data['recommended_product'])) + + return RecommendProductResponse( + ai_response=json_data, + products=amazon_data.get_products() + ) +``` + +#### generate_copies + +Genera copys de marketing procesando múltiples agentes en paralelo. + +```python +async def generate_copies(self, request: CopyRequest): + agent_queries = [ + {'agent': agent, 'query': request.prompt} + for agent in AGENT_COPIES + ] + + combined_data = await self.process_multiple_agents(agent_queries) + return {"copies": combined_data} +``` + +#### generate_pdf + +Genera un manual PDF para un producto. + +```python +async def generate_pdf(self, request: GeneratePdfRequest): + # Verificar si ya existe + exists = await check_file_exists_direct(s3_url) + if exists: + return {"s3_url": s3_url} + + # Generar secciones con múltiples agentes + sections = get_sections_for_language(request.language) + agent_queries = [ + {'agent': "agent_copies_pdf", 'query': f"section: {section}. {base_query}"} + for section in sections.keys() + ] + + combined_data = await self.process_multiple_agents(agent_queries) + + # Crear PDF + pdf_generator = PDFManualGenerator(request.product_name, language=request.language) + pdf = await pdf_generator.create_manual(combined_data, request.title, request.image_url) + + # Subir a S3 + result = await upload_file(S3UploadRequest(...)) + return result +``` + +#### resolve_funnel + +Genera información de funnel de ventas. + +```python +async def resolve_funnel(self, request: ResolveFunnelRequest): + # 1. Detección de dolor + pain_detection_response = await self.handle_message(MessageRequest( + agent_id="pain_detection", + parameter_prompt={"product_name": ..., "product_description": ..., "language": ...} + )) + + # 2. Detección de comprador + buyer_detection_response = await self.handle_message(MessageRequest( + agent_id="buyer_detection", + parameter_prompt={"pain_detection": pain_detection_response['text'], ...} + )) + + # 3. Ángulos de venta + sales_angles_response = await self.handle_message_json(MessageRequest( + agent_id="sales_angles_v2", + json_parser={"angles": [{"name": "string", "description": "string"}]}, + parameter_prompt={...} + )) + + return { + "pain_detection": pain_detection_message, + "buyer_detection": buyer_detection_message, + "sales_angles": sales_angles_response["angles"] + } +``` + +--- + +## ImageService + +Servicio para generación y manipulación de imágenes. + +### Métodos + +#### generate_variation_images + +Genera variaciones de una imagen existente. + +```python +async def generate_variation_images(self, request: VariationImageRequest, owner_id: str): + folder_id = uuid.uuid4().hex[:8] + + # Subir imagen original + original_image_response = await self._upload_to_s3(request.file, owner_id, folder_id, "original") + + # Analizar con Google Vision + vision_analysis = await analyze_image(request.file) + + # Obtener prompt del agente + message_request = MessageRequest( + query=f"Attached is the product image. {vision_analysis.get_analysis_text()}", + agent_id=AGENT_IMAGE_VARIATIONS, + files=[{"type": "image", "url": original_image_response.s3_url, "content": request.file}] + ) + + response_data = await self.message_service.handle_message_with_config(message_request) + prompt = response_data["message"]["text"] + + # Generar variaciones en paralelo + tasks = [ + self._generate_single_variation([original_image_response.s3_url], prompt, owner_id, folder_id, ...) + for i in range(request.num_variations) + ] + generated_urls = await asyncio.gather(*tasks) + + return GenerateImageResponse( + generated_urls=generated_urls, + original_url=original_image_response.s3_url, + generated_prompt=prompt, + vision_analysis=vision_analysis + ) +``` + +#### generate_images_from + +Genera imágenes desde un prompt y/o imagen base. + +```python +async def generate_images_from(self, request: GenerateImageRequest, owner_id: str): + folder_id = uuid.uuid4().hex[:8] + + if request.file: + original_image_response = await self._upload_to_s3(request.file, ...) + + tasks = [ + self._generate_single_variation(urls, request.prompt, owner_id, folder_id, ...) + for i in range(request.num_variations) + ] + generated_urls = await asyncio.gather(*tasks) + + return GenerateImageResponse( + original_urls=urls, + generated_urls=generated_urls, + generated_prompt=request.prompt + ) +``` + +### Proveedores de Imágenes + +```python +async def _generate_single_variation(self, url_images, prompt, owner_id, folder_id, + provider=None, model_ai=None): + if provider and provider.lower() == "openai": + image_content = await openai_image_edit(image_urls=url_images, prompt=prompt, ...) + else: + image_content = await google_image(image_urls=url_images, prompt=prompt, ...) + + # Comprimir y subir + content_base64 = base64.b64encode(image_content).decode('utf-8') + return await self._upload_to_s3(content_base64, owner_id, folder_id, "variation") +``` + +--- + +## VideoService + +Servicio para generación de videos con FAL AI. + +### Tipos de Video + +| Tipo | Descripción | Campos requeridos | +|------|-------------|-------------------| +| `animated_scene` | Escena animada desde imagen | prompt, image_url | +| `human_scene` | Escena con humano hablando | image_url, audio_url | + +### Implementación + +```python +class VideoService(VideoServiceInterface): + def __init__(self, fal_client: FalClient = Depends()): + self.fal_client = fal_client + + async def generate_video(self, request: GenerateVideoRequest) -> Dict[str, Any]: + content = request.content or {} + + if request.type == VideoType.animated_scene: + return await self.fal_client.kling_image_to_video( + prompt=content.get("prompt"), + image_url=content.get("image_url"), + fal_webhook=content.get("fal_webhook") + ) + + if request.type == VideoType.human_scene: + return await self.fal_client.bytedance_omnihuman( + image_url=content.get("image_url"), + audio_url=content.get("audio_url"), + fal_webhook=content.get("fal_webhook") + ) +``` + +--- + +## AudioService + +Servicio para generación de audio (Text-to-Speech). + +```python +class AudioService(AudioServiceInterface): + def __init__(self, fal_client: FalClient = Depends()): + self.fal_client = fal_client + + async def generate_audio(self, request: GenerateAudioRequest) -> Dict[str, Any]: + if not request.text: + raise HTTPException(status_code=400, detail="Falta 'text'") + + content = request.content or {} + fal_webhook = content.get("fal_webhook") + + return await self.fal_client.tts_multilingual_v2( + text=request.text, + fal_webhook=fal_webhook, + **{k: v for k, v in content.items() if k != "fal_webhook"} + ) +``` + +--- + +## DropiService + +Servicio para integración con la plataforma Dropi. + +```python +class DropiService(DropiServiceInterface): + async def get_departments(self, country: str = "co") -> List[Dict[str, Any]]: + response = await dropi_client.get_departments(country) + return response.get("objects", []) + + async def get_cities_by_department(self, department_id: int, + country: str = "co") -> List[Dict[str, Any]]: + rate_type = "CON RECAUDO" + response = await dropi_client.get_cities_by_department( + department_id, rate_type, country + ) + return response.get("objects", {}).get("cities", []) +``` + +--- + +## ProductScrapingService + +Servicio para scraping de productos (ver [Scrapers](./scrapers.md)). + +```python +class ProductScrapingService(ProductScrapingServiceInterface): + def __init__(self, scraping_factory: ScrapingFactory = Depends()): + self.scraping_factory = scraping_factory + + async def scrape_product(self, request: ProductScrapingRequest): + url = str(request.product_url) + domain = urlparse(url).netloc.lower() + + scraper = self.scraping_factory.get_scraper(url, country=request.country) + return await scraper.scrape(url, domain) +``` + +--- + +## Interfaces de Servicio + +Cada servicio tiene una interfaz que permite la inyección de dependencias: + +```python +# message_service_interface.py +class MessageServiceInterface(ABC): + @abstractmethod + async def handle_message(self, request: MessageRequest) -> dict: + pass + +# image_service_interface.py +class ImageServiceInterface(ABC): + @abstractmethod + async def generate_variation_images(self, request, owner_id) -> GenerateImageResponse: + pass + +# Inyección en main.py +app.dependency_overrides[MessageServiceInterface] = MessageService +app.dependency_overrides[ImageServiceInterface] = ImageService +``` + +## Procesamiento Paralelo + +Los servicios utilizan `asyncio.gather` para procesar múltiples tareas en paralelo: + +```python +async def process_multiple_agents(self, agent_queries: list[dict]) -> dict: + tasks = [ + self.handle_message(MessageRequest( + agent_id=item['agent'], + query=item['query'] + )) for item in agent_queries + ] + + responses = await asyncio.gather(*tasks, return_exceptions=True) + + combined_data = {} + for response in responses: + if not isinstance(response, Exception): + data = json.loads(response['text']) + combined_data.update(data) + + return combined_data +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..40e8fce --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,62 @@ +[tool.black] +line-length = 120 +target-version = ['py310'] +include = '\.pyi?$' +extend-exclude = ''' +/( + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | venv + | _build + | buck-out + | build + | dist +)/ +''' + +[tool.isort] +profile = "black" +line_length = 120 +known_first_party = ["app"] +skip = [".venv", "venv", ".git", "__pycache__"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +addopts = "-v --tb=short --strict-markers" +markers = [ + "unit: Unit tests", + "integration: Integration tests", + "slow: Slow running tests", +] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::PendingDeprecationWarning", +] + +[tool.coverage.run] +source = ["app"] +omit = [ + "*/tests/*", + "*/__pycache__/*", + "*/venv/*", + "*/.venv/*", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/requirements.txt b/requirements.txt index 9d33655..4f959cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,14 @@ Pillow==10.3.0 html5lib requests langsmith +aiohttp + +# Testing +pytest>=8.0.0 +pytest-asyncio>=0.23.0 +pytest-cov>=4.1.0 + +# Linting & Formatting +black>=24.0.0 +flake8>=7.0.0 +isort>=5.13.0 diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..adc018c --- /dev/null +++ b/tests/README.md @@ -0,0 +1,185 @@ +# Tests para Conversational Engine + +Este directorio contiene los tests unitarios e integración para el proyecto conversational-engine. + +## Estructura + +``` +tests/ +├── conftest.py # Fixtures globales compartidas +├── unit/ # Tests unitarios +│ ├── factories/ # Tests para AIProviderFactory, ScrapingFactory +│ ├── providers/ # Tests para proveedores de IA +│ ├── scrapers/ # Tests para scrapers de productos +│ ├── helpers/ # Tests para helpers (escape, compression) +│ ├── services/ # Tests para servicios principales +│ ├── processors/ # Tests para procesadores de conversación +│ ├── managers/ # Tests para ConversationManager +│ ├── middlewares/ # Tests para middlewares de auth +│ ├── externals/ # Tests para clientes externos (FAL, Vision) +│ ├── models/ # Tests para modelos Pydantic +│ └── tools/ # Tests para ToolGenerator +└── integration/ # Tests de integración + └── test_api_endpoints.py # Tests de endpoints de la API +``` + +## Comandos Rápidos (Makefile) + +```bash +# Ejecutar todos los tests +make test + +# Tests con cobertura +make test-cov + +# Solo tests unitarios +make test-unit + +# Solo tests de integración +make test-integration + +# Verificar formato y linting +make lint + +# Formatear código automáticamente +make format +``` + +## Ejecutar Tests Manualmente + +### Todos los tests + +```bash +pytest +``` + +### Solo tests unitarios + +```bash +pytest tests/unit -v +``` + +### Solo tests de integración + +```bash +pytest tests/integration -v +``` + +### Tests con cobertura + +```bash +pytest --cov=app --cov-report=html +``` + +### Tests específicos por módulo + +```bash +# Factories +pytest tests/unit/factories -v + +# Providers +pytest tests/unit/providers -v + +# Scrapers +pytest tests/unit/scrapers -v + +# Services +pytest tests/unit/services -v + +# Processors +pytest tests/unit/processors -v +``` + +### Tests por marcador + +```bash +# Solo tests unitarios +pytest -m unit + +# Solo tests de integración +pytest -m integration + +# Tests lentos +pytest -m slow +``` + +## Fixtures Disponibles + +Las fixtures globales están definidas en `conftest.py`: + +### Datos de Ejemplo +- `sample_message_request_data`: Datos para MessageRequest +- `sample_agent_config_data`: Datos para AgentConfigResponse +- `sample_product_data`: Datos de producto scrapeado +- `sample_amazon_product_data`: Respuesta de Amazon +- `sample_aliexpress_product_data`: Respuesta de AliExpress + +### Mocks de Servicios +- `mock_httpx_client`: Mock para httpx.AsyncClient +- `mock_llm`: Mock para modelos de lenguaje +- `mock_agent_config`: Mock de AgentConfigResponse +- `mock_conversation_manager`: Mock de ConversationManager +- `mock_message_service`: Mock de MessageService +- `mock_fal_client`: Mock de FalClient + +### Otros +- `mock_request`: Mock de FastAPI Request +- `sample_base64_image`: Imagen de prueba en base64 +- `sample_html_content`: HTML de ejemplo +- `sample_tool_config`: Configuración de herramienta +- `mock_env_vars`: Variables de entorno mockeadas + +## Escribir Nuevos Tests + +### Convenciones + +1. **Nombres de archivos**: `test_.py` +2. **Nombres de clases**: `Test` +3. **Nombres de funciones**: `test__` + +### Ejemplo + +```python +import pytest +from app.module import MyClass + +class TestMyClass: + """Tests para MyClass.""" + + @pytest.fixture + def instance(self): + """Crear instancia de prueba.""" + return MyClass() + + @pytest.mark.unit + def test_method_returns_expected(self, instance): + """El método debe retornar el valor esperado.""" + result = instance.method() + assert result == expected_value + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_async_method(self, instance): + """El método async debe funcionar correctamente.""" + result = await instance.async_method() + assert result is not None +``` + +### Marcadores Disponibles + +- `@pytest.mark.unit`: Tests unitarios +- `@pytest.mark.integration`: Tests de integración +- `@pytest.mark.slow`: Tests que tardan mucho +- `@pytest.mark.asyncio`: Tests asíncronos + +## CI/CD + +Para ejecutar en CI: + +```bash +# Instalar dependencias de test +pip install pytest pytest-asyncio pytest-cov + +# Ejecutar tests con reporte de cobertura +pytest --cov=app --cov-report=xml --junitxml=test-results.xml +``` diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..d4839a6 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests package diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0ff022c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,256 @@ +""" +Configuración global de pytest para el proyecto conversational-engine. +Contiene fixtures compartidas entre todos los tests. +""" + +from typing import Any, Dict, List +from unittest.mock import AsyncMock, MagicMock + +import pytest + +# ============================================================================ +# Fixtures para Modelos de Datos +# ============================================================================ + + +@pytest.fixture +def sample_message_request_data() -> Dict[str, Any]: + """Datos de ejemplo para MessageRequest.""" + return { + "agent_id": "test-agent", + "conversation_id": "conv-123", + "query": "Hello, how are you?", + "metadata_filter": [], + "parameter_prompt": {"language": "es"}, + "files": [], + "json_parser": None, + } + + +@pytest.fixture +def sample_agent_config_data() -> Dict[str, Any]: + """Datos de ejemplo para AgentConfigResponse.""" + return { + "id": 1, + "agent_id": "test-agent", + "description": "Test agent description", + "prompt": "You are a helpful assistant.", + "provider_ai": "openai", + "model_ai": "gpt-4", + "preferences": {"temperature": 0.7, "max_tokens": 1000, "top_p": 1.0, "extra_parameters": None}, + "tools": [], + "mcp_config": None, + } + + +@pytest.fixture +def sample_product_data() -> Dict[str, Any]: + """Datos de ejemplo para un producto scrapeado.""" + return { + "name": "Test Product", + "description": "A test product description", + "external_sell_price": "29.99", + "images": ["https://example.com/image1.jpg", "https://example.com/image2.jpg"], + } + + +@pytest.fixture +def sample_amazon_product_data() -> Dict[str, Any]: + """Datos de ejemplo de respuesta de Amazon.""" + return { + "data": { + "product_title": "Amazon Test Product", + "product_description": "Product description from Amazon", + "product_price": "$49.99", + "product_photos": ["https://amazon.com/img1.jpg", "https://amazon.com/img2.jpg"], + "product_variations_dimensions": ["Color", "Size"], + "product_variations": { + "Color": [{"value": "Red", "photo": "https://amazon.com/red.jpg"}], + "Size": [{"value": "Large"}], + }, + "all_product_variations": {}, + } + } + + +@pytest.fixture +def sample_aliexpress_product_data() -> Dict[str, Any]: + """Datos de ejemplo de respuesta de AliExpress.""" + return { + "result": { + "item": { + "title": "AliExpress Test Product", + "description": {"html": "

Product description

"}, + "images": ["//ae01.alicdn.com/img1.jpg"], + "sku": {"def": {"promotionPrice": "15.99", "price": "19.99"}, "base": [], "props": []}, + } + } + } + + +# ============================================================================ +# Fixtures para Mocks de Servicios Externos +# ============================================================================ + + +@pytest.fixture +def mock_httpx_client(): + """Mock para httpx.AsyncClient.""" + mock = MagicMock() + mock.get = AsyncMock() + mock.post = AsyncMock() + return mock + + +@pytest.fixture +def mock_llm(): + """Mock para modelos de lenguaje LangChain.""" + mock = MagicMock() + mock.ainvoke = AsyncMock(return_value=MagicMock(content="Test response")) + return mock + + +@pytest.fixture +def mock_agent_config(): + """Mock para AgentConfigResponse.""" + from app.externals.agent_config.responses.agent_config_response import AgentConfigResponse, AgentPreferences + + return AgentConfigResponse( + id=1, + agent_id="test-agent", + description="Test agent", + prompt="You are a helpful assistant.", + provider_ai="openai", + model_ai="gpt-4", + preferences=AgentPreferences(temperature=0.7, max_tokens=1000, top_p=1.0, extra_parameters=None), + tools=[], + mcp_config=None, + ) + + +@pytest.fixture +def mock_conversation_manager(): + """Mock para ConversationManager.""" + mock = MagicMock() + mock.get_conversation_history = MagicMock(return_value=[]) + mock.process_conversation = AsyncMock(return_value={"text": "Test response"}) + return mock + + +@pytest.fixture +def mock_message_service(): + """Mock para MessageService.""" + mock = MagicMock() + mock.handle_message = AsyncMock(return_value={"text": "Test response"}) + mock.handle_message_json = AsyncMock(return_value={"result": "test"}) + mock.handle_message_with_config = AsyncMock( + return_value={"message": {"text": "Test response"}, "agent_config": MagicMock()} + ) + return mock + + +@pytest.fixture +def mock_fal_client(): + """Mock para FalClient.""" + mock = MagicMock() + mock.tts_multilingual_v2 = AsyncMock(return_value={"audio_url": "https://example.com/audio.mp3"}) + mock.kling_image_to_video = AsyncMock(return_value={"video_url": "https://example.com/video.mp4"}) + mock.bytedance_omnihuman = AsyncMock(return_value={"video_url": "https://example.com/human.mp4"}) + return mock + + +# ============================================================================ +# Fixtures para Testing de API +# ============================================================================ + + +@pytest.fixture +def mock_request(): + """Mock para FastAPI Request.""" + mock = MagicMock() + mock.headers = {"authorization": "Bearer test-token", "x-api-key": "test-api-key"} + mock.state = MagicMock() + mock.state.user_info = {"data": {"id": "user-123"}} + return mock + + +# ============================================================================ +# Fixtures para Imágenes y Archivos +# ============================================================================ + + +@pytest.fixture +def sample_base64_image() -> str: + """Base64 de una imagen de prueba pequeña (1x1 pixel PNG).""" + return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + + +@pytest.fixture +def sample_html_content() -> str: + """HTML de ejemplo para testing de scrapers.""" + return """ + + + + Test Product + + + + +

Test Product Name

+
$29.99
+ Product Image +

This is a test product description.

+ + + """ + + +# ============================================================================ +# Fixtures para Tools +# ============================================================================ + + +@pytest.fixture +def sample_tool_config() -> Dict[str, Any]: + """Configuración de ejemplo para una herramienta.""" + return { + "tool_name": "test_tool", + "description": "A test tool for testing purposes", + "config": { + "name": "test_tool", + "description": "Test tool", + "api": "https://api.example.com/test", + "method": "POST", + "properties": [ + {"name": "param1", "description": "First parameter"}, + {"name": "param2", "description": "Second parameter"}, + ], + "body": {"param1": "{param1}", "param2": "{param2}"}, + "headers": [{"Content-Type": "application/json"}], + "query_params": None, + }, + } + + +# ============================================================================ +# Fixtures de Configuración de Environment +# ============================================================================ + + +@pytest.fixture +def mock_env_vars(monkeypatch): + """Mock de variables de entorno comunes.""" + env_vars = { + "OPENAI_API_KEY": "test-openai-key", + "ANTHROPIC_API_KEY": "test-anthropic-key", + "GOOGLE_GEMINI_API_KEY": "test-gemini-key", + "FAL_AI_API_KEY": "test-fal-key", + "GOOGLE_VISION_API_KEY": "test-vision-key", + "API_KEY": "test-api-key", + "HOST_AGENT_CONFIG": "http://localhost:8000", + "DEEP_SEEK_HOST": "http://localhost:11434", + } + for key, value in env_vars.items(): + monkeypatch.setenv(key, value) + return env_vars diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..0ca287e --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +# Integration tests diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py new file mode 100644 index 0000000..b284bea --- /dev/null +++ b/tests/integration/test_api_endpoints.py @@ -0,0 +1,307 @@ +""" +Tests de integración para los endpoints de la API. +Verifica el comportamiento end-to-end de los controladores. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.controllers.handle_controller import router +from app.services.audio_service_interface import AudioServiceInterface +from app.services.image_service_interface import ImageServiceInterface +from app.services.message_service_interface import MessageServiceInterface +from app.services.product_scraping_service_interface import ProductScrapingServiceInterface +from app.services.video_service_interface import VideoServiceInterface + + +class TestAPIEndpoints: + """Tests para los endpoints de la API.""" + + @pytest.fixture + def app(self): + """Crear aplicación FastAPI de prueba.""" + test_app = FastAPI() + test_app.include_router(router) + return test_app + + @pytest.fixture + def mock_message_service(self): + """Mock para MessageService.""" + mock = MagicMock(spec=MessageServiceInterface) + mock.handle_message = AsyncMock(return_value={"text": "Test response"}) + mock.handle_message_json = AsyncMock(return_value={"result": "success"}) + mock.recommend_products = AsyncMock( + return_value=MagicMock(ai_response={"recommendation": "product"}, products=[{"name": "Product 1"}]) + ) + mock.generate_pdf = AsyncMock(return_value={"s3_url": "https://s3.example.com/doc.pdf"}) + mock.generate_copies = AsyncMock(return_value={"copies": {"headline": "Test"}}) + mock.resolve_funnel = AsyncMock( + return_value={"pain_detection": "pain", "buyer_detection": "buyer", "sales_angles": []} + ) + mock.resolve_brand_context = AsyncMock(return_value={"brands": ["Brand1"], "contexts": ["Context1"]}) + return mock + + @pytest.fixture + def mock_image_service(self): + """Mock para ImageService.""" + mock = MagicMock(spec=ImageServiceInterface) + mock.generate_variation_images = AsyncMock( + return_value=MagicMock( + original_url="https://example.com/original.jpg", + generated_urls=["https://example.com/var1.jpg"], + generated_prompt="Test prompt", + ) + ) + mock.generate_images_from = AsyncMock( + return_value=MagicMock( + original_url="https://example.com/original.jpg", + generated_urls=["https://example.com/gen1.jpg"], + generated_prompt="Test prompt", + ) + ) + return mock + + @pytest.fixture + def mock_video_service(self): + """Mock para VideoService.""" + mock = MagicMock(spec=VideoServiceInterface) + mock.generate_video = AsyncMock(return_value={"video_url": "https://example.com/video.mp4"}) + return mock + + @pytest.fixture + def mock_audio_service(self): + """Mock para AudioService.""" + mock = MagicMock(spec=AudioServiceInterface) + mock.generate_audio = AsyncMock(return_value={"audio_url": "https://example.com/audio.mp3"}) + return mock + + @pytest.fixture + def client(self, app, mock_message_service, mock_image_service, mock_video_service, mock_audio_service): + """Crear cliente de prueba con dependencias mockeadas.""" + app.dependency_overrides[MessageServiceInterface] = lambda: mock_message_service + app.dependency_overrides[ImageServiceInterface] = lambda: mock_image_service + app.dependency_overrides[VideoServiceInterface] = lambda: mock_video_service + app.dependency_overrides[AudioServiceInterface] = lambda: mock_audio_service + return TestClient(app) + + # ======================================================================== + # Tests para /health + # ======================================================================== + + @pytest.mark.integration + def test_health_check(self, client): + """Debe retornar status OK.""" + response = client.get("/api/ms/conversational-engine/health") + + assert response.status_code == 200 + assert response.json() == {"status": "OK"} + + # ======================================================================== + # Tests para /handle-message + # ======================================================================== + + @pytest.mark.integration + def test_handle_message_success(self, client, mock_message_service): + """Debe procesar mensaje correctamente.""" + response = client.post( + "/api/ms/conversational-engine/handle-message", + json={"agent_id": "test-agent", "conversation_id": "conv-123", "query": "Hello"}, + ) + + assert response.status_code == 200 + assert response.json() == {"text": "Test response"} + mock_message_service.handle_message.assert_called_once() + + @pytest.mark.integration + def test_handle_message_with_metadata(self, client, mock_message_service): + """Debe pasar metadata_filter correctamente.""" + response = client.post( + "/api/ms/conversational-engine/handle-message", + json={ + "agent_id": "test-agent", + "conversation_id": "conv-123", + "query": "Hello", + "metadata_filter": [{"key": "category", "value": "tech", "evaluator": "="}], + "parameter_prompt": {"language": "es"}, + }, + ) + + assert response.status_code == 200 + + @pytest.mark.integration + def test_handle_message_validation_error(self, client): + """Debe retornar 422 para datos inválidos.""" + response = client.post( + "/api/ms/conversational-engine/handle-message", + json={ + "agent_id": "test-agent" + # Falta conversation_id y query + }, + ) + + assert response.status_code == 422 + + # ======================================================================== + # Tests para /handle-message-json + # ======================================================================== + + @pytest.mark.integration + def test_handle_message_json_success(self, client, mock_message_service): + """Debe retornar respuesta JSON parseada.""" + response = client.post( + "/api/ms/conversational-engine/handle-message-json", + json={"agent_id": "test-agent", "conversation_id": "", "query": "Get data"}, + ) + + assert response.status_code == 200 + assert response.json() == {"result": "success"} + + # ======================================================================== + # Tests para /recommend-product + # ======================================================================== + + @pytest.mark.integration + def test_recommend_product_success(self, client, mock_message_service): + """Debe recomendar productos.""" + response = client.post( + "/api/ms/conversational-engine/recommend-product", + json={"product_name": "Headphones", "product_description": "Wireless headphones", "similar": False}, + ) + + assert response.status_code == 200 + + # ======================================================================== + # Tests para /generate-pdf + # ======================================================================== + + @pytest.mark.integration + def test_generate_pdf_success(self, client, mock_message_service): + """Debe generar PDF.""" + response = client.post( + "/api/ms/conversational-engine/generate-pdf", + json={ + "product_name": "Test Product", + "product_description": "Description", + "product_id": "prod-123", + "owner_id": "owner-123", + "title": "Manual", + "image_url": "https://example.com/img.jpg", + "language": "es", + "content": "Product content", + }, + ) + + assert response.status_code == 200 + + # ======================================================================== + # Tests para /generate-copies + # ======================================================================== + + @pytest.mark.integration + def test_generate_copies_success(self, client, mock_message_service): + """Debe generar copies.""" + response = client.post( + "/api/ms/conversational-engine/generate-copies", json={"prompt": "Product description for copies"} + ) + + assert response.status_code == 200 + assert "copies" in response.json() + + # ======================================================================== + # Tests para /resolve-info-funnel + # ======================================================================== + + @pytest.mark.integration + def test_resolve_funnel_success(self, client, mock_message_service): + """Debe resolver información del funnel.""" + response = client.post( + "/api/ms/conversational-engine/resolve-info-funnel", + json={"product_name": "Test Product", "product_description": "Description", "language": "es"}, + ) + + assert response.status_code == 200 + data = response.json() + assert "pain_detection" in data + assert "buyer_detection" in data + assert "sales_angles" in data + + # ======================================================================== + # Tests para Dropi endpoints + # ======================================================================== + + @pytest.mark.integration + @patch("app.services.dropi_service.dropi_client") + def test_get_departments(self, mock_dropi_client, client): + """Debe obtener departamentos de Dropi.""" + mock_dropi_client.get_departments = AsyncMock(return_value={"objects": [{"id": 1, "name": "Dept 1"}]}) + + response = client.get("/api/ms/conversational-engine/integration/dropi/departments") + + assert response.status_code == 200 + + @pytest.mark.integration + @patch("app.services.dropi_service.dropi_client") + def test_get_cities_by_department(self, mock_dropi_client, client): + """Debe obtener ciudades por departamento.""" + mock_dropi_client.get_cities_by_department = AsyncMock( + return_value={"objects": {"cities": [{"id": 1, "name": "City 1"}]}} + ) + + response = client.get("/api/ms/conversational-engine/integration/dropi/departments/1/cities") + + assert response.status_code == 200 + + +class TestAuthenticatedEndpoints: + """Tests para endpoints que requieren autenticación.""" + + @pytest.mark.integration + def test_scrape_product_requires_auth_header(self): + """Endpoint scrape-product requiere header de autenticación.""" + # Este test verifica que el endpoint existe y requiere auth + # La implementación real del middleware maneja la autenticación + from fastapi import FastAPI + from fastapi.testclient import TestClient + + test_app = FastAPI() + test_app.include_router(router) + + mock_scraping = MagicMock(spec=ProductScrapingServiceInterface) + mock_scraping.scrape_product = AsyncMock(return_value={"data": {}}) + test_app.dependency_overrides[ProductScrapingServiceInterface] = lambda: mock_scraping + + client = TestClient(test_app, raise_server_exceptions=False) + + # Sin header de auth + response = client.post( + "/api/ms/conversational-engine/scrape-product", json={"product_url": "https://amazon.com/dp/B08TEST"} + ) + + # Debería fallar por falta de autenticación (401 o 500 dependiendo de la config) + assert response.status_code in [401, 500, 422] + + @pytest.mark.integration + def test_generate_images_api_key_requires_header(self): + """Endpoint con api-key requiere x-api-key header.""" + from fastapi import FastAPI + from fastapi.testclient import TestClient + + test_app = FastAPI() + test_app.include_router(router) + + mock_image = MagicMock(spec=ImageServiceInterface) + mock_image.generate_images_from = AsyncMock(return_value=MagicMock()) + test_app.dependency_overrides[ImageServiceInterface] = lambda: mock_image + + client = TestClient(test_app, raise_server_exceptions=False) + + response = client.post( + "/api/ms/conversational-engine/generate-images-from/api-key", + json={"prompt": "Generate image", "file_url": "https://example.com/img.jpg"}, + ) + + # Debería fallar por falta de API key + assert response.status_code in [401, 500] diff --git a/tests/unit/externals/__init__.py b/tests/unit/externals/__init__.py new file mode 100644 index 0000000..7eb78c6 --- /dev/null +++ b/tests/unit/externals/__init__.py @@ -0,0 +1 @@ +# External client tests diff --git a/tests/unit/externals/test_fal_client.py b/tests/unit/externals/test_fal_client.py new file mode 100644 index 0000000..c5bad2c --- /dev/null +++ b/tests/unit/externals/test_fal_client.py @@ -0,0 +1,192 @@ +""" +Tests para FalClient. +Verifica la integración con FAL AI. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.externals.fal.fal_client import FalClient + + +class TestFalClient: + """Tests para FalClient.""" + + @pytest.fixture + def client(self): + """Crear instancia de FalClient con API key.""" + return FalClient(api_key="test-api-key") + + @pytest.fixture + def mock_httpx_response(self): + """Mock de respuesta httpx.""" + mock = MagicMock() + mock.json.return_value = {"result": "success"} + mock.raise_for_status = MagicMock() + return mock + + @pytest.mark.unit + def test_initialization_with_api_key(self, client): + """Debe inicializarse con API key proporcionada.""" + assert client.api_key == "test-api-key" + + @pytest.mark.unit + def test_initialization_from_env(self): + """Debe usar API key de variable de entorno.""" + with patch("app.externals.fal.fal_client.FAL_AI_API_KEY", "env-api-key"): + client = FalClient() + assert client.api_key == "env-api-key" + + # ======================================================================== + # Tests para _post + # ======================================================================== + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.fal.fal_client.httpx.AsyncClient") + async def test_post_success(self, mock_client_class, client, mock_httpx_response): + """Debe realizar POST correctamente.""" + mock_client = MagicMock() + mock_client.post = AsyncMock(return_value=mock_httpx_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock() + mock_client_class.return_value = mock_client + + result = await client._post("test/path", {"key": "value"}) + + assert result == {"result": "success"} + mock_client.post.assert_called_once() + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.fal.fal_client.httpx.AsyncClient") + async def test_post_with_webhook(self, mock_client_class, client, mock_httpx_response): + """Debe incluir webhook en URL.""" + mock_client = MagicMock() + mock_client.post = AsyncMock(return_value=mock_httpx_response) + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock() + mock_client_class.return_value = mock_client + + await client._post("test/path", {"key": "value"}, fal_webhook="https://callback.example.com") + + call_args = mock_client.post.call_args + assert "fal_webhook" in call_args[0][0] + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_post_without_api_key_raises(self): + """Debe lanzar error si no hay API key.""" + client = FalClient(api_key=None) + + with pytest.raises(ValueError) as exc_info: + await client._post("test/path", {}) + + assert "FAL_AI_API_KEY" in str(exc_info.value) + + # ======================================================================== + # Tests para tts_multilingual_v2 + # ======================================================================== + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_tts_multilingual_v2(self, mock_post, client): + """Debe llamar a TTS endpoint correctamente.""" + mock_post.return_value = {"audio_url": "https://example.com/audio.mp3"} + + result = await client.tts_multilingual_v2(text="Hello world") + + mock_post.assert_called_once_with("fal-ai/elevenlabs/tts/multilingual-v2", {"text": "Hello world"}, None) + assert result["audio_url"] == "https://example.com/audio.mp3" + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_tts_with_extra_params(self, mock_post, client): + """Debe pasar parámetros extra.""" + mock_post.return_value = {"audio_url": "https://example.com/audio.mp3"} + + await client.tts_multilingual_v2(text="Hello", voice_id="custom_voice", speed=1.5) + + call_args = mock_post.call_args + payload = call_args[0][1] + assert payload["voice_id"] == "custom_voice" + assert payload["speed"] == 1.5 + + # ======================================================================== + # Tests para bytedance_omnihuman + # ======================================================================== + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_bytedance_omnihuman(self, mock_post, client): + """Debe llamar a OmniHuman endpoint correctamente.""" + mock_post.return_value = {"video_url": "https://example.com/video.mp4"} + + result = await client.bytedance_omnihuman( + image_url="https://example.com/image.jpg", audio_url="https://example.com/audio.mp3" + ) + + mock_post.assert_called_once_with( + "fal-ai/bytedance/omnihuman", + {"image_url": "https://example.com/image.jpg", "audio_url": "https://example.com/audio.mp3"}, + None, + ) + assert result["video_url"] == "https://example.com/video.mp4" + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_bytedance_omnihuman_with_webhook(self, mock_post, client): + """Debe incluir webhook.""" + mock_post.return_value = {"request_id": "123"} + + await client.bytedance_omnihuman( + image_url="https://example.com/image.jpg", + audio_url="https://example.com/audio.mp3", + fal_webhook="https://callback.example.com", + ) + + call_args = mock_post.call_args + assert call_args[0][2] == "https://callback.example.com" + + # ======================================================================== + # Tests para kling_image_to_video + # ======================================================================== + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_kling_image_to_video(self, mock_post, client): + """Debe llamar a Kling endpoint correctamente.""" + mock_post.return_value = {"video_url": "https://example.com/video.mp4"} + + result = await client.kling_image_to_video( + prompt="A beautiful animation", image_url="https://example.com/image.jpg" + ) + + mock_post.assert_called_once_with( + "fal-ai/kling-video/v2/master/image-to-video", + {"prompt": "A beautiful animation", "image_url": "https://example.com/image.jpg"}, + None, + ) + assert result["video_url"] == "https://example.com/video.mp4" + + @pytest.mark.unit + @pytest.mark.asyncio + @patch.object(FalClient, "_post") + async def test_kling_with_extra_params(self, mock_post, client): + """Debe pasar parámetros extra como duración.""" + mock_post.return_value = {"video_url": "https://example.com/video.mp4"} + + await client.kling_image_to_video( + prompt="Animation", image_url="https://example.com/image.jpg", duration=10, fps=30 + ) + + call_args = mock_post.call_args + payload = call_args[0][1] + assert payload["duration"] == 10 + assert payload["fps"] == 30 diff --git a/tests/unit/externals/test_google_vision_client.py b/tests/unit/externals/test_google_vision_client.py new file mode 100644 index 0000000..766a367 --- /dev/null +++ b/tests/unit/externals/test_google_vision_client.py @@ -0,0 +1,193 @@ +""" +Tests para google_vision_client. +Verifica la integración con Google Cloud Vision API. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.externals.google_vision.google_vision_client import analyze_image +from app.externals.google_vision.responses.vision_analysis_response import VisionAnalysisResponse + + +class TestGoogleVisionClient: + """Tests para google_vision_client.""" + + @pytest.fixture + def sample_vision_response(self): + """Respuesta de ejemplo de Google Vision API.""" + return { + "responses": [ + { + "labelAnnotations": [ + {"description": "Product", "score": 0.95}, + {"description": "Electronics", "score": 0.85}, + {"description": "Technology", "score": 0.75}, + ], + "logoAnnotations": [{"description": "Apple", "score": 0.90}], + } + ] + } + + @pytest.fixture + def sample_vision_response_no_logo(self): + """Respuesta sin logo detectado.""" + return {"responses": [{"labelAnnotations": [{"description": "Product", "score": 0.95}], "logoAnnotations": []}]} + + @pytest.fixture + def sample_vision_response_low_score(self): + """Respuesta con scores bajos.""" + return { + "responses": [ + { + "labelAnnotations": [{"description": "Unknown", "score": 0.3}], + "logoAnnotations": [{"description": "Maybe Logo", "score": 0.5}], + } + ] + } + + # ======================================================================== + # Tests para analyze_image + # ======================================================================== + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.google_vision.google_vision_client.aiohttp.ClientSession") + async def test_analyze_image_success(self, mock_session_class, sample_vision_response, sample_base64_image): + """Debe analizar imagen correctamente.""" + mock_response = MagicMock() + mock_response.status = 200 + mock_response.json = AsyncMock(return_value=sample_vision_response) + + mock_session = MagicMock() + mock_session.post = MagicMock( + return_value=MagicMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock()) + ) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock() + mock_session_class.return_value = mock_session + + result = await analyze_image(sample_base64_image) + + assert isinstance(result, VisionAnalysisResponse) + assert result.logo_description == "Apple" + assert "Product" in result.label_description + assert "Electronics" in result.label_description + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.google_vision.google_vision_client.aiohttp.ClientSession") + async def test_analyze_image_no_logo(self, mock_session_class, sample_vision_response_no_logo, sample_base64_image): + """Debe manejar imágenes sin logo detectado.""" + mock_response = MagicMock() + mock_response.status = 200 + mock_response.json = AsyncMock(return_value=sample_vision_response_no_logo) + + mock_session = MagicMock() + mock_session.post = MagicMock( + return_value=MagicMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock()) + ) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock() + mock_session_class.return_value = mock_session + + result = await analyze_image(sample_base64_image) + + assert result.logo_description == "" + assert "Product" in result.label_description + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.google_vision.google_vision_client.aiohttp.ClientSession") + async def test_analyze_image_filters_low_scores( + self, mock_session_class, sample_vision_response_low_score, sample_base64_image + ): + """Debe filtrar resultados con score bajo.""" + mock_response = MagicMock() + mock_response.status = 200 + mock_response.json = AsyncMock(return_value=sample_vision_response_low_score) + + mock_session = MagicMock() + mock_session.post = MagicMock( + return_value=MagicMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock()) + ) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock() + mock_session_class.return_value = mock_session + + result = await analyze_image(sample_base64_image) + + assert result.logo_description == "" # Score < 0.65 + assert result.label_description == "" # Score < 0.65 + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_analyze_image_api_error(self, sample_base64_image): + """Debe lanzar excepción en error de API.""" + with patch("app.externals.google_vision.google_vision_client.aiohttp.ClientSession") as mock_session_class: + mock_response = MagicMock() + mock_response.status = 400 + mock_response.text = AsyncMock(return_value="Bad Request") + + # Create proper async context manager mocks + mock_post_cm = MagicMock() + mock_post_cm.__aenter__ = AsyncMock(return_value=mock_response) + mock_post_cm.__aexit__ = AsyncMock(return_value=None) + + mock_session = MagicMock() + mock_session.post = MagicMock(return_value=mock_post_cm) + + mock_session_cm = MagicMock() + mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session) + mock_session_cm.__aexit__ = AsyncMock(return_value=None) + mock_session_class.return_value = mock_session_cm + + with pytest.raises(Exception) as exc_info: + await analyze_image(sample_base64_image) + + assert "Error en Google Vision API" in str(exc_info.value) + + @pytest.mark.unit + @pytest.mark.asyncio + @patch("app.externals.google_vision.google_vision_client.aiohttp.ClientSession") + async def test_analyze_image_empty_response(self, mock_session_class, sample_base64_image): + """Debe manejar respuesta vacía.""" + mock_response = MagicMock() + mock_response.status = 200 + mock_response.json = AsyncMock(return_value={"responses": [{}]}) + + mock_session = MagicMock() + mock_session.post = MagicMock( + return_value=MagicMock(__aenter__=AsyncMock(return_value=mock_response), __aexit__=AsyncMock()) + ) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock() + mock_session_class.return_value = mock_session + + result = await analyze_image(sample_base64_image) + + assert result.logo_description == "" + assert result.label_description == "" + + +class TestVisionAnalysisResponse: + """Tests para VisionAnalysisResponse.""" + + @pytest.mark.unit + def test_response_creation(self): + """Debe crear respuesta correctamente.""" + response = VisionAnalysisResponse(logo_description="TestLogo", label_description="Product, Electronics") + + assert response.logo_description == "TestLogo" + assert response.label_description == "Product, Electronics" + + @pytest.mark.unit + def test_get_analysis_text(self): + """Debe generar texto de análisis.""" + response = VisionAnalysisResponse(logo_description="Apple", label_description="Phone, Technology") + + analysis_text = response.get_analysis_text() + + # Verificar que el método existe y retorna string + assert isinstance(analysis_text, str) diff --git a/tests/unit/factories/__init__.py b/tests/unit/factories/__init__.py new file mode 100644 index 0000000..ae72101 --- /dev/null +++ b/tests/unit/factories/__init__.py @@ -0,0 +1 @@ +# Factory tests diff --git a/tests/unit/factories/test_ai_provider_factory.py b/tests/unit/factories/test_ai_provider_factory.py new file mode 100644 index 0000000..a89761d --- /dev/null +++ b/tests/unit/factories/test_ai_provider_factory.py @@ -0,0 +1,96 @@ +""" +Tests para AIProviderFactory. +Verifica la correcta instanciación de proveedores de IA. +""" + +import pytest + +from app.factories.ai_provider_factory import AIProviderFactory +from app.providers.ai_provider_interface import AIProviderInterface +from app.providers.anthropic_provider import AnthropicProvider +from app.providers.deepseek_provider import DeepseekProvider +from app.providers.gemini_provider import GeminiProvider +from app.providers.openai_provider import OpenAIProvider + + +class TestAIProviderFactory: + """Tests para AIProviderFactory.""" + + @pytest.mark.unit + def test_get_openai_provider(self): + """Debe retornar una instancia de OpenAIProvider para 'openai'.""" + provider = AIProviderFactory.get_provider("openai") + + assert provider is not None + assert isinstance(provider, OpenAIProvider) + assert isinstance(provider, AIProviderInterface) + + @pytest.mark.unit + def test_get_anthropic_provider(self): + """Debe retornar una instancia de AnthropicProvider para 'claude'.""" + provider = AIProviderFactory.get_provider("claude") + + assert provider is not None + assert isinstance(provider, AnthropicProvider) + assert isinstance(provider, AIProviderInterface) + + @pytest.mark.unit + def test_get_gemini_provider(self): + """Debe retornar una instancia de GeminiProvider para 'gemini'.""" + provider = AIProviderFactory.get_provider("gemini") + + assert provider is not None + assert isinstance(provider, GeminiProvider) + assert isinstance(provider, AIProviderInterface) + + @pytest.mark.unit + def test_get_deepseek_provider(self): + """Debe retornar una instancia de DeepseekProvider para 'deepseek'.""" + provider = AIProviderFactory.get_provider("deepseek") + + assert provider is not None + assert isinstance(provider, DeepseekProvider) + assert isinstance(provider, AIProviderInterface) + + @pytest.mark.unit + def test_invalid_provider_raises_error(self): + """Debe lanzar ValueError para un proveedor no implementado.""" + with pytest.raises(ValueError) as exc_info: + AIProviderFactory.get_provider("invalid_provider") + + assert "no está implementado" in str(exc_info.value) + assert "invalid_provider" in str(exc_info.value) + + @pytest.mark.unit + def test_empty_provider_raises_error(self): + """Debe lanzar ValueError para un proveedor vacío.""" + with pytest.raises(ValueError) as exc_info: + AIProviderFactory.get_provider("") + + assert "no está implementado" in str(exc_info.value) + + @pytest.mark.unit + def test_case_sensitive_provider_names(self): + """Los nombres de proveedores deben ser case-sensitive.""" + with pytest.raises(ValueError): + AIProviderFactory.get_provider("OpenAI") + + with pytest.raises(ValueError): + AIProviderFactory.get_provider("CLAUDE") + + @pytest.mark.unit + @pytest.mark.parametrize( + "provider_name,expected_class", + [ + ("openai", OpenAIProvider), + ("claude", AnthropicProvider), + ("gemini", GeminiProvider), + ("deepseek", DeepseekProvider), + ], + ) + def test_all_providers_parametrized(self, provider_name, expected_class): + """Test parametrizado para todos los proveedores válidos.""" + provider = AIProviderFactory.get_provider(provider_name) + + assert isinstance(provider, expected_class) + assert isinstance(provider, AIProviderInterface) diff --git a/tests/unit/factories/test_scraping_factory.py b/tests/unit/factories/test_scraping_factory.py new file mode 100644 index 0000000..08232f2 --- /dev/null +++ b/tests/unit/factories/test_scraping_factory.py @@ -0,0 +1,139 @@ +""" +Tests para ScrapingFactory. +Verifica la correcta selección de scrapers según el dominio de la URL. +""" + +from unittest.mock import MagicMock + +import pytest + +from app.factories.scraping_factory import ScrapingFactory +from app.scrapers.aliexpress_scraper import AliexpressScraper +from app.scrapers.amazon_scraper import AmazonScraper +from app.scrapers.cj_scraper import CJScraper +from app.scrapers.dropi_scraper import DropiScraper +from app.scrapers.ia_scraper import IAScraper +from app.scrapers.scraper_interface import ScraperInterface + + +class TestScrapingFactory: + """Tests para ScrapingFactory.""" + + @pytest.fixture + def factory(self, mock_message_service): + """Crear instancia de ScrapingFactory con mock de message_service.""" + return ScrapingFactory(message_service=mock_message_service) + + @pytest.mark.unit + def test_get_amazon_scraper(self, factory): + """Debe retornar AmazonScraper para URLs de Amazon.""" + urls = [ + "https://www.amazon.com/dp/B08N5WRWNW", + "https://amazon.com/gp/product/B08N5WRWNW", + "https://www.amazon.es/dp/B08N5WRWNW", + "https://www.amazon.com.mx/dp/B08N5WRWNW", + ] + + for url in urls: + scraper = factory.get_scraper(url) + assert isinstance(scraper, AmazonScraper), f"Failed for URL: {url}" + assert isinstance(scraper, ScraperInterface) + + @pytest.mark.unit + def test_get_aliexpress_scraper(self, factory): + """Debe retornar AliexpressScraper para URLs de AliExpress.""" + urls = [ + "https://www.aliexpress.com/item/1005001234567890.html", + "https://es.aliexpress.com/item/1005001234567890.html", + "https://aliexpress.com/item/1005001234567890.html", + ] + + for url in urls: + scraper = factory.get_scraper(url) + assert isinstance(scraper, AliexpressScraper), f"Failed for URL: {url}" + assert isinstance(scraper, ScraperInterface) + + @pytest.mark.unit + def test_get_cj_scraper(self, factory): + """Debe retornar CJScraper para URLs de CJ Dropshipping.""" + urls = [ + "https://www.cjdropshipping.com/product/test-product-p-123456.html", + "https://cjdropshipping.com/product/test-p-789.html", + ] + + for url in urls: + scraper = factory.get_scraper(url) + assert isinstance(scraper, CJScraper), f"Failed for URL: {url}" + assert isinstance(scraper, ScraperInterface) + + @pytest.mark.unit + def test_get_dropi_scraper(self, factory): + """Debe retornar DropiScraper para URLs de Dropi.""" + urls = [ + "https://app.dropi.co/catalog/product/12345", + "https://dropi.co/products/test", + ] + + for url in urls: + scraper = factory.get_scraper(url) + assert isinstance(scraper, DropiScraper), f"Failed for URL: {url}" + assert isinstance(scraper, ScraperInterface) + + @pytest.mark.unit + def test_dropi_scraper_with_country(self, factory): + """DropiScraper debe inicializarse con el país correcto.""" + url = "https://app.dropi.co/catalog/product/12345" + + scraper_co = factory.get_scraper(url, country="co") + assert isinstance(scraper_co, DropiScraper) + + scraper_mx = factory.get_scraper(url, country="mx") + assert isinstance(scraper_mx, DropiScraper) + + @pytest.mark.unit + def test_get_ia_scraper_for_unknown_domain(self, factory): + """Debe retornar IAScraper para dominios desconocidos.""" + urls = [ + "https://www.macys.com/shop/product/123", + "https://www.walmart.com/ip/test-product", + "https://www.ebay.com/itm/123456", + "https://www.unknown-store.com/product/test", + ] + + for url in urls: + scraper = factory.get_scraper(url) + assert isinstance(scraper, IAScraper), f"Failed for URL: {url}" + assert isinstance(scraper, ScraperInterface) + + @pytest.mark.unit + def test_factory_requires_message_service_for_ia_scraper(self, mock_message_service): + """IAScraper requiere message_service para funcionar.""" + factory = ScrapingFactory(message_service=mock_message_service) + scraper = factory.get_scraper("https://unknown-domain.com/product") + + assert isinstance(scraper, IAScraper) + + @pytest.mark.unit + @pytest.mark.parametrize( + "url,expected_scraper", + [ + ("https://www.amazon.com/dp/B08TEST", AmazonScraper), + ("https://www.aliexpress.com/item/123.html", AliexpressScraper), + ("https://cjdropshipping.com/product/test", CJScraper), + ("https://dropi.co/products/test", DropiScraper), + ("https://other-store.com/product", IAScraper), + ], + ) + def test_scraper_selection_parametrized(self, factory, url, expected_scraper): + """Test parametrizado para selección de scrapers.""" + scraper = factory.get_scraper(url) + assert isinstance(scraper, expected_scraper) + + @pytest.mark.unit + def test_url_case_insensitive(self, factory): + """La detección de dominio debe ser case-insensitive.""" + scraper_lower = factory.get_scraper("https://www.amazon.com/dp/B08TEST") + scraper_upper = factory.get_scraper("https://WWW.AMAZON.COM/dp/B08TEST") + + assert type(scraper_lower) == type(scraper_upper) + assert isinstance(scraper_lower, AmazonScraper) diff --git a/tests/unit/helpers/__init__.py b/tests/unit/helpers/__init__.py new file mode 100644 index 0000000..b1bb2f7 --- /dev/null +++ b/tests/unit/helpers/__init__.py @@ -0,0 +1 @@ +# Helper tests diff --git a/tests/unit/helpers/test_escape_helper.py b/tests/unit/helpers/test_escape_helper.py new file mode 100644 index 0000000..4aa4f26 --- /dev/null +++ b/tests/unit/helpers/test_escape_helper.py @@ -0,0 +1,170 @@ +""" +Tests para escape_helper. +Verifica la limpieza de HTML y placeholders. +""" + +import pytest + +from app.helpers.escape_helper import clean_html_deeply, clean_html_less_deeply, clean_placeholders + + +class TestCleanPlaceholders: + """Tests para clean_placeholders.""" + + @pytest.mark.unit + def test_removes_all_placeholders_when_no_allowed_keys(self): + """Debe remover todos los placeholders si no hay keys permitidas.""" + text = "Hello {name}, your order {order_id} is ready" + result = clean_placeholders(text) + assert result == "Hello , your order is ready" + + @pytest.mark.unit + def test_keeps_allowed_placeholders(self): + """Debe mantener placeholders que están en allowed_keys.""" + text = "Hello {name}, your order {order_id} is ready" + result = clean_placeholders(text, allowed_keys=["name"]) + assert "{name}" in result + assert "{order_id}" not in result + + @pytest.mark.unit + def test_handles_quoted_placeholders(self): + """Debe manejar placeholders con comillas.""" + text = "Value: {'key'} and {\"another_key\"}" + result = clean_placeholders(text, allowed_keys=["key"]) + assert "{'key'}" in result + + @pytest.mark.unit + def test_empty_text_returns_empty(self): + """Debe retornar string vacío para input vacío.""" + assert clean_placeholders("") == "" + assert clean_placeholders("", ["key"]) == "" + + @pytest.mark.unit + def test_text_without_placeholders_unchanged(self): + """Texto sin placeholders no debe cambiar.""" + text = "Hello World! No placeholders here." + result = clean_placeholders(text) + assert result == text + + @pytest.mark.unit + def test_nested_braces_handled(self): + """Debe manejar llaves anidadas correctamente.""" + text = 'JSON: {"key": "value"}' + result = clean_placeholders(text) + # El contenido entre llaves con formato JSON debería procesarse + assert result is not None + + +class TestCleanHtmlDeeply: + """Tests para clean_html_deeply.""" + + @pytest.mark.unit + def test_removes_script_tags(self, sample_html_content): + """Debe remover tags de script.""" + result = clean_html_deeply(sample_html_content) + assert "
Content
' + result = clean_html_less_deeply(html) + assert "