hackathon_Raia/api_mock.py at main · JustTheHero/hackathon_Raia · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
from fastapi import FastAPI, HTTPException, Response
from fastapi.responses import FileResponse
from pydantic import BaseModel
from typing import List, Optional
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import io
import base64
import asyncio
import logging
import tempfile
import os

# Configuração de logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="Fake News Detection API",
    description="API para buscar posts do Bluesky, detectar fake news e gerar dashboards visuais",
    version="1.0.0"
)

class Post(BaseModel):
    """Modelo para representar um post"""
    uri: str
    text: str
    author: str
    createdAt: str
    likes: int
    replies: int
    reposts: int
    fake_news_score: Optional[float] = None
    is_fake_news: Optional[bool] = None
    confidence: Optional[float] = None

class ThemeRequest(BaseModel):
    """Modelo para requisição de tema"""
    theme: str
    max_posts: Optional[int] = 10
    language: Optional[str] = "pt"

class AnalysisResponse(BaseModel):
    """Modelo para resposta da análise"""
    theme: str
    total_posts: int
    fake_news_count: int
    posts: List[Post]
    analysis_summary: dict
    dashboard_image: Optional[str] = None  # Base64 encoded image

async def fetch_bluesky_posts(theme: str, max_posts: int = 10) -> List[Post]:
    """
    Simula a busca de posts no Bluesky baseado no tema
    Em uma implementação real, usaria a API oficial do Bluesky
    """
    logger.info(f"Buscando posts sobre: {theme}")

    # Simulação de resposta da API do Bluesky com dados mais realistas
    mock_posts_data = [
        {
            "text": f"Atenção: novo golpe está circulando sobre {theme}. Não cliquem em links suspeitos!",
            "author": "SegurançaOnline",
            "likes": 45,
            "replies": 12,
            "reposts": 8
        },
        {
            "text": f"URGENTE: Descoberta revolucionária sobre {theme} que vai mudar tudo!",
            "author": "CuriosoNews",
            "likes": 120,
            "replies": 25,
            "reposts": 15
        },
        {
            "text": f"Especialistas confirmam: {theme} é uma das maiores ameaças atuais",
            "author": "CiênciaHoje",
            "likes": 89,
            "replies": 15,
            "reposts": 10
        },
        {
            "text": f"ALERTA: Governo está escondendo a verdade sobre {theme}",
            "author": "VerdadeOculta",
            "likes": 230,
            "replies": 45,
            "reposts": 32
        },
        {
            "text": f"Estudo comprova: {theme} não é tão perigoso quanto dizem",
            "author": "FatosCientíficos",
            "likes": 76,
            "replies": 18,
            "reposts": 7
        }
    ]

    # Garantir que não excedemos o max_posts
    posts_data = mock_posts_data[:max_posts]

    posts = []
    for i, data in enumerate(posts_data):
        post = Post(
            uri=f"at://did:plc:example/post_{i}",
            text=data["text"],
            author=data["author"],
            createdAt="2024-01-01T12:00:00Z",
            likes=data["likes"],
            replies=data["replies"],
            reposts=data["reposts"]
        )
        posts.append(post)

    await asyncio.sleep(1)  # Simula delay de rede
    return posts

async def analyze_fake_news(posts: List[Post]) -> List[Post]:
    """
    Simula a análise de fake news usando um modelo ML
    Em produção, integraria com o modelo real
    """
    logger.info("Analisando posts para detecção de fake news")

    analyzed_posts = []
    for i, post in enumerate(posts):
        # Simulação de scores do modelo de fake news com base no conteúdo do post
        text = post.text.lower()

        # Heurísticas simples para demonstração
        if "urgente" in text or "alerta" in text or "revolucionária" in text:
            fake_news_score = round(0.7 + (np.random.random() * 0.2), 2)
        elif "estudo" in text or "comprova" in text or "especialistas" in text:
            fake_news_score = round(0.3 + (np.random.random() * 0.3), 2)
        else:
            fake_news_score = round(0.1 + (np.random.random() * 0.4), 2)

        is_fake_news = fake_news_score > 0.5
        confidence = round(np.random.random() * 0.3 + 0.7, 2)  # Confiança entre 0.7-1.0

        analyzed_post = post.copy()
        analyzed_post.fake_news_score = fake_news_score
        analyzed_post.is_fake_news = is_fake_news
        analyzed_post.confidence = confidence
        analyzed_posts.append(analyzed_post)

    await asyncio.sleep(0.5)  # Simula processamento do modelo
    return analyzed_posts

def generate_dashboard(analysis_data: dict, posts: List[Post]) -> str:
    """
    Gera um dashboard visual com as estatísticas de fake news
    Retorna a imagem em base64
    """
    try:
        # Configurar o estilo do matplotlib
        plt.style.use('default')
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle(f'Dashboard de Análise de Fake News - Tema: {analysis_data["theme"]}',
                    fontsize=16, fontweight='bold')

        # Dados para os gráficos
        labels = ['Verdadeiros', 'Fake News']
        sizes = [analysis_data['true_news_count'], analysis_data['fake_news_count']]
        colors = ['#66b3ff', '#ff6666']

        # Gráfico 1: Pizza - Distribuição de Fake News
        axes[0, 0].pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
        axes[0, 0].set_title('Distribuição de Fake News vs. Conteúdo Verdadeiro')
        axes[0, 0].axis('equal')

        # Gráfico 2: Barras - Engajamento por tipo de conteúdo
        fake_news_posts = [p for p in posts if p.is_fake_news]
        true_news_posts = [p for p in posts if not p.is_fake_news]

        fake_engagement = sum([p.likes + p.replies * 2 + p.reposts * 3 for p in fake_news_posts]) / len(fake_news_posts) if fake_news_posts else 0
        true_engagement = sum([p.likes + p.replies * 2 + p.reposts * 3 for p in true_news_posts]) / len(true_news_posts) if true_news_posts else 0

        engagement_data = [true_engagement, fake_engagement]
        bars = axes[0, 1].bar(labels, engagement_data, color=colors)
        axes[0, 1].set_title('Engajamento Médio por Tipo de Conteúdo')
        axes[0, 1].set_ylabel('Pontuação de Engajamento')

        # Adicionar valores nas barras
        for bar, value in zip(bars, engagement_data):
            axes[0, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5,
                           f'{value:.1f}', ha='center', va='bottom')

        # Gráfico 3: Distribuição de scores de fake news
        scores = [p.fake_news_score for p in posts]
        axes[1, 0].hist(scores, bins=10, color='#ff9999', edgecolor='black')
        axes[1, 0].set_title('Distribuição de Scores de Fake News')
        axes[1, 0].set_xlabel('Score de Fake News')
        axes[1, 0].set_ylabel('Frequência')
        axes[1, 0].axvline(x=0.5, color='red', linestyle='--', label='Limite Fake News')
        axes[1, 0].legend()

        # Gráfico 4: Top autores com mais fake news
        fake_authors = {}
        for post in fake_news_posts:
            fake_authors[post.author] = fake_authors.get(post.author, 0) + 1

        if fake_authors:
            author_names = list(fake_authors.keys())
            author_counts = list(fake_authors.values())

            # Ordenar e pegar os top 5
            sorted_indices = np.argsort(author_counts)[::-1][:5]
            top_authors = [author_names[i] for i in sorted_indices]
            top_counts = [author_counts[i] for i in sorted_indices]

            bars = axes[1, 1].barh(top_authors, top_counts, color='#ff6666')
            axes[1, 1].set_title('Top Autores com Fake News')
            axes[1, 1].set_xlabel('Número de Fake News')

            # Adicionar valores nas barras
            for i, (bar, value) in enumerate(zip(bars, top_counts)):
                axes[1, 1].text(value + 0.1, bar.get_y() + bar.get_height()/2,
                               f'{value}', ha='left', va='center')
        else:
            axes[1, 1].text(0.5, 0.5, 'Nenhuma fake news detectada',
                           ha='center', va='center', transform=axes[1, 1].transAxes)
            axes[1, 1].set_title('Top Autores com Fake News')

        plt.tight_layout()

        # Salvar a imagem em um buffer
        buf = io.BytesIO()
        plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
        buf.seek(0)

        # Codificar a imagem em base64
        image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
        plt.close(fig)  # Fechar a figura para liberar memória

        return image_base64

    except Exception as e:
        logger.error(f"Erro ao gerar dashboard: {str(e)}")
        # Gerar uma imagem de erro
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.text(0.5, 0.5, f"Erro ao gerar dashboard: {str(e)}",
                ha='center', va='center', transform=ax.transAxes)
        ax.set_title("Erro na Geração do Dashboard")

        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
        plt.close(fig)

        return image_base64

@app.get("/")
async def root():
    """Endpoint raiz"""
    return {
        "message": "Fake News Detection API",
        "endpoints": {
            "health": "/health",
            "analyze": "/analyze-theme",
            "dashboard": "/dashboard/{theme}",
            "docs": "/docs"
        }
    }

@app.get("/health")
async def health_check():
    """Endpoint de health check"""
    return {"status": "healthy", "service": "fake-news-detection"}

@app.post("/analyze-theme", response_model=AnalysisResponse)
async def analyze_theme(request: ThemeRequest):
    """
    Endpoint principal para analisar posts de um tema específico
    """
    try:
        # 1. Buscar posts no Bluesky
        posts = await fetch_bluesky_posts(
            theme=request.theme,
            max_posts=request.max_posts
        )

        if not posts:
            raise HTTPException(
                status_code=404,
                detail=f"Nenhum post encontrado para o tema: {request.theme}"
            )

        # 2. Analisar posts com modelo de fake news
        analyzed_posts = await analyze_fake_news(posts)

        # 3. Calcular estatísticas
        fake_news_count = sum(1 for post in analyzed_posts if post.is_fake_news)
        true_news_count = len(analyzed_posts) - fake_news_count
        avg_score = round(
            sum(post.fake_news_score for post in analyzed_posts) / len(analyzed_posts),
            2
        ) if analyzed_posts else 0

        # 4. Gerar dashboard
        analysis_data = {
            "theme": request.theme,
            "total_posts": len(analyzed_posts),
            "fake_news_count": fake_news_count,
            "true_news_count": true_news_count,
            "fake_news_percentage": round((fake_news_count / len(analyzed_posts)) * 100, 2),
            "average_fake_score": avg_score,
        }

        dashboard_image = generate_dashboard(analysis_data, analyzed_posts)

        # 5. Preparar resposta
        response = AnalysisResponse(
            theme=request.theme,
            total_posts=len(analyzed_posts),
            fake_news_count=fake_news_count,
            posts=analyzed_posts,
            analysis_summary=analysis_data,
            dashboard_image=dashboard_image
        )

        logger.info(f"Análise concluída para tema: {request.theme}")
        return response

    except Exception as e:
        logger.error(f"Erro ao processar requisição: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Erro interno: {str(e)}")

@app.get("/dashboard/{theme}")
async def get_dashboard(theme: str, max_posts: int = 10):
    """
    Endpoint para retornar apenas o dashboard visual
    """
    try:
        # Buscar e analisar posts
        posts = await fetch_bluesky_posts(theme, max_posts)
        analyzed_posts = await analyze_fake_news(posts)

        # Calcular estatísticas
        fake_news_count = sum(1 for post in analyzed_posts if post.is_fake_news)
        true_news_count = len(analyzed_posts) - fake_news_count

        # Gerar dashboard
        analysis_data = {
            "theme": theme,
            "total_posts": len(analyzed_posts),
            "fake_news_count": fake_news_count,
            "true_news_count": true_news_count,
        }

        dashboard_image = generate_dashboard(analysis_data, analyzed_posts)

        # Decodificar a imagem base64 e retornar como PNG
        image_data = base64.b64decode(dashboard_image)
        return Response(content=image_data, media_type="image/png")

    except Exception as e:
        logger.error(f"Erro ao gerar dashboard: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Erro ao gerar dashboard: {str(e)}")

@app.get("/posts/{theme}")
async def get_posts_only(theme: str, max_posts: int = 10):
    """
    Endpoint alternativo para apenas buscar posts (sem análise)
    """
    try:
        posts = await fetch_bluesky_posts(theme, max_posts)
        return {"theme": theme, "posts": posts}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)