Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 60 additions & 31 deletions app/chatbot.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,81 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
import logging
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from app.scraper import scrape_latest_news

# Load the smaller pre-trained model and tokenizer from Hugging Face
model_name = "EleutherAI/gpt-j-6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Configure logging
logging.basicConfig(level=logging.ERROR)

# Set device to GPU if available, else use CPU
# Load model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# Set pad token if missing
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token


def truncate_incomplete_articles(text):
"""Ensure the last displayed article is fully complete."""
delimiter = "=" * 40
articles = text.split(delimiter)

valid_timestamp_regex = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$")
if not valid_timestamp_regex.search(articles[-1]):
articles.pop()

return delimiter.join(articles).strip()


# Function to generate a chatbot response using the LLM with news context
def generate_response(input_text):
try:
# Get the latest news articles
news_articles = scrape_latest_news()

# Create a context string from the latest news
news_articles = scrape_latest_news(input_text)

if not isinstance(news_articles, list) or not all(isinstance(a, dict) for a in news_articles):
return "Received an unexpected response while fetching news."

if not news_articles:
return "Sorry, I couldn't find any recent news on this topic."

news_context = "\n".join(
f"Title: {article['title']}\nDescription: {article['description']}\nURL: {article['url']}"
f"Title: {article.get('title', 'N/A')}\nDescription: {article.get('description', 'N/A')}\nURL: {article.get('url', 'N/A')}\nSource: {article.get('source', 'N/A')}\nPublished At: {article.get('publishedAt', 'N/A')}\n{'=' * 40}"
for article in news_articles
)

# Combine the user's input with the news context
combined_input = f"User: {input_text}\n\nLatest News:\n{news_context}\n\nChatbot:"

# Encode input text
inputs = tokenizer(combined_input, return_tensors="pt").to(device)
combined_input = f"{news_context}\n\nChatbot:"
max_input_length = tokenizer.model_max_length # Dynamically fetch model's limit

inputs = tokenizer(
combined_input,
return_tensors="pt",
padding=True,
truncation=True,
max_length=min(len(combined_input.split()), max_input_length)
).to(device)

# Generate response
outputs = model.generate(
inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=100,
temperature=0.9, # Adjust temperature for diversity
top_p=0.95, # Increase top-p for nucleus sampling
top_k=50, # Increase top-k for more diverse outputs
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9,
top_k=40,
pad_token_id=tokenizer.eos_token_id,
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
return truncate_incomplete_articles(response)

# Check if "Chatbot:" exists before splitting
if "Chatbot:" in response:
response_text = response.split("Chatbot:")[-1].strip()
else:
response_text = response.strip() # Use the full response if "Chatbot:" is missing

return response_text
except Exception as e:
return f"Error: {str(e)}"
logging.error(f"Error generating response: {str(e)}", exc_info=True)
return "Oops! Something went wrong while processing your request."


# Example usage
if __name__ == "__main__":
user_input = "latest technology news"
print(generate_response(user_input))
142 changes: 57 additions & 85 deletions app/scraper.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,58 @@
def scrape_latest_news():
# This function returns dummy news articles with detailed descriptions
articles = [
{
"title": "AI Breakthrough in Cancer Detection",
"description": "Scientists at Global Health Institute have developed a revolutionary AI system capable of detecting early signs of cancer with an accuracy of 98%. This advancement is expected to transform the medical industry by drastically reducing diagnostic errors and enabling earlier treatment interventions, potentially saving millions of lives annually.",
"url": "https://www.example.com/ai-cancer-detection",
"source": "Health Today",
"published_at": "2024-10-12T10:00:00Z"
},
{
"title": "Global Climate Summit Urges Immediate Action",
"description": "World leaders gathered at the 2024 Global Climate Summit to address the urgent need for sustainable energy solutions. With record-breaking heatwaves and natural disasters increasing worldwide, experts are calling for accelerated investments in renewable energy and stricter carbon emission regulations to mitigate the impact of climate change.",
"url": "https://www.example.com/global-climate-summit",
"source": "Eco News",
"published_at": "2024-10-11T14:45:00Z"
},
{
"title": "Nepal Celebrates Vijaya Dashami Amidst Challenges",
"description": "Prime Minister K P Sharma Oli and President Ram Chandra Paudel extend greetings to citizens during Vijaya Dashami, urging support for those impacted by recent floods and landslides. The festival's religious importance is being celebrated in parallel with ongoing recovery efforts in affected areas.",
"url": "https://www.example.com/nepal-dashami-celebration",
"source": "Devdiscourse",
"published_at": "2024-10-12T15:56:00Z"
},
{
"title": "Floods and Landslides Devastate Nepal",
"description": "Recent natural disasters in Nepal have caused 240 fatalities and displaced thousands, severely impacting infrastructure and disrupting travel during the Dashain festival. The government has allocated emergency funds for relief efforts, but the recovery process is expected to take months.",
"url": "https://www.example.com/nepal-floods-landslides",
"source": "Devdiscourse",
"published_at": "2024-10-12T14:30:00Z"
},
{
"title": "Global Tech Companies Eye Expansion in Africa",
"description": "Tech giants such as Google and Microsoft have announced plans to expand their presence in Africa, aiming to tap into the continent's rapidly growing digital economy. By investing in infrastructure, training, and local talent, these companies hope to establish Africa as a major hub for technology innovation in the coming years.",
"url": "https://www.example.com/tech-expansion-africa",
"source": "Tech World",
"published_at": "2024-10-10T09:00:00Z"
},
{
"title": "SpaceX Successfully Launches First All-Civilian Mission to Mars",
"description": "In a historic achievement, SpaceX launched its first all-civilian crewed mission to Mars. The crew, composed of scientists and engineers, will conduct groundbreaking research on the Martian surface. This mission marks a significant step toward SpaceX's goal of establishing a permanent human colony on Mars within the next decade.",
"url": "https://www.example.com/spacex-mars-mission",
"source": "Space News",
"published_at": "2024-10-09T18:30:00Z"
},
{
"title": "ç",
"description": "A team of researchers from MIT has developed a new battery technology capable of storing renewable energy for up to 10 times longer than current solutions. This breakthrough could solve one of the major challenges facing the adoption of solar and wind energy by providing reliable storage during periods of low production.",
"url": "https://www.example.com/renewable-energy-storage",
"source": "Science Daily",
"published_at": "2024-10-11T11:20:00Z"
},
{
"title": "Global Food Prices Surge Amidst Supply Chain Disruptions",
"description": "The global food supply chain continues to face unprecedented disruptions due to ongoing geopolitical conflicts and climate-related disasters. As a result, food prices have surged by 15% in the last quarter alone, placing significant strain on low-income populations and leading to increased calls for food security reforms.",
"url": "https://www.example.com/global-food-price-surge",
"source": "Financial Times",
"published_at": "2024-10-08T08:10:00Z"
},
{
"title": "Advancements in Quantum Computing Pave the Way for Breakthroughs in Medicine",
"description": "Quantum computing is set to revolutionize medical research, with new algorithms that can model complex molecular structures at an unprecedented scale. These advancements are expected to accelerate drug discovery processes and provide new insights into genetic diseases, opening the door for personalized medicine breakthroughs.",
"url": "https://www.example.com/quantum-computing-medicine",
"source": "Tech Insights",
"published_at": "2024-10-07T12:00:00Z"
},
{
"title": "World Cup 2024: The Rise of Underdogs",
"description": "The 2024 FIFA World Cup has seen unexpected victories from teams considered underdogs, sparking excitement among fans worldwide. With several top-ranked teams eliminated early, the tournament has become one of the most unpredictable and thrilling in recent memory, capturing the spirit of competition on the global stage.",
"url": "https://www.example.com/world-cup-2024-underdogs",
"source": "Sports Daily",
"published_at": "2024-10-12T17:45:00Z"
}
]
return articles
import requests
from datetime import datetime, timedelta

# Example usage for testing
if __name__ == "__main__":
articles = scrape_latest_news()
for article in articles:
print(f"Title: {article['title']}")
print(f"Description: {article['description']}")
print(f"URL: {article['url']}")
print(f"Source: {article['source']}")
print(f"Published At: {article['published_at']}")
print("=" * 40)
def scrape_latest_news(query="news"):
# Get today's date in YYYY-MM-DD format
today = datetime.today()
from_date = (today - timedelta(days=7)).strftime('%Y-%m-%d')
to_date = today.strftime('%Y-%m-%d')

# API Key from NewsAPI
api_key = 'api_key'
url = "https://newsapi.org/v2/everything"

# Define parameters for API request
params = {
"q": query,
"from": from_date,
"to": to_date,
"sortBy": "publishedAt",
"apiKey": api_key,
"language": "en",
"pageSize": 50,
}

try:
# Send GET request to NewsAPI
response = requests.get(url, params=params)
# print(f"Request URL: {response.url}") # Debugging URL
if response.status_code == 200:
data = response.json()
# print(f"API Response: {data}") # Debugging output

if "articles" in data and data["articles"]:
articles = data["articles"]
news_articles = []
for article in articles:
# Extract necessary fields safely
news_articles.append({
'title': article.get('title', 'No title available'),
'description': article.get('description', 'No description available'),
'url': article.get('url', 'No URL available'),
'source': article.get('source', {}).get('name', 'No source available'),
'publishedAt': article.get('publishedAt', 'No published date available'),
'author': article.get('author', 'Unknown Author'),
'image': article.get('urlToImage', 'No image available'),
})

return news_articles
else:
print("No articles found.")
return []
else:
print(f"Error fetching news: {response.status_code} - {response.text}")
return []

except Exception as e:
print(f"Error in scrape_latest_news: {str(e)}")
return []
31 changes: 20 additions & 11 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,33 @@ def gradio_chatbot(input_text):
response = generate_response(input_text)
return response

# Initialize Gradio interface
# Initialize Gradio interface with updated settings
gradio_interface = gr.Interface(
fn=gradio_chatbot,
inputs="text",
outputs="text"
inputs=gr.Textbox(
label="Enter your query here",
placeholder="Type something here...",
lines=3,
max_length=500
),
outputs=[gr.Textbox(
label="Chatbot Response",
placeholder="The chatbot's response will appear here...",
lines=5,
interactive=False
)],
title="NewsBot Chatbot",
description="A chatbot that provides recent news articles based on your queries. Just type your topic and get the latest updates!",
allow_flagging="never",
live=False,
)

# Function to run Flask app
def run_flask():
app.run(port=5000, debug=True, threaded=False)

def run_gradio():
# Set share=True for public access
# Set share=False to keep the interface private
gradio_interface.launch(share=False)

if __name__ == "__main__":
# Start Flask and Gradio in separate threads
flask_thread = Thread(target=run_flask)
flask_thread = Thread(target=run_gradio)
flask_thread.start()
run_gradio()

app.run(port=5000, debug=False, threaded=False)