YA-PapersWithCode/.env.openai.example at main · Chivier/YA-PapersWithCode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# ============================================
# YA-PapersWithCode OpenAI Configuration
# ============================================
# This configuration file is for using OpenAI API for search and AI features
# Copy this file to .env to use OpenAI models instead of local models
#
# Requirements:
# 1. OpenAI API key with access to GPT-4o-mini and text-embedding models
# 2. Sufficient API credits for model usage
# ============================================

# ====================
# DEPLOYMENT MODE
# ====================
# Must be set to api_mode to use OpenAI API
DEPLOYMENT_MODE=api_mode

# ====================
# OPENAI API CONFIGURATION
# ====================
# Your OpenAI API Key (required)
# Get your API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY=sk-your-openai-api-key-here

# OpenAI API Base URL
# Default OpenAI endpoint (do not change unless using a proxy)
API_BASE=https://api.openai.com/v1

# ====================
# MODEL CONFIGURATION
# ====================
# Language Model for text generation and agent search
# Using GPT-5-mini (latest and most capable mini model as of Aug 2025)
MODEL_NAME=gpt-5-mini-2025-08-07

# Embedding Model for semantic search
# Using text-embedding-3-small for cost efficiency
# Options: text-embedding-3-small, text-embedding-3-large, text-embedding-ada-002
EMBEDDING_MODEL=text-embedding-3-small

# Backup/Fallback Language Model (optional)
# Used when primary model is unavailable
FALLBACK_MODEL=gpt-4o-mini-2025-08-07

# ====================
# AGENT SEARCH MODELS
# ====================
# Models for PASA agent search functionality
# Both crawler and selector use the same GPT-5-mini model
CRAWLER_MODEL=gpt-5-mini-2025-08-07
SELECTOR_MODEL=gpt-5-mini-2025-08-07

# ====================
# MODEL PARAMETERS
# ====================
# Maximum tokens to generate (affects response length and cost)
MAX_TOKENS=1024

# Temperature for text generation (0.0-2.0)
# Lower = more deterministic, Higher = more creative
# Recommended: 0.3-0.7 for search, 0.0-0.3 for factual tasks
TEMPERATURE=0.5

# Top-p sampling (0.0-1.0)
# Alternative to temperature, controls diversity
TOP_P=0.9

# Frequency penalty (-2.0 to 2.0)
# Reduces repetition of tokens
FREQUENCY_PENALTY=0.0

# Presence penalty (-2.0 to 2.0)
# Encourages new topics
PRESENCE_PENALTY=0.0

# ====================
# EMBEDDING SETTINGS
# ====================
# Dimension of embedding vectors
# text-embedding-3-large: 3072
# text-embedding-3-small: 1536
# text-embedding-ada-002: 1536
EMBEDDING_DIMENSION=1536

# Batch size for embedding generation
# Reduce if you encounter rate limits
EMBEDDING_BATCH_SIZE=100

# ====================
# API LIMITS & RETRY
# ====================
# Request timeout in seconds
TIMEOUT=60

# Maximum retries for failed requests
MAX_RETRIES=3

# Delay between retries (seconds)
RETRY_DELAY=2

# Rate limiting (requests per minute)
# Adjust based on your OpenAI tier
RATE_LIMIT_RPM=500

# Maximum tokens per minute
# Adjust based on your OpenAI tier
RATE_LIMIT_TPM=150000

# ====================
# SEARCH SETTINGS
# ====================
# Enable AI-powered agent search
ENABLE_AGENT_SEARCH=true

# Use OpenAI for semantic search
USE_OPENAI_EMBEDDINGS=true

# Similarity threshold for semantic search (0.0-1.0)
SIMILARITY_THRESHOLD=0.7

# Maximum search results to return
MAX_SEARCH_RESULTS=50

# Number of papers to expand in multi-layer search
EXPAND_PAPERS=10

# ====================
# CACHING SETTINGS
# ====================
# Enable caching to reduce API calls and costs
ENABLE_CACHE=true

# Cache TTL in seconds (1 hour default)
CACHE_TTL=3600

# Cache embeddings locally
CACHE_EMBEDDINGS=true

# Embeddings cache directory
EMBEDDINGS_CACHE_DIR=embeddings/openai

# ====================
# COST OPTIMIZATION
# ====================
# Enable cost tracking and warnings
TRACK_COSTS=true

# Maximum cost per request (USD)
MAX_COST_PER_REQUEST=0.50

# Daily cost limit (USD)
DAILY_COST_LIMIT=10.00

# Use cheaper models for non-critical tasks
USE_CHEAP_MODELS_FOR_PREPROCESSING=true

# ====================
# DATABASE SETTINGS
# ====================
# Database path for PapersWithCode data
DATABASE_PATH=paperswithcode.db

# ====================
# SERVER SETTINGS
# ====================
# Backend API server
BACKEND_HOST=0.0.0.0
BACKEND_PORT=8000

# CORS allowed origins
CORS_ORIGINS=http://localhost:5173,http://localhost:3000

# ====================
# LOGGING & MONITORING
# ====================
# Log level (DEBUG, INFO, WARNING, ERROR)
LOG_LEVEL=INFO

# Log OpenAI API calls
LOG_API_CALLS=true

# Log file path
LOG_FILE=logs/openai_api.log

# ====================
# FEATURE FLAGS
# ====================
# Enable fallback to local models if OpenAI fails
ENABLE_LOCAL_FALLBACK=false

# Use mock models for testing (overrides OpenAI)
USE_MOCK_MODELS=false

# Enable health checks
ENABLE_HEALTH_CHECKS=true

# Enable metrics collection
ENABLE_METRICS=true

# ====================
# NOTES
# ====================
# Cost Estimation (as of Aug 2025):
# - GPT-5-mini: $0.100 / 1M input tokens, $0.400 / 1M output tokens (estimated)
# - GPT-4o-mini: $0.150 / 1M input tokens, $0.600 / 1M output tokens
# - text-embedding-3-small: $0.02 / 1M tokens (very cost-effective)
# - text-embedding-3-large: $0.13 / 1M tokens
#
# Typical usage per search:
# - Agent search: ~2000 input tokens, ~500 output tokens
# - Embedding generation: ~500 tokens per document
# - Estimated cost per search: < $0.001 with GPT-5-mini and text-embedding-3-small
#
# Tips for cost optimization:
# 1. Use caching aggressively
# 2. Batch embedding requests
# 3. text-embedding-3-small provides excellent quality at low cost
# 4. Set appropriate MAX_TOKENS limits
# 5. Use temperature=0 for deterministic results (better caching)