@@ -255,14 +255,48 @@ def _search_vectors(database_path: str, q_vector: List[float], top_k: int = 5) -
255255
256256
257257def _get_chunk_text (database_path : str , file_id : int , chunk_index : int ) -> Optional [str ]:
258+ """
259+ Get chunk text by reading from filesystem instead of database.
260+ Uses project_path metadata and file path to read the actual file.
261+ """
262+ from db .operations import get_project_metadata
263+
258264 conn = _connect_db (database_path )
259265 try :
260266 cur = conn .cursor ()
261- cur .execute ("SELECT content FROM files WHERE id = ?" , (file_id ,))
267+ # Get file path from database
268+ cur .execute ("SELECT path FROM files WHERE id = ?" , (file_id ,))
262269 row = cur .fetchone ()
263270 if not row :
271+ logger .warning (f"File not found in database: file_id={ file_id } " )
272+ return None
273+
274+ file_path = row [0 ]
275+ if not file_path :
276+ logger .warning (f"File path is empty for file_id={ file_id } " )
277+ return None
278+
279+ # Get project path from metadata
280+ project_path = get_project_metadata (database_path , "project_path" )
281+ if not project_path :
282+ logger .error ("Project path not found in metadata, cannot read file from filesystem" )
264283 return None
265- content = row [0 ] or ""
284+
285+ # Construct full file path
286+ full_path = os .path .join (project_path , file_path )
287+
288+ # Read file content from filesystem
289+ try :
290+ with open (full_path , "r" , encoding = "utf-8" , errors = "ignore" ) as fh :
291+ content = fh .read ()
292+ except Exception as e :
293+ logger .warning (f"Failed to read file from filesystem: { full_path } , error: { e } " )
294+ return None
295+
296+ if not content :
297+ return None
298+
299+ # Extract the chunk
266300 if CHUNK_SIZE <= 0 :
267301 return content
268302 step = max (1 , CHUNK_SIZE - CHUNK_OVERLAP )
@@ -309,8 +343,12 @@ def _process_file_sync(
309343
310344 # Check if file needs reindexing (incremental mode)
311345 if incremental and not needs_reindex (database_path , rel_path , mtime , file_hash ):
346+ logger .debug (f"Skipping unchanged file: { rel_path } " )
312347 return {"stored" : False , "embedded" : False , "skipped" : True }
313348
349+ # Log file processing
350+ logger .info (f"Processing file: { rel_path } " )
351+
314352 # store file (synchronous DB writer) with metadata
315353 try :
316354 fid = store_file (database_path , rel_path , content , lang , mtime , file_hash )
@@ -426,16 +464,26 @@ def analyze_local_path_sync(
426464 Submits per-file tasks to a shared ThreadPoolExecutor.
427465 Supports incremental indexing to skip unchanged files.
428466 """
467+ from db .operations import set_project_metadata
468+
429469 semaphore = threading .Semaphore (EMBEDDING_CONCURRENCY )
430470 start_time = time .time ()
431471
472+ # Store project path in metadata for filesystem access
473+ try :
474+ set_project_metadata (database_path , "project_path" , local_path )
475+ logger .info (f"Starting indexing for project at: { local_path } " )
476+ except Exception as e :
477+ logger .warning (f"Failed to store project path in metadata: { e } " )
478+
432479 try :
433480 file_count = 0
434481 emb_count = 0
435482 skipped_count = 0
436483 file_paths : List [Dict [str , str ]] = []
437484
438485 # Collect files to process
486+ logger .info ("Collecting files to index..." )
439487 for root , dirs , files in os .walk (local_path ):
440488 for fname in files :
441489 full = os .path .join (root , fname )
@@ -447,6 +495,8 @@ def analyze_local_path_sync(
447495 except Exception :
448496 continue
449497 file_paths .append ({"full" : full , "rel" : rel })
498+
499+ logger .info (f"Found { len (file_paths )} files to process" )
450500
451501 # Process files in chunks to avoid too many futures at once.
452502 CHUNK_SUBMIT = 256
@@ -482,6 +532,9 @@ def analyze_local_path_sync(
482532 end_time = time .time ()
483533 duration = end_time - start_time
484534
535+ # Log summary
536+ logger .info (f"Indexing completed: { file_count } files processed, { emb_count } embeddings created, { skipped_count } files skipped in { duration :.2f} s" )
537+
485538 try :
486539 # Use batch update for efficiency - single database transaction
487540 set_project_metadata_batch (database_path , {
0 commit comments