@@ -258,6 +258,9 @@ def _get_chunk_text(database_path: str, file_id: int, chunk_index: int) -> Optio
258258 """
259259 Get chunk text by reading from filesystem instead of database.
260260 Uses project_path metadata and file path to read the actual file.
261+
262+ Security: Validates that the resolved file path is within the project directory
263+ to prevent path traversal attacks.
261264 """
262265 from db .operations import get_project_metadata
263266
@@ -280,14 +283,20 @@ def _get_chunk_text(database_path: str, file_id: int, chunk_index: int) -> Optio
280283 project_path = get_project_metadata (database_path , "project_path" )
281284 if not project_path :
282285 logger .error ("Project path not found in metadata, cannot read file from filesystem" )
283- return None
286+ raise RuntimeError ( "Project path metadata is missing - indexing may not have completed properly" )
284287
285- # Construct full file path
286- full_path = os .path .join (project_path , file_path )
288+ # Construct full file path and normalize to prevent path traversal
289+ full_path = os .path .normpath (os .path .join (project_path , file_path ))
290+ normalized_project_path = os .path .normpath (project_path )
291+
292+ # Security check: ensure the resolved path is within the project directory
293+ if not full_path .startswith (normalized_project_path + os .sep ) and full_path != normalized_project_path :
294+ logger .error (f"Path traversal attempt detected: { file_path } resolves outside project directory" )
295+ return None
287296
288297 # Read file content from filesystem
289298 try :
290- with open (full_path , "r" , encoding = "utf-8" , errors = "ignore " ) as fh :
299+ with open (full_path , "r" , encoding = "utf-8" , errors = "replace " ) as fh :
291300 content = fh .read ()
292301 except Exception as e :
293302 logger .warning (f"Failed to read file from filesystem: { full_path } , error: { e } " )
0 commit comments