88from collections import defaultdict
99from dataclasses import dataclass , field
1010from io import StringIO
11+ from pathlib import Path
1112from typing import (
1213 Any ,
1314 DefaultDict ,
@@ -719,7 +720,7 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
719720 return msdf
720721
721722
722- def get_file_extension (file : Union [str , TextIO ]) -> str :
723+ def get_file_extension (file : Union [str , Path , TextIO ]) -> str :
723724 """Get file extension.
724725
725726 :param file: File path
@@ -728,6 +729,8 @@ def get_file_extension(file: Union[str, TextIO]) -> str:
728729 """
729730 if isinstance (file , str ):
730731 filename = file
732+ elif isinstance (file , Path ):
733+ return file .suffix
731734 else :
732735 filename = file .name
733736 parts = filename .split ("." )
@@ -739,7 +742,7 @@ def get_file_extension(file: Union[str, TextIO]) -> str:
739742
740743
741744def read_csv (
742- filename : Union [str , TextIO ], comment : str = "#" , sep : str = ","
745+ filename : Union [str , Path , TextIO ], comment : str = "#" , sep : str = ","
743746) -> pd .DataFrame :
744747 """Read a CSV that contains frontmatter commented by a specific character.
745748
@@ -753,7 +756,10 @@ def read_csv(
753756 """
754757 if isinstance (filename , TextIO ):
755758 return pd .read_csv (filename , sep = sep )
756- if validators .url (filename ):
759+ if isinstance (filename , Path ) or not validators .url (filename ):
760+ with open (filename , "r" ) as f :
761+ lines = "" .join ([line for line in f if not line .startswith (comment )])
762+ else :
757763 response = urlopen (filename )
758764 lines = "" .join (
759765 [
@@ -762,9 +768,6 @@ def read_csv(
762768 if not line .decode ("utf-8" ).startswith (comment )
763769 ]
764770 )
765- else :
766- with open (filename , "r" ) as f :
767- lines = "" .join ([line for line in f if not line .startswith (comment )])
768771 return pd .read_csv (StringIO (lines ), sep = sep )
769772
770773
@@ -778,7 +781,9 @@ def read_metadata(filename: str) -> Metadata:
778781 return Metadata (prefix_map = prefix_map , metadata = metadata )
779782
780783
781- def read_pandas (file : Union [str , TextIO ], sep : Optional [str ] = None ) -> pd .DataFrame :
784+ def read_pandas (
785+ file : Union [str , Path , TextIO ], sep : Optional [str ] = None
786+ ) -> pd .DataFrame :
782787 """Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
783788
784789 :param file: The file to read. If no separator is given, this file should be named.
@@ -982,14 +987,17 @@ def prepare_context_str(prefix_map: Optional[PrefixMap] = None, **kwargs) -> str
982987 return json .dumps (prepare_context (prefix_map ), ** kwargs )
983988
984989
985- def raise_for_bad_path (file_path : str ) -> None :
990+ def raise_for_bad_path (file_path : Union [ str , Path ] ) -> None :
986991 """Raise exception if file path is invalid.
987992
988993 :param file_path: File path
989- :raises ValueError : Invalid file path
994+ :raises FileNotFoundError : Invalid file path
990995 """
991- if not validators .url (file_path ) and not os .path .exists (file_path ):
992- raise ValueError (f"{ file_path } is not a valid file path or url." )
996+ if isinstance (file_path , Path ):
997+ if not file_path .is_file ():
998+ raise FileNotFoundError (f"{ file_path } is not a valid file path or url." )
999+ elif not validators .url (file_path ) and not os .path .exists (file_path ):
1000+ raise FileNotFoundError (f"{ file_path } is not a valid file path or url." )
9931001
9941002
9951003def is_multivalued_slot (slot : str ) -> bool :
0 commit comments