Skip to content

Commit f83b483

Browse files
cthoythrshdhgd
andauthored
Add windows testing and update path construction (#171)
* Add windows testing * Add pathlib support Also cleans up some suspect file path concatenation in testing * linted and flake8 compliant Co-authored-by: Harshad <hrshdhgd@users.noreply.github.com> Co-authored-by: Harshad Hegde <hegdehb@gmail.com>
1 parent 1dd41ab commit f83b483

File tree

11 files changed

+55
-47
lines changed

11 files changed

+55
-47
lines changed

.github/workflows/qc.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ on:
1010
jobs:
1111
build:
1212

13-
runs-on: ubuntu-latest
13+
runs-on: ${{ matrix.os }}
1414
strategy:
1515
matrix:
1616
python-version: [3.7, 3.8, 3.9]
17+
os: [ ubuntu-latest, windows-latest ]
1718

1819
steps:
1920
- uses: actions/checkout@v2

sssom/parsers.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import typing
77
from collections import Counter
8+
from pathlib import Path
89
from typing import Any, Callable, Dict, List, Optional, Set, TextIO, Tuple, Union, cast
910
from urllib.request import urlopen
1011
from xml.dom import Node, minidom
@@ -47,7 +48,7 @@
4748

4849

4950
def read_sssom_table(
50-
file_path: str,
51+
file_path: Union[str, Path],
5152
prefix_map: Optional[PrefixMap] = None,
5253
meta: Optional[MetadataType] = None,
5354
) -> MappingSetDataFrame:
@@ -571,8 +572,16 @@ def _swap_object_subject(mapping: Mapping) -> Mapping:
571572
return mapping
572573

573574

574-
def _read_metadata_from_table(path: str) -> Dict[str, Any]:
575-
if validators.url(path):
575+
def _read_metadata_from_table(path: Union[str, Path]) -> Dict[str, Any]:
576+
if isinstance(path, Path) or not validators.url(path):
577+
with open(path) as file:
578+
yamlstr = ""
579+
for line in file:
580+
if line.startswith("#"):
581+
yamlstr += re.sub("^#", "", line)
582+
else:
583+
break
584+
else:
576585
response = urlopen(path)
577586
yamlstr = ""
578587
for lin in response:
@@ -581,14 +590,7 @@ def _read_metadata_from_table(path: str) -> Dict[str, Any]:
581590
yamlstr += re.sub("^#", "", line)
582591
else:
583592
break
584-
else:
585-
with open(path) as file:
586-
yamlstr = ""
587-
for line in file:
588-
if line.startswith("#"):
589-
yamlstr += re.sub("^#", "", line)
590-
else:
591-
break
593+
592594
if yamlstr:
593595
meta = yaml.safe_load(yamlstr)
594596
logging.info(f"Meta={meta}")

sssom/util.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from collections import defaultdict
99
from dataclasses import dataclass, field
1010
from io import StringIO
11+
from pathlib import Path
1112
from typing import (
1213
Any,
1314
DefaultDict,
@@ -719,7 +720,7 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
719720
return msdf
720721

721722

722-
def get_file_extension(file: Union[str, TextIO]) -> str:
723+
def get_file_extension(file: Union[str, Path, TextIO]) -> str:
723724
"""Get file extension.
724725
725726
:param file: File path
@@ -728,6 +729,8 @@ def get_file_extension(file: Union[str, TextIO]) -> str:
728729
"""
729730
if isinstance(file, str):
730731
filename = file
732+
elif isinstance(file, Path):
733+
return file.suffix
731734
else:
732735
filename = file.name
733736
parts = filename.split(".")
@@ -739,7 +742,7 @@ def get_file_extension(file: Union[str, TextIO]) -> str:
739742

740743

741744
def read_csv(
742-
filename: Union[str, TextIO], comment: str = "#", sep: str = ","
745+
filename: Union[str, Path, TextIO], comment: str = "#", sep: str = ","
743746
) -> pd.DataFrame:
744747
"""Read a CSV that contains frontmatter commented by a specific character.
745748
@@ -753,7 +756,10 @@ def read_csv(
753756
"""
754757
if isinstance(filename, TextIO):
755758
return pd.read_csv(filename, sep=sep)
756-
if validators.url(filename):
759+
if isinstance(filename, Path) or not validators.url(filename):
760+
with open(filename, "r") as f:
761+
lines = "".join([line for line in f if not line.startswith(comment)])
762+
else:
757763
response = urlopen(filename)
758764
lines = "".join(
759765
[
@@ -762,9 +768,6 @@ def read_csv(
762768
if not line.decode("utf-8").startswith(comment)
763769
]
764770
)
765-
else:
766-
with open(filename, "r") as f:
767-
lines = "".join([line for line in f if not line.startswith(comment)])
768771
return pd.read_csv(StringIO(lines), sep=sep)
769772

770773

@@ -778,7 +781,9 @@ def read_metadata(filename: str) -> Metadata:
778781
return Metadata(prefix_map=prefix_map, metadata=metadata)
779782

780783

781-
def read_pandas(file: Union[str, TextIO], sep: Optional[str] = None) -> pd.DataFrame:
784+
def read_pandas(
785+
file: Union[str, Path, TextIO], sep: Optional[str] = None
786+
) -> pd.DataFrame:
782787
"""Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
783788
784789
:param file: The file to read. If no separator is given, this file should be named.
@@ -982,14 +987,17 @@ def prepare_context_str(prefix_map: Optional[PrefixMap] = None, **kwargs) -> str
982987
return json.dumps(prepare_context(prefix_map), **kwargs)
983988

984989

985-
def raise_for_bad_path(file_path: str) -> None:
990+
def raise_for_bad_path(file_path: Union[str, Path]) -> None:
986991
"""Raise exception if file path is invalid.
987992
988993
:param file_path: File path
989-
:raises ValueError: Invalid file path
994+
:raises FileNotFoundError: Invalid file path
990995
"""
991-
if not validators.url(file_path) and not os.path.exists(file_path):
992-
raise ValueError(f"{file_path} is not a valid file path or url.")
996+
if isinstance(file_path, Path):
997+
if not file_path.is_file():
998+
raise FileNotFoundError(f"{file_path} is not a valid file path or url.")
999+
elif not validators.url(file_path) and not os.path.exists(file_path):
1000+
raise FileNotFoundError(f"{file_path} is not a valid file path or url.")
9931001

9941002

9951003
def is_multivalued_slot(slot: str) -> bool:

tests/constants.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
"""Constants for test cases."""
22

3-
import os
43
import pathlib
54

65
cwd = pathlib.Path(__file__).parent.resolve()
7-
data_dir = os.path.join(cwd, "data")
6+
data_dir = cwd / "data"
87

9-
test_out_dir = os.path.join(cwd, "tmp")
10-
os.makedirs(test_out_dir, exist_ok=True)
8+
test_out_dir = cwd / "tmp"
9+
test_out_dir.mkdir(parents=True, exist_ok=True)

tests/test_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def run_parse(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result:
113113
def run_split(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result:
114114
"""Run the split test."""
115115
result = runner.invoke(
116-
split, [test_case.filepath, "--output-directory", test_out_dir]
116+
split, [test_case.filepath, "--output-directory", test_out_dir.as_posix()]
117117
)
118118
self.run_successful(result, test_case)
119119
return result
@@ -172,7 +172,7 @@ def run_partition(
172172
if not primary_test_case:
173173
primary_test_case = t
174174
params.append(t.filepath)
175-
params.extend(["--output-directory", test_out_dir])
175+
params.extend(["--output-directory", test_out_dir.as_posix()])
176176
result = runner.invoke(partition, params)
177177
self.run_successful(result, primary_test_case)
178178
return result

tests/test_collapse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class TestCollapse(unittest.TestCase):
2020

2121
def setUp(self) -> None:
2222
"""Set up the test case."""
23-
self.df = parse(f"{data_dir}/basic.tsv")
23+
self.df = parse(data_dir / "basic.tsv")
2424

2525
def test_row_count(self):
2626
"""Test the dataframe has the correct number of rows."""
@@ -69,7 +69,7 @@ def test_diff(self):
6969
print(output)
7070
# print(diff)
7171

72-
df2 = parse(f"{data_dir}/basic2.tsv")
72+
df2 = parse(data_dir / "basic2.tsv")
7373
diff = compare_dataframes(self.df, df2)
7474
# print(len(diff.unique_tuples1))
7575
# print(len(diff.unique_tuples2))

tests/test_convert.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ class TestConvert(unittest.TestCase):
1212

1313
def setUp(self) -> None:
1414
"""Set up the test case with two tables."""
15-
self.msdf = read_sssom_table(f"{data_dir}/basic.tsv")
16-
self.cob = read_sssom_table(f"{data_dir}/cob-to-external.tsv")
15+
self.msdf = read_sssom_table(data_dir / "basic.tsv")
16+
self.cob = read_sssom_table(data_dir / "cob-to-external.tsv")
1717

1818
def test_df(self):
1919
"""Test the dataframe has the right number of mappings."""

tests/test_parsers.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ class TestParse(unittest.TestCase):
3030

3131
def setUp(self) -> None:
3232
"""Set up the test case."""
33-
if not os.path.exists(test_out_dir):
34-
os.mkdir(test_out_dir)
35-
3633
self.df_url = "https://raw.githubusercontent.com/mapping-commons/sssom-py/master/tests/data/basic.tsv"
3734
self.rdf_graph_file = f"{test_data_dir}/basic.sssom.rdf"
3835
self.rdf_graph = Graph()

tests/test_reconcile.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class TestReconcile(unittest.TestCase):
1313

1414
def setUp(self) -> None:
1515
"""Test up the test case with the third basic example."""
16-
self.msdf = read_sssom_table(f"{data_dir}/basic3.tsv")
16+
self.msdf = read_sssom_table(data_dir / "basic3.tsv")
1717

1818
def test_filter(self):
1919
"""Test filtering returns the right number of rows."""
@@ -27,17 +27,17 @@ def test_deal_with_negation(self):
2727

2828
def test_merge(self):
2929
"""Test merging two tables."""
30-
msdf1 = read_sssom_table(f"{data_dir}/basic.tsv")
31-
msdf2 = read_sssom_table(f"{data_dir}/basic2.tsv")
30+
msdf1 = read_sssom_table(data_dir / "basic.tsv")
31+
msdf2 = read_sssom_table(data_dir / "basic2.tsv")
3232

3333
merged_msdf = merge_msdf(msdf1=msdf1, msdf2=msdf2)
3434

3535
self.assertEqual(123, len(merged_msdf.df))
3636

3737
def test_merge_no_reconcile(self):
3838
"""Test merging two tables without reconciliation."""
39-
msdf1 = read_sssom_table(f"{data_dir}/basic4.tsv")
40-
msdf2 = read_sssom_table(f"{data_dir}/basic5.tsv")
39+
msdf1 = read_sssom_table(data_dir / "basic4.tsv")
40+
msdf2 = read_sssom_table(data_dir / "basic5.tsv")
4141

4242
merged_msdf = merge_msdf(msdf1=msdf1, msdf2=msdf2, reconcile=False)
4343

tests/test_rewire.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Tests for rewiring utilities."""
22

3+
import os
34
import unittest
45

56
from rdflib import Graph
@@ -14,9 +15,9 @@ class TestRewire(unittest.TestCase):
1415

1516
def setUp(self) -> None:
1617
"""Set up the test case with the COB mappings et and OWL graph."""
17-
self.mset = read_sssom_table(f"{data_dir}/cob-to-external.tsv")
18+
self.mset = read_sssom_table(data_dir / "cob-to-external.tsv")
1819
g = Graph()
19-
g.parse(f"{data_dir}/cob.owl", format="xml")
20+
g.parse(os.path.join(data_dir, "cob.owl"), format="xml")
2021
self.graph = g
2122

2223
def test_rewire(self):
@@ -27,5 +28,5 @@ def test_rewire(self):
2728

2829
n = rewire_graph(self.graph, self.mset, precedence=["PR"])
2930
print(f"Num changed = {n}")
30-
with open(f"{test_out_dir}/rewired-cob.ttl", "w") as stream:
31+
with open(test_out_dir / "rewired-cob.ttl", "w") as stream:
3132
stream.write(self.graph.serialize(format="turtle").decode())

0 commit comments

Comments
 (0)