Skip to content

Commit d3fe5b9

Browse files
authored
Merge pull request #23 from TogetherCrew/fix/mediawiki-activities-wrong-arg
feat: mediaWiki change dump dir!
2 parents 1a22667 + f414749 commit d3fe5b9

2 files changed

Lines changed: 7 additions & 6 deletions

File tree

hivemind_etl/mediawiki/etl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def __init__(
2525
community_id, namespaces=namespaces, proxy_url=self.proxy_url
2626
)
2727

28-
self.dump_dir = f"dump_{self.community_id}"
28+
self.dump_dir = f"dumps/{self.community_id}"
2929
self.delete_dump_after_load = delete_dump_after_load
3030

3131
def extract(self, api_url: str, dump_dir: str | None = None) -> None:

tests/unit/test_mediawiki_etl.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,21 @@ def setUp(self):
1515
self.namespaces = [0, 1] # Main and Talk namespaces
1616

1717
# Create a temporary dumps directory
18-
os.makedirs(f"dump_{self.community_id}", exist_ok=True)
18+
os.makedirs("dumps", exist_ok=True)
19+
os.makedirs(f"dumps/{self.community_id}", exist_ok=True)
1920

2021
def tearDown(self):
2122
# Clean up any created files
22-
if os.path.exists(f"dump_{self.community_id}"):
23-
shutil.rmtree(f"dump_{self.community_id}")
23+
if os.path.exists(f"dumps/{self.community_id}"):
24+
shutil.rmtree(f"dumps/{self.community_id}")
2425
if os.path.exists(self.custom_path):
2526
shutil.rmtree(self.custom_path)
2627

2728
def test_mediawiki_etl_initialization(self):
2829
etl = MediawikiETL(community_id=self.community_id, namespaces=self.namespaces)
2930
self.assertEqual(etl.community_id, self.community_id)
3031
self.assertTrue(etl.delete_dump_after_load)
31-
self.assertEqual(etl.dump_dir, f"dump_{self.community_id}")
32+
self.assertEqual(etl.dump_dir, f"dumps/{self.community_id}")
3233

3334
etl = MediawikiETL(
3435
community_id=self.community_id,
@@ -45,7 +46,7 @@ def test_extract_with_default_path(self):
4546
etl.extract(self.api_url)
4647

4748
etl.wikiteam_crawler.crawl.assert_called_once_with(
48-
self.api_url, f"dump_{self.community_id}"
49+
self.api_url, f"dumps/{self.community_id}"
4950
)
5051

5152
def test_extract_with_custom_path(self):

0 commit comments

Comments
 (0)