Skip to content

Commit f4b4b4e

Browse files
authored
Merge pull request #25 from LuisJG8/from_json_to_pqt
test
2 parents c79352c + 1282a07 commit f4b4b4e

2 files changed

Lines changed: 11 additions & 5 deletions

File tree

data/analytics/ducky.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1-
import duckdb
1+
import duckdb
22

3-
duckdb.sql("SELECT 42 FROM data/2026-01-14/hey.parquet")
3+
df = duckdb.read_parquet("../2026-01-14/hey.parquet")
4+
5+
duckdb.sql("DESCRIBE SELECT * FROM df").show()
6+
7+
duckdb.sql("SELECT language, COUNT(language) AS c_p \
8+
FROM df \
9+
GROUP BY language \
10+
ORDER BY c_p DESC").show()

worker.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,14 @@ def get_github_data(self, start_in_repo_num: int = 0, batch_size: int = 500, git
123123

124124
except Exception as validation_error:
125125
print(f"Validation error for repo {github_data_points.get('full_name')}: {validation_error}")
126-
print("Skipping this repo and continuing...")
126+
print("Skipping this repo and continuing")
127127
continue
128128

129129
remaining_api_calls = github_instance.rate_limiting
130130
remaining = remaining_api_calls[0]
131131

132-
if remaining_api_calls == 1:
132+
if remaining == 2:
133133
print(f"Reached batch size limit of {batch_size}")
134-
135134
break
136135

137136
# # start_in_repo_num = counter

0 commit comments

Comments
 (0)