Skip to content

Commit 098b4a6

Browse files
committed
Improve logging & documentation
1 parent e842a0a commit 098b4a6

3 files changed

Lines changed: 50 additions & 14 deletions

File tree

app/Airsequel.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ loadRowids manager dbEndpoint airseqWriteToken tableName repos = do
174174
Right GqlRes{gqlErrors, gqlData} ->
175175
case gqlErrors of
176176
Just errs -> do
177-
putErrText "GraphQL errors:"
177+
putErrText "GraphQL errors while retrieving repos:"
178178
errs
179179
<&> encodePretty
180180
& P.mapM_ P.putLByteString
@@ -300,7 +300,7 @@ saveReposInAirsequel saveStrategy repos = do
300300
case gqlErrors of
301301
Nothing -> pure ()
302302
Just errs -> do
303-
putErrText "GraphQL errors:"
303+
putErrText "GraphQL errors while upserting repos:"
304304
putErrText $ show errs
305305

306306

app/Main.hs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ import Types (GqlRepoRes (..), Repo (..), SaveStrategy (..))
8484
import Utils (loadAirsWriteToken, loadGitHubToken)
8585

8686

87+
batchSize :: Int
88+
batchSize = 50
89+
90+
8791
data CliCmd
8892
= -- | Upload files
8993
FileUpload
@@ -276,8 +280,6 @@ getGhHeaders tokenMb =
276280
execGithubGqlQuery ::
277281
Maybe Text -> Text -> KeyMap Value -> [Repo] -> Text -> IO [Repo]
278282
execGithubGqlQuery ghTokenMb query variables initialRepos tableName = do
279-
putText "\n▶️ Query a batch of repos from GitHub …"
280-
281283
manager <- newManager tlsManagerSettings
282284

283285
initialRequest <- parseRequest $ T.unpack "https://api.github.com/graphql"
@@ -311,14 +313,19 @@ execGithubGqlQuery ghTokenMb query variables initialRepos tableName = do
311313

312314
when (P.null initialRepos {- First call -}) $ do
313315
putText $
314-
"\n📲 Total number of repos: "
316+
"#️⃣ Number of repos: "
315317
<> show @Integer gqlResponse.repositoryCount
316318

317-
when (gqlResponse.repositoryCount > 1000) $ do
318-
putText $
319-
"\n⚠️ WARNING\n"
320-
<> "⚠️ The search returns more than 1000 repos.\n"
321-
<> "⚠️ Not all repos will be crawled.\n"
319+
when (gqlResponse.repositoryCount > 1000) $ do
320+
putText $
321+
"\n⚠️ WARNING\n"
322+
<> "⚠️ The search returns more than 1000 repos.\n"
323+
<> "⚠️ Not all repos will be crawled.\n"
324+
325+
putText $
326+
"\n▶️ Query a batch of "
327+
<> show @Int batchSize
328+
<> " repos from GitHub …"
322329

323330
let
324331
repos :: [Repo] = gqlResponse.repos
@@ -428,6 +435,8 @@ loadAndSaveReposViaSearch ghTokenMb searchQuery numRepos afterMb tableNameParam
428435
}
429436
|]
430437

438+
putText $ "\n🔎 Search Query: " <> searchQuery <> "\n"
439+
431440
execGithubGqlQuery
432441
ghTokenMb
433442
gqlQUery
@@ -481,14 +490,17 @@ run cliCmd = do
481490

482491
allRepos <- P.forM searchQueriesNorm $ \searchQueryNorm -> do
483492
repos <-
484-
loadAndSaveReposViaSearch ghTokenMb searchQueryNorm 50 Nothing cmdTableName
493+
loadAndSaveReposViaSearch
494+
ghTokenMb
495+
searchQueryNorm
496+
batchSize
497+
Nothing
498+
cmdTableName
485499

486500
putText $
487501
"\n🏁 Crawled "
488502
<> show @Int (P.length repos)
489-
<> " repos with search query:\n"
490-
<> searchQueryNorm
491-
<> "\n"
503+
<> " repos\n\n"
492504

493505
pure repos
494506

app/Types.hs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,30 @@ import Data.Aeson.Types (parseJSON)
2020
import Data.Time (UTCTime)
2121

2222

23+
{-| Corresponding SQL table:
24+
25+
`rowid` is created automatically.
26+
27+
CREATE TABLE "repos" (
28+
"name" TEXT,
29+
"commits_count" INTEGER DEFAULT 0 NOT NULL,
30+
"stargazers_count" INTEGER DEFAULT 0 NOT NULL,
31+
"github_id" INTEGER DEFAULT 0 NOT NULL,
32+
"owner" TEXT DEFAULT '' NOT NULL,
33+
"description" TEXT,
34+
"homepage" TEXT_URL,
35+
"languages" TEXT_JSON_ARRAY,
36+
"language" TEXT,
37+
"open_issues_count" INTEGER,
38+
"is_archived" BOOLEAN DEFAULT FALSE NOT NULL,
39+
"updated_utc" TEXT_DATETIME,
40+
"crawled_utc" TEXT_DATETIME,
41+
"created_utc" TEXT_DATETIME,
42+
"pushed_utc" TEXT_DATETIME,
43+
"is_private" BOOLEAN DEFAULT FALSE NOT NULL,
44+
"open_prs_count" INTEGER
45+
)
46+
-}
2347
data Repo = Repo
2448
{ githubId :: Integer
2549
, rowid :: Maybe Integer -- Airsequel rowid

0 commit comments

Comments
 (0)