Skip to content

Commit 89ef974

Browse files
author
Theodore Li
committed
Add sleep every 1000, dry run writes to files that prod consumes from
1 parent d2e0868 commit 89ef974

File tree

1 file changed

+86
-20
lines changed

1 file changed

+86
-20
lines changed

packages/db/scripts/migrate-block-api-keys-to-byok.ts

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,23 @@
55
// Handles both literal keys ("sk-xxx...") and env var references ("{{VAR_NAME}}").
66
//
77
// Usage:
8-
// # Dry run: audit for conflicts + preview inserts (no DB writes)
8+
// # Step 1 — Dry run: audit for conflicts + preview inserts (no DB writes)
9+
// # Outputs migrate-byok-workspace-ids.txt for the live run.
910
// bun run packages/db/scripts/migrate-block-api-keys-to-byok.ts --dry-run \
1011
// --map jina=jina --map perplexity=perplexity --map google_books=google_cloud
1112
//
12-
// # Live run: insert BYOK keys
13+
// # Step 2 — Live run: insert BYOK keys (--from-file is required)
1314
// bun run packages/db/scripts/migrate-block-api-keys-to-byok.ts \
14-
// --map jina=jina --map perplexity=perplexity --map google_books=google_cloud
15+
// --map jina=jina --map perplexity=perplexity --map google_books=google_cloud \
16+
// --from-file migrate-byok-workspace-ids.txt
1517
//
16-
// # Optionally scope to specific users (repeatable)
18+
// # Optionally scope dry run to specific users (repeatable)
1719
// bun run packages/db/scripts/migrate-block-api-keys-to-byok.ts --dry-run \
1820
// --map jina=jina --user user_abc123 --user user_def456
1921

2022
import { createCipheriv, createDecipheriv, randomBytes } from 'crypto'
23+
import { readFileSync, writeFileSync } from 'fs'
24+
import { resolve } from 'path'
2125
import { eq, sql } from 'drizzle-orm'
2226
import { index, json, jsonb, pgTable, text, timestamp, uniqueIndex } from 'drizzle-orm/pg-core'
2327
import { drizzle } from 'drizzle-orm/postgres-js'
@@ -36,7 +40,9 @@ function parseMapArgs(): Record<string, string> {
3640
if (blockType && providerId) {
3741
mapping[blockType] = providerId
3842
} else {
39-
console.error(`Invalid --map value: "${args[i + 1]}". Expected format: blockType=providerId`)
43+
console.error(
44+
`Invalid --map value: "${args[i + 1]}". Expected format: blockType=providerId`
45+
)
4046
process.exit(1)
4147
}
4248
i++
@@ -68,6 +74,27 @@ function parseUserArgs(): string[] {
6874

6975
const USER_FILTER = parseUserArgs()
7076

77+
function parseFromFileArg(): string | null {
78+
const args = process.argv.slice(2)
79+
for (let i = 0; i < args.length; i++) {
80+
if (args[i] === '--from-file' && args[i + 1]) {
81+
return args[i + 1]
82+
}
83+
}
84+
return null
85+
}
86+
87+
const FROM_FILE = parseFromFileArg()
88+
89+
if (!DRY_RUN && !FROM_FILE) {
90+
console.error('Live runs require --from-file. Run with --dry-run first to generate the file.')
91+
process.exit(1)
92+
}
93+
if (DRY_RUN && FROM_FILE) {
94+
console.error('--from-file cannot be used with --dry-run. Dry runs always discover workspaces from the database.')
95+
process.exit(1)
96+
}
97+
7198
// ---------- Env ----------
7299
function getEnv(name: string): string | undefined {
73100
if (typeof process !== 'undefined' && process.env && name in process.env) {
@@ -191,6 +218,23 @@ const postgresClient = postgres(CONNECTION_STRING, {
191218
})
192219
const db = drizzle(postgresClient)
193220

221+
// ---------- Throttle ----------
222+
const BATCH_SIZE = 1000
223+
const SLEEP_MS = 30_000
224+
let requestCount = 0
225+
let lastWorkspaceId = ''
226+
227+
async function throttle(workspaceId?: string) {
228+
if (workspaceId) lastWorkspaceId = workspaceId
229+
requestCount++
230+
if (requestCount % BATCH_SIZE === 0) {
231+
console.log(
232+
` [THROTTLE] ${requestCount} DB requests — last workspace: ${lastWorkspaceId} — sleeping ${SLEEP_MS / 1000}s`
233+
)
234+
await new Promise((r) => setTimeout(r, SLEEP_MS))
235+
}
236+
}
237+
194238
// ---------- Helpers ----------
195239
const TOOL_INPUT_SUBBLOCK_IDS: Record<string, string> = {
196240
agent: 'tools',
@@ -266,9 +310,12 @@ async function resolveKey(
266310
async function run() {
267311
console.log(`Mode: ${DRY_RUN ? 'DRY RUN (audit + preview)' : 'LIVE'}`)
268312
console.log(
269-
`Mappings: ${Object.entries(BLOCK_TYPE_TO_PROVIDER).map(([b, p]) => `${b}=${p}`).join(', ')}`
313+
`Mappings: ${Object.entries(BLOCK_TYPE_TO_PROVIDER)
314+
.map(([b, p]) => `${b}=${p}`)
315+
.join(', ')}`
270316
)
271317
console.log(`Users: ${USER_FILTER.length > 0 ? USER_FILTER.join(', ') : 'all'}`)
318+
if (FROM_FILE) console.log(`From file: ${FROM_FILE}`)
272319
console.log('---\n')
273320

274321
const stats = {
@@ -295,22 +342,37 @@ async function run() {
295342
)})`
296343
: sql``
297344

298-
const workspaceIdRows = await db
299-
.selectDistinct({ workspaceId: workflow.workspaceId })
300-
.from(workflowBlocks)
301-
.innerJoin(workflow, eq(workflowBlocks.workflowId, workflow.id))
302-
.where(
303-
sql`${workflow.workspaceId} IS NOT NULL AND ${workflowBlocks.type} IN (${sql.join(
304-
allBlockTypes.map((t) => sql`${t}`),
305-
sql`, `
306-
)})${userFilter}`
307-
)
345+
let workspaceIds: string[]
346+
347+
if (DRY_RUN) {
348+
const workspaceIdRows = await db
349+
.selectDistinct({ workspaceId: workflow.workspaceId })
350+
.from(workflowBlocks)
351+
.innerJoin(workflow, eq(workflowBlocks.workflowId, workflow.id))
352+
.where(
353+
sql`${workflow.workspaceId} IS NOT NULL AND ${workflowBlocks.type} IN (${sql.join(
354+
allBlockTypes.map((t) => sql`${t}`),
355+
sql`, `
356+
)})${userFilter}`
357+
)
358+
359+
workspaceIds = workspaceIdRows
360+
.map((r) => r.workspaceId)
361+
.filter((id): id is string => id !== null)
308362

309-
const workspaceIds = workspaceIdRows
310-
.map((r) => r.workspaceId)
311-
.filter((id): id is string => id !== null)
363+
console.log(`Found ${workspaceIds.length} workspaces with candidate blocks\n`)
312364

313-
console.log(`Found ${workspaceIds.length} workspaces with candidate blocks\n`)
365+
const outPath = resolve('migrate-byok-workspace-ids.txt')
366+
writeFileSync(outPath, workspaceIds.join('\n') + '\n')
367+
console.log(`[DRY RUN] Wrote ${workspaceIds.length} workspace IDs to ${outPath}\n`)
368+
} else {
369+
const raw = readFileSync(resolve(FROM_FILE!), 'utf-8')
370+
workspaceIds = raw
371+
.split('\n')
372+
.map((l) => l.trim())
373+
.filter(Boolean)
374+
console.log(`Loaded ${workspaceIds.length} workspace IDs from ${FROM_FILE}\n`)
375+
}
314376

315377
// 2. Process one workspace at a time
316378
for (const workspaceId of workspaceIds) {
@@ -332,6 +394,7 @@ async function run() {
332394
sql`, `
333395
)})${userFilter}`
334396
)
397+
await throttle(workspaceId)
335398

336399
console.log(`[Workspace ${workspaceId}] ${blocks.length} blocks`)
337400

@@ -397,6 +460,7 @@ async function run() {
397460
.from(workspaceEnvironment)
398461
.where(sql`${workspaceEnvironment.workspaceId} = ${workspaceId}`)
399462
.limit(1)
463+
await throttle()
400464
if (wsEnvRows[0]) {
401465
wsEnvVars = (wsEnvRows[0].variables as Record<string, string>) || {}
402466
}
@@ -412,6 +476,7 @@ async function run() {
412476
sql`, `
413477
)})`
414478
)
479+
await throttle()
415480
for (const row of personalRows) {
416481
personalEnvCache.set(row.userId, (row.variables as Record<string, string>) || {})
417482
}
@@ -475,6 +540,7 @@ async function run() {
475540
target: [workspaceBYOKKeys.workspaceId, workspaceBYOKKeys.providerId],
476541
})
477542
.returning({ id: workspaceBYOKKeys.id })
543+
await throttle()
478544

479545
if (result.length === 0) {
480546
console.log(` [SKIP] BYOK already exists for provider "${providerId}"`)

0 commit comments

Comments
 (0)