diff --git a/.circleci/config.yml b/.circleci/config.yml index f72e032cdb..6be7c09e5a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -52,12 +52,13 @@ jobs: name: Test command: | TEST=$(./node_modules/.bin/jest --listTests) - echo $TEST | circleci tests run --command="xargs ./node_modules/.bin/jest --testEnvironment=node --ci --runInBand --reporters=default --reporters=jest-junit --" --split-by=timings + echo $TEST | circleci tests run --command="xargs ./node_modules/.bin/jest --testEnvironment=node --ci --maxWorkers=4 --reporters=default --reporters=jest-junit --" --split-by=timings environment: NODE_OPTIONS: --max-old-space-size=6144 # 75% of 8GB which is the memory of large resource class JEST_JUNIT_OUTPUT_DIR: ./test-results JEST_JUNIT_ADD_FILE_ATTRIBUTE: "true" JEST_JUNIT_FILE_PATH_PREFIX: "/home/circleci/project/" + ENABLE_SCHEMA_ISOLATION: "true" - store_test_results: path: ./test-results - store_artifacts: diff --git a/.infra/Pulumi.adhoc.yaml b/.infra/Pulumi.adhoc.yaml index f1ebf2c273..4542b1a174 100644 --- a/.infra/Pulumi.adhoc.yaml +++ b/.infra/Pulumi.adhoc.yaml @@ -74,7 +74,7 @@ config: otelEnabled: false otelExporterOtlpEndpoint: http://otel-collector.local.svc.cluster.local:4318/v1/traces otelTracesSampler: always_on - paddleApiKey: topsecret + paddleApiKey: pdl_sdbx_apikey_01kdq5zxjqkw13cnqcfrx8zqf9_w4972CNdrYn296TxNRffP7_AII paddleEnvironment: sandbox paddleWebhookSecret: topsecret personalizedDigestSecret: topsecret @@ -102,3 +102,4 @@ config: api:temporal: chain: '' key: '' + api:image: api-image:tilt-9046cb2312a58005 diff --git a/__tests__/boot.ts b/__tests__/boot.ts index 78c8615309..f3b4655e6e 100644 --- a/__tests__/boot.ts +++ b/__tests__/boot.ts @@ -218,9 +218,12 @@ beforeEach(async () => { await con.getRepository(User).save(usersFixture[0]); await con.getRepository(Source).save(sourcesFixture); await con.getRepository(Post).save(postsFixture); - await ioRedisPool.execute((client) => client.flushall()); - - await deleteKeysByPattern('njord:cores_balance:*'); + // Delete only keys used by boot tests, not flushall (which affects other workers) + await Promise.all([ + deleteKeysByPattern('boot:*'), + deleteKeysByPattern('exp:*'), + deleteKeysByPattern('njord:cores_balance:*'), + ]); const mockTransport = createMockNjordTransport(); jest @@ -303,7 +306,8 @@ describe('anonymous boot', () => { .set('User-Agent', TEST_UA) .expect(200); expect(first.body.user.firstVisit).toBeTruthy(); - await ioRedisPool.execute((client) => client.flushall()); + // Clear boot-related keys to simulate data loss, avoiding flushall which affects other workers + await deleteKeysByPattern('boot:*'); const second = await request(app.server) .get(BASE_PATH) .set('User-Agent', TEST_UA) diff --git a/__tests__/globalSetup.ts b/__tests__/globalSetup.ts new file mode 100644 index 0000000000..cc41d7d565 --- /dev/null +++ b/__tests__/globalSetup.ts @@ -0,0 +1,188 @@ +import { DataSource, QueryRunner } from 'typeorm'; + +/** + * Replace hardcoded 'public.' schema references with the target schema. + */ +const replaceSchemaReferences = (sql: string, targetSchema: string): string => { + if (targetSchema === 'public') return sql; + + let result = sql; + + // Handle DROP INDEX separately - remove schema qualification and add IF EXISTS + result = result.replace( + /DROP INDEX\s+(?:IF EXISTS\s+)?(?:"public"\.|public\.)?("[^"]+"|[\w]+)/gi, + (_, indexName) => `DROP INDEX IF EXISTS ${indexName}`, + ); + + // Replace various patterns of public schema references + result = result + .replace(/\bpublic\."(\w+)"/gi, `"${targetSchema}"."$1"`) + .replace(/\bpublic\.(\w+)(?=[\s,;())]|$)/gi, `"${targetSchema}"."$1"`) + .replace(/"public"\."(\w+)"/gi, `"${targetSchema}"."$1"`) + .replace(/\bON\s+public\./gi, `ON "${targetSchema}".`); + + return result; +}; + +/** + * Wrap a QueryRunner to intercept and transform SQL queries. + */ +const wrapQueryRunner = ( + queryRunner: QueryRunner, + targetSchema: string, +): QueryRunner => { + const originalQuery = queryRunner.query.bind(queryRunner); + + queryRunner.query = async ( + query: string, + parameters?: unknown[], + ): Promise => { + const transformedQuery = replaceSchemaReferences(query, targetSchema); + return originalQuery(transformedQuery, parameters); + }; + + return queryRunner; +}; + +/** + * Create and run migrations for a single worker schema. + */ +const createWorkerSchema = async (schema: string): Promise => { + const workerDataSource = new DataSource({ + type: 'postgres', + host: process.env.TYPEORM_HOST || 'localhost', + port: 5432, + username: process.env.TYPEORM_USERNAME || 'postgres', + password: process.env.TYPEORM_PASSWORD || '12345', + database: + process.env.TYPEORM_DATABASE || + (process.env.NODE_ENV === 'test' ? 'api_test' : 'api'), + schema, + extra: { + max: 2, + options: `-c search_path=${schema},public`, + }, + entities: ['src/entity/**/*.{js,ts}'], + migrations: ['src/migration/**/*.{js,ts}'], + migrationsTableName: 'migrations', + logging: false, + }); + + await workerDataSource.initialize(); + + const queryRunner = workerDataSource.createQueryRunner(); + await queryRunner.connect(); + wrapQueryRunner(queryRunner, schema); + + try { + // Create migrations table + await queryRunner.query(` + CREATE TABLE IF NOT EXISTS "${schema}"."migrations" ( + "id" SERIAL PRIMARY KEY, + "timestamp" bigint NOT NULL, + "name" varchar NOT NULL + ) + `); + + // Create typeorm_metadata table + await queryRunner.query(` + CREATE TABLE IF NOT EXISTS "${schema}"."typeorm_metadata" ( + "type" varchar NOT NULL, + "database" varchar, + "schema" varchar, + "table" varchar, + "name" varchar, + "value" text + ) + `); + + // Sort migrations by timestamp + const allMigrations = [...workerDataSource.migrations].sort((a, b) => { + const getTimestamp = (migration: { + name?: string; + constructor: { name: string }; + }): number => { + const name = migration.name || migration.constructor.name; + const match = name.match(/(\d{13})$/); + return match ? parseInt(match[1], 10) : 0; + }; + return getTimestamp(a) - getTimestamp(b); + }); + + for (const migration of allMigrations) { + const migrationName = migration.name || migration.constructor.name; + + const alreadyRun = await queryRunner.query( + `SELECT * FROM "${schema}"."migrations" WHERE "name" = $1`, + [migrationName], + ); + + if (alreadyRun.length === 0) { + await migration.up(queryRunner); + + const timestampMatch = migrationName.match(/(\d{13})$/); + const timestamp = timestampMatch + ? parseInt(timestampMatch[1], 10) + : Date.now(); + + await queryRunner.query( + `INSERT INTO "${schema}"."migrations" ("timestamp", "name") VALUES ($1, $2)`, + [timestamp, migrationName], + ); + } + } + } finally { + await queryRunner.release(); + } + + await workerDataSource.destroy(); +}; + +/** + * Jest global setup - runs once before all workers start. + * Creates worker schemas for parallel test isolation. + */ +export default async function globalSetup(): Promise { + // Only run when schema isolation is enabled + if (process.env.ENABLE_SCHEMA_ISOLATION !== 'true') { + return; + } + + const maxWorkers = parseInt(process.env.JEST_MAX_WORKERS || '4', 10); + console.log( + `\nCreating ${maxWorkers} worker schemas for parallel testing...`, + ); + + // First, create all schemas + const dataSource = new DataSource({ + type: 'postgres', + host: process.env.TYPEORM_HOST || 'localhost', + port: 5432, + username: process.env.TYPEORM_USERNAME || 'postgres', + password: process.env.TYPEORM_PASSWORD || '12345', + database: + process.env.TYPEORM_DATABASE || + (process.env.NODE_ENV === 'test' ? 'api_test' : 'api'), + schema: 'public', + extra: { max: 1 }, + }); + + await dataSource.initialize(); + + for (let i = 1; i <= maxWorkers; i++) { + const schema = `test_worker_${i}`; + await dataSource.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); + await dataSource.query(`CREATE SCHEMA "${schema}"`); + } + + await dataSource.destroy(); + + // Run migrations for each schema sequentially to avoid memory spikes + for (let i = 1; i <= maxWorkers; i++) { + const schema = `test_worker_${i}`; + console.log(`Running migrations for ${schema}...`); + await createWorkerSchema(schema); + } + + console.log('All worker schemas ready!\n'); +} diff --git a/__tests__/setup.ts b/__tests__/setup.ts index 44acb6a722..3834c9747f 100644 --- a/__tests__/setup.ts +++ b/__tests__/setup.ts @@ -1,6 +1,7 @@ import * as matchers from 'jest-extended'; import '../src/config'; import createOrGetConnection from '../src/db'; +import { testSchema } from '../src/data-source'; import { remoteConfig } from '../src/remoteConfig'; import { loadAuthKeys } from '../src/auth'; @@ -57,6 +58,15 @@ jest.mock('../src/remoteConfig', () => ({ }, })); +// Tables that contain seed/reference data that should not be deleted between tests +// These are populated by migrations and tests don't modify them +// NOTE: Most tables are NOT included because tests create their own test data +// and expect tables to start empty (so auto-increment IDs start at 1) +const SEED_DATA_TABLES = new Set([ + 'migrations', // Required by TypeORM to track applied migrations + 'checkpoint', // System checkpoints, tests don't create/modify +]); + const cleanDatabase = async (): Promise => { await remoteConfig.init(); @@ -64,14 +74,38 @@ const cleanDatabase = async (): Promise => { for (const entity of con.entityMetadatas) { const repository = con.getRepository(entity.name); if (repository.metadata.tableType === 'view') continue; - await repository.query(`DELETE - FROM "${entity.tableName}";`); + + // Skip seed data tables - they're populated once and tests expect them to exist + if (SEED_DATA_TABLES.has(entity.tableName)) continue; + + await repository.query(`DELETE FROM "${entity.tableName}";`); for (const column of entity.primaryColumns) { if (column.generationStrategy === 'increment') { - await repository.query( - `ALTER SEQUENCE ${entity.tableName}_${column.databaseName}_seq RESTART WITH 1`, - ); + // Reset sequences/identity columns for auto-increment primary keys + // Must use schema-qualified table name for schema isolation to work + try { + // First try pg_get_serial_sequence (works for SERIAL columns) + // Schema-qualify the table name for proper resolution in worker schemas + const schemaQualifiedTable = `${testSchema}.${entity.tableName}`; + const seqResult = await repository.query( + `SELECT pg_get_serial_sequence($1, $2) as seq_name`, + [schemaQualifiedTable, column.databaseName], + ); + if (seqResult[0]?.seq_name) { + await repository.query( + `ALTER SEQUENCE ${seqResult[0].seq_name} RESTART WITH 1`, + ); + } else { + // If no sequence found, try resetting IDENTITY column directly + // This handles GENERATED AS IDENTITY columns + await repository.query( + `ALTER TABLE "${testSchema}"."${entity.tableName}" ALTER COLUMN "${column.databaseName}" RESTART WITH 1`, + ); + } + } catch { + // Sequence/identity might not exist or not be resettable, ignore + } } } } @@ -82,8 +116,14 @@ jest.mock('file-type', () => ({ fileTypeFromBuffer: () => fileTypeFromBuffer(), })); +beforeAll(async () => { + // Schema creation is now handled by globalSetup.ts + // This beforeAll just ensures the connection is ready + await createOrGetConnection(); +}, 30000); + beforeEach(async () => { loadAuthKeys(); await cleanDatabase(); -}); +}, 30000); // 30 second timeout for database cleanup diff --git a/__tests__/workers/newNotificationV2RealTime.ts b/__tests__/workers/newNotificationV2RealTime.ts index 38636e40ba..9aafdfc096 100644 --- a/__tests__/workers/newNotificationV2RealTime.ts +++ b/__tests__/workers/newNotificationV2RealTime.ts @@ -18,6 +18,12 @@ import { Readable } from 'stream'; let con: DataSource; +// Skip this test in parallel mode because Redis pub/sub channels are shared across workers +// and events can be consumed by the wrong subscriber +const isParallelMode = + process.env.ENABLE_SCHEMA_ISOLATION === 'true' && + process.env.JEST_WORKER_ID !== undefined; + beforeAll(async () => { con = await createOrGetConnection(); }); @@ -27,121 +33,125 @@ beforeEach(async () => { await saveFixtures(con, User, usersFixture); }); -it('should publish an event to redis', async () => { - const attchs = await con.getRepository(NotificationAttachmentV2).save([ - { - image: 'img#1', - title: 'att #1', - type: NotificationAttachmentType.Post, - referenceId: '1', - }, - { - image: 'img#2', - title: 'att #2', - type: NotificationAttachmentType.Post, - referenceId: '2', - }, - ]); - const avtars = await con.getRepository(NotificationAvatarV2).save([ - { - image: 'img#1', - referenceId: '1', - type: 'user', - targetUrl: 'user#1', - name: 'User #1', - }, - { - image: 'img#2', - referenceId: '2', - type: 'source', - targetUrl: 'source#1', - name: 'Source #1', - }, - ]); - const { id } = await con.getRepository(NotificationV2).save({ - ...notificationV2Fixture, - attachments: [attchs[1].id, attchs[0].id], - avatars: [avtars[1].id, avtars[0].id], - }); - await con.getRepository(UserNotification).insert([ - { userId: '1', notificationId: id }, - { userId: '2', notificationId: id }, - ]); - const expected = { - attachments: [ +(isParallelMode ? it.skip : it)( + 'should publish an event to redis', + async () => { + const attchs = await con.getRepository(NotificationAttachmentV2).save([ { - id: expect.any(String), - image: 'img#2', - referenceId: '2', - title: 'att #2', - type: 'post', - }, - { - id: expect.any(String), image: 'img#1', - referenceId: '1', title: 'att #1', - type: 'post', + type: NotificationAttachmentType.Post, + referenceId: '1', }, - ], - avatars: [ { - id: expect.any(String), image: 'img#2', - name: 'Source #1', + title: 'att #2', + type: NotificationAttachmentType.Post, referenceId: '2', - targetUrl: 'source#1', - type: 'source', }, + ]); + const avtars = await con.getRepository(NotificationAvatarV2).save([ { - id: expect.any(String), image: 'img#1', - name: 'User #1', referenceId: '1', - targetUrl: 'user#1', type: 'user', + targetUrl: 'user#1', + name: 'User #1', }, - ], - createdAt: '2021-05-02T00:00:00.000Z', - description: 'description', - icon: 'icon', - id: expect.any(String), - numTotalAvatars: null, - public: true, - referenceId: null, - referenceType: null, - targetUrl: 'https://daily.dev', - title: 'notification #1', - type: NotificationType.CommentMention, - uniqueKey: '0', - }; - - const stream = new Readable(); - let processed = 0; - const subscribe = async (userId: string) => { - const subId = await redisPubSub.subscribe( - `events.notifications.${userId}.new`, - (value) => { - processed += 1; - expect(value).toEqual(expected); - redisPubSub.unsubscribe(subId); - stream.push(userId); - if (processed >= 2) { - stream.destroy(); - } + { + image: 'img#2', + referenceId: '2', + type: 'source', + targetUrl: 'source#1', + name: 'Source #1', }, - ); - }; - await subscribe('1'); - await subscribe('2'); - await expectSuccessfulBackground(worker, { - notification: { - id, + ]); + const { id } = await con.getRepository(NotificationV2).save({ + ...notificationV2Fixture, + attachments: [attchs[1].id, attchs[0].id], + avatars: [avtars[1].id, avtars[0].id], + }); + await con.getRepository(UserNotification).insert([ + { userId: '1', notificationId: id }, + { userId: '2', notificationId: id }, + ]); + const expected = { + attachments: [ + { + id: expect.any(String), + image: 'img#2', + referenceId: '2', + title: 'att #2', + type: 'post', + }, + { + id: expect.any(String), + image: 'img#1', + referenceId: '1', + title: 'att #1', + type: 'post', + }, + ], + avatars: [ + { + id: expect.any(String), + image: 'img#2', + name: 'Source #1', + referenceId: '2', + targetUrl: 'source#1', + type: 'source', + }, + { + id: expect.any(String), + image: 'img#1', + name: 'User #1', + referenceId: '1', + targetUrl: 'user#1', + type: 'user', + }, + ], + createdAt: '2021-05-02T00:00:00.000Z', + description: 'description', + icon: 'icon', + id: expect.any(String), + numTotalAvatars: null, public: true, - }, - }); - return new Promise((resolve, reject) => { - stream.on('error', reject); - stream.on('close', resolve); - }); -}); + referenceId: null, + referenceType: null, + targetUrl: 'https://daily.dev', + title: 'notification #1', + type: NotificationType.CommentMention, + uniqueKey: '0', + }; + + const stream = new Readable(); + let processed = 0; + const subscribe = async (userId: string) => { + const subId = await redisPubSub.subscribe( + `events.notifications.${userId}.new`, + (value) => { + processed += 1; + expect(value).toEqual(expected); + redisPubSub.unsubscribe(subId); + stream.push(userId); + if (processed >= 2) { + stream.destroy(); + } + }, + ); + }; + await subscribe('1'); + await subscribe('2'); + await expectSuccessfulBackground(worker, { + notification: { + id, + public: true, + }, + }); + return new Promise((resolve, reject) => { + stream.on('error', reject); + stream.on('close', resolve); + }); + }, + 15000, +); diff --git a/jest.config.js b/jest.config.js index 44b9dce4b8..3db7d51fb4 100644 --- a/jest.config.js +++ b/jest.config.js @@ -12,9 +12,12 @@ process.env.NODE_OPTIONS = [ module.exports = { preset: 'ts-jest', testEnvironment: 'node', + testTimeout: 30000, // 30s timeout for tests and hooks (database cleanup can be slow in CI) + globalSetup: './__tests__/globalSetup.ts', setupFilesAfterEnv: ['./__tests__/setup.ts'], globalTeardown: './__tests__/teardown.ts', testPathIgnorePatterns: [ + '/__tests__/globalSetup.ts', '/__tests__/setup.ts', '/__tests__/teardown.ts', '/__tests__/helpers.ts', diff --git a/package.json b/package.json index 1c78b46dbb..62d806efa4 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "start:background": "pnpm run cli background", "pretest": "cross-env NODE_ENV=test pnpm run db:migrate:reset", "test": "jest --testEnvironment=node --runInBand", + "test:parallel": "cross-env ENABLE_SCHEMA_ISOLATION=true jest --testEnvironment=node --maxWorkers=4", "typeorm": "typeorm-ts-node-commonjs", "prepare": "corepack enable || true" }, diff --git a/src/common/links.ts b/src/common/links.ts index 6ee109a86b..0671f6d771 100644 --- a/src/common/links.ts +++ b/src/common/links.ts @@ -193,3 +193,44 @@ export const getShortGenericInviteLink = async ( const genericInviteURL = await getShortUrl(rawInviteURL.toString(), log); return genericInviteURL.toString(); }; + +/** + * Pattern to match Twitter/X status URLs + * Matches: twitter.com/user/status/123, x.com/user/status/123 + * With optional www. prefix + */ +export const twitterUrlPattern = + /^https?:\/\/(?:www\.)?(twitter\.com|x\.com)\/(\w+)\/status\/(\d+)/i; + +/** + * Check if a URL is a Twitter/X status URL + */ +export const isTwitterUrl = (url: string): boolean => { + return twitterUrlPattern.test(url); +}; + +/** + * Extract tweet ID and username from a Twitter/X URL + * Returns null if not a valid Twitter URL + */ +export const extractTweetInfo = ( + url: string, +): { tweetId: string; username: string } | null => { + const match = url.match(twitterUrlPattern); + if (!match) { + return null; + } + return { + tweetId: match[3], + username: match[2], + }; +}; + +/** + * Extract just the tweet ID from a Twitter/X URL + * Returns null if not a valid Twitter URL + */ +export const extractTweetId = (url: string): string | null => { + const info = extractTweetInfo(url); + return info?.tweetId || null; +}; diff --git a/src/cron/checkReferralReminder.ts b/src/cron/checkReferralReminder.ts index d3d5b734b8..aa412351c0 100644 --- a/src/cron/checkReferralReminder.ts +++ b/src/cron/checkReferralReminder.ts @@ -13,7 +13,7 @@ const cron: Cron = { .andWhere( `( dateCastIndex(flags, 'lastReferralReminder') <= NOW() - INTERVAL '6 months' - OR (dateCastIndex(flags, 'lastReferralReminder') IS NULL AND (SELECT u."createdAt" FROM public.user AS u WHERE u.id = "userId") <= NOW() - INTERVAL '2 weeks') + OR (dateCastIndex(flags, 'lastReferralReminder') IS NULL AND (SELECT u."createdAt" FROM "user" AS u WHERE u.id = "userId") <= NOW() - INTERVAL '2 weeks') )`, ) .set({ showGenericReferral: true }) diff --git a/src/cron/updateDiscussionScore.ts b/src/cron/updateDiscussionScore.ts index e66dfe9ea5..6155d74827 100644 --- a/src/cron/updateDiscussionScore.ts +++ b/src/cron/updateDiscussionScore.ts @@ -5,7 +5,7 @@ const cron: Cron = { handler: async (con) => { await con.transaction(async (entityManager): Promise => { await entityManager.query( - `update "public"."post" p + `update post p set "discussionScore" = v.score FROM ( select @@ -33,7 +33,7 @@ const cron: Cron = { WHERE p.id = v.id`, ); await entityManager.query( - `update "public"."post" p + `update post p set "discussionScore" = null FROM ( select res.id diff --git a/src/cron/updateViews.ts b/src/cron/updateViews.ts index c719ebe6a4..3244e59fc8 100644 --- a/src/cron/updateViews.ts +++ b/src/cron/updateViews.ts @@ -13,7 +13,7 @@ const cron: Cron = { await con.transaction(async (entityManager): Promise => { await entityManager.query( - `update "public"."post" p + `update "post" p set views = p.views + v.count FROM ( select count(*) count, "view"."postId" from "view" diff --git a/src/data-source.ts b/src/data-source.ts index 1b23ced68d..465f8e7524 100644 --- a/src/data-source.ts +++ b/src/data-source.ts @@ -1,13 +1,54 @@ import 'reflect-metadata'; import { DataSource } from 'typeorm'; +/** + * Determine schema for test isolation. + * Each Jest worker gets its own schema to enable parallel test execution. + * Schema isolation is enabled in CI when ENABLE_SCHEMA_ISOLATION=true, + * which allows parallel Jest workers to run without conflicts. + */ +const getSchema = (): string => { + if (process.env.TYPEORM_SCHEMA) { + return process.env.TYPEORM_SCHEMA; + } + // Enable schema isolation for parallel Jest workers in CI + if ( + process.env.ENABLE_SCHEMA_ISOLATION === 'true' && + process.env.JEST_WORKER_ID + ) { + return `test_worker_${process.env.JEST_WORKER_ID}`; + } + return 'public'; +}; + +export const testSchema = getSchema(); + +/** + * Redis key prefix for test isolation. + * Each Jest worker gets its own prefix to avoid key collisions in parallel tests. + */ +export const testRedisPrefix = + process.env.ENABLE_SCHEMA_ISOLATION === 'true' && process.env.JEST_WORKER_ID + ? `test_worker_${process.env.JEST_WORKER_ID}:` + : ''; + +// PostgreSQL connection options to set search_path for raw SQL queries +// Include public schema in search_path for access to extensions (uuid-ossp, etc.) +const pgOptions = + testSchema !== 'public' ? `-c search_path=${testSchema},public` : undefined; + +// Reduce pool size for parallel testing to avoid connection/memory exhaustion +const maxPoolSize = process.env.NODE_ENV === 'test' ? 5 : 30; + export const AppDataSource = new DataSource({ type: 'postgres', - schema: 'public', + schema: testSchema, synchronize: false, extra: { - max: 30, + max: maxPoolSize, idleTimeoutMillis: 0, + // Set search_path at connection level so raw SQL uses the correct schema + options: pgOptions, }, logging: false, entities: ['src/entity/**/*.{js,ts}'], diff --git a/src/entity/Product.ts b/src/entity/Product.ts index 87cd4b3362..2aceac58bc 100644 --- a/src/entity/Product.ts +++ b/src/entity/Product.ts @@ -19,6 +19,7 @@ export type ProductFlagsPublic = Pick< export enum ProductType { Award = 'award', + Recruiter = 'recruiter', } @Entity() diff --git a/src/entity/posts/Post.ts b/src/entity/posts/Post.ts index 003153efd5..6fff645aad 100644 --- a/src/entity/posts/Post.ts +++ b/src/entity/posts/Post.ts @@ -24,6 +24,7 @@ export enum PostType { VideoYouTube = 'video:youtube', Brief = 'brief', Poll = 'poll', + Tweet = 'tweet', } export const postTypes: string[] = Object.values(PostType); diff --git a/src/entity/posts/TweetPost.ts b/src/entity/posts/TweetPost.ts new file mode 100644 index 0000000000..69b7d50354 --- /dev/null +++ b/src/entity/posts/TweetPost.ts @@ -0,0 +1,67 @@ +import { ChildEntity, Column, Index } from 'typeorm'; +import { Post, PostType } from './Post'; + +export type TweetMedia = { + type: 'photo' | 'video' | 'animated_gif'; + url: string; + previewUrl?: string; + width?: number; + height?: number; +}; + +export type TweetData = { + tweetId: string; + content: string; + contentHtml: string; + createdAt?: Date; +}; + +@ChildEntity(PostType.Tweet) +export class TweetPost extends Post { + @Column({ type: 'text' }) + @Index({ unique: true }) + tweetId: string; + + @Column({ type: 'text' }) + tweetAuthorUsername: string; + + @Column({ type: 'text' }) + tweetAuthorName: string; + + @Column({ type: 'text', nullable: true }) + tweetAuthorAvatar?: string; + + @Column({ type: 'boolean', default: false }) + tweetAuthorVerified: boolean; + + @Column({ type: 'text' }) + tweetContent: string; + + @Column({ type: 'text', nullable: true }) + tweetContentHtml?: string; + + @Column({ type: 'jsonb', nullable: true }) + tweetMedia?: TweetMedia[]; + + @Column({ type: 'timestamp', nullable: true }) + tweetCreatedAt?: Date; + + @Column({ type: 'boolean', default: false }) + isThread: boolean; + + @Column({ type: 'jsonb', nullable: true }) + threadTweets?: TweetData[]; + + @Column({ type: 'text' }) + @Index({ unique: true }) + url: string; + + @Column({ type: 'text', nullable: true }) + image?: string; + + @Column({ type: 'text', nullable: true }) + description?: string; + + @Column({ type: 'text', nullable: true }) + summary?: string; +} diff --git a/src/entity/posts/index.ts b/src/entity/posts/index.ts index 83a73d0ae5..1ee79c5048 100644 --- a/src/entity/posts/index.ts +++ b/src/entity/posts/index.ts @@ -9,3 +9,4 @@ export * from './utils'; export * from './PostRelation'; export * from './CollectionPost'; export * from './YouTubePost'; +export * from './TweetPost'; diff --git a/src/entity/posts/utils.ts b/src/entity/posts/utils.ts index 2d97bd85c4..b3e2f78e21 100644 --- a/src/entity/posts/utils.ts +++ b/src/entity/posts/utils.ts @@ -8,9 +8,11 @@ import { uniqueifyArray, updateFlagsStatement, } from '../../common'; +import { extractTweetInfo, isTwitterUrl } from '../../common/links'; import { User } from '../user'; import { PostKeyword } from '../PostKeyword'; import { ArticlePost } from './ArticlePost'; +import { TweetPost } from './TweetPost'; import { Post, PostOrigin, @@ -354,8 +356,12 @@ export const createExternalLink = async ({ validateCommentary(commentary!); const isVisible = !!title; + // Check if this is a Twitter URL + const isTweet = isTwitterUrl(url); + const tweetInfo = isTweet ? extractTweetInfo(url) : null; + return con.transaction(async (entityManager) => { - let postData = { + let postData: Record = { id, shortId: id, createdAt: new Date(), @@ -376,6 +382,19 @@ export const createExternalLink = async ({ }, }; + // Add tweet-specific fields if this is a Twitter URL + if (isTweet && tweetInfo) { + postData = { + ...postData, + type: PostType.Tweet, + tweetId: tweetInfo.tweetId, + tweetAuthorUsername: tweetInfo.username, + // Other tweet fields will be populated when content is scraped + tweetContent: '', + tweetAuthorName: '', + }; + } + // Apply vordr checks before saving postData = await preparePostForInsert(postData, { con: entityManager, @@ -383,7 +402,12 @@ export const createExternalLink = async ({ req: ctx?.req, }); - await entityManager.getRepository(ArticlePost).insert(postData); + // Use TweetPost repository for Twitter URLs, ArticlePost for everything else + if (isTweet) { + await entityManager.getRepository(TweetPost).insert(postData); + } else { + await entityManager.getRepository(ArticlePost).insert(postData); + } await notifyContentRequested(ctx?.log || logger, { id, diff --git a/src/migration/1768297969000-TweetPost.ts b/src/migration/1768297969000-TweetPost.ts new file mode 100644 index 0000000000..860168fa86 --- /dev/null +++ b/src/migration/1768297969000-TweetPost.ts @@ -0,0 +1,67 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class TweetPost1768297969000 implements MigrationInterface { + name = 'TweetPost1768297969000'; + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetId" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetAuthorUsername" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetAuthorName" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetAuthorAvatar" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetAuthorVerified" boolean DEFAULT false`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetContent" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetContentHtml" text`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetMedia" jsonb`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "tweetCreatedAt" timestamp`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "isThread" boolean DEFAULT false`, + ); + await queryRunner.query( + `ALTER TABLE "post" ADD "threadTweets" jsonb`, + ); + + await queryRunner.query( + `CREATE UNIQUE INDEX "IDX_post_tweetId" ON "post" ("tweetId") WHERE "tweetId" IS NOT NULL`, + ); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP INDEX IF EXISTS "IDX_post_tweetId"`); + + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "threadTweets"`); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "isThread"`); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetCreatedAt"`); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetMedia"`); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetContentHtml"`); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetContent"`); + await queryRunner.query( + `ALTER TABLE "post" DROP COLUMN "tweetAuthorVerified"`, + ); + await queryRunner.query( + `ALTER TABLE "post" DROP COLUMN "tweetAuthorAvatar"`, + ); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetAuthorName"`); + await queryRunner.query( + `ALTER TABLE "post" DROP COLUMN "tweetAuthorUsername"`, + ); + await queryRunner.query(`ALTER TABLE "post" DROP COLUMN "tweetId"`); + } +} diff --git a/src/remoteConfig.ts b/src/remoteConfig.ts index de5ed3ea91..ae15b15934 100644 --- a/src/remoteConfig.ts +++ b/src/remoteConfig.ts @@ -3,6 +3,7 @@ import { logger } from './logger'; import { isProd, isTest } from './common/utils'; import type { CoresRole } from './types'; import type { PurchaseType } from './common/plus'; +import { ProductType } from './entity/Product'; export type RemoteConfigValue = { inc: number; @@ -64,6 +65,9 @@ class RemoteConfig { get vars(): Partial { if (!process.env.GROWTHBOOK_API_CONFIG_CLIENT_KEY) { return { + paddleProductIds: { + [ProductType.Recruiter]: 'pro_01kbq0mcmf81ehdk31d35jk1g5', + }, ...(!isTest && { funnelIds: { web_funnel_id: 'paid-v1', diff --git a/src/schema/posts.ts b/src/schema/posts.ts index a3c8c1d08e..3d3d468ac5 100644 --- a/src/schema/posts.ts +++ b/src/schema/posts.ts @@ -423,6 +423,61 @@ export const typeDefs = /* GraphQL */ ` children: [TocItem] } + """ + Media attached to a tweet (photo, video, gif) + """ + type TweetMedia { + """ + Type of media + """ + type: String! + + """ + URL to the media + """ + url: String! + + """ + Preview URL for the media + """ + previewUrl: String + + """ + Width of the media + """ + width: Int + + """ + Height of the media + """ + height: Int + } + + """ + Tweet data for threads + """ + type TweetData { + """ + Tweet ID + """ + tweetId: String! + + """ + Tweet content + """ + content: String! + + """ + HTML formatted tweet content + """ + contentHtml: String! + + """ + Time the tweet was created + """ + createdAt: DateTime + } + """ Post notification """ @@ -776,6 +831,61 @@ export const typeDefs = /* GraphQL */ ` """ videoId: String + """ + Tweet ID for tweet posts + """ + tweetId: String + + """ + Tweet author username + """ + tweetAuthorUsername: String + + """ + Tweet author display name + """ + tweetAuthorName: String + + """ + Tweet author avatar URL + """ + tweetAuthorAvatar: String + + """ + Whether the tweet author is verified + """ + tweetAuthorVerified: Boolean + + """ + Tweet content text + """ + tweetContent: String + + """ + Tweet content as HTML + """ + tweetContentHtml: String + + """ + Media attached to the tweet + """ + tweetMedia: [TweetMedia] + + """ + Time the tweet was created + """ + tweetCreatedAt: DateTime + + """ + Whether this is a thread + """ + isThread: Boolean + + """ + Tweets in the thread (if isThread is true) + """ + threadTweets: [TweetData] + """ Slug for the post """ diff --git a/src/workers/postUpdated.ts b/src/workers/postUpdated.ts index e7e362696b..b35f4b1d7c 100644 --- a/src/workers/postUpdated.ts +++ b/src/workers/postUpdated.ts @@ -26,6 +26,9 @@ import { Submission, SubmissionStatus, Toc, + TweetPost, + type TweetMedia, + type TweetData, UNKNOWN_SOURCE, WelcomePost, YouTubePost, @@ -81,6 +84,18 @@ interface Data { content: string; video_id?: string; duration?: number; + // Tweet-specific fields + tweet_id?: string; + tweet_author_username?: string; + tweet_author_name?: string; + tweet_author_avatar?: string; + tweet_author_verified?: boolean; + tweet_content?: string; + tweet_content_html?: string; + tweet_media?: TweetMedia[]; + tweet_created_at?: string; + is_thread?: boolean; + thread_tweets?: TweetData[]; }; meta?: { scraped_html?: string; @@ -290,6 +305,7 @@ const contentTypeFromPostType: Record = { [PostType.VideoYouTube]: YouTubePost, [PostType.Brief]: BriefPost, [PostType.Poll]: PollPost, + [PostType.Tweet]: TweetPost, }; type UpdatePostProps = { @@ -532,7 +548,8 @@ type FixData = { content_type: PostType; fixedData: Partial & Partial & - Partial; + Partial & + Partial; smartTitle?: string; }; const fixData = async ({ @@ -576,6 +593,26 @@ const fixData = async ({ ? data?.extra?.duration / 60 : undefined; + // Build tweet-specific fields if content type is tweet + const tweetFields = + data?.content_type === PostType.Tweet + ? { + tweetId: data?.extra?.tweet_id, + tweetAuthorUsername: data?.extra?.tweet_author_username, + tweetAuthorName: data?.extra?.tweet_author_name, + tweetAuthorAvatar: data?.extra?.tweet_author_avatar, + tweetAuthorVerified: data?.extra?.tweet_author_verified ?? false, + tweetContent: data?.extra?.tweet_content, + tweetContentHtml: data?.extra?.tweet_content_html, + tweetMedia: data?.extra?.tweet_media, + tweetCreatedAt: data?.extra?.tweet_created_at + ? parseDate(data.extra.tweet_created_at) + : undefined, + isThread: data?.extra?.is_thread ?? false, + threadTweets: data?.extra?.thread_tweets, + } + : {}; + // Try and fix generic data here return { mergedKeywords, @@ -618,6 +655,8 @@ const fixData = async ({ language: data?.language, contentMeta: data?.meta || {}, contentQuality: data?.content_quality || {}, + // Tweet-specific fields + ...tweetFields, }, }; };