Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 219 additions & 0 deletions packages/omni/omni/data_fetchers/browser_stealth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
from playwright.async_api import BrowserContext

BROWSER_ARGS = [
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
]

USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"

STEALTH_JS = """
// Store original functions to maintain [native code] appearance
const originalDefineProperty = Object.defineProperty;
const originalGetOwnPropertyDescriptor = Object.getOwnPropertyDescriptor;

// Try to make our overrides look as native as possible
const createNativeLookingFunction = (func, name) => {
// Wrap function to return [native code] on toString()
const wrapper = new Proxy(func, {
apply(target, thisArg, args) {
return target.apply(thisArg, args);
},
});

// Override toString to return [native code]
try {
originalDefineProperty(wrapper, "toString", {
value: function () {
return `function ${name}() { [native code] }`;
},
configurable: false,
writable: false,
});
} catch (e) {}

return wrapper;
};

// 1. userAgentData - THE CRITICAL ONE
try {
// Try to make it look native by defining on prototype
const navProto = Object.getPrototypeOf(navigator);
const originalGetter = originalGetOwnPropertyDescriptor(
navProto,
"userAgentData"
)?.get;

originalDefineProperty(navProto, "userAgentData", {
get: createNativeLookingFunction(function () {
return {
brands: [
{ brand: "Chromium", version: "136" },
{ brand: "Google Chrome", version: "136" },
{ brand: "Not.A/Brand", version: "99" },
],
mobile: false,
platform: "macOS",
getHighEntropyValues: createNativeLookingFunction(
() =>
Promise.resolve({
brands: [
{ brand: "Chromium", version: "136" },
{ brand: "Google Chrome", version: "136" },
{ brand: "Not.A/Brand", version: "99" },
],
mobile: false,
platform: "macOS",
platformVersion: "15.0.0",
architecture: "arm64",
bitness: "64",
model: "",
uaFullVersion: "136.0.6961.0",
}),
"getHighEntropyValues"
),
};
}, "get userAgentData"),
configurable: true,
enumerable: true,
});
} catch (e) {
console.error("Failed to override userAgentData:", e);
}

// 2. Remove webdriver - try to delete completely
try {
delete Object.getPrototypeOf(navigator).webdriver;
delete navigator.__proto__.webdriver;
delete navigator.webdriver;
} catch (e) {}

// then redefine it
Object.defineProperty(navigator, 'webdriver', {
get: () => false
});

// 3. Plugins - use native-looking array
try {
const createPlugin = (name, desc, filename) => {
return {
0: { type: "application/pdf", suffixes: "pdf", description: desc },
1: { type: "text/pdf", suffixes: "pdf", description: desc },
description: desc,
filename: filename,
length: 2,
name: name,
item: createNativeLookingFunction(function (index) {
return this[index] || null;
}, "item"),
namedItem: createNativeLookingFunction(function (name) {
return null;
}, "namedItem"),
};
};

const plugins = [
createPlugin(
"PDF Viewer",
"Portable Document Format",
"internal-pdf-viewer"
),
createPlugin(
"Chrome PDF Viewer",
"Portable Document Format",
"internal-pdf-viewer"
),
createPlugin(
"Chromium PDF Viewer",
"Portable Document Format",
"internal-pdf-viewer"
),
createPlugin(
"Microsoft Edge PDF Viewer",
"Portable Document Format",
"internal-pdf-viewer"
),
createPlugin(
"WebKit built-in PDF",
"Portable Document Format",
"internal-pdf-viewer"
),
];

plugins.item = createNativeLookingFunction(function (index) {
return this[index] || null;
}, "item");

plugins.namedItem = createNativeLookingFunction(function (name) {
return Array.from(this).find((p) => p.name === name) || null;
}, "namedItem");

plugins.refresh = createNativeLookingFunction(function () {}, "refresh");

originalDefineProperty(Object.getPrototypeOf(navigator), "plugins", {
get: createNativeLookingFunction(() => plugins, "get plugins"),
configurable: true,
enumerable: true,
});
} catch (e) {
console.error("Failed to override plugins:", e);
}

// 4. MimeTypes
try {
const mimeTypes = [
{
type: "application/pdf",
suffixes: "pdf",
description: "Portable Document Format",
},
{
type: "text/pdf",
suffixes: "pdf",
description: "Portable Document Format",
},
];

mimeTypes.item = createNativeLookingFunction(function (index) {
return this[index] || null;
}, "item");

mimeTypes.namedItem = createNativeLookingFunction(function (name) {
return Array.from(this).find((m) => m.type === name) || null;
}, "namedItem");

originalDefineProperty(Object.getPrototypeOf(navigator), "mimeTypes", {
get: createNativeLookingFunction(() => mimeTypes, "get mimeTypes"),
configurable: true,
enumerable: true,
});
} catch (e) {
console.error("Failed to override mimeTypes:", e);
}

// 5. Chrome object
if (!window.chrome || !window.chrome.runtime) {
window.chrome = {
runtime: {},
loadTimes: createNativeLookingFunction(function () {}, "loadTimes"),
csi: createNativeLookingFunction(function () {}, "csi"),
app: {},
};
}

// 6. Permissions
const origQuery = navigator.permissions?.query;
if (origQuery) {
navigator.permissions.query = createNativeLookingFunction(function (params) {
if (params?.name === "notifications") {
return Promise.resolve({ state: Notification.permission });
}
return origQuery.call(this, params);
}, "query");
}
"""


async def apply_stealth_mode(context: BrowserContext) -> None:
await context.add_init_script(STEALTH_JS)
49 changes: 47 additions & 2 deletions packages/omni/omni/data_fetchers/x_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
import browser_cookie3
from playwright.async_api import Browser, BrowserContext, Page, async_playwright

from omni.data_fetchers.browser_stealth import (
BROWSER_ARGS,
USER_AGENT,
apply_stealth_mode,
)
from omni.data_fetchers.job_queue import DataFetcherJobQueue
from omni.data_fetchers.x_utils import parse_tweets_json, parse_user_tweets_json
from omni.db import get_tweet_store
Expand Down Expand Up @@ -71,6 +76,32 @@ def load_cookies_from_file() -> list[dict] | None:
return


async def simulate_user_activity(page: Page) -> None:
"""Simulate user activity to bypass hasBeenActive detection"""
try:
await page.mouse.move(random.randint(100, 500), random.randint(100, 500))

# Random keypress
safe_keys = [
"Tab",
"Shift",
"Escape",
"ArrowDown",
"ArrowUp",
"ArrowLeft",
"ArrowRight",
"Home",
]
await page.keyboard.press(random.choice(safe_keys))

# Small scroll
await page.mouse.wheel(0, random.randint(5, 25))

except Exception as e:
print(f"simulate_user_activity error: {e}")
pass


def get_cookies_for_playwright(
use_cached_x_cookies: bool = settings.use_cached_x_cookies,
) -> list[dict]:
Expand All @@ -93,8 +124,19 @@ async def setup_browser(
"""Setup browser with authentication cookies"""

p = await async_playwright().start()
browser = await p.chromium.launch(headless=headless)
context = await browser.new_context()
browser = await p.chromium.launch(
headless=headless,
args=BROWSER_ARGS,
)

# Create context
context = await browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent=USER_AGENT,
)

# Apply stealth measures
await apply_stealth_mode(context)

# Set cookies before navigating
await context.add_cookies(x_cookies)
Expand All @@ -106,6 +148,7 @@ async def setup_browser(
print(cookie)

page = await context.new_page()
await simulate_user_activity(page)

return browser, context, page

Expand Down Expand Up @@ -178,6 +221,7 @@ async def fetch_timeline(

# Navigate to X.com
await page.goto("https://x.com")
await simulate_user_activity(page)
await asyncio.sleep(3)

# Click "Following" to switch to chronological timeline
Expand Down Expand Up @@ -275,6 +319,7 @@ async def follow_user(
# Navigate to user's profile
profile_url = f"https://x.com/{handle.lstrip('@')}"
await page.goto(profile_url)
await simulate_user_activity(page)
await asyncio.sleep(3)

# Find follow button for this specific user using partial aria-label (for localization)
Expand Down