Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b424898
add queue
eelcovdw Sep 25, 2025
29b8333
add x scraper
eelcovdw Sep 26, 2025
546e5a6
remove old files
eelcovdw Sep 26, 2025
a88a252
fix
eelcovdw Sep 26, 2025
9d16591
merge
eelcovdw Sep 27, 2025
d2d576c
fixes
eelcovdw Sep 29, 2025
c4a3369
fix test
eelcovdw Sep 29, 2025
f358dd3
fix flow
koenvanderveen Sep 29, 2025
4fb087a
Merge branch 'eelco/scraping' of github.com:OpenMined/agentic-syftbox…
koenvanderveen Sep 29, 2025
f6ee91e
fix
koenvanderveen Sep 29, 2025
d53fb22
fetch usertweets
eelcovdw Sep 29, 2025
1cf367b
add rerankers with fix
eelcovdw Sep 29, 2025
51daddd
better create list flow
koenvanderveen Sep 29, 2025
00c6a1d
Merge branch 'eelco/scraping' of github.com:OpenMined/agentic-syftbox…
koenvanderveen Sep 29, 2025
224a6ea
fix state when loading authors
koenvanderveen Sep 29, 2025
899ed19
analytics
koenvanderveen Sep 30, 2025
aa4aa89
deploy fixes
eelcovdw Sep 30, 2025
a7b62c8
Merge branch 'eelco/scraping' of https://github.com/OpenMined/toolbox…
eelcovdw Sep 30, 2025
c569ec3
fix failing account checking
koenvanderveen Sep 30, 2025
327c873
Merge branch 'eelco/scraping' of github.com:OpenMined/agentic-syftbox…
koenvanderveen Sep 30, 2025
2af5a44
fix rerankers version
eelcovdw Sep 30, 2025
3b58acb
fix
koenvanderveen Sep 30, 2025
8b68616
Merge branch 'eelco/scraping' of github.com:OpenMined/agentic-syftbox…
koenvanderveen Sep 30, 2025
8d78d81
fix fecther, fix list add
koenvanderveen Sep 30, 2025
049dbaf
fix summaries, dates and generation
koenvanderveen Sep 30, 2025
556a593
fix a bunch of things
koenvanderveen Sep 30, 2025
7ee79bb
-
koenvanderveen Oct 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/omni/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ pnpm-debug.log*
.DS_Store
.vscode
*.local
examples/twitter/
settings.local.json

9 changes: 7 additions & 2 deletions packages/omni/deploy_omni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

set -eo pipefail

TOOLBOX_DIR="${TOOLBOX_DIR:-/Users/koen/workspace/toolbox}"

cd /Users/koen/workspace/toolbox/packages/omni
echo "Deploying from $TOOLBOX_DIR/packages/omni to toolbox-prod"

cd "$TOOLBOX_DIR/packages/omni"
ssh toolbox-prod "rm -rf /home/azureuser/omni"
zip -r - . -x "**.venv/*" -x ".git/*" -x "data/*" -x "node_modules/*" | ssh toolbox-prod "cat > /home/azureuser/archive.zip && rm -rf /home/azureuser/omni && unzip /home/azureuser/archive.zip -d /home/azureuser/omni"

Expand All @@ -16,7 +19,8 @@ export PATH=/home/azureuser/.local/bin:/home/azureuser/.nvm/versions/node/v20.19
cd /home/azureuser/omni
uv venv
source .venv/bin/activate
uv pip install -e . || true
# uv cache clean
uv pip install --refresh --prerelease allow -e . || true
npm install
npm install -g serve
"
Expand All @@ -41,6 +45,7 @@ export PATH=/home/azureuser/.local/bin:/home/azureuser/.nvm/versions/node/v20.19
export VITE_API_BASE_URL=http://20.224.153.50:8000
export USE_ANTHROPIC=True
export ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY
export VITE_POSTHOG_API_KEY=$POSTHOG_API_KEY

cd /home/azureuser/omni
source .venv/bin/activate
Expand Down
52 changes: 15 additions & 37 deletions packages/omni/examples/twitter_account_exists.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,24 @@
import requests
import time
import urllib
from datetime import datetime

from omni.twitter import account_exists, get_guest_token

def account_exists(handle):
handle_url_encoded = handle.replace("@", "")
# warmup
cookie_value, expires = get_guest_token()
datetime_expires = datetime.fromtimestamp(expires)

url = f"https://api.x.com/graphql/96tVxbPqMZDoYB5pmzezKA/UserByScreenName?variables=%7B%22screen_name%22%3A%22{handle_url_encoded}%22%2C%22withGrokTranslatedBio%22%3Afalse%7D&features=%7B%22hidden_profile_subscriptions_enabled%22%3Atrue%2C%22payments_enabled%22%3Afalse%2C%22profile_label_improvements_pcf_label_in_post_enabled%22%3Atrue%2C%22rweb_tipjar_consumption_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22subscriptions_verification_info_is_identity_verified_enabled%22%3Atrue%2C%22subscriptions_verification_info_verified_since_enabled%22%3Atrue%2C%22highlights_tweets_tab_ui_enabled%22%3Atrue%2C%22responsive_web_twitter_article_notes_tab_enabled%22%3Atrue%2C%22subscriptions_feature_can_gift_premium%22%3Atrue%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%7D&fieldToggles=%7B%22withAuxiliaryUserLabels%22%3Atrue%7D"
cookie_decoded = urllib.parse.unquote(cookie_value)

headers = {
"accept": "*/*",
"accept-language": "en-GB,en;q=0.9",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://x.com",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://x.com/",
"sec-ch-ua": '"Chromium";v="140", "Not=A?Brand";v="24", "Brave";v="140"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"macOS"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"sec-gpc": "1",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
"x-client-transaction-id": "zcVrDRSIRWct0CXT5dceAT2N5xwliK+Nxkijb3TPCA16/oa44aGUkRBDP/wJm87H9A+kSckr/RdWjvO/n/ktx/La3vPEzg",
"x-guest-token": "1970803204867682327",
"x-twitter-active-user": "yes",
"x-twitter-client-language": "en-GB",
"x-xp-forwarded-for": "38cb822faf802d12f9a727d427f9b1014fbd26331085ff5627a9b0c468630c7eff0cd0d43e6195e3c79c43661bb7fa5314711c287e2035c395de5f855be30bd6c0d91b6b891f2c765f883796cc5c76b1dea0bbc49193e4a139ec4168499daefc00426b798829b6163aecf027ed47418510a1b6f2b8add52e87efa04524909db5e1debc9e3f8c0d66891eab7d3e80824a73a706d7bec9a0bd1428c1a8c56c8f02c0e972668556ae9c9fa4fd5d93b76b61980a3551c2c5de4dc05858e1c855af1889944cbd44da5650237bd6c5c45888c36d080a5cea283729af8d71ff3be1dbf723e97b0511225ab55cfb735d9553fc4f911c674079dba0b791c359c05fd1adc66e",
}
time_start = time.time()
cookie_value, expires = get_guest_token()
handle = "elonmusk"
# handle = "XXXXXFQWEGWRBFSD"
exists = account_exists(handle, cookie_value)

response = requests.get(url, headers=headers)
if response.json().get("data") == {}:
return False

else:
return True


handle = "femke_plantinga"
exists = account_exists(handle)
time_end = time.time()
print(f"Time taken: {time_end - time_start} seconds")
if exists:
print(f"Account {handle} exists")
else:
Expand Down
111 changes: 111 additions & 0 deletions packages/omni/examples/twitter_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import asyncio
import json
from http.cookiejar import Cookie
from pathlib import Path

import browser_cookie3

# Keep all cookies that might be relevant for authentication
x_cookies = []
for c in browser_cookie3.brave(domain_name="x.com"):
if c.domain == ".x.com" and c.name in ["auth_token"]:
x_cookies.append(c)
print("Found x auth_token")

# print(x_cookies)

OUTPUT_DIR = Path(".").resolve().parent.parent.parent / "data"


def cookie_to_playwright_cookie(cookie: Cookie) -> dict:
cookie_dict = {
"name": cookie.name,
"value": cookie.value,
"domain": cookie.domain.lstrip("."), # Playwright expects no leading dot
"path": cookie.path,
"secure": bool(cookie.secure),
"httpOnly": getattr(cookie, "httponly", False)
or c.get_nonstandard_attr("HttpOnly", False),
}
# Optional: expires
if cookie.expires is not None:
cookie_dict["expires"] = cookie.expires
return cookie_dict


def is_home_latest_timeline_url(url):
if "graphql" in url and ("HomeLatestTimeline" in url or "HomeTimeline" in url):
return True
return False


async def on_request(request):
if is_home_latest_timeline_url(request.url):
# INSERT_YOUR_CODE
response = await request.response()
if response is not None:
try:
json_data = await response.json()
from datetime import datetime

dt_str = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"home_latest_timeline_{dt_str}.json"
output_path = OUTPUT_DIR / filename
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(json_data, f)
print(f"Saved JSON to {filename}")
except Exception as e:
print(f"Failed to get JSON from response: {e}")

print("API call to HomeLatestTimeline")


async def playwright_login_with_cookies():
from playwright.async_api import async_playwright

# Use the first cookie found (auth_token) for x.com
cookies = []
for c in x_cookies:
cookies.append(cookie_to_playwright_cookie(c))

async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
context = await browser.new_context()
# Set cookies before navigating
await context.add_cookies(cookies)
# Print all cookies currently in the context
cookies_in_context = await context.cookies()
print("All cookies in context after setting:")
for cookie in cookies_in_context:
print(cookie)
page = await context.new_page()
await asyncio.sleep(3)
page.on("request", on_request)

await page.goto("https://x.com")

# INSERT_YOUR_CODE
# Print all spans content with class "r-poiln3"
await asyncio.sleep(3)

# INSERT_YOUR_CODE
# Find the span (button) that says "Following" and click it
following_span = await page.query_selector("span:has-text('Following')")
if following_span:
await following_span.click()
print("Clicked the 'Following' button.")
else:
print("Could not find the 'Following' button.")

# spans = await page.query_selector_all("span.r-poiln3")
# print("Contents of all <span class='r-poiln3'> elements:")
# for span in spans:
# text = await span.text_content()
# print(text)
await asyncio.sleep(10000)
await browser.close()


# Run the async function
asyncio.run(playwright_login_with_cookies())
18 changes: 16 additions & 2 deletions packages/omni/frontend/App.vue
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<div class="flex-1 flex" v-if="!dataSourcesStore.showDashboard">
<!-- Welcome Page (default) -->
<div v-if="!smartListsStore.currentListId" class="flex-1">
<WelcomePage />
<WelcomePage :key="welcomePageKey" />
</div>

<!-- List View -->
Expand Down Expand Up @@ -60,7 +60,7 @@
</template>

<script>
import { onMounted } from "vue";
import { onMounted, ref, watch } from "vue";
import { useNewChatStore } from "./stores/newChatStore";
import { useDataSourcesStore } from "./stores/dataSourcesStore";
import { useSmartListsStore } from "./stores/smartListsStore";
Expand Down Expand Up @@ -103,6 +103,7 @@ export default {
const smartListsStore = useSmartListsStore();
const dataCollectionsStore = useDataCollectionsStore();
const userStore = useUserStore();
const welcomePageKey = ref(0);

onMounted(async () => {
// Initialize authentication first
Expand All @@ -126,12 +127,25 @@ export default {
}
});

// Watch for dashboard closing and force WelcomePage to remount
watch(
() => dataSourcesStore.showDashboard,
(isOpen, wasOpen) => {
// If dashboard was open and is now closed
if (wasOpen && !isOpen && !smartListsStore.currentListId) {
// Force WelcomePage to remount by changing key
welcomePageKey.value++;
}
},
);

return {
chatStore,
dataSourcesStore,
smartListsStore,
dataCollectionsStore,
userStore,
welcomePageKey,
};
},
};
Expand Down
18 changes: 16 additions & 2 deletions packages/omni/frontend/api/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class APIClient {
return this.request(`/chats/${listId}`);
}

async createSmartList(listData) {
return this.request("/smart-lists", {
async createSmartList(listData, userEmail = "dev@example.com") {
return this.request(`/smart-lists?user_email=${userEmail}`, {
method: "POST",
body: JSON.stringify(listData),
});
Expand Down Expand Up @@ -110,6 +110,20 @@ class APIClient {
method: "DELETE",
});
}

async checkTwitterAccount(handle) {
return this.request("/twitter/check-account", {
method: "POST",
body: JSON.stringify({ handle }),
});
}

async getTweetCounts(handles) {
return this.request("/twitter/tweet-counts", {
method: "POST",
body: JSON.stringify({ handles }),
});
}
}

export const apiClient = new APIClient();
Loading