From 7352b8cd224de3cb1cadbcecd9995fed226bf19c Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 13 May 2026 21:39:42 +0800 Subject: [PATCH] feat(phys_org): add Phys.org mirror site MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 16th WebHarbor mirror at https://phys.org — a science / technology / research news aggregator. Real RSS-derived catalog of 210 articles across 7 categories (Physics, Earth, Technology, Biology, Chemistry, Astronomy, Nanotechnology) with real thumbnails, plus 4 benchmark users with seeded saved articles, comments (incl. cross-user reply chains), and search history. Registered as the 16th site at port 40015. .gitignore was tightened because the previous inline-comment patterns for sites/*/scraped_data/ and sites/*/instance/ were not matching (Codex finding, fixed in this PR). Site features: - Categories with recent/popular sort - Article detail with source journal / institution / DOI - Threaded comments with reply UI (parent-article validation) - Save articles with notes (auth) - Token-overlap scored search with category filter - Trending list, user profile, account edit, login/register Determinism work for byte-identical reset: - RSS pubDate parsing strips trailing TZ token (strptime %Z rejects EDT) - Pinned bcrypt hash for benchmark users (random salt would drift md5) - Per-article RNG seeded by slug for synthesized author/journal/views - /article/ GET no longer mutates Article.views (Codex finding) Open-redirect hardening: - _safe_next() validates next= targets in /login and /save (Codex finding) Tasks: 18 WebVoyager-format tasks in sites/phys_org/tasks.jsonl, covering search, browse, detail, comment thread reading, save toggle, auth flows, and one comparison task. Assets: heavy assets (instance_seed/phys_org.db, static/images/) live in the paired HF dataset PR; phys_org.tar.gz is 460K, db md5 b4a324122c3cb0a56b8d511e73ff13a7. .assets-revision uses 'main' so the HF merge will roll in automatically. --- .gitignore | 6 +- Dockerfile | 4 +- control_server.py | 2 +- sites/phys_org/_health.py | 72 +++ sites/phys_org/app.py | 557 +++++++++++++++++++ sites/phys_org/requirements.txt | 1 + sites/phys_org/seed_data.py | 527 ++++++++++++++++++ sites/phys_org/static/css/.gitkeep | 0 sites/phys_org/static/css/main.css | 469 ++++++++++++++++ sites/phys_org/static/icons/.gitkeep | 0 sites/phys_org/static/icons/favicon.ico | Bin 0 -> 233 bytes sites/phys_org/static/icons/placeholder.svg | 9 + sites/phys_org/static/js/.gitkeep | 0 sites/phys_org/tasks.jsonl | 18 + sites/phys_org/templates/.gitkeep | 0 sites/phys_org/templates/_macros.html | 48 ++ sites/phys_org/templates/account.html | 60 ++ sites/phys_org/templates/article_detail.html | 146 +++++ sites/phys_org/templates/base.html | 66 +++ sites/phys_org/templates/category.html | 46 ++ sites/phys_org/templates/index.html | 89 +++ sites/phys_org/templates/login.html | 24 + sites/phys_org/templates/register.html | 33 ++ sites/phys_org/templates/saved.html | 37 ++ sites/phys_org/templates/search.html | 44 ++ sites/phys_org/templates/trending.html | 11 + sites/phys_org/templates/user.html | 29 + websyn_start.sh | 14 +- 28 files changed, 2301 insertions(+), 11 deletions(-) create mode 100644 sites/phys_org/_health.py create mode 100644 sites/phys_org/app.py create mode 100644 sites/phys_org/requirements.txt create mode 100644 sites/phys_org/seed_data.py create mode 100644 sites/phys_org/static/css/.gitkeep create mode 100644 sites/phys_org/static/css/main.css create mode 100644 sites/phys_org/static/icons/.gitkeep create mode 100644 sites/phys_org/static/icons/favicon.ico create mode 100644 sites/phys_org/static/icons/placeholder.svg create mode 100644 sites/phys_org/static/js/.gitkeep create mode 100644 sites/phys_org/tasks.jsonl create mode 100644 sites/phys_org/templates/.gitkeep create mode 100644 sites/phys_org/templates/_macros.html create mode 100644 sites/phys_org/templates/account.html create mode 100644 sites/phys_org/templates/article_detail.html create mode 100644 sites/phys_org/templates/base.html create mode 100644 sites/phys_org/templates/category.html create mode 100644 sites/phys_org/templates/index.html create mode 100644 sites/phys_org/templates/login.html create mode 100644 sites/phys_org/templates/register.html create mode 100644 sites/phys_org/templates/saved.html create mode 100644 sites/phys_org/templates/search.html create mode 100644 sites/phys_org/templates/trending.html create mode 100644 sites/phys_org/templates/user.html diff --git a/.gitignore b/.gitignore index c2efc04..c8564b8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,10 @@ sites/*/static/external_cache/ # ============================================================= # Intermediate / volatile — never committed anywhere. # ============================================================= -sites/*/scraped_data/ # scrape pipeline intermediate; runtime data lives in instance_seed/*.db -sites/*/instance/ # rebuilt at every container boot from instance_seed/ +# Scrape pipeline intermediate; runtime data lives in instance_seed/*.db. +sites/*/scraped_data/ +# Rebuilt at every container boot from instance_seed/. +sites/*/instance/ sites/*/venv/ # HF download metadata produced by `hf download`. diff --git a/Dockerfile b/Dockerfile index 991e5ab..1e86b1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # WebHarbor — slim, self-contained image. -# 15 Flask mirror sites + control plane on :8101. +# 16 Flask mirror sites + control plane on :8101. FROM python:3.12-slim-bookworm @@ -33,6 +33,6 @@ COPY control_server.py /opt/control_server.py COPY site_runner.py /opt/site_runner.py RUN chmod +x /opt/websyn_start.sh -EXPOSE 8101 40000-40014 +EXPOSE 8101 40000-40015 CMD ["/opt/websyn_start.sh"] diff --git a/control_server.py b/control_server.py index c255253..3737f90 100644 --- a/control_server.py +++ b/control_server.py @@ -26,7 +26,7 @@ 'allrecipes', 'amazon', 'apple', 'arxiv', 'bbc_news', 'booking', 'github', 'google_flights', 'google_map', 'google_search', 'huggingface', 'wolfram_alpha', 'cambridge_dictionary', - 'coursera', 'espn', + 'coursera', 'espn', 'phys_org', ] BASE_PORT = 40000 WEBSYN_DIR = '/opt/WebSyn' diff --git a/sites/phys_org/_health.py b/sites/phys_org/_health.py new file mode 100644 index 0000000..b0514f7 --- /dev/null +++ b/sites/phys_org/_health.py @@ -0,0 +1,72 @@ +"""Phys.org mirror health check.""" +from healthcheck import random_user + + +def run(p): + # 1. Home page renders + p.assert_get('home', '/', must_contain='Phys.org') + + # 2. Category pages render (DB read) + p.assert_get('category physics', '/category/physics', must_contain='Physics') + p.assert_get('category technology', '/category/technology', must_contain='Technology') + + # 3. Trending list renders + p.assert_get('trending', '/trending', must_contain='Trending') + + # 4. Search returns results (token-overlap match) + p.assert_get('search quantum', '/search?q=quantum', must_contain='quantum') + + # 5. User profile (DB read) + p.assert_get('user profile', '/user/alice_j', must_contain='alice_j') + + # 6. Article detail page (DB read; pick the first article slug from home) + home_html = p.get('/').text if hasattr(p.get('/'), 'text') else '' + # Fallback: known seed article slug pattern uses kebab; we look up by id 1. + # The home grid links to /article/; just pick a simple test that the + # detail route is wired up at all. + p.assert_get('article first', '/article/' + _first_slug(home_html, fallback='nonexistent'), + accept_status=(200, 404)) + + # 7. Register page renders (CSRF visible) + user = random_user() + html = p.assert_get('register page', '/register', must_contain='csrf_token') + token = p.csrf(html) + if not token: + p.check('register csrf token', False, 'no csrf in register form') + return + + # 8. Submit registration (DB write) + p.assert_post('register submit', '/register', { + 'csrf_token': token, + 'username': user['name'], + 'email': f"{user['name']}@test.com", + 'full_name': user['name'].title(), + 'password': user['password'], + }, accept_status=(200, 302, 303)) + + # Logout to confirm /login renders + p.get('/logout') + + # 9. Login page renders + html = p.assert_get('login page', '/login', accept_status=(200, 302, 303)) + token = p.csrf(html) if html else '' + + # 10. Submit login (DB read + session) + if token: + p.assert_post('login submit', '/login', { + 'csrf_token': token, + 'email': f"{user['name']}@test.com", + 'password': user['password'], + }, accept_status=(200, 302, 303)) + else: + p.check('login submit', True, 'already authenticated from register') + + # 11. Authenticated: account page accessible + p.assert_get('account page', '/account', accept_status=(200, 302, 303)) + + +def _first_slug(html: str, fallback: str) -> str: + """Best-effort: pull the first /article/ link from the home page.""" + import re + m = re.search(r'/article/([a-z0-9-]+)', html or '') + return m.group(1) if m else fallback diff --git a/sites/phys_org/app.py b/sites/phys_org/app.py new file mode 100644 index 0000000..1873bca --- /dev/null +++ b/sites/phys_org/app.py @@ -0,0 +1,557 @@ +"""Phys.org mirror — Flask application.""" +import os +import re +from datetime import datetime, timedelta +from urllib.parse import urlparse + +from flask import (Flask, render_template, request, redirect, url_for, + flash, abort, jsonify) +from flask_sqlalchemy import SQLAlchemy +from flask_login import (LoginManager, UserMixin, login_user, logout_user, + login_required, current_user) +from flask_wtf import FlaskForm +from flask_wtf.csrf import CSRFProtect +from flask_bcrypt import Bcrypt +from wtforms import StringField, PasswordField, TextAreaField, HiddenField +from wtforms.validators import DataRequired, Length, Optional, Email +from sqlalchemy import or_, desc, func +from markupsafe import Markup + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +app = Flask(__name__, instance_path=os.path.join(BASE_DIR, "instance")) +app.config['SECRET_KEY'] = 'phys-org-mirror-secret-key' +app.config['SQLALCHEMY_DATABASE_URI'] = ( + f"sqlite:///{os.path.join(BASE_DIR, 'instance', 'phys_org.db')}" +) +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False +app.config['WTF_CSRF_TIME_LIMIT'] = None + +os.makedirs(os.path.join(BASE_DIR, 'instance'), exist_ok=True) + +db = SQLAlchemy(app) +bcrypt = Bcrypt(app) +login_manager = LoginManager(app) +login_manager.login_view = 'login' +login_manager.login_message = 'Please sign in to continue.' +csrf = CSRFProtect(app) + + +# ----- Sanitize filter (for body HTML) ----- + +SAFE_TAGS = re.compile( + r'<(?!/?(?:a|p|i|b|em|strong|code|pre|br|ul|ol|li|h2|h3|blockquote)\b)[^>]+>', + re.IGNORECASE +) + + +@app.template_filter('sanitize') +def sanitize_html(text): + if not text: + return '' + cleaned = SAFE_TAGS.sub('', text) + return Markup(cleaned) + + +@app.template_filter('time_ago') +def time_ago_filter(dt): + if not dt: + return '' + return _time_ago(dt) + + +def _time_ago(dt: datetime) -> str: + now = datetime.utcnow() + diff = now - dt + seconds = int(diff.total_seconds()) + if seconds < 60: + return f"{max(seconds, 0)}s ago" + minutes = seconds // 60 + if minutes < 60: + return f"{minutes} min ago" + hours = minutes // 60 + if hours < 24: + return f"{hours} hour{'s' if hours != 1 else ''} ago" + days = hours // 24 + if days < 14: + return f"{days} day{'s' if days != 1 else ''} ago" + return dt.strftime('%b %d, %Y') + + +# ----- Models ----- + +class User(db.Model, UserMixin): + __tablename__ = 'users' + id = db.Column(db.Integer, primary_key=True) + username = db.Column(db.String(80), unique=True, nullable=False, index=True) + email = db.Column(db.String(200), unique=True, nullable=False, index=True) + password_hash = db.Column(db.String(255), nullable=False) + full_name = db.Column(db.String(200), default='') + bio = db.Column(db.Text, default='') + location = db.Column(db.String(120), default='') + interests = db.Column(db.String(255), default='') # comma-separated category slugs + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + +class Category(db.Model): + __tablename__ = 'categories' + id = db.Column(db.Integer, primary_key=True) + slug = db.Column(db.String(60), unique=True, nullable=False, index=True) + name = db.Column(db.String(120), nullable=False) + description = db.Column(db.Text, default='') + sort_order = db.Column(db.Integer, default=100) + + articles = db.relationship('Article', backref='category', lazy='dynamic') + + @property + def article_count(self): + return Article.query.filter_by(category_id=self.id).count() + + +class Article(db.Model): + __tablename__ = 'articles' + id = db.Column(db.Integer, primary_key=True) + slug = db.Column(db.String(120), unique=True, nullable=False, index=True) + title = db.Column(db.String(500), nullable=False) + subtitle = db.Column(db.String(500), default='') + body = db.Column(db.Text, default='') # paragraphs separated by \n\n + author_name = db.Column(db.String(200), default='Phys.org Staff') + source_journal = db.Column(db.String(200), default='') + source_institution = db.Column(db.String(200), default='') + doi_url = db.Column(db.String(500), default='') + image_filename = db.Column(db.String(200), default='') # under static/images/ + subsection = db.Column(db.String(120), default='') # e.g., 'Optics & Photonics' + category_id = db.Column(db.Integer, db.ForeignKey('categories.id')) + published_at = db.Column(db.DateTime, default=datetime.utcnow) + views = db.Column(db.Integer, default=0) + featured = db.Column(db.Boolean, default=False) + + comments = db.relationship('Comment', backref='article', + cascade='all, delete-orphan', lazy='dynamic') + saves = db.relationship('SavedArticle', backref='article', + cascade='all, delete-orphan', lazy='dynamic') + + @property + def comment_count(self): + return self.comments.count() + + @property + def save_count(self): + return self.saves.count() + + @property + def reading_time(self): + wc = len((self.body or '').split()) + return max(1, wc // 220) + + def get_paragraphs(self): + return [p.strip() for p in re.split(r"\n\n+", self.body or '') if p.strip()] + + @property + def published_str(self): + return _time_ago(self.published_at) if self.published_at else '' + + +class Comment(db.Model): + __tablename__ = 'comments' + id = db.Column(db.Integer, primary_key=True) + text = db.Column(db.Text, nullable=False) + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) + article_id = db.Column(db.Integer, db.ForeignKey('articles.id'), nullable=False) + parent_id = db.Column(db.Integer, db.ForeignKey('comments.id'), nullable=True) + score = db.Column(db.Integer, default=0) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + user = db.relationship('User', backref='comments') + replies = db.relationship('Comment', backref=db.backref('parent', remote_side=[id]), + lazy='dynamic') + + @property + def time_ago(self): + return _time_ago(self.created_at) + + +class SavedArticle(db.Model): + __tablename__ = 'saved_articles' + id = db.Column(db.Integer, primary_key=True) + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False, index=True) + article_id = db.Column(db.Integer, db.ForeignKey('articles.id'), nullable=False, index=True) + note = db.Column(db.String(500), default='') + created_at = db.Column(db.DateTime, default=datetime.utcnow) + __table_args__ = (db.UniqueConstraint('user_id', 'article_id'),) + + user = db.relationship('User', backref='saved') + + +class SearchHistory(db.Model): + __tablename__ = 'search_history' + id = db.Column(db.Integer, primary_key=True) + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False, index=True) + query_text = db.Column('query', db.String(500), nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + user = db.relationship('User', backref='searches') + + +# ----- Forms ----- + +class LoginForm(FlaskForm): + email = StringField('Email or username', validators=[DataRequired(), Length(3, 200)]) + password = PasswordField('Password', validators=[DataRequired()]) + + +class RegisterForm(FlaskForm): + username = StringField('Username', validators=[DataRequired(), Length(2, 80)]) + email = StringField('Email', validators=[DataRequired(), Email(), Length(3, 200)]) + full_name = StringField('Full name', validators=[Optional(), Length(0, 200)]) + password = PasswordField('Password', validators=[DataRequired(), Length(6, 128)]) + + +class ProfileForm(FlaskForm): + full_name = StringField('Full name', validators=[Optional(), Length(0, 200)]) + bio = TextAreaField('Bio', validators=[Optional(), Length(0, 2000)]) + location = StringField('Location', validators=[Optional(), Length(0, 120)]) + interests = StringField('Interests (comma separated category slugs)', + validators=[Optional(), Length(0, 255)]) + + +class CommentForm(FlaskForm): + text = TextAreaField('Comment', validators=[DataRequired(), Length(1, 2000)]) + parent_id = HiddenField() + + +class SaveForm(FlaskForm): + note = StringField('Note', validators=[Optional(), Length(0, 500)]) + + +# ----- Auth ----- + +@login_manager.user_loader +def load_user(user_id): + return db.session.get(User, int(user_id)) + + +# ----- Helpers ----- + +STOP_WORDS = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'and', + 'or', 'is', 'it', 'by', 'with', 'as', 'be', 'this', 'that', + 'are', 'was', 'were', 'from', 'how', 'what', 'why', 'we', 'i'} + + +def tokenize(query: str): + return [t.lower() for t in re.split(r'\W+', query or '') + if t.lower() not in STOP_WORDS and len(t) > 1] + + +def _safe_next(target: str | None, fallback: str) -> str: + """Return ``target`` only if it is a same-origin path on this app. + + Login and save handlers accept a `next=` parameter so the user lands + back where they came from. Without validation, an attacker could + pass `next=https://evil.example.com` and turn the site into an + open-redirect gadget. We accept only relative paths that have no + scheme/netloc, otherwise we fall back.""" + if not target: + return fallback + parsed = urlparse(target) + if parsed.scheme or parsed.netloc: + return fallback + if not target.startswith('/'): + return fallback + return target + + +def _flatten_comments(comments, depth=0): + result = [] + for c in comments: + result.append({'comment': c, 'depth': depth}) + children = c.replies.order_by(Comment.created_at).all() + result.extend(_flatten_comments(children, depth + 1)) + return result + + +@app.context_processor +def inject_globals(): + cats = Category.query.order_by(Category.sort_order, Category.name).all() + return {'all_categories': cats, 'site_name': 'Phys.org Mirror'} + + +# ----- Routes ----- + +@app.route('/') +def index(): + featured = Article.query.filter_by(featured=True) \ + .order_by(desc(Article.published_at)).limit(5).all() + latest = Article.query.order_by(desc(Article.published_at)).limit(20).all() + cats = Category.query.order_by(Category.sort_order).all() + by_cat = [] + for c in cats: + items = Article.query.filter_by(category_id=c.id) \ + .order_by(desc(Article.published_at)).limit(4).all() + if items: + by_cat.append((c, items)) + sidebar_trending = Article.query.order_by(desc(Article.views)).limit(6).all() + return render_template('index.html', featured=featured, latest=latest, + by_cat=by_cat, sidebar_trending=sidebar_trending) + + +@app.route('/category/') +def category(slug): + cat = Category.query.filter_by(slug=slug).first_or_404() + page = request.args.get('page', 1, type=int) + sort = request.args.get('sort', 'recent') + q = Article.query.filter_by(category_id=cat.id) + if sort == 'popular': + q = q.order_by(desc(Article.views), desc(Article.published_at)) + else: + q = q.order_by(desc(Article.published_at)) + pagination = q.paginate(page=page, per_page=12, error_out=False) + sidebar_trending = Article.query.order_by(desc(Article.views)).limit(6).all() + return render_template('category.html', category=cat, pagination=pagination, + sort=sort, sidebar_trending=sidebar_trending) + + +@app.route('/article/') +def article_detail(slug): + art = Article.query.filter_by(slug=slug).first_or_404() + # Note: we deliberately do NOT increment views on GET. `views` is the + # seeded popularity signal used by trending/popular sort and by + # benchmark tasks (Phys.org--3, --10, --15). Mutating it on every page + # view would let an agent's browsing order shift task answers and + # would break /reset/ byte-identity. If a future task needs a + # runtime visit counter, add a separate column for that. + top_comments = Comment.query.filter_by(article_id=art.id, parent_id=None) \ + .order_by(Comment.created_at).all() + comment_tree = _flatten_comments(top_comments) + related = Article.query.filter(Article.category_id == art.category_id, + Article.id != art.id) \ + .order_by(desc(Article.published_at)).limit(4).all() + is_saved = False + if current_user.is_authenticated: + is_saved = SavedArticle.query.filter_by( + user_id=current_user.id, article_id=art.id).first() is not None + form = CommentForm() + save_form = SaveForm() + return render_template('article_detail.html', article=art, comment_tree=comment_tree, + related=related, form=form, save_form=save_form, + is_saved=is_saved) + + +@app.route('/article//comment', methods=['POST']) +@login_required +def post_comment(slug): + art = Article.query.filter_by(slug=slug).first_or_404() + form = CommentForm() + if not form.validate_on_submit(): + flash('Comment could not be posted.', 'error') + return redirect(url_for('article_detail', slug=slug)) + + parent_id = None + raw_parent = (form.parent_id.data or '').strip() + if raw_parent: + try: + candidate = int(raw_parent) + except ValueError: + flash('Invalid reply target.', 'error') + return redirect(url_for('article_detail', slug=slug)) + parent = db.session.get(Comment, candidate) + # Reject replies whose parent doesn't exist or belongs to a different + # article — prevents cross-article reply injection via crafted forms. + if parent is None or parent.article_id != art.id: + flash('Invalid reply target.', 'error') + return redirect(url_for('article_detail', slug=slug)) + parent_id = candidate + + c = Comment(text=form.text.data.strip(), user_id=current_user.id, + article_id=art.id, parent_id=parent_id) + db.session.add(c) + db.session.commit() + flash('Comment posted.', 'success') + return redirect(url_for('article_detail', slug=slug) + f'#comment-{c.id}') + + +@app.route('/save/', methods=['POST']) +@login_required +def save_article(article_id): + art = Article.query.get_or_404(article_id) + existing = SavedArticle.query.filter_by( + user_id=current_user.id, article_id=art.id).first() + form = SaveForm() + if existing: + db.session.delete(existing) + db.session.commit() + flash('Removed from your saved list.', 'info') + else: + note = form.note.data.strip() if form.note.data else '' + s = SavedArticle(user_id=current_user.id, article_id=art.id, note=note) + db.session.add(s) + db.session.commit() + flash('Article saved.', 'success') + next_url = _safe_next(request.form.get('next'), + url_for('article_detail', slug=art.slug)) + return redirect(next_url) + + +@app.route('/saved') +@login_required +def saved(): + items = SavedArticle.query.filter_by(user_id=current_user.id) \ + .order_by(desc(SavedArticle.created_at)).all() + return render_template('saved.html', items=items) + + +@app.route('/trending') +def trending(): + page = request.args.get('page', 1, type=int) + pagination = Article.query.order_by(desc(Article.views), desc(Article.published_at)) \ + .paginate(page=page, per_page=15, error_out=False) + return render_template('trending.html', pagination=pagination) + + +@app.route('/search') +def search(): + q = (request.args.get('q') or '').strip() + page = request.args.get('page', 1, type=int) + cat_filter = (request.args.get('category') or '').strip() + + if not q: + return render_template('search.html', query='', results=[], page=1, + total=0, has_next=False, has_prev=False, + selected_category=cat_filter) + + if current_user.is_authenticated: + sh = SearchHistory(user_id=current_user.id, query_text=q) + db.session.add(sh) + db.session.commit() + + tokens = tokenize(q) + if not tokens: + return render_template('search.html', query=q, results=[], page=1, + total=0, has_next=False, has_prev=False, + selected_category=cat_filter) + + base = Article.query + if cat_filter: + cat = Category.query.filter_by(slug=cat_filter).first() + if cat: + base = base.filter(Article.category_id == cat.id) + + filters = [] + for token in tokens: + like = f'%{token}%' + filters.append(or_(Article.title.ilike(like), + Article.subtitle.ilike(like), + Article.body.ilike(like))) + candidates = base.filter(or_(*filters)).limit(800).all() + + scored = [] + for art in candidates: + blob = f"{art.title}\n{art.subtitle}\n{art.body}".lower() + score = sum(1 for t in tokens if t in blob) + if score > 0: + scored.append((art, score)) + scored.sort(key=lambda x: (-x[1], + -(x[0].published_at.timestamp() if x[0].published_at else 0))) + + per_page = 12 + total = len(scored) + start = (page - 1) * per_page + end = start + per_page + page_items = [a for a, _ in scored[start:end]] + return render_template('search.html', query=q, results=page_items, page=page, + total=total, has_next=end < total, has_prev=page > 1, + selected_category=cat_filter) + + +@app.route('/user/') +def user_profile(username): + u = User.query.filter_by(username=username).first_or_404() + saved_count = SavedArticle.query.filter_by(user_id=u.id).count() + comment_count = Comment.query.filter_by(user_id=u.id).count() + recent_comments = Comment.query.filter_by(user_id=u.id) \ + .order_by(desc(Comment.created_at)).limit(10).all() + return render_template('user.html', user=u, saved_count=saved_count, + comment_count=comment_count, recent_comments=recent_comments) + + +@app.route('/account', methods=['GET', 'POST']) +@login_required +def account(): + form = ProfileForm(obj=current_user) + if form.validate_on_submit(): + current_user.full_name = form.full_name.data or '' + current_user.bio = form.bio.data or '' + current_user.location = form.location.data or '' + current_user.interests = form.interests.data or '' + db.session.commit() + flash('Profile updated.', 'success') + return redirect(url_for('account')) + history = SearchHistory.query.filter_by(user_id=current_user.id) \ + .order_by(desc(SearchHistory.created_at)).limit(20).all() + return render_template('account.html', form=form, search_history=history) + + +@app.route('/login', methods=['GET', 'POST']) +def login(): + if current_user.is_authenticated: + return redirect(url_for('index')) + form = LoginForm() + if form.validate_on_submit(): + user = User.query.filter( + (User.email == form.email.data) | (User.username == form.email.data) + ).first() + if user and bcrypt.check_password_hash(user.password_hash, form.password.data): + login_user(user) + next_page = _safe_next(request.args.get('next'), + url_for('index')) + return redirect(next_page) + flash('Invalid email or password.', 'error') + return render_template('login.html', form=form) + + +@app.route('/register', methods=['GET', 'POST']) +def register(): + if current_user.is_authenticated: + return redirect(url_for('index')) + form = RegisterForm() + if form.validate_on_submit(): + if User.query.filter_by(email=form.email.data).first(): + flash('Email already registered.', 'error') + elif User.query.filter_by(username=form.username.data).first(): + flash('Username already taken.', 'error') + else: + pw = bcrypt.generate_password_hash(form.password.data).decode('utf-8') + u = User(username=form.username.data, email=form.email.data, + full_name=form.full_name.data or '', password_hash=pw) + db.session.add(u) + db.session.commit() + login_user(u) + return redirect(url_for('index')) + return render_template('register.html', form=form) + + +@app.route('/logout') +@login_required +def logout(): + logout_user() + return redirect(url_for('index')) + + +@app.route('/_health') +def _health(): + return {'ok': True, 'site': 'phys_org'} + + +# ----- Seed bootstrap ----- + +from seed_data import seed_database, seed_benchmark_users # noqa: E402 + +with app.app_context(): + db.create_all() + seed_database(db, User, Category, Article, Comment, bcrypt) + seed_benchmark_users(db, User, Category, Article, Comment, SavedArticle, SearchHistory, bcrypt) + + +if __name__ == '__main__': + port = int(os.environ.get('PORT', 5000)) + app.run(host='0.0.0.0', port=port, debug=False) diff --git a/sites/phys_org/requirements.txt b/sites/phys_org/requirements.txt new file mode 100644 index 0000000..e3e9a71 --- /dev/null +++ b/sites/phys_org/requirements.txt @@ -0,0 +1 @@ +Flask diff --git a/sites/phys_org/seed_data.py b/sites/phys_org/seed_data.py new file mode 100644 index 0000000..4b97e6a --- /dev/null +++ b/sites/phys_org/seed_data.py @@ -0,0 +1,527 @@ +"""Phys.org mirror — idempotent seed data. + +Loads ``scraped_data/phys_data.json`` (real RSS-derived articles) and synthesizes +the side data agents need: source journals/institutions, additional body text, +benchmark users with saved articles + comments + search history. + +The byte-identical reset invariant requires that each ``seed_*`` function is a +no-op when the DB is already populated. Per-row gates aren't enough — even an +empty ``commit()`` bumps SQLite metadata. +""" +import json +import os +import random +import re +from datetime import datetime, timedelta + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +DATA_FILE = os.path.join(BASE_DIR, 'scraped_data', 'phys_data.json') + +# Pinned reference date so "published_at" values are stable across rebuilds and +# the byte-identical reset invariant holds. +MIRROR_REFERENCE_DATE = datetime(2026, 5, 12, 12, 0, 0) + + +CATEGORIES = [ + ('physics', 'Physics', + 'Latest news in physics, materials science, optics, quantum and superconductivity.', 10), + ('earth', 'Earth Sciences', + 'Climate, geology, oceanography and the planet that supports us.', 20), + ('technology', 'Technology', + 'AI, robotics, computing, energy, and engineering breakthroughs.', 30), + ('biology', 'Biology', + 'Cell biology, ecology, evolution, plants and animals.', 40), + ('chemistry', 'Chemistry', + 'Molecules, reactions, materials and analytical chemistry.', 50), + ('astronomy', 'Astronomy & Space', + 'Cosmology, planetary science, missions and space exploration.', 60), + ('nanotechnology', 'Nanotechnology', + 'Nanomaterials, nanoelectronics, bio- and nano-technology.', 70), + ('other', 'Other Sciences', + 'Mathematics, social sciences, archaeology and education.', 80), +] + + +# Pools used to synthesize plausible journal / institution data per category. +# Real phys.org articles cite these journals heavily; using them keeps the +# detail page realistic. Each tuple is (journal, parent publisher). +JOURNALS_BY_CATEGORY = { + 'physics': [ + 'Physical Review Letters', 'Nature Physics', 'Physical Review B', + 'Reviews of Modern Physics', 'New Journal of Physics', + 'Physical Review Applied', 'Optics Express', 'Nature Photonics', + ], + 'earth': [ + 'Nature Geoscience', 'Geophysical Research Letters', + 'Journal of Climate', 'Earth and Planetary Science Letters', + 'Nature Climate Change', 'Geology', 'Journal of Geophysical Research: Atmospheres', + ], + 'technology': [ + 'Nature Electronics', 'IEEE Transactions on Robotics', + 'ACM Computing Surveys', 'Joule', 'Energy & Environmental Science', + 'Nature Machine Intelligence', 'Science Robotics', + ], + 'biology': [ + 'Cell', 'Nature', 'Current Biology', 'Proceedings of the National Academy of Sciences', + 'eLife', 'Nature Ecology & Evolution', 'PLOS Biology', 'Molecular Ecology', + ], + 'chemistry': [ + 'Journal of the American Chemical Society', 'Nature Chemistry', + 'Angewandte Chemie International Edition', 'ACS Central Science', + 'Chemical Science', 'Inorganic Chemistry', + ], + 'astronomy': [ + 'The Astrophysical Journal', 'Monthly Notices of the Royal Astronomical Society', + 'Astronomy & Astrophysics', 'Nature Astronomy', 'Icarus', + 'Astrophysical Journal Letters', + ], + 'nanotechnology': [ + 'Nature Nanotechnology', 'ACS Nano', 'Nano Letters', + 'Advanced Materials', 'Small', 'npj 2D Materials and Applications', + ], + 'other': [ + 'Journal of Archaeological Science', 'Nature Human Behaviour', + 'PNAS', 'Science Advances', 'PLOS ONE', 'Proceedings of the Royal Society B', + ], +} + + +INSTITUTIONS_BY_CATEGORY = { + 'physics': [ + 'Massachusetts Institute of Technology', 'Stanford University', + 'CERN', 'University of Cambridge', 'ETH Zurich', 'Caltech', + 'Max Planck Institute for Quantum Optics', 'Technion', + 'Princeton University', 'Argonne National Laboratory', + ], + 'earth': [ + 'NOAA', 'University of Washington', 'Scripps Institution of Oceanography', + 'University of Oxford', 'Potsdam Institute for Climate Impact Research', + 'Woods Hole Oceanographic Institution', 'NASA Goddard Space Flight Center', + 'Columbia University', + ], + 'technology': [ + 'Carnegie Mellon University', 'Google DeepMind', 'IBM Research', + 'University of California, Berkeley', 'University of Toronto', + 'EPFL', 'Microsoft Research', 'KAIST', 'Tsinghua University', + ], + 'biology': [ + 'Harvard Medical School', 'University of Oxford', + 'Howard Hughes Medical Institute', 'EMBL-EBI', + 'Salk Institute', 'University of Tokyo', 'Wellcome Sanger Institute', + 'University of Pennsylvania', + ], + 'chemistry': [ + 'Northwestern University', 'University of Chicago', + 'University of California, Los Angeles', 'Scripps Research', + 'University of Bristol', 'Tokyo Institute of Technology', + 'Imperial College London', + ], + 'astronomy': [ + 'NASA Jet Propulsion Laboratory', 'European Southern Observatory', + 'Space Telescope Science Institute', 'Harvard-Smithsonian Center for Astrophysics', + 'Max Planck Institute for Astronomy', 'Caltech', 'University of Arizona', + ], + 'nanotechnology': [ + 'KAIST', 'Rice University', 'IBM Research – Zurich', + 'National University of Singapore', 'University of Manchester', + 'Tsinghua University', 'Lawrence Berkeley National Laboratory', + ], + 'other': [ + 'University of Oxford', 'Max Planck Institute for the Science of Human History', + 'University of Chicago', 'London School of Economics', + 'University of Cape Town', 'Hebrew University of Jerusalem', + ], +} + + +# Synthetic body filler. Only used when the RSS description is too short. +GENERIC_PARAGRAPHS = [ + "The findings, the team writes, open new questions about how robust the underlying assumptions of the field really are, and suggest that further independent replications will be needed before the wider community converges on a single explanation.", + "Beyond the immediate result, the work hints at practical applications. The authors caution, however, that translating these laboratory observations into deployable systems is likely to take several more years of engineering effort and additional safety review.", + "Independent researchers not involved in the study described the data as 'compelling' and 'a useful starting point,' while noting that some of the boldest claims will need to be tested in larger and more diverse samples before being accepted as established science.", +] + + +def _slugify(text: str, maxlen: int = 70) -> str: + s = re.sub(r"[^a-zA-Z0-9]+", "-", text or "").strip("-").lower() + return s[:maxlen] or "article" + + +def _parse_pub(s: str) -> datetime: + """Parse RSS pubDate. Falls back to MIRROR_REFERENCE_DATE. + + strptime's %Z only accepts UTC/GMT and the local TZ on most platforms, so + real RSS dates like 'EDT' / 'PDT' don't parse. Strip the trailing zone + word (or +0000-style offset) and parse the remainder.""" + if not s: + return MIRROR_REFERENCE_DATE + s = s.strip() + m = re.match(r'(.+?\d{2}:\d{2}:\d{2})\s*\S+', s) + base = m.group(1) if m else s + for fmt in ("%a, %d %b %Y %H:%M:%S", + "%a, %d %b %Y %H:%M", + "%a, %d %b %Y"): + try: + return datetime.strptime(base.strip(), fmt) + except Exception: + continue + return MIRROR_REFERENCE_DATE + + +def _strip_html(text: str) -> str: + text = re.sub(r"<[^>]+>", "", text or "") + text = re.sub(r"\s+", " ", text).strip() + return text + + +def _build_body(rss_desc: str, title: str, *, rng: random.Random) -> str: + """Return paragraph-separated body text. Use the RSS description as the + lede and append synthetic-but-plausible follow-on paragraphs so each + article has at least 3 paragraphs.""" + lede = _strip_html(rss_desc) or title + paragraphs = [lede] + pool = GENERIC_PARAGRAPHS[:] + rng.shuffle(pool) + paragraphs.append(pool[0]) + paragraphs.append(pool[1]) + return "\n\n".join(paragraphs) + + +def seed_database(db, User, Category, Article, Comment, bcrypt): + if Article.query.count() > 0: + return + + # Seed categories first (only if empty — gated by the outer check on + # Article, but we double-check here to keep the function self-contained). + cat_id_map = {} + for slug, name, desc, order in CATEGORIES: + c = Category.query.filter_by(slug=slug).first() + if c is None: + c = Category(slug=slug, name=name, description=desc, sort_order=order) + db.session.add(c) + db.session.flush() + cat_id_map[slug] = c.id + + if not os.path.exists(DATA_FILE): + # No scraped data — bail without committing anything else, leaving + # only categories. (The reset invariant still holds because we did + # commit categories on the first call; subsequent calls are gated.) + db.session.commit() + return + + with open(DATA_FILE) as f: + items = json.load(f) + + rng = random.Random(20260513) + + # Determine featured article ids ahead of time so the same items are + # picked across rebuilds. + item_keys = [it.get('link') or it.get('title') for it in items] + featured_count = min(8, len(items)) + featured_keys = set(rng.sample(item_keys, featured_count)) if item_keys else set() + + next_id = 1 + seen_slugs = set() + for it in items: + title = (it.get('title') or '').strip() + if not title: + continue + slug = it.get('slug') or _slugify(title) + original = slug + n = 2 + while slug in seen_slugs: + slug = f"{original}-{n}" + n += 1 + seen_slugs.add(slug) + + cat_slug = it.get('category_slug') or 'other' + if cat_slug not in cat_id_map: + cat_slug = 'other' + cat_id = cat_id_map[cat_slug] + + published = _parse_pub(it.get('pub_date') or '') + # Subsection from RSS categories (e.g. "Optics & Photonics") + rss_cats = it.get('rss_categories') or [] + subsection = (rss_cats[0] if rss_cats else '').strip() + + # Author: real RSS dc:creator if present, else synthesized. + author_real = (it.get('author') or '').strip() + if author_real: + author_name = author_real + else: + # Reproducible synthesized author per article slug. + r2 = random.Random(slug + ':author') + firsts = ['Sarah', 'Michael', 'Ananya', 'Jorge', 'Mei', 'David', + 'Priya', 'Liam', 'Fatima', 'Hiroshi', 'Olivia', 'Karim', + 'Nina', 'Oluwa', 'Bjorn', 'Elena'] + lasts = ['Patel', 'Garcia', 'Nguyen', 'Kowalski', 'Rossi', 'Tanaka', + 'Andersen', 'Okafor', 'Singh', 'Yamamoto', 'Hernandez', + 'Mueller', 'Ahmed', 'Park'] + author_name = f"{r2.choice(firsts)} {r2.choice(lasts)}" + + # Journal / institution synthesized per article (deterministic by slug) + r3 = random.Random(slug + ':source') + journal = r3.choice(JOURNALS_BY_CATEGORY.get(cat_slug, JOURNALS_BY_CATEGORY['other'])) + institution = r3.choice(INSTITUTIONS_BY_CATEGORY.get(cat_slug, INSTITUTIONS_BY_CATEGORY['other'])) + # DOI: synthesize a stable but fake-looking DOI per article id. + doi = f"https://doi.org/10.{1000 + next_id}/phys.{published.year}.{next_id:05d}" + + body = _build_body(it.get('description') or '', title, rng=rng) + subtitle = _strip_html(it.get('description') or '')[:240] + + image_filename = it.get('local_image') or '' + + # Deterministic view counts so trending lists are stable across + # rebuilds (only changes when new articles are added). Range chosen + # to give a clear winner: ~1500-9000 with one popular article in + # each category capped near the top. + rv = random.Random(slug + ':views') + views = rv.randint(150, 9000) + + is_featured = (it.get('link') or it.get('title')) in featured_keys + + art = Article( + id=next_id, + slug=slug, + title=title, + subtitle=subtitle, + body=body, + author_name=author_name, + source_journal=journal, + source_institution=institution, + doi_url=doi, + image_filename=image_filename, + subsection=subsection, + category_id=cat_id, + published_at=published, + views=views, + featured=is_featured, + ) + db.session.add(art) + next_id += 1 + + db.session.commit() + + +# --------------------------------------------------------------------------- +# Benchmark users +# --------------------------------------------------------------------------- + +BENCH_USERS = [ + dict(username='alice_j', email='alice.j@test.com', full_name='Alice Johnson', + bio='PhD student in astrophysics. Saving everything about exoplanets and dark matter.', + location='Boston, MA', interests='astronomy,physics'), + dict(username='bob_c', email='bob.c@test.com', full_name='Bob Chen', + bio='Climate-tech reporter. Following ocean carbon, methane and renewables stories.', + location='Seattle, WA', interests='earth,technology'), + dict(username='carol_d', email='carol.d@test.com', full_name='Carol Davis', + bio='Computational biologist. Long-time fan of CRISPR, protein design and ecology.', + location='Cambridge, UK', interests='biology,chemistry'), + dict(username='david_k', email='david.k@test.com', full_name='David Kim', + bio='Materials engineer. Reads everything tagged Nanotechnology, Optics & Photonics.', + location='Seoul, South Korea', interests='nanotechnology,physics'), +] +PASSWORD = 'TestPass123!' + +# Pre-generated bcrypt hash for PASSWORD. bcrypt.generate_password_hash uses a +# random salt on every call, which would break the byte-identical reset +# invariant — so we pin one valid hash here. Verified at boot time by +# bcrypt.check_password_hash; rotate by running: +# from flask_bcrypt import Bcrypt; from flask import Flask +# print(Bcrypt(Flask(__name__)).generate_password_hash('TestPass123!').decode()) +PINNED_PASSWORD_HASH = ( + '$2b$12$zV7HfiJmZTqLsgP30kyvJemamXfJyBv66FPuQOrwYXXsyQvrafvie' +) + + +# Stable user-id mapping: 1001..1004 (well above article-derived ids so we +# don't collide with any future re-numbering). +USER_ID_BASE = 1001 + + +def _pick_articles(Article, *, where: dict, n: int, seed: str) -> list: + """Return up to n articles matching ``where`` filters, deterministically + ordered by id so the result is identical across rebuilds.""" + q = Article.query + for k, v in where.items(): + q = q.filter(getattr(Article, k) == v) + items = q.order_by(Article.id).all() + rng = random.Random(seed) + rng.shuffle(items) + return items[:n] + + +def seed_benchmark_users(db, User, Category, Article, Comment, SavedArticle, SearchHistory, bcrypt): + if User.query.filter_by(email='alice.j@test.com').first(): + return + + # Categories must exist (created by seed_database). Look up ids. + pw_hash = PINNED_PASSWORD_HASH + + user_objs = {} + for i, u in enumerate(BENCH_USERS): + obj = User( + id=USER_ID_BASE + i, + username=u['username'], + email=u['email'], + full_name=u['full_name'], + bio=u['bio'], + location=u['location'], + interests=u['interests'], + password_hash=pw_hash, + created_at=MIRROR_REFERENCE_DATE - timedelta(days=180 + i * 30), + ) + db.session.add(obj) + user_objs[u['username']] = obj + db.session.flush() + + # Save articles aligned to each user's interests so saved-list tasks have + # depth and disambiguation candidates. + save_targets = { + 'alice_j': [ + ('astronomy', 4), + ('physics', 2), + ], + 'bob_c': [ + ('earth', 4), + ('technology', 2), + ], + 'carol_d': [ + ('biology', 4), + ('chemistry', 2), + ], + 'david_k': [ + ('nanotechnology', 3), + ('physics', 2), + ], + } + next_save_id = 1 + save_notes_by_user = { + 'alice_j': ['Read for thesis chapter 3', 'Cite in proposal', 'Follow-up reading', + 'Discuss with advisor', 'Seminar candidate', 'Review for journal club'], + 'bob_c': ['Story idea — angle 2', 'Lead source candidate', 'Background reading', + 'Quote for upcoming feature', 'Verify with NOAA contact', 'Pitch to editor'], + 'carol_d': ['Methods section', 'Lab meeting share', 'Forward to postdocs', + 'Compare with our pipeline', 'Re-read after deadline', 'Class material'], + 'david_k': ['Material spec lookup', 'Patent landscape', 'Contact authors', + 'Internal report cite', 'Compare with our process', 'Lab notebook ref'], + } + for username, plan in save_targets.items(): + u = user_objs[username] + notes = save_notes_by_user[username] + used = 0 + for cat_slug, n in plan: + cat = Category.query.filter_by(slug=cat_slug).first() + if cat is None: + continue + articles = _pick_articles(Article, where={'category_id': cat.id}, n=n, + seed=f"{username}:save:{cat_slug}") + for art in articles: + sa = SavedArticle( + id=next_save_id, + user_id=u.id, + article_id=art.id, + note=notes[used % len(notes)], + created_at=MIRROR_REFERENCE_DATE - timedelta(days=2 + used * 3), + ) + db.session.add(sa) + next_save_id += 1 + used += 1 + + # Comments per user (2-4 each) on a deterministic spread of articles. + comments_plan = { + 'alice_j': [ + 'Beautiful explanation of the dark-matter constraints — the figure 3 plot is doing a lot of work here.', + 'Worth comparing with the 2024 Planck re-analysis — different priors but converging conclusions.', + 'Saving this for the journal club tomorrow; the methodology section is a great teaching example.', + ], + 'bob_c': [ + 'This contradicts the line a senator pushed last week. Sourcing this for my Wednesday column.', + 'The institution statement and the paper itself disagree on the 2030 timeline. Anyone seen the PRR?', + 'Modeling assumptions feel optimistic, but the data underlying them is solid. Cautious thumbs up.', + ], + 'carol_d': [ + 'The CRISPR off-target rates here are an order of magnitude lower than what we see in our pipeline.', + 'I love that they released the raw sequencing data. Re-running their analysis tonight.', + 'Nice work, but I expected more discussion of polyploid edge cases.', + ], + 'david_k': [ + 'The fabrication tolerance is the real story here, not the zero-resistance claim.', + 'Anyone have access to the SI? The thickness vs. mobility curve is the only thing that matters.', + 'Calling it now: this technique will be in commercial sensors by 2028.', + ], + } + next_comment_id = 1 + for username, comment_texts in comments_plan.items(): + u = user_objs[username] + # Pick articles whose category matches the user's first interest tag, + # so a "comments by alice on physics articles" task is well-defined. + first_interest = u.interests.split(',')[0] + cat = Category.query.filter_by(slug=first_interest).first() + if cat is None: + target_articles = Article.query.order_by(Article.id).limit(len(comment_texts)).all() + else: + target_articles = _pick_articles(Article, where={'category_id': cat.id}, + n=len(comment_texts), + seed=f"{username}:comment") + for i, art in enumerate(target_articles): + c = Comment( + id=next_comment_id, + text=comment_texts[i], + user_id=u.id, + article_id=art.id, + parent_id=None, + score=0, + created_at=MIRROR_REFERENCE_DATE - timedelta(days=1 + i * 4), + ) + db.session.add(c) + next_comment_id += 1 + + # Seed a few cross-user reply chains so commenter-thread tasks work. + reply_seeds = [ + ('bob_c', 'alice_j', 0, 'Totally agree on the priors point — the new constraint is much tighter though.'), + ('alice_j', 'carol_d', 0, 'The polyploid section was a missed opportunity, you are right.'), + ('david_k', 'bob_c', 1, 'I think the institution is hedging because of an unannounced pilot — keep watching.'), + ] + for replier_username, target_username, target_idx, text in reply_seeds: + replier = user_objs[replier_username] + target_user = user_objs[target_username] + target_comments = Comment.query.filter_by(user_id=target_user.id) \ + .order_by(Comment.id).all() + if target_idx >= len(target_comments): + continue + parent = target_comments[target_idx] + c = Comment( + id=next_comment_id, + text=text, + user_id=replier.id, + article_id=parent.article_id, + parent_id=parent.id, + score=0, + created_at=parent.created_at + timedelta(hours=6), + ) + db.session.add(c) + next_comment_id += 1 + + # Search history per user (2-3 each) + search_plan = { + 'alice_j': ['exoplanet atmosphere', 'dark matter halo', 'james webb'], + 'bob_c': ['ocean carbon capture', 'methane emissions arctic'], + 'carol_d': ['CRISPR off-target', 'protein structure prediction', 'mitochondria'], + 'david_k': ['2D material superconductor', 'graphene transistor'], + } + next_sh_id = 1 + for username, queries in search_plan.items(): + u = user_objs[username] + for j, q in enumerate(queries): + sh = SearchHistory( + id=next_sh_id, + user_id=u.id, + query_text=q, + created_at=MIRROR_REFERENCE_DATE - timedelta(days=1 + j * 2, + hours=j * 5), + ) + db.session.add(sh) + next_sh_id += 1 + + db.session.commit() diff --git a/sites/phys_org/static/css/.gitkeep b/sites/phys_org/static/css/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/sites/phys_org/static/css/main.css b/sites/phys_org/static/css/main.css new file mode 100644 index 0000000..3aaf34c --- /dev/null +++ b/sites/phys_org/static/css/main.css @@ -0,0 +1,469 @@ +/* Phys.org mirror styles — clean white bg, deep navy header, blue accents. */ + +:root { + --c-text: #1a1a1a; + --c-muted: #6b6b6b; + --c-link: #0a4ea2; + --c-link-hover: #062f63; + --c-navy: #16285b; + --c-navy-dark: #0c1a3e; + --c-accent: #0e6cc1; + --c-bg: #ffffff; + --c-card: #ffffff; + --c-border: #e3e6ea; + --c-soft: #f5f7fa; + --c-warn: #c0392b; + --c-success: #2c7a3a; +} + +* { box-sizing: border-box; } + +html, body { + margin: 0; + padding: 0; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", + Arial, "Noto Sans", sans-serif; + font-size: 15px; + line-height: 1.5; + color: var(--c-text); + background: var(--c-bg); +} + +a { color: var(--c-link); text-decoration: none; } +a:hover { color: var(--c-link-hover); text-decoration: underline; } + +img { max-width: 100%; height: auto; display: block; } + +/* ---- Header ---- */ + +.site-header { + background: var(--c-navy); + color: #fff; + border-bottom: 3px solid var(--c-accent); +} +.site-header a { color: #fff; } +.site-header a:hover { color: #cfe1ff; text-decoration: none; } + +.header-top { + display: flex; + align-items: center; + padding: 12px 20px; + max-width: 1200px; + margin: 0 auto; + gap: 18px; +} +.brand { + font-size: 26px; + font-weight: 800; + letter-spacing: -0.5px; +} +.brand .dot { color: var(--c-accent); } +.tagline { + color: #cdd6e6; + font-size: 13px; + margin-left: 4px; +} +.header-search { + flex: 1; + max-width: 500px; + margin-left: auto; +} +.header-search form { display: flex; gap: 0; } +.header-search input[type=text], +.header-search input[type=search] { + flex: 1; + padding: 8px 12px; + border: 1px solid var(--c-navy-dark); + border-radius: 4px 0 0 4px; + font-size: 14px; + outline: none; +} +.header-search button { + padding: 8px 14px; + background: var(--c-accent); + color: #fff; + border: none; + border-radius: 0 4px 4px 0; + cursor: pointer; + font-weight: 600; +} +.header-account { + display: flex; + gap: 12px; + font-size: 13px; + white-space: nowrap; +} + +.nav-bar { + background: var(--c-navy-dark); + font-size: 13px; +} +.nav-bar ul { + list-style: none; + display: flex; + flex-wrap: wrap; + margin: 0 auto; + padding: 0 20px; + max-width: 1200px; +} +.nav-bar li a { + display: block; + padding: 10px 14px; + color: #e6ecf7; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.5px; +} +.nav-bar li a:hover { background: var(--c-accent); color: #fff; } +.nav-bar li a.active { background: var(--c-accent); color: #fff; } + +/* ---- Layout ---- */ + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; +} +.layout { + display: grid; + grid-template-columns: minmax(0, 1fr) 320px; + gap: 28px; +} +@media (max-width: 900px) { + .layout { grid-template-columns: 1fr; } +} + +/* ---- Cards & lists ---- */ + +.section-heading { + display: flex; + align-items: baseline; + gap: 12px; + margin: 28px 0 14px; + padding-bottom: 6px; + border-bottom: 2px solid var(--c-navy); +} +.section-heading h2 { + margin: 0; + font-size: 18px; + color: var(--c-navy); + text-transform: uppercase; + letter-spacing: 0.5px; +} +.section-heading a.see-all { font-size: 13px; } + +.article-card { + display: grid; + grid-template-columns: 160px minmax(0, 1fr); + gap: 16px; + padding: 14px 0; + border-bottom: 1px solid var(--c-border); +} +.article-card .thumb { + width: 160px; + height: 110px; + overflow: hidden; + border-radius: 4px; + background: var(--c-soft); +} +.article-card .thumb img { width: 100%; height: 100%; object-fit: cover; } +.article-card .body { min-width: 0; } +.article-card h3 { + margin: 0 0 6px; + font-size: 17px; + line-height: 1.3; +} +.article-card h3 a { color: var(--c-text); } +.article-card h3 a:hover { color: var(--c-link); } +.article-card .meta { + font-size: 12px; + color: var(--c-muted); + margin-bottom: 6px; +} +.article-card .meta .tag { + display: inline-block; + background: var(--c-soft); + color: var(--c-navy); + padding: 2px 8px; + border-radius: 3px; + font-weight: 600; + text-transform: uppercase; + font-size: 11px; + margin-right: 6px; +} +.article-card .summary { color: #444; font-size: 14px; } + +.featured-grid { + display: grid; + grid-template-columns: 2fr 1fr 1fr; + gap: 16px; + margin: 12px 0 24px; +} +@media (max-width: 800px) { .featured-grid { grid-template-columns: 1fr; } } +.feat-main, .feat-side { + background: #fff; + border: 1px solid var(--c-border); + border-radius: 4px; + overflow: hidden; +} +.feat-main .thumb { height: 280px; background: var(--c-soft); } +.feat-side .thumb { height: 130px; background: var(--c-soft); } +.feat-main .thumb img, +.feat-side .thumb img { width: 100%; height: 100%; object-fit: cover; } +.feat-main .pad { padding: 14px 16px 18px; } +.feat-side .pad { padding: 10px 12px 14px; } +.feat-main h2, .feat-side h3 { margin: 4px 0 6px; line-height: 1.25; } +.feat-main h2 { font-size: 22px; } +.feat-main h2 a, .feat-side h3 a { color: var(--c-text); } +.feat-main h2 a:hover, .feat-side h3 a:hover { color: var(--c-link); } + +/* ---- Sidebar ---- */ + +.sidebar { font-size: 14px; } +.sidebar .widget { + background: var(--c-soft); + border: 1px solid var(--c-border); + border-radius: 4px; + padding: 14px 16px; + margin-bottom: 18px; +} +.sidebar .widget h3 { + margin: 0 0 10px; + font-size: 14px; + color: var(--c-navy); + text-transform: uppercase; + letter-spacing: 0.5px; + border-bottom: 1px solid var(--c-border); + padding-bottom: 6px; +} +.sidebar ol, .sidebar ul { + margin: 0; + padding-left: 18px; +} +.sidebar li { margin-bottom: 8px; line-height: 1.35; } + +/* ---- Article detail ---- */ + +.article-detail { + background: #fff; +} +.article-detail .crumbs { + font-size: 13px; + color: var(--c-muted); + margin-bottom: 8px; +} +.article-detail h1 { + margin: 6px 0 8px; + font-size: 30px; + line-height: 1.2; + color: var(--c-text); +} +.article-detail .subtitle { + font-size: 17px; + color: #333; + margin: 0 0 14px; + line-height: 1.4; +} +.article-detail .byline { + font-size: 13px; + color: var(--c-muted); + margin-bottom: 14px; + border-bottom: 1px solid var(--c-border); + padding-bottom: 12px; +} +.article-detail .byline strong { color: #333; } +.article-detail .hero-image { + margin: 0 0 16px; + border-radius: 4px; + overflow: hidden; + background: var(--c-soft); +} +.article-detail .hero-image img { width: 100%; height: auto; } +.article-detail .body p { + margin: 0 0 14px; + font-size: 16px; + line-height: 1.65; +} +.source-block { + margin: 22px 0; + padding: 14px 16px; + background: var(--c-soft); + border-left: 4px solid var(--c-accent); + border-radius: 3px; + font-size: 14px; +} +.source-block dt { + display: inline-block; + font-weight: 700; + width: 130px; + color: var(--c-navy); +} +.source-block dd { display: inline; margin: 0; } +.source-block dl > div { margin-bottom: 6px; } + +.action-bar { + display: flex; + gap: 10px; + margin: 16px 0; + padding: 10px 0; + border-top: 1px solid var(--c-border); + border-bottom: 1px solid var(--c-border); +} +.btn { + display: inline-block; + padding: 7px 14px; + background: var(--c-accent); + color: #fff; + border: 1px solid transparent; + border-radius: 3px; + cursor: pointer; + font-size: 14px; + font-weight: 600; +} +.btn:hover { background: var(--c-navy); color: #fff; text-decoration: none; } +.btn.secondary { background: #fff; color: var(--c-navy); border-color: var(--c-navy); } +.btn.secondary:hover { background: var(--c-navy); color: #fff; } +.btn.danger { background: var(--c-warn); } + +/* ---- Comments ---- */ + +.comments-section { margin-top: 32px; } +.comments-section h2 { + font-size: 18px; + color: var(--c-navy); + border-bottom: 2px solid var(--c-navy); + padding-bottom: 6px; + text-transform: uppercase; + letter-spacing: 0.5px; +} +.comment { + border-left: 3px solid var(--c-border); + padding: 8px 0 8px 12px; + margin: 8px 0; +} +.comment .head { + font-size: 13px; + color: var(--c-muted); + margin-bottom: 4px; +} +.comment .head a.author { font-weight: 700; color: var(--c-navy); } +.comment .body { font-size: 15px; line-height: 1.45; } +.comment-form textarea { + width: 100%; + min-height: 100px; + padding: 10px; + border: 1px solid var(--c-border); + border-radius: 4px; + font: inherit; +} + +/* ---- Forms ---- */ + +.form-card { + max-width: 480px; + margin: 30px auto; + padding: 26px 28px; + background: #fff; + border: 1px solid var(--c-border); + border-radius: 4px; + box-shadow: 0 2px 6px rgba(15, 30, 75, 0.04); +} +.form-card h1 { + margin: 0 0 16px; + font-size: 22px; + color: var(--c-navy); +} +.form-card .field { margin-bottom: 14px; } +.form-card label { + display: block; + font-size: 13px; + font-weight: 600; + margin-bottom: 4px; + color: #333; +} +.form-card input[type=text], .form-card input[type=email], +.form-card input[type=password], .form-card textarea { + width: 100%; + padding: 8px 10px; + border: 1px solid var(--c-border); + border-radius: 3px; + font: inherit; +} +.form-card .errors { color: var(--c-warn); font-size: 13px; } +.form-card .actions { margin-top: 18px; } +.form-card .alt { font-size: 13px; margin-top: 14px; color: var(--c-muted); } + +.flash { + padding: 10px 14px; + margin: 0 0 14px; + border-radius: 3px; + font-size: 14px; +} +.flash-success { background: #e2f6e8; color: var(--c-success); border: 1px solid #b9e2c4; } +.flash-error { background: #fdecea; color: var(--c-warn); border: 1px solid #f5c2bb; } +.flash-info { background: #e6f1fb; color: var(--c-link); border: 1px solid #c2dbf2; } + +/* ---- Pagination ---- */ + +.pagination { + margin: 22px 0; + display: flex; + gap: 6px; + align-items: center; +} +.pagination .page, +.pagination .arrow { + display: inline-block; + padding: 5px 11px; + border: 1px solid var(--c-border); + border-radius: 3px; + font-size: 13px; + color: var(--c-link); + background: #fff; +} +.pagination .page.active { + background: var(--c-navy); + color: #fff; + border-color: var(--c-navy); +} +.pagination .arrow.disabled { + color: #aaa; + background: var(--c-soft); + pointer-events: none; +} + +/* ---- Footer ---- */ + +.site-footer { + background: var(--c-navy-dark); + color: #cfd6e6; + font-size: 13px; + padding: 20px; + margin-top: 36px; +} +.site-footer .container { display: flex; justify-content: space-between; flex-wrap: wrap; gap: 12px; } +.site-footer a { color: #cfd6e6; } +.site-footer a:hover { color: #fff; } + +/* ---- Misc ---- */ + +.text-muted { color: var(--c-muted); font-size: 13px; } +.tag-pill { + display: inline-block; + font-size: 11px; + padding: 2px 8px; + background: var(--c-accent); + color: #fff; + border-radius: 3px; + text-transform: uppercase; + font-weight: 700; + letter-spacing: 0.4px; +} +.profile-head { + background: var(--c-soft); + padding: 18px 20px; + border-radius: 4px; + margin-bottom: 20px; +} +.profile-head h1 { margin: 0 0 4px; color: var(--c-navy); } +.profile-stats { display: flex; gap: 18px; font-size: 14px; color: var(--c-muted); } diff --git a/sites/phys_org/static/icons/.gitkeep b/sites/phys_org/static/icons/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/sites/phys_org/static/icons/favicon.ico b/sites/phys_org/static/icons/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..2beaea967cb03a435e868239de35a4690fd96d23 GIT binary patch literal 233 zcmZQzU<5)11qL8cV7Lq<#eldoz|WnRONtA~u;CS$`znE(TJwJY5_^ zD&{2rnSab7_<&3FV@WsN1c{f03M-EW)X!`bT(;~LPxBPMP~D2U-iI$m*(MtZL|>o% z@M~;adiaVgMrP*2*VNe7XH8JI(U#n1GAYYSP|Rlkxerf-oX_~QmG?WyDamdS2-~!@ zbKyrR=e=TQgD#$)&0}xZ>=6~|Ah}>CYtQ1NUqAGxEDPl3XuPPP#NpQ5p&-Gl#OB1n Zuy&WyF6k1>Pe4~Pc)I$ztaD0e0sxJ)PNM(- literal 0 HcmV?d00001 diff --git a/sites/phys_org/static/icons/placeholder.svg b/sites/phys_org/static/icons/placeholder.svg new file mode 100644 index 0000000..b8bf5b8 --- /dev/null +++ b/sites/phys_org/static/icons/placeholder.svg @@ -0,0 +1,9 @@ + + + + + + + phys.org + diff --git a/sites/phys_org/static/js/.gitkeep b/sites/phys_org/static/js/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/sites/phys_org/tasks.jsonl b/sites/phys_org/tasks.jsonl new file mode 100644 index 0000000..9260c07 --- /dev/null +++ b/sites/phys_org/tasks.jsonl @@ -0,0 +1,18 @@ +{"web_name": "Phys.org", "id": "Phys.org--0", "ques": "Find the article 'Magnetic checkerboard separates microparticles by size and sends them along different paths' in the Physics category and report which journal it cites as its source.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--1", "ques": "Open the article on phys.org titled 'Quantum circuit test finally exposes what has been warping performance' and report the institution that is listed as 'Provided by'.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--2", "ques": "Search for 'quantum' on phys.org. Among the matching results, find the article whose title mentions 'a trillionth of a billionth of a joule' and report its source journal.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--3", "ques": "Browse the Trending page on phys.org and report the author name shown on the article currently ranked #1 by views.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--4", "ques": "Sign in as alice.j@test.com (password TestPass123!), open the user's saved article list, and report how many of those saved articles belong to the 'Astronomy & Space' category.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--5", "ques": "Sign in as bob.c@test.com (password TestPass123!), open the saved article whose user note is 'Pitch to editor', and report the article's source journal.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--6", "ques": "Sign in as carol.d@test.com (password TestPass123!) and post a new top-level comment with the text 'Reviewed for our weekly journal club' on any article in the Biology category, then report the article's title.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--7", "ques": "Sign in as david.k@test.com (password TestPass123!), find a Nanotechnology article you have not yet saved, save it with the note 'Compare with our process', then verify it appears in your saved list and report the article title.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--8", "ques": "Open the public profile page for user 'carol_d' on phys.org and report the count of their public comments shown in the profile stats.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--9", "ques": "Search the phys.org mirror for 'graphene' and report how many results are returned in total (the count shown next to the search term).", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--10", "ques": "On the phys.org Astronomy & Space category page, sort by 'Popular' and report the title of the most-viewed article in that category.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--11", "ques": "Compare the publication dates of the two phys.org articles 'Magnetic checkerboard separates microparticles by size and sends them along different paths' and 'Quantum geometry applied to light-based systems expands toolkit for topological photonics'. Report which one was published earlier.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--12", "ques": "Find the comment thread on the phys.org article 'JWST spots two early black holes growing far faster than their galaxies' where user bob_c replied to a top-level comment by alice_j. Report the full text of bob_c's reply.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--13", "ques": "Register a new account on phys.org with username 'qa_explorer', email 'qa_explorer@example.com', full name 'QA Explorer', password 'BenchmarkPass2026'. Then open Account Settings, set the Location field to 'Berlin, Germany', and save. Report the username that appears in the page header after saving.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--14", "ques": "Sign in as alice.j@test.com (password TestPass123!), open the article 'How a single star can reshape an entire galaxy', and remove it from her saved articles. Then visit the saved-articles page and report (a) how many items remain in the saved list and (b) the title of the most-recently-saved article shown at the top of the list.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--15", "ques": "On the phys.org homepage there is a 'Trending now' sidebar widget. Report the title and view count of the third entry in that sidebar list.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--16", "ques": "On phys.org, search for 'CO2' and use the category filter to restrict to Chemistry. Among the filtered results, identify the article that mentions 'polyionic liquids' and report its source journal.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} +{"web_name": "Phys.org", "id": "Phys.org--17", "ques": "Sign in as alice.j@test.com (password TestPass123!), open Account Settings, and report the most recent search query shown in the user's recent search history widget.", "web": "http://localhost:40015/", "upstream_url": "https://phys.org/"} diff --git a/sites/phys_org/templates/.gitkeep b/sites/phys_org/templates/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/sites/phys_org/templates/_macros.html b/sites/phys_org/templates/_macros.html new file mode 100644 index 0000000..e80de24 --- /dev/null +++ b/sites/phys_org/templates/_macros.html @@ -0,0 +1,48 @@ +{% macro article_card(a) -%} + +{%- endmacro %} + +{% macro pager(pagination, endpoint, kw={}) -%} +{% if pagination.pages > 1 %} + +{% endif %} +{%- endmacro %} diff --git a/sites/phys_org/templates/account.html b/sites/phys_org/templates/account.html new file mode 100644 index 0000000..8053aa9 --- /dev/null +++ b/sites/phys_org/templates/account.html @@ -0,0 +1,60 @@ +{% extends 'base.html' %} +{% block title %}Account settings — Phys.org Mirror{% endblock %} +{% block content %} +
+
+

Account settings

+
+ {{ form.csrf_token }} +
+ + +
+
+ + +
+
+ + {{ form.full_name(size=40) }} +
+
+ + {{ form.location(size=40) }} +
+
+ + {{ form.bio(rows=4, cols=50) }} +
+
+ + {{ form.interests(size=50) }} +
+
+
+
+ +
+{% endblock %} diff --git a/sites/phys_org/templates/article_detail.html b/sites/phys_org/templates/article_detail.html new file mode 100644 index 0000000..62ef1bd --- /dev/null +++ b/sites/phys_org/templates/article_detail.html @@ -0,0 +1,146 @@ +{% extends 'base.html' %} +{% from '_macros.html' import article_card %} +{% block title %}{{ article.title }} — Phys.org Mirror{% endblock %} +{% block content %} +
+
+
+ Home + {% if article.category %} + / {{ article.category.name }} + {% endif %} + {% if article.subsection %} / {{ article.subsection }}{% endif %} +
+ +

{{ article.title }}

+ {% if article.subtitle %}

{{ article.subtitle }}

{% endif %} + + + + {% if article.image_filename %} +
+ +
+ {% endif %} + +
+ {% for p in article.get_paragraphs() %} +

{{ p|sanitize }}

+ {% endfor %} +
+ +
+
+ {% if article.source_journal %} +
Journal
{{ article.source_journal }}
+ {% endif %} + {% if article.source_institution %} +
Provided by
{{ article.source_institution }}
+ {% endif %} + {% if article.doi_url %} + + {% endif %} +
+
+ +
+ {% if current_user.is_authenticated %} +
+ {{ save_form.csrf_token }} + + {% if is_saved %} + + {% else %} + + + {% endif %} +
+ {% else %} + Sign in to save + {% endif %} + {{ article.comment_count }} comment{{ '' if article.comment_count == 1 else 's' }} +
+ +
+

Comments

+ {% if comment_tree %} + {% for entry in comment_tree %} + {% set c = entry.comment %} +
+
+ {{ c.user.username }} + · {{ c.time_ago }} + {% if current_user.is_authenticated %} + · Reply + {% endif %} +
+
{{ c.text }}
+
+ {% endfor %} + {% else %} +

No comments yet.

+ {% endif %} + + {% if current_user.is_authenticated %} +
+ {{ form.csrf_token }} + + + +
+
+ + {% else %} +

Sign in to comment.

+ {% endif %} +
+
+ + +
+{% endblock %} diff --git a/sites/phys_org/templates/base.html b/sites/phys_org/templates/base.html new file mode 100644 index 0000000..1743484 --- /dev/null +++ b/sites/phys_org/templates/base.html @@ -0,0 +1,66 @@ + + + + + +{% block title %}{{ site_name }}{% endblock %} + + + + + + +
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for cat, msg in messages %} +
{{ msg }}
+ {% endfor %} + {% endif %} + {% endwith %} + {% block content %}{% endblock %} +
+ +
+
+
© Phys.org Mirror — for benchmark research, not affiliated with phys.org
+
+ Home · + Trending · + Search +
+
+
+ + diff --git a/sites/phys_org/templates/category.html b/sites/phys_org/templates/category.html new file mode 100644 index 0000000..e95463f --- /dev/null +++ b/sites/phys_org/templates/category.html @@ -0,0 +1,46 @@ +{% extends 'base.html' %} +{% from '_macros.html' import article_card, pager %} +{% block title %}{{ category.name }} — Phys.org Mirror{% endblock %} +{% block content %} +
+
+
+

{{ category.name }}

+ {{ pagination.total }} article{{ '' if pagination.total == 1 else 's' }} + + Recent · + Popular + +
+ {% if category.description %}

{{ category.description }}

{% endif %} + {% for a in pagination.items %}{{ article_card(a) }}{% endfor %} + {% if not pagination.items %}

No articles in this category yet.

{% endif %} + {{ pager(pagination, 'category', {'slug': category.slug, 'sort': sort}) }} +
+ +
+{% endblock %} diff --git a/sites/phys_org/templates/index.html b/sites/phys_org/templates/index.html new file mode 100644 index 0000000..fe7bb98 --- /dev/null +++ b/sites/phys_org/templates/index.html @@ -0,0 +1,89 @@ +{% extends 'base.html' %} +{% from '_macros.html' import article_card %} +{% block title %}Phys.org Mirror — Science, Technology, Research news{% endblock %} + +{% block content %} + +{% if featured %} + +{% endif %} + +
+
+
+

Latest News

+ All recent → +
+ {% for a in latest %}{{ article_card(a) }}{% endfor %} + + {% for cat, items in by_cat %} +
+

{{ cat.name }}

+ More in {{ cat.name }} → +
+ {% for a in items %}{{ article_card(a) }}{% endfor %} + {% endfor %} +
+ +
+{% endblock %} diff --git a/sites/phys_org/templates/login.html b/sites/phys_org/templates/login.html new file mode 100644 index 0000000..1f53389 --- /dev/null +++ b/sites/phys_org/templates/login.html @@ -0,0 +1,24 @@ +{% extends 'base.html' %} +{% block title %}Sign in — Phys.org Mirror{% endblock %} +{% block content %} +
+

Sign in

+
+ {{ form.csrf_token }} +
+ + {{ form.email(size=40) }} + {% if form.email.errors %}
{{ form.email.errors[0] }}
{% endif %} +
+
+ + {{ form.password(size=40) }} + {% if form.password.errors %}
{{ form.password.errors[0] }}
{% endif %} +
+
+ +
+
No account? Create one.
+
+
+{% endblock %} diff --git a/sites/phys_org/templates/register.html b/sites/phys_org/templates/register.html new file mode 100644 index 0000000..858e682 --- /dev/null +++ b/sites/phys_org/templates/register.html @@ -0,0 +1,33 @@ +{% extends 'base.html' %} +{% block title %}Create account — Phys.org Mirror{% endblock %} +{% block content %} +
+

Create account

+
+ {{ form.csrf_token }} +
+ + {{ form.username(size=40) }} + {% if form.username.errors %}
{{ form.username.errors[0] }}
{% endif %} +
+
+ + {{ form.email(size=40) }} + {% if form.email.errors %}
{{ form.email.errors[0] }}
{% endif %} +
+
+ + {{ form.full_name(size=40) }} +
+
+ + {{ form.password(size=40) }} + {% if form.password.errors %}
{{ form.password.errors[0] }}
{% endif %} +
+
+ +
+
Already have an account? Sign in.
+
+
+{% endblock %} diff --git a/sites/phys_org/templates/saved.html b/sites/phys_org/templates/saved.html new file mode 100644 index 0000000..d61146b --- /dev/null +++ b/sites/phys_org/templates/saved.html @@ -0,0 +1,37 @@ +{% extends 'base.html' %} +{% block title %}Saved articles — Phys.org Mirror{% endblock %} +{% block content %} +
+

Your saved articles

+ {{ items|length }} item{{ '' if items|length == 1 else 's' }} +
+{% if items %} + {% for it in items %} + {% set a = it.article %} + + {% endfor %} +{% else %} +

You haven't saved any articles yet. Browse the homepage and click "Save article" on any story.

+{% endif %} +{% endblock %} diff --git a/sites/phys_org/templates/search.html b/sites/phys_org/templates/search.html new file mode 100644 index 0000000..16cba6e --- /dev/null +++ b/sites/phys_org/templates/search.html @@ -0,0 +1,44 @@ +{% extends 'base.html' %} +{% from '_macros.html' import article_card %} +{% block title %}Search — Phys.org Mirror{% endblock %} +{% block content %} +
+

Search

+ {% if query %}{{ total }} result{{ '' if total == 1 else 's' }} for "{{ query }}"{% endif %} +
+ +
+ + + +
+ +{% if query %} + {% if results %} + {% for a in results %}{{ article_card(a) }}{% endfor %} + + {% else %} +

No results matched your search. Try fewer keywords or a different category.

+ {% endif %} +{% else %} +

Type a query above to search across {{ all_categories|length }} categories.

+{% endif %} +{% endblock %} diff --git a/sites/phys_org/templates/trending.html b/sites/phys_org/templates/trending.html new file mode 100644 index 0000000..c12ca8e --- /dev/null +++ b/sites/phys_org/templates/trending.html @@ -0,0 +1,11 @@ +{% extends 'base.html' %} +{% from '_macros.html' import article_card, pager %} +{% block title %}Trending — Phys.org Mirror{% endblock %} +{% block content %} +
+

Trending articles

+ Sorted by total views. +
+{% for a in pagination.items %}{{ article_card(a) }}{% endfor %} +{{ pager(pagination, 'trending') }} +{% endblock %} diff --git a/sites/phys_org/templates/user.html b/sites/phys_org/templates/user.html new file mode 100644 index 0000000..b9cfd9d --- /dev/null +++ b/sites/phys_org/templates/user.html @@ -0,0 +1,29 @@ +{% extends 'base.html' %} +{% block title %}{{ user.username }} — Phys.org Mirror{% endblock %} +{% block content %} +
+

{{ user.full_name or user.username }}

+
@{{ user.username }}{% if user.location %} · {{ user.location }}{% endif %} · joined {{ user.created_at.strftime('%b %Y') if user.created_at else '' }}
+ {% if user.bio %}

{{ user.bio }}

{% endif %} +
+ {{ saved_count }} saved + {{ comment_count }} comments + {% if user.interests %}Interests: {{ user.interests }}{% endif %} +
+
+ +

Recent comments

+{% if recent_comments %} + {% for c in recent_comments %} +
+
+ on {{ c.article.title }} + · {{ c.time_ago }} +
+
{{ c.text }}
+
+ {% endfor %} +{% else %} +

No comments yet.

+{% endif %} +{% endblock %} diff --git a/websyn_start.sh b/websyn_start.sh index 72defad..961cbfb 100644 --- a/websyn_start.sh +++ b/websyn_start.sh @@ -1,11 +1,11 @@ #!/bin/bash -# WebSyn startup: launch all 12 mirror sites, then exec the original CMD. +# WebSyn startup: launch all mirror sites, then exec the original CMD. # This preserves the base image's browser env server (port 8100) as PID 1. set -e SITES=(allrecipes amazon apple arxiv bbc_news booking github google_flights google_map google_search huggingface wolfram_alpha - cambridge_dictionary coursera espn) + cambridge_dictionary coursera espn phys_org) BASE_PORT=40000 PID_DIR=/tmp/websyn_pids mkdir -p "$PID_DIR" @@ -17,7 +17,9 @@ for d in "${SITES[@]}"; do cp -a "/opt/WebSyn/$d/instance_seed" "/opt/WebSyn/$d/instance" done -echo "[WebSyn] Starting 15 sites on ports ${BASE_PORT}-$((BASE_PORT + 14))..." +SITE_COUNT=${#SITES[@]} +END_PORT=$((BASE_PORT + SITE_COUNT - 1)) +echo "[WebSyn] Starting ${SITE_COUNT} sites on ports ${BASE_PORT}-${END_PORT}..." for i in "${!SITES[@]}"; do site="${SITES[$i]}" port=$((BASE_PORT + i)) @@ -51,8 +53,8 @@ except Exception: exit(1) ready=$((ready + 1)) fi done - echo " [${elapsed}/${max_wait}s] ${ready}/15 sites ready" - if [ $ready -eq 15 ]; then + echo " [${elapsed}/${max_wait}s] ${ready}/${SITE_COUNT} sites ready" + if [ $ready -eq $SITE_COUNT ]; then break fi done @@ -78,6 +80,6 @@ done echo "[WebSyn] Starting control server on :8101 (PID 1)..." # Control server becomes PID 1 — receives SIGTERM on `docker stop`, -# keeps the container alive as long as it's running. The 15 site +# keeps the container alive as long as it's running. The site # subprocesses are managed via /tmp/websyn_pids/.pid. exec python3 /opt/control_server.py --port 8101