From 1fc9e34e6c40187cd98b64348db8c6aa8b0ff9f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Czjwu0522=E2=80=9D?= Date: Mon, 17 Nov 2025 15:53:13 +0000 Subject: [PATCH 1/2] Add first batch of 50 easy tasks across services --- README.md | 12 +- docs/contributing/make-contribution.md | 4 +- docs/datasets/task.md | 16 +- docs/installation_and_docker_usage.md | 2 +- pipeline.py | 8 + src/aggregators/aggregate_results.py | 71 +++- src/base/task_manager.py | 8 +- src/evaluator.py | 10 +- .../filesystem/filesystem_task_manager.py | 3 +- .../github/github_state_manager.py | 30 +- .../github/github_task_manager.py | 3 +- .../notion/notion_task_manager.py | 5 +- .../playwright/playwright_task_manager.py | 3 +- .../playwright_task_manager.py | 10 +- .../postgres/postgres_task_manager.py | 4 +- tasks/filesystem/easy/.gitkeep | 0 .../file_splitting/description.md | 11 + .../file_context/file_splitting/meta.json | 23 ++ .../file_context/file_splitting/verify.py | 164 +++++++++ .../pattern_matching/description.md | 15 + .../file_context/pattern_matching/meta.json | 23 ++ .../file_context/pattern_matching/verify.py | 206 +++++++++++ .../file_context/uppercase/description.md | 11 + .../easy/file_context/uppercase/meta.json | 23 ++ .../easy/file_context/uppercase/verify.py | 278 ++++++++++++++ .../largest_rename/description.md | 11 + .../file_property/largest_rename/meta.json | 23 ++ .../file_property/largest_rename/verify.py | 69 ++++ .../file_property/txt_merging/description.md | 12 + .../easy/file_property/txt_merging/meta.json | 23 ++ .../easy/file_property/txt_merging/verify.py | 97 +++++ .../structure_analysis/description.md | 9 + .../structure_analysis/meta.json | 23 ++ .../structure_analysis/verify.py | 88 +++++ .../file_reorganize/description.md | 16 + .../legal_document/file_reorganize/meta.json | 23 ++ .../legal_document/file_reorganize/verify.py | 114 ++++++ .../papers/papers_counting/description.md | 15 + .../easy/papers/papers_counting/meta.json | 23 ++ .../easy/papers/papers_counting/verify.py | 94 +++++ .../duplicate_name/description.md | 5 + .../student_database/duplicate_name/meta.json | 23 ++ .../student_database/duplicate_name/verify.py | 181 ++++++++++ .../recommender_name/description.md | 1 + .../recommender_name/meta.json | 23 ++ .../recommender_name/verify.py | 71 ++++ .../desktop/music_report/description.md | 0 .../desktop/music_report/meta.json | 0 .../desktop/music_report/verify.py | 0 .../desktop/project_management/description.md | 0 .../desktop/project_management/meta.json | 0 .../desktop/project_management/verify.py | 0 .../timeline_extraction/description.md | 0 .../desktop/timeline_extraction/meta.json | 0 .../desktop/timeline_extraction/verify.py | 0 .../budget_computation/description.md | 0 .../budget_computation/meta.json | 0 .../budget_computation/verify.py | 0 .../contact_information/description.md | 0 .../contact_information/meta.json | 0 .../contact_information/verify.py | 0 .../file_arrangement/description.md | 0 .../file_arrangement/meta.json | 0 .../file_arrangement/verify.py | 0 .../duplicates_searching/description.md | 0 .../duplicates_searching/meta.json | 0 .../duplicates_searching/verify.py | 0 .../file_context/file_merging/description.md | 0 .../file_context/file_merging/meta.json | 0 .../file_context/file_merging/verify.py | 0 .../file_splitting/description.md | 0 .../file_context/file_splitting/meta.json | 0 .../file_context/file_splitting/verify.py | 0 .../pattern_matching/description.md | 0 .../file_context/pattern_matching/meta.json | 0 .../file_context/pattern_matching/verify.py | 0 .../file_context/uppercase/description.md | 0 .../file_context/uppercase/meta.json | 0 .../file_context/uppercase/verify.py | 0 .../size_classification/description.md | 0 .../size_classification/meta.json | 0 .../size_classification/verify.py | 0 .../time_classification/description.md | 0 .../time_classification/meta.json | 0 .../time_classification/verify.py | 0 .../structure_analysis/description.md | 0 .../structure_analysis/meta.json | 0 .../structure_analysis/verify.py | 0 .../structure_mirror/description.md | 0 .../structure_mirror/meta.json | 0 .../structure_mirror/verify.py | 0 .../dispute_review/description.md | 0 .../legal_document/dispute_review/meta.json | 0 .../legal_document/dispute_review/verify.py | 0 .../individual_comments/description.md | 0 .../individual_comments/meta.json | 0 .../individual_comments/verify.py | 0 .../solution_tracing/description.md | 0 .../legal_document/solution_tracing/meta.json | 0 .../legal_document/solution_tracing/verify.py | 0 .../papers/author_folders/description.md | 0 .../papers/author_folders/meta.json | 0 .../papers/author_folders/verify.py | 0 .../papers/find_math_paper/description.md | 0 .../papers/find_math_paper/meta.json | 0 .../papers/find_math_paper/verify.py | 0 .../organize_legacy_papers/description.md | 0 .../papers/organize_legacy_papers/meta.json | 0 .../papers/organize_legacy_papers/verify.py | 0 .../duplicate_name/description.md | 0 .../student_database/duplicate_name/meta.json | 0 .../student_database/duplicate_name/verify.py | 0 .../english_talent/description.md | 0 .../student_database/english_talent/meta.json | 0 .../student_database/english_talent/verify.py | 0 .../gradebased_score/description.md | 0 .../gradebased_score/meta.json | 0 .../gradebased_score/verify.py | 0 .../threestudio/code_locating/description.md | 0 .../threestudio/code_locating/meta.json | 0 .../threestudio/code_locating/verify.py | 0 .../output_analysis/description.md | 0 .../threestudio/output_analysis/meta.json | 0 .../threestudio/output_analysis/verify.py | 0 .../requirements_completion/description.md | 0 .../requirements_completion/meta.json | 0 .../requirements_completion/verify.py | 0 .../votenet/dataset_comparison/description.md | 0 .../votenet/dataset_comparison/meta.json | 0 .../votenet/dataset_comparison/verify.py | 0 .../votenet/debugging/description.md | 0 .../votenet/debugging/meta.json | 0 .../votenet/debugging/verify.py | 0 .../requirements_writing/description.md | 0 .../votenet/requirements_writing/meta.json | 0 .../votenet/requirements_writing/verify.py | 0 .../close_commented_issues/description.md | 1 + .../close_commented_issues/meta.json | 22 ++ .../close_commented_issues/verify.py | 76 ++++ .../record_recent_commits/description.md | 16 + .../record_recent_commits/meta.json | 23 ++ .../record_recent_commits/verify.py | 167 +++++++++ .../add_terminal_shortcuts_doc/description.md | 13 + .../add_terminal_shortcuts_doc/meta.json | 23 ++ .../add_terminal_shortcuts_doc/verify.py | 85 +++++ .../thank_docker_pr_author/description.md | 4 + .../thank_docker_pr_author/meta.json | 23 ++ .../thank_docker_pr_author/verify.py | 76 ++++ .../description.md | 5 + .../meta.json | 23 ++ .../verify.py | 89 +++++ .../basic_ci_checks/description.md | 15 + .../mcpmark-cicd/basic_ci_checks/meta.json | 24 ++ .../mcpmark-cicd/basic_ci_checks/verify.py | 123 +++++++ .../issue_lint_guard/description.md | 14 + .../mcpmark-cicd/issue_lint_guard/meta.json | 24 ++ .../mcpmark-cicd/issue_lint_guard/verify.py | 213 +++++++++++ .../nightly_health_check/description.md | 14 + .../nightly_health_check/meta.json | 24 ++ .../nightly_health_check/verify.py | 125 +++++++ .../count_translations/description.md | 12 + .../count_translations/meta.json | 23 ++ .../count_translations/verify.py | 92 +++++ .../find_ga_tracking_id/description.md | 10 + .../find_ga_tracking_id/meta.json | 24 ++ .../find_ga_tracking_id/verify.py | 84 +++++ .../find_commit_date/description.md | 0 .../find_commit_date/meta.json | 0 .../find_commit_date/verify.py | 0 .../find_rag_commit/description.md | 0 .../find_rag_commit/meta.json | 0 .../find_rag_commit/verify.py | 0 .../description.md | 0 .../automated_changelog_generation/meta.json | 0 .../automated_changelog_generation/verify.py | 0 .../description.md | 0 .../claude_collaboration_analysis/meta.json | 0 .../claude_collaboration_analysis/verify.py | 0 .../description.md | 0 .../critical_issue_hotfix_workflow/meta.json | 0 .../critical_issue_hotfix_workflow/verify.py | 0 .../feature_commit_tracking/description.md | 0 .../feature_commit_tracking/meta.json | 0 .../feature_commit_tracking/verify.py | 0 .../description.md | 0 .../label_color_standardization/meta.json | 0 .../label_color_standardization/verify.py | 0 .../advanced_branch_strategy/description.md | 0 .../easyr1/advanced_branch_strategy/meta.json | 0 .../easyr1/advanced_branch_strategy/verify.py | 0 .../config_parameter_audit/description.md | 0 .../easyr1/config_parameter_audit/meta.json | 0 .../easyr1/config_parameter_audit/verify.py | 0 .../description.md | 0 .../meta.json | 0 .../verify.py | 0 .../qwen3_issue_management/description.md | 0 .../easyr1/qwen3_issue_management/meta.json | 0 .../easyr1/qwen3_issue_management/verify.py | 0 .../harmony/fix_conflict/description.md | 0 .../harmony/fix_conflict/meta.json | 0 .../harmony/fix_conflict/verify.py | 0 .../issue_pr_commit_workflow/description.md | 0 .../issue_pr_commit_workflow/meta.json | 0 .../issue_pr_commit_workflow/verify.py | 0 .../issue_tagging_pr_closure/description.md | 0 .../issue_tagging_pr_closure/meta.json | 0 .../issue_tagging_pr_closure/verify.py | 0 .../description.md | 0 .../multi_branch_commit_aggregation/meta.json | 0 .../multi_branch_commit_aggregation/verify.py | 0 .../description.md | 0 .../release_management_workflow/meta.json | 0 .../release_management_workflow/verify.py | 0 .../deployment_status_workflow/description.md | 0 .../deployment_status_workflow/meta.json | 0 .../deployment_status_workflow/verify.py | 0 .../issue_management_workflow/description.md | 0 .../issue_management_workflow/meta.json | 0 .../issue_management_workflow/verify.py | 0 .../linting_ci_workflow/description.md | 0 .../linting_ci_workflow/meta.json | 0 .../linting_ci_workflow/verify.py | 0 .../pr_automation_workflow/description.md | 0 .../pr_automation_workflow/meta.json | 0 .../pr_automation_workflow/verify.py | 0 .../assign_contributor_labels/description.md | 0 .../assign_contributor_labels/meta.json | 0 .../assign_contributor_labels/verify.py | 0 .../find_legacy_name/description.md | 0 .../find_legacy_name/meta.json | 0 .../find_legacy_name/verify.py | 0 .../find_salient_file/description.md | 0 .../find_salient_file/meta.json | 0 .../find_salient_file/verify.py | 0 tasks/notion/easy/.gitkeep | 0 .../simple__code_snippets_go/description.md | 27 ++ .../simple__code_snippets_go/meta.json | 24 ++ .../simple__code_snippets_go/verify.py | 125 +++++++ .../description.md | 4 + .../simple__study_session_tracker/meta.json | 24 ++ .../simple__study_session_tracker/verify.py | 132 +++++++ .../description.md | 11 + .../meta.json | 26 ++ .../verify.py | 143 ++++++++ .../description.md | 1 + .../simple__remove_osaka_itinerary/meta.json | 23 ++ .../simple__remove_osaka_itinerary}/verify.py | 0 .../description.md | 19 + .../meta.json | 27 ++ .../verify.py | 206 +++++++++++ .../description.md | 30 ++ .../simple__expert_level_lessons/meta.json | 26 ++ .../simple__expert_level_lessons/verify.py | 234 ++++++++++++ .../simple__faq_column_layout/description.md | 6 + .../simple__faq_column_layout/meta.json | 24 ++ .../simple__faq_column_layout/verify.py | 161 +++++++++ .../description.md | 10 + .../simple__section_organization/meta.json | 24 ++ .../simple__section_organization/verify.py | 76 ++++ .../simple__swap_tasks/description.md | 1 + .../simple__swap_tasks/meta.json | 24 ++ .../simple__swap_tasks}/verify.py | 0 .../simple__change_color/description.md | 7 + .../simple__change_color/meta.json | 23 ++ .../simple__change_color/verify.py | 100 +++++ .../employee_onboarding/description.md | 0 .../employee_onboarding/meta.json | 0 .../employee_onboarding/verify.py | 0 .../goals_restructure/description.md | 0 .../goals_restructure/meta.json | 0 .../goals_restructure/verify.py | 0 .../quarterly_review_dashboard/description.md | 0 .../quarterly_review_dashboard/meta.json | 0 .../quarterly_review_dashboard/verify.py | 0 .../code_snippets_go/description.md | 0 .../code_snippets_go/meta.json | 0 .../code_snippets_go/verify.py | 0 .../description.md | 0 .../courses_internships_relation/meta.json | 0 .../courses_internships_relation/verify.py | 0 .../study_session_tracker/description.md | 0 .../study_session_tracker/meta.json | 0 .../study_session_tracker/verify.py | 0 .../asset_retirement_migration/description.md | 0 .../asset_retirement_migration/meta.json | 0 .../asset_retirement_migration/verify.py | 0 .../security_audit_ticket/description.md | 0 .../security_audit_ticket/meta.json | 0 .../security_audit_ticket/verify.py | 0 .../description.md | 0 .../verification_expired_update/meta.json | 0 .../verification_expired_update/verify.py | 0 .../daily_itinerary_overview/description.md | 0 .../daily_itinerary_overview/meta.json | 0 .../daily_itinerary_overview/verify.py | 0 .../packing_progress_summary/description.md | 0 .../packing_progress_summary/meta.json | 0 .../packing_progress_summary/verify.py | 0 .../remove_osaka_itinerary/description.md | 0 .../remove_osaka_itinerary/meta.json | 0 .../remove_osaka_itinerary/verify.py | 288 +++++++++++++++ .../restaurant_expenses_sync/description.md | 0 .../restaurant_expenses_sync/meta.json | 0 .../restaurant_expenses_sync/verify.py | 0 .../layout_adjustment/description.md | 0 .../online_resume/layout_adjustment/meta.json | 0 .../online_resume/layout_adjustment/verify.py | 0 .../projects_section_update/description.md | 0 .../projects_section_update/meta.json | 0 .../projects_section_update/verify.py | 0 .../skills_development_tracker/description.md | 0 .../skills_development_tracker/meta.json | 0 .../skills_development_tracker/verify.py | 0 .../work_history_addition/description.md | 0 .../work_history_addition/meta.json | 0 .../work_history_addition/verify.py | 0 .../expert_level_lessons/description.md | 0 .../expert_level_lessons/meta.json | 0 .../expert_level_lessons/verify.py | 0 .../learning_metrics_dashboard/description.md | 0 .../learning_metrics_dashboard/meta.json | 0 .../learning_metrics_dashboard/verify.py | 0 .../faq_column_layout/description.md | 0 .../faq_column_layout/meta.json | 0 .../faq_column_layout/verify.py | 0 .../hyperfocus_analysis_report/description.md | 0 .../hyperfocus_analysis_report/meta.json | 0 .../hyperfocus_analysis_report/verify.py | 0 .../numbered_list_emojis/description.md | 0 .../numbered_list_emojis/meta.json | 0 .../numbered_list_emojis/verify.py | 0 .../deployment_process_sop/description.md | 0 .../deployment_process_sop/meta.json | 0 .../deployment_process_sop/verify.py | 0 .../section_organization/description.md | 0 .../section_organization/meta.json | 0 .../section_organization/verify.py | 0 .../priority_tasks_table/description.md | 0 .../priority_tasks_table/meta.json | 0 .../priority_tasks_table/verify.py | 0 .../team_projects/swap_tasks/description.md | 0 .../team_projects/swap_tasks/meta.json | 0 .../team_projects/swap_tasks/verify.py | 215 +++++++++++ .../toronto_guide/change_color/description.md | 0 .../toronto_guide/change_color/meta.json | 0 .../toronto_guide/change_color/verify.py | 0 .../weekend_adventure_planner/description.md | 0 .../weekend_adventure_planner/meta.json | 0 .../weekend_adventure_planner/verify.py | 0 tasks/playwright/easy/.gitkeep | 0 .../description.md | 0 .../cloudflare_turnstile_challenge/meta.json | 0 .../cloudflare_turnstile_challenge/verify.py | 0 .../eval_web/extraction_table/data.csv | 0 .../eval_web/extraction_table/description.md | 0 .../eval_web/extraction_table/meta.json | 0 .../eval_web/extraction_table/verify.py | 0 .../birth_of_arvinxu/description.md | 0 .../web_search/birth_of_arvinxu/meta.json | 0 .../web_search/birth_of_arvinxu/verify.py | 0 .../web_search/r1_arxiv/content.txt | 0 .../web_search/r1_arxiv/description.md | 0 .../web_search/r1_arxiv/meta.json | 0 .../web_search/r1_arxiv/verify.py | 0 tasks/playwright_webarena/easy/.gitkeep | 0 .../reddit/ai_data_analyst/description.md | 15 + .../easy/reddit/ai_data_analyst/label.txt | 2 + .../easy/reddit/ai_data_analyst/meta.json | 24 ++ .../easy/reddit/ai_data_analyst/verify.py | 175 +++++++++ .../llm_research_summary/description.md | 16 + .../reddit/llm_research_summary/label.txt | 3 + .../reddit/llm_research_summary/meta.json | 25 ++ .../reddit/llm_research_summary/verify.py | 190 ++++++++++ .../movie_reviewer_analysis/description.md | 17 + .../reddit/movie_reviewer_analysis/label.txt | 4 + .../reddit/movie_reviewer_analysis/meta.json | 25 ++ .../reddit/movie_reviewer_analysis/verify.py | 195 ++++++++++ .../nba_statistics_analysis/description.md | 18 + .../reddit/nba_statistics_analysis/label.txt | 5 + .../reddit/nba_statistics_analysis/meta.json | 25 ++ .../reddit/nba_statistics_analysis/verify.py | 196 ++++++++++ .../routine_tracker_forum/description.md | 10 + .../reddit/routine_tracker_forum/meta.json | 23 ++ .../reddit/routine_tracker_forum/verify.py | 113 ++++++ .../fitness_promotion_strategy/description.md | 38 ++ .../fitness_promotion_strategy/label.txt | 7 + .../fitness_promotion_strategy/meta.json | 25 ++ .../fitness_promotion_strategy/verify.py | 0 .../ny_expansion_analysis/description.md | 47 +++ .../ny_expansion_analysis/label.txt | 11 + .../ny_expansion_analysis/meta.json | 24 ++ .../ny_expansion_analysis/verify.py | 304 ++++++++++++++++ .../products_sales_analysis/description.md | 54 +++ .../products_sales_analysis/label.txt | 8 + .../products_sales_analysis/meta.json | 24 ++ .../products_sales_analysis/verify.py | 242 +++++++++++++ .../sales_inventory_analysis/description.md | 60 +++ .../sales_inventory_analysis/label.txt | 9 + .../sales_inventory_analysis/meta.json | 24 ++ .../sales_inventory_analysis/verify.py | 341 ++++++++++++++++++ .../description.md | 73 ++++ .../search_filtering_operations/label.txt | 10 + .../search_filtering_operations/meta.json | 22 ++ .../search_filtering_operations/verify.py | 277 ++++++++++++++ .../reddit/ai_data_analyst/description.md | 0 .../reddit/ai_data_analyst/label.txt | 0 .../reddit/ai_data_analyst/meta.json | 0 .../reddit/ai_data_analyst/verify.py | 0 .../budget_europe_travel/description.md | 0 .../reddit/budget_europe_travel/meta.json | 0 .../reddit/budget_europe_travel/verify.py | 0 .../buyitforlife_research/description.md | 0 .../reddit/buyitforlife_research/label.txt | 0 .../reddit/buyitforlife_research/meta.json | 0 .../reddit/buyitforlife_research/verify.py | 0 .../llm_research_summary/description.md | 0 .../reddit/llm_research_summary/label.txt | 0 .../reddit/llm_research_summary/meta.json | 0 .../reddit/llm_research_summary/verify.py | 0 .../movie_reviewer_analysis/description.md | 0 .../reddit/movie_reviewer_analysis/label.txt | 0 .../reddit/movie_reviewer_analysis/meta.json | 0 .../reddit/movie_reviewer_analysis/verify.py | 0 .../nba_statistics_analysis/description.md | 0 .../reddit/nba_statistics_analysis/label.txt | 0 .../reddit/nba_statistics_analysis/meta.json | 0 .../reddit/nba_statistics_analysis/verify.py | 0 .../routine_tracker_forum/description.md | 0 .../reddit/routine_tracker_forum/meta.json | 0 .../reddit/routine_tracker_forum/verify.py | 0 .../advanced_product_analysis/description.md | 0 .../advanced_product_analysis/label.txt | 0 .../advanced_product_analysis/meta.json | 0 .../advanced_product_analysis/verify.py | 0 .../description.md | 0 .../gaming_accessories_analysis/label.txt | 0 .../gaming_accessories_analysis/meta.json | 0 .../gaming_accessories_analysis/verify.py | 0 .../description.md | 0 .../health_routine_optimization/label.txt | 0 .../health_routine_optimization/meta.json | 0 .../health_routine_optimization/verify.py | 0 .../holiday_baking_competition/description.md | 0 .../holiday_baking_competition/label.txt | 0 .../holiday_baking_competition/meta.json | 0 .../holiday_baking_competition/verify.py | 0 .../description.md | 0 .../multi_category_budget_analysis/label.txt | 0 .../multi_category_budget_analysis/meta.json | 0 .../multi_category_budget_analysis/verify.py | 0 .../printer_keyboard_search/description.md | 0 .../printer_keyboard_search/label.txt | 0 .../printer_keyboard_search/meta.json | 0 .../printer_keyboard_search/verify.py | 0 .../running_shoes_purchase/description.md | 0 .../shopping/running_shoes_purchase/label.txt | 0 .../shopping/running_shoes_purchase/meta.json | 0 .../shopping/running_shoes_purchase/verify.py | 0 .../description.md | 0 .../customer_segmentation_setup/label.txt | 0 .../customer_segmentation_setup/meta.json | 0 .../customer_segmentation_setup/verify.py | 0 .../fitness_promotion_strategy/description.md | 0 .../fitness_promotion_strategy/label.txt | 0 .../fitness_promotion_strategy/meta.json | 0 .../fitness_promotion_strategy/verify.py | 263 ++++++++++++++ .../description.md | 0 .../marketing_customer_analysis/label.txt | 0 .../marketing_customer_analysis/meta.json | 0 .../marketing_customer_analysis/verify.py | 0 .../ny_expansion_analysis/description.md | 0 .../ny_expansion_analysis/label.txt | 0 .../ny_expansion_analysis/meta.json | 0 .../ny_expansion_analysis/verify.py | 0 .../products_sales_analysis/description.md | 0 .../products_sales_analysis/label.txt | 0 .../products_sales_analysis/meta.json | 0 .../products_sales_analysis/verify.py | 0 .../sales_inventory_analysis/description.md | 0 .../sales_inventory_analysis/label.txt | 0 .../sales_inventory_analysis/meta.json | 0 .../sales_inventory_analysis/verify.py | 0 .../description.md | 0 .../search_filtering_operations/label.txt | 0 .../search_filtering_operations/meta.json | 0 .../search_filtering_operations/verify.py | 0 tasks/postgres/easy/.gitkeep | 0 .../customer_data.pkl | Bin 0 -> 585 bytes .../description.md | 21 ++ .../customer_data_migration_basic/meta.json | 23 ++ .../customer_data_migration_basic}/verify.py | 0 .../update_employee_info/description.md | 23 ++ .../chinook/update_employee_info/meta.json | 23 ++ .../chinook/update_employee_info/verify.py | 146 ++++++++ .../create_payment_index/description.md | 21 ++ .../dvdrental/create_payment_index/meta.json | 23 ++ .../dvdrental/create_payment_index}/verify.py | 0 .../department_summary_view/description.md | 30 ++ .../department_summary_view/meta.json | 23 ++ .../department_summary_view/verify.py | 149 ++++++++ .../employee_gender_statistics/description.md | 19 + .../employee_gender_statistics/meta.json | 23 ++ .../employee_gender_statistics/verify.py | 123 +++++++ .../employee_projects_basic/description.md | 20 + .../employee_projects_basic/meta.json | 23 ++ .../employee_projects_basic/verify.py | 116 ++++++ .../hiring_year_summary/description.md | 19 + .../employees/hiring_year_summary/meta.json | 23 ++ .../employees/hiring_year_summary/verify.py | 127 +++++++ .../lego/basic_security_setup/description.md | 34 ++ .../easy/lego/basic_security_setup/meta.json | 23 ++ .../easy/lego/basic_security_setup/verify.py | 129 +++++++ .../fix_data_inconsistencies/description.md | 39 ++ .../lego/fix_data_inconsistencies/meta.json | 23 ++ .../lego/fix_data_inconsistencies/verify.py | 135 +++++++ .../create_performance_indexes/description.md | 19 + .../create_performance_indexes/meta.json | 23 ++ .../create_performance_indexes/verify.py | 133 +++++++ .../customer_data_migration/customer_data.pkl | Bin .../customer_data_migration/description.md | 0 .../chinook/customer_data_migration/meta.json | 0 .../chinook/customer_data_migration/verify.py | 158 ++++++++ .../description.md | 0 .../employee_hierarchy_management/meta.json | 0 .../employee_hierarchy_management/verify.py | 0 .../sales_and_music_charts/description.md | 0 .../chinook/sales_and_music_charts/meta.json | 0 .../chinook/sales_and_music_charts/verify.py | 0 .../customer_analysis_fix/description.md | 0 .../dvdrental/customer_analysis_fix/meta.json | 0 .../dvdrental/customer_analysis_fix/verify.py | 0 .../description.md | 0 .../customer_analytics_optimization/meta.json | 0 .../customer_analytics_optimization/verify.py | 82 +++++ .../film_inventory_management/description.md | 0 .../film_inventory_management/meta.json | 0 .../film_inventory_management/verify.py | 0 .../description.md | 0 .../employee_demographics_report/meta.json | 0 .../employee_demographics_report/verify.py | 0 .../description.md | 0 .../employee_performance_analysis/meta.json | 0 .../employee_performance_analysis/verify.py | 0 .../employee_project_tracking/description.md | 0 .../employee_project_tracking/meta.json | 0 .../employee_project_tracking/verify.py | 0 .../description.md | 0 .../employee_retention_analysis/meta.json | 0 .../employee_retention_analysis/verify.py | 0 .../description.md | 0 .../executive_dashboard_automation/meta.json | 0 .../executive_dashboard_automation/verify.py | 0 .../description.md | 0 .../management_structure_analysis/meta.json | 0 .../management_structure_analysis/verify.py | 0 .../consistency_enforcement/description.md | 0 .../lego/consistency_enforcement/meta.json | 0 .../lego/consistency_enforcement/verify.py | 0 .../database_security_policies/description.md | 0 .../lego/database_security_policies/meta.json | 0 .../lego/database_security_policies/verify.py | 0 .../description.md | 0 .../meta.json | 0 .../verify.py | 0 .../rls_business_access/description.md | 0 .../rls_business_access/ground_truth.sql | 0 .../security/rls_business_access/meta.json | 0 .../prepare_environment.py | 0 .../security/rls_business_access/verify.py | 0 .../user_permission_audit/description.md | 0 .../user_permission_audit/ground_truth.sql | 0 .../security/user_permission_audit/meta.json | 0 .../prepare_environment.py | 0 .../security/user_permission_audit/verify.py | 0 .../baseball_player_analysis/description.md | 0 .../sports/baseball_player_analysis/meta.json | 0 .../sports/baseball_player_analysis/verify.py | 0 .../description.md | 0 .../participant_report_optimization/meta.json | 0 .../participant_report_optimization/verify.py | 0 .../team_roster_management/description.md | 0 .../sports/team_roster_management/meta.json | 0 .../sports/team_roster_management/verify.py | 0 .../dba_vector_analysis/description.md | 0 .../dba_vector_analysis/ground_truth.sql | 0 .../vectors/dba_vector_analysis/meta.json | 0 .../prepare_environment.py | 0 .../vectors/dba_vector_analysis/verify.py | 0 .../{ => standard}/vectors/vectors_setup.py | 0 590 files changed, 10069 insertions(+), 43 deletions(-) create mode 100644 tasks/filesystem/easy/.gitkeep create mode 100644 tasks/filesystem/easy/file_context/file_splitting/description.md create mode 100644 tasks/filesystem/easy/file_context/file_splitting/meta.json create mode 100644 tasks/filesystem/easy/file_context/file_splitting/verify.py create mode 100644 tasks/filesystem/easy/file_context/pattern_matching/description.md create mode 100644 tasks/filesystem/easy/file_context/pattern_matching/meta.json create mode 100644 tasks/filesystem/easy/file_context/pattern_matching/verify.py create mode 100644 tasks/filesystem/easy/file_context/uppercase/description.md create mode 100644 tasks/filesystem/easy/file_context/uppercase/meta.json create mode 100644 tasks/filesystem/easy/file_context/uppercase/verify.py create mode 100644 tasks/filesystem/easy/file_property/largest_rename/description.md create mode 100644 tasks/filesystem/easy/file_property/largest_rename/meta.json create mode 100644 tasks/filesystem/easy/file_property/largest_rename/verify.py create mode 100644 tasks/filesystem/easy/file_property/txt_merging/description.md create mode 100644 tasks/filesystem/easy/file_property/txt_merging/meta.json create mode 100644 tasks/filesystem/easy/file_property/txt_merging/verify.py create mode 100644 tasks/filesystem/easy/folder_structure/structure_analysis/description.md create mode 100644 tasks/filesystem/easy/folder_structure/structure_analysis/meta.json create mode 100644 tasks/filesystem/easy/folder_structure/structure_analysis/verify.py create mode 100644 tasks/filesystem/easy/legal_document/file_reorganize/description.md create mode 100644 tasks/filesystem/easy/legal_document/file_reorganize/meta.json create mode 100644 tasks/filesystem/easy/legal_document/file_reorganize/verify.py create mode 100644 tasks/filesystem/easy/papers/papers_counting/description.md create mode 100644 tasks/filesystem/easy/papers/papers_counting/meta.json create mode 100644 tasks/filesystem/easy/papers/papers_counting/verify.py create mode 100644 tasks/filesystem/easy/student_database/duplicate_name/description.md create mode 100644 tasks/filesystem/easy/student_database/duplicate_name/meta.json create mode 100644 tasks/filesystem/easy/student_database/duplicate_name/verify.py create mode 100644 tasks/filesystem/easy/student_database/recommender_name/description.md create mode 100644 tasks/filesystem/easy/student_database/recommender_name/meta.json create mode 100644 tasks/filesystem/easy/student_database/recommender_name/verify.py rename tasks/filesystem/{ => standard}/desktop/music_report/description.md (100%) rename tasks/filesystem/{ => standard}/desktop/music_report/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop/music_report/verify.py (100%) rename tasks/filesystem/{ => standard}/desktop/project_management/description.md (100%) rename tasks/filesystem/{ => standard}/desktop/project_management/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop/project_management/verify.py (100%) rename tasks/filesystem/{ => standard}/desktop/timeline_extraction/description.md (100%) rename tasks/filesystem/{ => standard}/desktop/timeline_extraction/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop/timeline_extraction/verify.py (100%) rename tasks/filesystem/{ => standard}/desktop_template/budget_computation/description.md (100%) rename tasks/filesystem/{ => standard}/desktop_template/budget_computation/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop_template/budget_computation/verify.py (100%) rename tasks/filesystem/{ => standard}/desktop_template/contact_information/description.md (100%) rename tasks/filesystem/{ => standard}/desktop_template/contact_information/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop_template/contact_information/verify.py (100%) rename tasks/filesystem/{ => standard}/desktop_template/file_arrangement/description.md (100%) rename tasks/filesystem/{ => standard}/desktop_template/file_arrangement/meta.json (100%) rename tasks/filesystem/{ => standard}/desktop_template/file_arrangement/verify.py (100%) rename tasks/filesystem/{ => standard}/file_context/duplicates_searching/description.md (100%) rename tasks/filesystem/{ => standard}/file_context/duplicates_searching/meta.json (100%) rename tasks/filesystem/{ => standard}/file_context/duplicates_searching/verify.py (100%) rename tasks/filesystem/{ => standard}/file_context/file_merging/description.md (100%) rename tasks/filesystem/{ => standard}/file_context/file_merging/meta.json (100%) rename tasks/filesystem/{ => standard}/file_context/file_merging/verify.py (100%) rename tasks/filesystem/{ => standard}/file_context/file_splitting/description.md (100%) rename tasks/filesystem/{ => standard}/file_context/file_splitting/meta.json (100%) rename tasks/filesystem/{ => standard}/file_context/file_splitting/verify.py (100%) rename tasks/filesystem/{ => standard}/file_context/pattern_matching/description.md (100%) rename tasks/filesystem/{ => standard}/file_context/pattern_matching/meta.json (100%) rename tasks/filesystem/{ => standard}/file_context/pattern_matching/verify.py (100%) rename tasks/filesystem/{ => standard}/file_context/uppercase/description.md (100%) rename tasks/filesystem/{ => standard}/file_context/uppercase/meta.json (100%) rename tasks/filesystem/{ => standard}/file_context/uppercase/verify.py (100%) rename tasks/filesystem/{ => standard}/file_property/size_classification/description.md (100%) rename tasks/filesystem/{ => standard}/file_property/size_classification/meta.json (100%) rename tasks/filesystem/{ => standard}/file_property/size_classification/verify.py (100%) rename tasks/filesystem/{ => standard}/file_property/time_classification/description.md (100%) rename tasks/filesystem/{ => standard}/file_property/time_classification/meta.json (100%) rename tasks/filesystem/{ => standard}/file_property/time_classification/verify.py (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_analysis/description.md (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_analysis/meta.json (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_analysis/verify.py (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_mirror/description.md (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_mirror/meta.json (100%) rename tasks/filesystem/{ => standard}/folder_structure/structure_mirror/verify.py (100%) rename tasks/filesystem/{ => standard}/legal_document/dispute_review/description.md (100%) rename tasks/filesystem/{ => standard}/legal_document/dispute_review/meta.json (100%) rename tasks/filesystem/{ => standard}/legal_document/dispute_review/verify.py (100%) rename tasks/filesystem/{ => standard}/legal_document/individual_comments/description.md (100%) rename tasks/filesystem/{ => standard}/legal_document/individual_comments/meta.json (100%) rename tasks/filesystem/{ => standard}/legal_document/individual_comments/verify.py (100%) rename tasks/filesystem/{ => standard}/legal_document/solution_tracing/description.md (100%) rename tasks/filesystem/{ => standard}/legal_document/solution_tracing/meta.json (100%) rename tasks/filesystem/{ => standard}/legal_document/solution_tracing/verify.py (100%) rename tasks/filesystem/{ => standard}/papers/author_folders/description.md (100%) rename tasks/filesystem/{ => standard}/papers/author_folders/meta.json (100%) rename tasks/filesystem/{ => standard}/papers/author_folders/verify.py (100%) rename tasks/filesystem/{ => standard}/papers/find_math_paper/description.md (100%) rename tasks/filesystem/{ => standard}/papers/find_math_paper/meta.json (100%) rename tasks/filesystem/{ => standard}/papers/find_math_paper/verify.py (100%) rename tasks/filesystem/{ => standard}/papers/organize_legacy_papers/description.md (100%) rename tasks/filesystem/{ => standard}/papers/organize_legacy_papers/meta.json (100%) rename tasks/filesystem/{ => standard}/papers/organize_legacy_papers/verify.py (100%) rename tasks/filesystem/{ => standard}/student_database/duplicate_name/description.md (100%) rename tasks/filesystem/{ => standard}/student_database/duplicate_name/meta.json (100%) rename tasks/filesystem/{ => standard}/student_database/duplicate_name/verify.py (100%) rename tasks/filesystem/{ => standard}/student_database/english_talent/description.md (100%) rename tasks/filesystem/{ => standard}/student_database/english_talent/meta.json (100%) rename tasks/filesystem/{ => standard}/student_database/english_talent/verify.py (100%) rename tasks/filesystem/{ => standard}/student_database/gradebased_score/description.md (100%) rename tasks/filesystem/{ => standard}/student_database/gradebased_score/meta.json (100%) rename tasks/filesystem/{ => standard}/student_database/gradebased_score/verify.py (100%) rename tasks/filesystem/{ => standard}/threestudio/code_locating/description.md (100%) rename tasks/filesystem/{ => standard}/threestudio/code_locating/meta.json (100%) rename tasks/filesystem/{ => standard}/threestudio/code_locating/verify.py (100%) rename tasks/filesystem/{ => standard}/threestudio/output_analysis/description.md (100%) rename tasks/filesystem/{ => standard}/threestudio/output_analysis/meta.json (100%) rename tasks/filesystem/{ => standard}/threestudio/output_analysis/verify.py (100%) rename tasks/filesystem/{ => standard}/threestudio/requirements_completion/description.md (100%) rename tasks/filesystem/{ => standard}/threestudio/requirements_completion/meta.json (100%) rename tasks/filesystem/{ => standard}/threestudio/requirements_completion/verify.py (100%) rename tasks/filesystem/{ => standard}/votenet/dataset_comparison/description.md (100%) rename tasks/filesystem/{ => standard}/votenet/dataset_comparison/meta.json (100%) rename tasks/filesystem/{ => standard}/votenet/dataset_comparison/verify.py (100%) rename tasks/filesystem/{ => standard}/votenet/debugging/description.md (100%) rename tasks/filesystem/{ => standard}/votenet/debugging/meta.json (100%) rename tasks/filesystem/{ => standard}/votenet/debugging/verify.py (100%) rename tasks/filesystem/{ => standard}/votenet/requirements_writing/description.md (100%) rename tasks/filesystem/{ => standard}/votenet/requirements_writing/meta.json (100%) rename tasks/filesystem/{ => standard}/votenet/requirements_writing/verify.py (100%) create mode 100644 tasks/github/easy/build-your-own-x/close_commented_issues/description.md create mode 100644 tasks/github/easy/build-your-own-x/close_commented_issues/meta.json create mode 100644 tasks/github/easy/build-your-own-x/close_commented_issues/verify.py create mode 100644 tasks/github/easy/build-your-own-x/record_recent_commits/description.md create mode 100644 tasks/github/easy/build-your-own-x/record_recent_commits/meta.json create mode 100644 tasks/github/easy/build-your-own-x/record_recent_commits/verify.py create mode 100644 tasks/github/easy/claude-code/add_terminal_shortcuts_doc/description.md create mode 100644 tasks/github/easy/claude-code/add_terminal_shortcuts_doc/meta.json create mode 100644 tasks/github/easy/claude-code/add_terminal_shortcuts_doc/verify.py create mode 100644 tasks/github/easy/claude-code/thank_docker_pr_author/description.md create mode 100644 tasks/github/easy/claude-code/thank_docker_pr_author/meta.json create mode 100644 tasks/github/easy/claude-code/thank_docker_pr_author/verify.py create mode 100644 tasks/github/easy/claude-code/triage_missing_tool_result_issue/description.md create mode 100644 tasks/github/easy/claude-code/triage_missing_tool_result_issue/meta.json create mode 100644 tasks/github/easy/claude-code/triage_missing_tool_result_issue/verify.py create mode 100644 tasks/github/easy/mcpmark-cicd/basic_ci_checks/description.md create mode 100644 tasks/github/easy/mcpmark-cicd/basic_ci_checks/meta.json create mode 100644 tasks/github/easy/mcpmark-cicd/basic_ci_checks/verify.py create mode 100644 tasks/github/easy/mcpmark-cicd/issue_lint_guard/description.md create mode 100644 tasks/github/easy/mcpmark-cicd/issue_lint_guard/meta.json create mode 100644 tasks/github/easy/mcpmark-cicd/issue_lint_guard/verify.py create mode 100644 tasks/github/easy/mcpmark-cicd/nightly_health_check/description.md create mode 100644 tasks/github/easy/mcpmark-cicd/nightly_health_check/meta.json create mode 100644 tasks/github/easy/mcpmark-cicd/nightly_health_check/verify.py create mode 100644 tasks/github/easy/missing-semester/count_translations/description.md create mode 100644 tasks/github/easy/missing-semester/count_translations/meta.json create mode 100644 tasks/github/easy/missing-semester/count_translations/verify.py create mode 100644 tasks/github/easy/missing-semester/find_ga_tracking_id/description.md create mode 100644 tasks/github/easy/missing-semester/find_ga_tracking_id/meta.json create mode 100644 tasks/github/easy/missing-semester/find_ga_tracking_id/verify.py rename tasks/github/{ => standard}/build_your_own_x/find_commit_date/description.md (100%) rename tasks/github/{ => standard}/build_your_own_x/find_commit_date/meta.json (100%) rename tasks/github/{ => standard}/build_your_own_x/find_commit_date/verify.py (100%) rename tasks/github/{ => standard}/build_your_own_x/find_rag_commit/description.md (100%) rename tasks/github/{ => standard}/build_your_own_x/find_rag_commit/meta.json (100%) rename tasks/github/{ => standard}/build_your_own_x/find_rag_commit/verify.py (100%) rename tasks/github/{ => standard}/claude-code/automated_changelog_generation/description.md (100%) rename tasks/github/{ => standard}/claude-code/automated_changelog_generation/meta.json (100%) rename tasks/github/{ => standard}/claude-code/automated_changelog_generation/verify.py (100%) rename tasks/github/{ => standard}/claude-code/claude_collaboration_analysis/description.md (100%) rename tasks/github/{ => standard}/claude-code/claude_collaboration_analysis/meta.json (100%) rename tasks/github/{ => standard}/claude-code/claude_collaboration_analysis/verify.py (100%) rename tasks/github/{ => standard}/claude-code/critical_issue_hotfix_workflow/description.md (100%) rename tasks/github/{ => standard}/claude-code/critical_issue_hotfix_workflow/meta.json (100%) rename tasks/github/{ => standard}/claude-code/critical_issue_hotfix_workflow/verify.py (100%) rename tasks/github/{ => standard}/claude-code/feature_commit_tracking/description.md (100%) rename tasks/github/{ => standard}/claude-code/feature_commit_tracking/meta.json (100%) rename tasks/github/{ => standard}/claude-code/feature_commit_tracking/verify.py (100%) rename tasks/github/{ => standard}/claude-code/label_color_standardization/description.md (100%) rename tasks/github/{ => standard}/claude-code/label_color_standardization/meta.json (100%) rename tasks/github/{ => standard}/claude-code/label_color_standardization/verify.py (100%) rename tasks/github/{ => standard}/easyr1/advanced_branch_strategy/description.md (100%) rename tasks/github/{ => standard}/easyr1/advanced_branch_strategy/meta.json (100%) rename tasks/github/{ => standard}/easyr1/advanced_branch_strategy/verify.py (100%) rename tasks/github/{ => standard}/easyr1/config_parameter_audit/description.md (100%) rename tasks/github/{ => standard}/easyr1/config_parameter_audit/meta.json (100%) rename tasks/github/{ => standard}/easyr1/config_parameter_audit/verify.py (100%) rename tasks/github/{ => standard}/easyr1/performance_regression_investigation/description.md (100%) rename tasks/github/{ => standard}/easyr1/performance_regression_investigation/meta.json (100%) rename tasks/github/{ => standard}/easyr1/performance_regression_investigation/verify.py (100%) rename tasks/github/{ => standard}/easyr1/qwen3_issue_management/description.md (100%) rename tasks/github/{ => standard}/easyr1/qwen3_issue_management/meta.json (100%) rename tasks/github/{ => standard}/easyr1/qwen3_issue_management/verify.py (100%) rename tasks/github/{ => standard}/harmony/fix_conflict/description.md (100%) rename tasks/github/{ => standard}/harmony/fix_conflict/meta.json (100%) rename tasks/github/{ => standard}/harmony/fix_conflict/verify.py (100%) rename tasks/github/{ => standard}/harmony/issue_pr_commit_workflow/description.md (100%) rename tasks/github/{ => standard}/harmony/issue_pr_commit_workflow/meta.json (100%) rename tasks/github/{ => standard}/harmony/issue_pr_commit_workflow/verify.py (100%) rename tasks/github/{ => standard}/harmony/issue_tagging_pr_closure/description.md (100%) rename tasks/github/{ => standard}/harmony/issue_tagging_pr_closure/meta.json (100%) rename tasks/github/{ => standard}/harmony/issue_tagging_pr_closure/verify.py (100%) rename tasks/github/{ => standard}/harmony/multi_branch_commit_aggregation/description.md (100%) rename tasks/github/{ => standard}/harmony/multi_branch_commit_aggregation/meta.json (100%) rename tasks/github/{ => standard}/harmony/multi_branch_commit_aggregation/verify.py (100%) rename tasks/github/{ => standard}/harmony/release_management_workflow/description.md (100%) rename tasks/github/{ => standard}/harmony/release_management_workflow/meta.json (100%) rename tasks/github/{ => standard}/harmony/release_management_workflow/verify.py (100%) rename tasks/github/{ => standard}/mcpmark-cicd/deployment_status_workflow/description.md (100%) rename tasks/github/{ => standard}/mcpmark-cicd/deployment_status_workflow/meta.json (100%) rename tasks/github/{ => standard}/mcpmark-cicd/deployment_status_workflow/verify.py (100%) rename tasks/github/{ => standard}/mcpmark-cicd/issue_management_workflow/description.md (100%) rename tasks/github/{ => standard}/mcpmark-cicd/issue_management_workflow/meta.json (100%) rename tasks/github/{ => standard}/mcpmark-cicd/issue_management_workflow/verify.py (100%) rename tasks/github/{ => standard}/mcpmark-cicd/linting_ci_workflow/description.md (100%) rename tasks/github/{ => standard}/mcpmark-cicd/linting_ci_workflow/meta.json (100%) rename tasks/github/{ => standard}/mcpmark-cicd/linting_ci_workflow/verify.py (100%) rename tasks/github/{ => standard}/mcpmark-cicd/pr_automation_workflow/description.md (100%) rename tasks/github/{ => standard}/mcpmark-cicd/pr_automation_workflow/meta.json (100%) rename tasks/github/{ => standard}/mcpmark-cicd/pr_automation_workflow/verify.py (100%) rename tasks/github/{ => standard}/missing-semester/assign_contributor_labels/description.md (100%) rename tasks/github/{ => standard}/missing-semester/assign_contributor_labels/meta.json (100%) rename tasks/github/{ => standard}/missing-semester/assign_contributor_labels/verify.py (100%) rename tasks/github/{ => standard}/missing-semester/find_legacy_name/description.md (100%) rename tasks/github/{ => standard}/missing-semester/find_legacy_name/meta.json (100%) rename tasks/github/{ => standard}/missing-semester/find_legacy_name/verify.py (100%) rename tasks/github/{ => standard}/missing-semester/find_salient_file/description.md (100%) rename tasks/github/{ => standard}/missing-semester/find_salient_file/meta.json (100%) rename tasks/github/{ => standard}/missing-semester/find_salient_file/verify.py (100%) create mode 100644 tasks/notion/easy/.gitkeep create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/description.md create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/meta.json create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/verify.py create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/description.md create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/meta.json create mode 100644 tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/verify.py create mode 100644 tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/description.md create mode 100644 tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/meta.json create mode 100644 tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/verify.py create mode 100644 tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/description.md create mode 100644 tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/meta.json rename tasks/notion/{japan_travel_planner/remove_osaka_itinerary => easy/japan_travel_planner/simple__remove_osaka_itinerary}/verify.py (100%) create mode 100644 tasks/notion/easy/online_resume/simple__skills_development_tracker/description.md create mode 100644 tasks/notion/easy/online_resume/simple__skills_development_tracker/meta.json create mode 100644 tasks/notion/easy/online_resume/simple__skills_development_tracker/verify.py create mode 100644 tasks/notion/easy/python_roadmap/simple__expert_level_lessons/description.md create mode 100644 tasks/notion/easy/python_roadmap/simple__expert_level_lessons/meta.json create mode 100644 tasks/notion/easy/python_roadmap/simple__expert_level_lessons/verify.py create mode 100644 tasks/notion/easy/self_assessment/simple__faq_column_layout/description.md create mode 100644 tasks/notion/easy/self_assessment/simple__faq_column_layout/meta.json create mode 100644 tasks/notion/easy/self_assessment/simple__faq_column_layout/verify.py create mode 100644 tasks/notion/easy/standard_operating_procedure/simple__section_organization/description.md create mode 100644 tasks/notion/easy/standard_operating_procedure/simple__section_organization/meta.json create mode 100644 tasks/notion/easy/standard_operating_procedure/simple__section_organization/verify.py create mode 100644 tasks/notion/easy/team_projects/simple__swap_tasks/description.md create mode 100644 tasks/notion/easy/team_projects/simple__swap_tasks/meta.json rename tasks/notion/{team_projects/swap_tasks => easy/team_projects/simple__swap_tasks}/verify.py (100%) create mode 100644 tasks/notion/easy/toronto_guide/simple__change_color/description.md create mode 100644 tasks/notion/easy/toronto_guide/simple__change_color/meta.json create mode 100644 tasks/notion/easy/toronto_guide/simple__change_color/verify.py rename tasks/notion/{ => standard}/company_in_a_box/employee_onboarding/description.md (100%) rename tasks/notion/{ => standard}/company_in_a_box/employee_onboarding/meta.json (100%) rename tasks/notion/{ => standard}/company_in_a_box/employee_onboarding/verify.py (100%) rename tasks/notion/{ => standard}/company_in_a_box/goals_restructure/description.md (100%) rename tasks/notion/{ => standard}/company_in_a_box/goals_restructure/meta.json (100%) rename tasks/notion/{ => standard}/company_in_a_box/goals_restructure/verify.py (100%) rename tasks/notion/{ => standard}/company_in_a_box/quarterly_review_dashboard/description.md (100%) rename tasks/notion/{ => standard}/company_in_a_box/quarterly_review_dashboard/meta.json (100%) rename tasks/notion/{ => standard}/company_in_a_box/quarterly_review_dashboard/verify.py (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/code_snippets_go/description.md (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/code_snippets_go/meta.json (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/code_snippets_go/verify.py (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/courses_internships_relation/description.md (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/courses_internships_relation/meta.json (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/courses_internships_relation/verify.py (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/study_session_tracker/description.md (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/study_session_tracker/meta.json (100%) rename tasks/notion/{ => standard}/computer_science_student_dashboard/study_session_tracker/verify.py (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/asset_retirement_migration/description.md (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/asset_retirement_migration/meta.json (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/asset_retirement_migration/verify.py (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/security_audit_ticket/description.md (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/security_audit_ticket/meta.json (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/security_audit_ticket/verify.py (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/verification_expired_update/description.md (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/verification_expired_update/meta.json (100%) rename tasks/notion/{ => standard}/it_trouble_shooting_hub/verification_expired_update/verify.py (100%) rename tasks/notion/{ => standard}/japan_travel_planner/daily_itinerary_overview/description.md (100%) rename tasks/notion/{ => standard}/japan_travel_planner/daily_itinerary_overview/meta.json (100%) rename tasks/notion/{ => standard}/japan_travel_planner/daily_itinerary_overview/verify.py (100%) rename tasks/notion/{ => standard}/japan_travel_planner/packing_progress_summary/description.md (100%) rename tasks/notion/{ => standard}/japan_travel_planner/packing_progress_summary/meta.json (100%) rename tasks/notion/{ => standard}/japan_travel_planner/packing_progress_summary/verify.py (100%) rename tasks/notion/{ => standard}/japan_travel_planner/remove_osaka_itinerary/description.md (100%) rename tasks/notion/{ => standard}/japan_travel_planner/remove_osaka_itinerary/meta.json (100%) create mode 100644 tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/verify.py rename tasks/notion/{ => standard}/japan_travel_planner/restaurant_expenses_sync/description.md (100%) rename tasks/notion/{ => standard}/japan_travel_planner/restaurant_expenses_sync/meta.json (100%) rename tasks/notion/{ => standard}/japan_travel_planner/restaurant_expenses_sync/verify.py (100%) rename tasks/notion/{ => standard}/online_resume/layout_adjustment/description.md (100%) rename tasks/notion/{ => standard}/online_resume/layout_adjustment/meta.json (100%) rename tasks/notion/{ => standard}/online_resume/layout_adjustment/verify.py (100%) rename tasks/notion/{ => standard}/online_resume/projects_section_update/description.md (100%) rename tasks/notion/{ => standard}/online_resume/projects_section_update/meta.json (100%) rename tasks/notion/{ => standard}/online_resume/projects_section_update/verify.py (100%) rename tasks/notion/{ => standard}/online_resume/skills_development_tracker/description.md (100%) rename tasks/notion/{ => standard}/online_resume/skills_development_tracker/meta.json (100%) rename tasks/notion/{ => standard}/online_resume/skills_development_tracker/verify.py (100%) rename tasks/notion/{ => standard}/online_resume/work_history_addition/description.md (100%) rename tasks/notion/{ => standard}/online_resume/work_history_addition/meta.json (100%) rename tasks/notion/{ => standard}/online_resume/work_history_addition/verify.py (100%) rename tasks/notion/{ => standard}/python_roadmap/expert_level_lessons/description.md (100%) rename tasks/notion/{ => standard}/python_roadmap/expert_level_lessons/meta.json (100%) rename tasks/notion/{ => standard}/python_roadmap/expert_level_lessons/verify.py (100%) rename tasks/notion/{ => standard}/python_roadmap/learning_metrics_dashboard/description.md (100%) rename tasks/notion/{ => standard}/python_roadmap/learning_metrics_dashboard/meta.json (100%) rename tasks/notion/{ => standard}/python_roadmap/learning_metrics_dashboard/verify.py (100%) rename tasks/notion/{ => standard}/self_assessment/faq_column_layout/description.md (100%) rename tasks/notion/{ => standard}/self_assessment/faq_column_layout/meta.json (100%) rename tasks/notion/{ => standard}/self_assessment/faq_column_layout/verify.py (100%) rename tasks/notion/{ => standard}/self_assessment/hyperfocus_analysis_report/description.md (100%) rename tasks/notion/{ => standard}/self_assessment/hyperfocus_analysis_report/meta.json (100%) rename tasks/notion/{ => standard}/self_assessment/hyperfocus_analysis_report/verify.py (100%) rename tasks/notion/{ => standard}/self_assessment/numbered_list_emojis/description.md (100%) rename tasks/notion/{ => standard}/self_assessment/numbered_list_emojis/meta.json (100%) rename tasks/notion/{ => standard}/self_assessment/numbered_list_emojis/verify.py (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/deployment_process_sop/description.md (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/deployment_process_sop/meta.json (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/deployment_process_sop/verify.py (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/section_organization/description.md (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/section_organization/meta.json (100%) rename tasks/notion/{ => standard}/standard_operating_procedure/section_organization/verify.py (100%) rename tasks/notion/{ => standard}/team_projects/priority_tasks_table/description.md (100%) rename tasks/notion/{ => standard}/team_projects/priority_tasks_table/meta.json (100%) rename tasks/notion/{ => standard}/team_projects/priority_tasks_table/verify.py (100%) rename tasks/notion/{ => standard}/team_projects/swap_tasks/description.md (100%) rename tasks/notion/{ => standard}/team_projects/swap_tasks/meta.json (100%) create mode 100644 tasks/notion/standard/team_projects/swap_tasks/verify.py rename tasks/notion/{ => standard}/toronto_guide/change_color/description.md (100%) rename tasks/notion/{ => standard}/toronto_guide/change_color/meta.json (100%) rename tasks/notion/{ => standard}/toronto_guide/change_color/verify.py (100%) rename tasks/notion/{ => standard}/toronto_guide/weekend_adventure_planner/description.md (100%) rename tasks/notion/{ => standard}/toronto_guide/weekend_adventure_planner/meta.json (100%) rename tasks/notion/{ => standard}/toronto_guide/weekend_adventure_planner/verify.py (100%) create mode 100644 tasks/playwright/easy/.gitkeep rename tasks/playwright/{ => standard}/eval_web/cloudflare_turnstile_challenge/description.md (100%) rename tasks/playwright/{ => standard}/eval_web/cloudflare_turnstile_challenge/meta.json (100%) rename tasks/playwright/{ => standard}/eval_web/cloudflare_turnstile_challenge/verify.py (100%) rename tasks/playwright/{ => standard}/eval_web/extraction_table/data.csv (100%) rename tasks/playwright/{ => standard}/eval_web/extraction_table/description.md (100%) rename tasks/playwright/{ => standard}/eval_web/extraction_table/meta.json (100%) rename tasks/playwright/{ => standard}/eval_web/extraction_table/verify.py (100%) rename tasks/playwright/{ => standard}/web_search/birth_of_arvinxu/description.md (100%) rename tasks/playwright/{ => standard}/web_search/birth_of_arvinxu/meta.json (100%) rename tasks/playwright/{ => standard}/web_search/birth_of_arvinxu/verify.py (100%) rename tasks/playwright/{ => standard}/web_search/r1_arxiv/content.txt (100%) rename tasks/playwright/{ => standard}/web_search/r1_arxiv/description.md (100%) rename tasks/playwright/{ => standard}/web_search/r1_arxiv/meta.json (100%) rename tasks/playwright/{ => standard}/web_search/r1_arxiv/verify.py (100%) create mode 100644 tasks/playwright_webarena/easy/.gitkeep create mode 100644 tasks/playwright_webarena/easy/reddit/ai_data_analyst/description.md create mode 100644 tasks/playwright_webarena/easy/reddit/ai_data_analyst/label.txt create mode 100644 tasks/playwright_webarena/easy/reddit/ai_data_analyst/meta.json create mode 100644 tasks/playwright_webarena/easy/reddit/ai_data_analyst/verify.py create mode 100644 tasks/playwright_webarena/easy/reddit/llm_research_summary/description.md create mode 100644 tasks/playwright_webarena/easy/reddit/llm_research_summary/label.txt create mode 100644 tasks/playwright_webarena/easy/reddit/llm_research_summary/meta.json create mode 100644 tasks/playwright_webarena/easy/reddit/llm_research_summary/verify.py create mode 100644 tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/description.md create mode 100644 tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/label.txt create mode 100644 tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/meta.json create mode 100644 tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/verify.py create mode 100644 tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/description.md create mode 100644 tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/label.txt create mode 100644 tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/meta.json create mode 100644 tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/verify.py create mode 100644 tasks/playwright_webarena/easy/reddit/routine_tracker_forum/description.md create mode 100644 tasks/playwright_webarena/easy/reddit/routine_tracker_forum/meta.json create mode 100644 tasks/playwright_webarena/easy/reddit/routine_tracker_forum/verify.py create mode 100644 tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/description.md create mode 100644 tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/label.txt create mode 100644 tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/meta.json rename tasks/playwright_webarena/{ => easy}/shopping_admin/fitness_promotion_strategy/verify.py (100%) create mode 100644 tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/description.md create mode 100644 tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/label.txt create mode 100644 tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/meta.json create mode 100644 tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/verify.py create mode 100644 tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/description.md create mode 100644 tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/label.txt create mode 100644 tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/meta.json create mode 100644 tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/verify.py create mode 100644 tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/description.md create mode 100644 tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/label.txt create mode 100644 tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/meta.json create mode 100644 tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/verify.py create mode 100644 tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/description.md create mode 100644 tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/label.txt create mode 100644 tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/meta.json create mode 100644 tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/verify.py rename tasks/playwright_webarena/{ => standard}/reddit/ai_data_analyst/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/ai_data_analyst/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/reddit/ai_data_analyst/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/ai_data_analyst/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/budget_europe_travel/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/budget_europe_travel/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/budget_europe_travel/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/buyitforlife_research/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/buyitforlife_research/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/reddit/buyitforlife_research/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/buyitforlife_research/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/llm_research_summary/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/llm_research_summary/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/reddit/llm_research_summary/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/llm_research_summary/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/movie_reviewer_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/movie_reviewer_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/reddit/movie_reviewer_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/movie_reviewer_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/nba_statistics_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/nba_statistics_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/reddit/nba_statistics_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/nba_statistics_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/reddit/routine_tracker_forum/description.md (100%) rename tasks/playwright_webarena/{ => standard}/reddit/routine_tracker_forum/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/reddit/routine_tracker_forum/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/advanced_product_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/advanced_product_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/advanced_product_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/advanced_product_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/gaming_accessories_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/gaming_accessories_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/gaming_accessories_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/gaming_accessories_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/health_routine_optimization/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/health_routine_optimization/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/health_routine_optimization/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/health_routine_optimization/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/holiday_baking_competition/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/holiday_baking_competition/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/holiday_baking_competition/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/holiday_baking_competition/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/multi_category_budget_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/multi_category_budget_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/multi_category_budget_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/multi_category_budget_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/printer_keyboard_search/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/printer_keyboard_search/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/printer_keyboard_search/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/printer_keyboard_search/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping/running_shoes_purchase/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping/running_shoes_purchase/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping/running_shoes_purchase/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping/running_shoes_purchase/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/customer_segmentation_setup/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/customer_segmentation_setup/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/customer_segmentation_setup/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/customer_segmentation_setup/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/fitness_promotion_strategy/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/fitness_promotion_strategy/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/fitness_promotion_strategy/meta.json (100%) create mode 100644 tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/verify.py rename tasks/playwright_webarena/{ => standard}/shopping_admin/marketing_customer_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/marketing_customer_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/marketing_customer_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/marketing_customer_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/ny_expansion_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/ny_expansion_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/ny_expansion_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/ny_expansion_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/products_sales_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/products_sales_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/products_sales_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/products_sales_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/sales_inventory_analysis/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/sales_inventory_analysis/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/sales_inventory_analysis/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/sales_inventory_analysis/verify.py (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/search_filtering_operations/description.md (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/search_filtering_operations/label.txt (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/search_filtering_operations/meta.json (100%) rename tasks/playwright_webarena/{ => standard}/shopping_admin/search_filtering_operations/verify.py (100%) create mode 100644 tasks/postgres/easy/.gitkeep create mode 100644 tasks/postgres/easy/chinook/customer_data_migration_basic/customer_data.pkl create mode 100644 tasks/postgres/easy/chinook/customer_data_migration_basic/description.md create mode 100644 tasks/postgres/easy/chinook/customer_data_migration_basic/meta.json rename tasks/postgres/{chinook/customer_data_migration => easy/chinook/customer_data_migration_basic}/verify.py (100%) create mode 100644 tasks/postgres/easy/chinook/update_employee_info/description.md create mode 100644 tasks/postgres/easy/chinook/update_employee_info/meta.json create mode 100644 tasks/postgres/easy/chinook/update_employee_info/verify.py create mode 100644 tasks/postgres/easy/dvdrental/create_payment_index/description.md create mode 100644 tasks/postgres/easy/dvdrental/create_payment_index/meta.json rename tasks/postgres/{dvdrental/customer_analytics_optimization => easy/dvdrental/create_payment_index}/verify.py (100%) create mode 100644 tasks/postgres/easy/employees/department_summary_view/description.md create mode 100644 tasks/postgres/easy/employees/department_summary_view/meta.json create mode 100644 tasks/postgres/easy/employees/department_summary_view/verify.py create mode 100644 tasks/postgres/easy/employees/employee_gender_statistics/description.md create mode 100644 tasks/postgres/easy/employees/employee_gender_statistics/meta.json create mode 100644 tasks/postgres/easy/employees/employee_gender_statistics/verify.py create mode 100644 tasks/postgres/easy/employees/employee_projects_basic/description.md create mode 100644 tasks/postgres/easy/employees/employee_projects_basic/meta.json create mode 100644 tasks/postgres/easy/employees/employee_projects_basic/verify.py create mode 100644 tasks/postgres/easy/employees/hiring_year_summary/description.md create mode 100644 tasks/postgres/easy/employees/hiring_year_summary/meta.json create mode 100644 tasks/postgres/easy/employees/hiring_year_summary/verify.py create mode 100644 tasks/postgres/easy/lego/basic_security_setup/description.md create mode 100644 tasks/postgres/easy/lego/basic_security_setup/meta.json create mode 100644 tasks/postgres/easy/lego/basic_security_setup/verify.py create mode 100644 tasks/postgres/easy/lego/fix_data_inconsistencies/description.md create mode 100644 tasks/postgres/easy/lego/fix_data_inconsistencies/meta.json create mode 100644 tasks/postgres/easy/lego/fix_data_inconsistencies/verify.py create mode 100644 tasks/postgres/easy/sports/create_performance_indexes/description.md create mode 100644 tasks/postgres/easy/sports/create_performance_indexes/meta.json create mode 100644 tasks/postgres/easy/sports/create_performance_indexes/verify.py rename tasks/postgres/{ => standard}/chinook/customer_data_migration/customer_data.pkl (100%) rename tasks/postgres/{ => standard}/chinook/customer_data_migration/description.md (100%) rename tasks/postgres/{ => standard}/chinook/customer_data_migration/meta.json (100%) create mode 100644 tasks/postgres/standard/chinook/customer_data_migration/verify.py rename tasks/postgres/{ => standard}/chinook/employee_hierarchy_management/description.md (100%) rename tasks/postgres/{ => standard}/chinook/employee_hierarchy_management/meta.json (100%) rename tasks/postgres/{ => standard}/chinook/employee_hierarchy_management/verify.py (100%) rename tasks/postgres/{ => standard}/chinook/sales_and_music_charts/description.md (100%) rename tasks/postgres/{ => standard}/chinook/sales_and_music_charts/meta.json (100%) rename tasks/postgres/{ => standard}/chinook/sales_and_music_charts/verify.py (100%) rename tasks/postgres/{ => standard}/dvdrental/customer_analysis_fix/description.md (100%) rename tasks/postgres/{ => standard}/dvdrental/customer_analysis_fix/meta.json (100%) rename tasks/postgres/{ => standard}/dvdrental/customer_analysis_fix/verify.py (100%) rename tasks/postgres/{ => standard}/dvdrental/customer_analytics_optimization/description.md (100%) rename tasks/postgres/{ => standard}/dvdrental/customer_analytics_optimization/meta.json (100%) create mode 100644 tasks/postgres/standard/dvdrental/customer_analytics_optimization/verify.py rename tasks/postgres/{ => standard}/dvdrental/film_inventory_management/description.md (100%) rename tasks/postgres/{ => standard}/dvdrental/film_inventory_management/meta.json (100%) rename tasks/postgres/{ => standard}/dvdrental/film_inventory_management/verify.py (100%) rename tasks/postgres/{ => standard}/employees/employee_demographics_report/description.md (100%) rename tasks/postgres/{ => standard}/employees/employee_demographics_report/meta.json (100%) rename tasks/postgres/{ => standard}/employees/employee_demographics_report/verify.py (100%) rename tasks/postgres/{ => standard}/employees/employee_performance_analysis/description.md (100%) rename tasks/postgres/{ => standard}/employees/employee_performance_analysis/meta.json (100%) rename tasks/postgres/{ => standard}/employees/employee_performance_analysis/verify.py (100%) rename tasks/postgres/{ => standard}/employees/employee_project_tracking/description.md (100%) rename tasks/postgres/{ => standard}/employees/employee_project_tracking/meta.json (100%) rename tasks/postgres/{ => standard}/employees/employee_project_tracking/verify.py (100%) rename tasks/postgres/{ => standard}/employees/employee_retention_analysis/description.md (100%) rename tasks/postgres/{ => standard}/employees/employee_retention_analysis/meta.json (100%) rename tasks/postgres/{ => standard}/employees/employee_retention_analysis/verify.py (100%) rename tasks/postgres/{ => standard}/employees/executive_dashboard_automation/description.md (100%) rename tasks/postgres/{ => standard}/employees/executive_dashboard_automation/meta.json (100%) rename tasks/postgres/{ => standard}/employees/executive_dashboard_automation/verify.py (100%) rename tasks/postgres/{ => standard}/employees/management_structure_analysis/description.md (100%) rename tasks/postgres/{ => standard}/employees/management_structure_analysis/meta.json (100%) rename tasks/postgres/{ => standard}/employees/management_structure_analysis/verify.py (100%) rename tasks/postgres/{ => standard}/lego/consistency_enforcement/description.md (100%) rename tasks/postgres/{ => standard}/lego/consistency_enforcement/meta.json (100%) rename tasks/postgres/{ => standard}/lego/consistency_enforcement/verify.py (100%) rename tasks/postgres/{ => standard}/lego/database_security_policies/description.md (100%) rename tasks/postgres/{ => standard}/lego/database_security_policies/meta.json (100%) rename tasks/postgres/{ => standard}/lego/database_security_policies/verify.py (100%) rename tasks/postgres/{ => standard}/lego/transactional_inventory_transfer/description.md (100%) rename tasks/postgres/{ => standard}/lego/transactional_inventory_transfer/meta.json (100%) rename tasks/postgres/{ => standard}/lego/transactional_inventory_transfer/verify.py (100%) rename tasks/postgres/{ => standard}/security/rls_business_access/description.md (100%) rename tasks/postgres/{ => standard}/security/rls_business_access/ground_truth.sql (100%) rename tasks/postgres/{ => standard}/security/rls_business_access/meta.json (100%) rename tasks/postgres/{ => standard}/security/rls_business_access/prepare_environment.py (100%) rename tasks/postgres/{ => standard}/security/rls_business_access/verify.py (100%) rename tasks/postgres/{ => standard}/security/user_permission_audit/description.md (100%) rename tasks/postgres/{ => standard}/security/user_permission_audit/ground_truth.sql (100%) rename tasks/postgres/{ => standard}/security/user_permission_audit/meta.json (100%) rename tasks/postgres/{ => standard}/security/user_permission_audit/prepare_environment.py (100%) rename tasks/postgres/{ => standard}/security/user_permission_audit/verify.py (100%) rename tasks/postgres/{ => standard}/sports/baseball_player_analysis/description.md (100%) rename tasks/postgres/{ => standard}/sports/baseball_player_analysis/meta.json (100%) rename tasks/postgres/{ => standard}/sports/baseball_player_analysis/verify.py (100%) rename tasks/postgres/{ => standard}/sports/participant_report_optimization/description.md (100%) rename tasks/postgres/{ => standard}/sports/participant_report_optimization/meta.json (100%) rename tasks/postgres/{ => standard}/sports/participant_report_optimization/verify.py (100%) rename tasks/postgres/{ => standard}/sports/team_roster_management/description.md (100%) rename tasks/postgres/{ => standard}/sports/team_roster_management/meta.json (100%) rename tasks/postgres/{ => standard}/sports/team_roster_management/verify.py (100%) rename tasks/postgres/{ => standard}/vectors/dba_vector_analysis/description.md (100%) rename tasks/postgres/{ => standard}/vectors/dba_vector_analysis/ground_truth.sql (100%) rename tasks/postgres/{ => standard}/vectors/dba_vector_analysis/meta.json (100%) rename tasks/postgres/{ => standard}/vectors/dba_vector_analysis/prepare_environment.py (100%) rename tasks/postgres/{ => standard}/vectors/dba_vector_analysis/verify.py (100%) rename tasks/postgres/{ => standard}/vectors/vectors_setup.py (100%) diff --git a/README.md b/README.md index a7c14f0a..68ba69aa 100644 --- a/README.md +++ b/README.md @@ -85,14 +85,22 @@ python -m pipeline \ --k 1 \ # run once to quick start --models gpt-5 \ # or any model you configured --tasks file_property/size_classification +# Add --task-suite easy to run the lightweight dataset (where available) ``` -Results are saved to `./results/{exp_name}/{model}__{mcp}/run-*/...` (e.g., `./results/test-run/gpt-5__filesystem/run-1/...`). +Results are saved to `./results/{exp_name}/{model}__{mcp}/run-*/...` for the standard suite and `./results/{exp_name}/{model}__{mcp}-easy/run-*/...` when you run `--task-suite easy` (e.g., `./results/test-run/gpt-5__filesystem/run-1/...` or `./results/test-run/gpt-5__github-easy/run-1/...`). --- ## Run your evaluations +### Task suites (standard vs easy) + +- Each MCP service now stores tasks under `tasks/////`. +- `standard` (default) covers the full benchmark (127 tasks today). +- `easy` hosts 10 lightweight tasks per MCP, ideal for smoke tests and CI (GitHub’s are already available under `tasks/github/easy`). +- Switch suites with `--task-suite easy` (defaults to `--task-suite standard`). + ### Single run (k=1) ```bash # Run ALL tasks for a service @@ -173,7 +181,7 @@ python -m src.aggregators.aggregate_results --exp-name exp --k 4 --single-run-mo ## Contributing Contributions are welcome: -1. Add a new task under `tasks///` with `meta.json`, `description.md` and `verify.py`. +1. Add a new task under `tasks/////` with `meta.json`, `description.md` and `verify.py`. 2. Ensure local checks pass and open a PR. 3. See `docs/contributing/make-contribution.md`. diff --git a/docs/contributing/make-contribution.md b/docs/contributing/make-contribution.md index 136971f5..2f731457 100644 --- a/docs/contributing/make-contribution.md +++ b/docs/contributing/make-contribution.md @@ -2,8 +2,8 @@ 1. Fork the repository and create a feature branch. -2. Add new tasks under `tasks///` with the files of `meta.json`, `description.md` and `verify.py`. Please refer to [Task Page](../datasets/task.md) for detailed instructions. +2. Add new tasks under `tasks/////` with the files of `meta.json`, `description.md` and `verify.py`. Please refer to [Task Page](../datasets/task.md) for detailed instructions. 3. Ensure all tests pass. -4. Submit a pull request — contributions are welcome! \ No newline at end of file +4. Submit a pull request — contributions are welcome! diff --git a/docs/datasets/task.md b/docs/datasets/task.md index 93a11fe6..795af391 100644 --- a/docs/datasets/task.md +++ b/docs/datasets/task.md @@ -18,15 +18,17 @@ tasks │ └───filesystem │ - └───file_context + └───standard # task_suite (also supports `easy`) │ - └───create_file_write - │ meta.json - │ description.md - │ verify.py + └───file_context # category_id + │ + └───create_file_write + │ meta.json + │ description.md + │ verify.py ``` -Note that all tasks are placed under `tasks/`. `filesystem` refers to the environment for the MCP service. +All tasks live under `tasks/////`. `filesystem` refers to the MCP service and `task_suite` captures the difficulty slice (`standard` benchmark vs `easy` smoke tests). `meta.json` includes the meta information about the task, including the following key - task_id: the id of the task. @@ -68,4 +70,4 @@ Accordingly, the `verify.py` contains the following functionalities - Check whether the target directory contains the file with target file name. [![Check Target File Existence](https://i.postimg.cc/Qx0Zwnf6/task-sample-verify-file-existence.png)](https://postimg.cc/7fGRTX87) - Check whether the target file contains the desired content `EXPECTED_PATTERNS = ["Hello Wolrd"]`. [![Check Content in Target File](https://i.postimg.cc/JzzMhWyV/task-sample-verify-check-content.png)](https://postimg.cc/w7ZSWZc0) -- If the outcome passes **all the above verification functionalities**, the task would be marked as successfully completed. \ No newline at end of file +- If the outcome passes **all the above verification functionalities**, the task would be marked as successfully completed. diff --git a/docs/installation_and_docker_usage.md b/docs/installation_and_docker_usage.md index 18930248..7b2eb6af 100644 --- a/docs/installation_and_docker_usage.md +++ b/docs/installation_and_docker_usage.md @@ -44,7 +44,7 @@ The `run-task.sh` script provides simplified Docker usage: ./run-task.sh --mcp MCPSERVICE --models MODEL_NAME --exp-name EXPNAME --tasks TASK --k K ``` -where *MODEL_NAME* refers to the model choice from the supported models (see [Introduction Page](./introduction.md) for more information), *EXPNAME* refers to customized experiment name, *TASK* refers to specific task or task group (see `tasks/` for more information), *K* refers to the time of independent experiments. +where *MODEL_NAME* refers to the model choice from the supported models (see [Introduction Page](./introduction.md) for more information), *EXPNAME* refers to customized experiment name, *TASK* refers to specific task or task group (see `tasks///...` for more information), *K* refers to the time of independent experiments. Additionally, the `run-benchmark.sh` script evaluates models across all MCP services: diff --git a/pipeline.py b/pipeline.py index 8fe78fc7..ff292427 100644 --- a/pipeline.py +++ b/pipeline.py @@ -54,6 +54,12 @@ def main(): default="all", help='Tasks to run: (1). "all"; (2). "category"; or (3). "category/task".', ) + parser.add_argument( + "--task-suite", + default="standard", + choices=["standard", "easy"], + help="Task suite to run (default: standard). Use 'easy' to run the lightweight dataset.", + ) parser.add_argument( "--exp-name", default=None, @@ -111,6 +117,7 @@ def main(): logger.info("MCPMark Evaluation") logger.info(f"Experiment: {args.exp_name} | {len(model_list)} Model(s): {', '.join(model_list)}") + logger.info(f"Task suite: {args.task_suite}") if args.k > 1: logger.info(f"Running {args.k} evaluation runs for pass@k metrics") @@ -147,6 +154,7 @@ def main(): output_dir=run_output_dir, reasoning_effort=args.reasoning_effort, agent_name=args.agent, + task_suite=args.task_suite, ) pipeline.run_evaluation(args.tasks) diff --git a/src/aggregators/aggregate_results.py b/src/aggregators/aggregate_results.py index 44a8afd5..88c99346 100755 --- a/src/aggregators/aggregate_results.py +++ b/src/aggregators/aggregate_results.py @@ -20,8 +20,12 @@ from src.aggregators.pricing import compute_cost_usd -def discover_tasks() -> Dict[str, List[str]]: - """Discover all tasks from ./tasks directory.""" +# Supported difficulty splits in ./tasks/// +SUPPORTED_TASK_SETS = {"standard", "easy"} + + +def discover_tasks(task_set: str = "standard") -> Dict[str, List[str]]: + """Discover all tasks from ./tasks directory filtered by task set.""" tasks_dir = Path("./tasks") all_tasks = {} @@ -37,22 +41,39 @@ def discover_tasks() -> Dict[str, List[str]]: } for mcp_service, task_dirs in service_mappings.items(): - tasks = [] + tasks: List[str] = [] for task_dir_name in task_dirs: service_path = tasks_dir / task_dir_name if not service_path.exists(): continue - - # Find all category/task combinations - for category_dir in service_path.iterdir(): - if not category_dir.is_dir() or category_dir.name.startswith("__"): - continue - - for task_dir in category_dir.iterdir(): - if task_dir.is_dir(): - # Use unified naming for both playwright and webarena variants - tasks.append(f"{category_dir.name}__{task_dir.name}") - + + selected_root = service_path / task_set + + # Detect if this service has partitioned task sets (e.g. standard/easy) + has_partitioned_layout = any( + child.is_dir() and child.name in SUPPORTED_TASK_SETS + for child in service_path.iterdir() + ) + + if selected_root.exists(): + search_roots = [selected_root] + elif has_partitioned_layout: + # Requested task set missing for this service; skip it for this run + print(f" āš ļø No '{task_set}' tasks found under {service_path}") + search_roots = [] + else: + # Legacy layout without task sets – fall back to original structure + search_roots = [service_path] + + for root in search_roots: + for category_dir in root.iterdir(): + if not category_dir.is_dir() or category_dir.name.startswith("__"): + continue + + for task_dir in category_dir.iterdir(): + if task_dir.is_dir() and not task_dir.name.startswith("__"): + tasks.append(f"{category_dir.name}__{task_dir.name}") + all_tasks[mcp_service] = sorted(tasks) return all_tasks @@ -653,14 +674,19 @@ def render_section(title: str, section_data: Dict[str, Any]) -> List[str]: f"# {exp_name} - Evaluation Results", "", f"Generated: {summary['generated_at']}", - "", ] + task_set = summary.get("task_set") + if task_set: + lines.append(f"Task set: {task_set}") + + lines.append("") + # Overall table lines.extend(render_section("Overall Performance", summary.get("overall", {}))) # Service tables: infer service keys from summary - reserved = {"overall", "generated_at", "k", "experiment_name"} + reserved = {"overall", "generated_at", "k", "experiment_name", "task_set"} service_keys = [key for key in summary.keys() if key not in reserved] # Keep stable order for service in sorted(service_keys): @@ -873,6 +899,12 @@ def main(): type=str, help="Comma-separated list of models that only need run-1" ) + parser.add_argument( + "--task-set", + choices=sorted(SUPPORTED_TASK_SETS), + default="standard", + help="Which task subset to aggregate (default: standard)" + ) parser.add_argument("--push", action="store_true", help="Push to GitHub (default to main)") args = parser.parse_args() @@ -892,8 +924,8 @@ def main(): print(f"šŸ”„ Processing experiment: {args.exp_name}") # Discover all tasks - print("šŸ“‹ Discovering tasks...") - all_tasks = discover_tasks() + print(f"šŸ“‹ Discovering tasks (task set: {args.task_set})...") + all_tasks = discover_tasks(args.task_set) total_tasks = sum(len(tasks) for tasks in all_tasks.values()) print(f" Found {total_tasks} tasks across {len(all_tasks)} services") @@ -918,6 +950,7 @@ def main(): print("\nšŸ“Š Calculating metrics...") summary = calculate_metrics(complete_models, all_tasks, args.k, single_run_models) summary["experiment_name"] = args.exp_name + summary["task_set"] = args.task_set # Save summary summary_path = exp_dir / "summary.json" @@ -952,4 +985,4 @@ def main(): if __name__ == "__main__": - exit(main()) \ No newline at end of file + exit(main()) diff --git a/src/base/task_manager.py b/src/base/task_manager.py index 65923008..6b6d3f49 100644 --- a/src/base/task_manager.py +++ b/src/base/task_manager.py @@ -55,6 +55,7 @@ def __init__( mcp_service: str = None, task_class: type = None, task_organization: str = None, + task_suite: str | None = "standard", ): """Initialize the base task manager. @@ -63,6 +64,7 @@ def __init__( mcp_service: MCP service name (e.g., 'notion', 'github', 'filesystem') task_class: Custom task class to use (defaults to BaseTask) task_organization: 'file' or 'directory' based task organization + task_suite: Logical task suite (e.g., 'standard', 'easy') """ self.tasks_root = tasks_root self.mcp_service = mcp_service or self.__class__.__name__.lower().replace( @@ -70,6 +72,7 @@ def __init__( ) self.task_class = task_class or BaseTask self.task_organization = task_organization + self.task_suite = task_suite self._tasks_cache = None # ========================================================================= @@ -85,6 +88,8 @@ def discover_all_tasks(self) -> List[BaseTask]: service_dir = self.tasks_root / ( self.mcp_service or self._get_service_directory_name() ) + if self.task_suite: + service_dir = service_dir / self.task_suite if not service_dir.exists(): logger.warning( @@ -112,9 +117,10 @@ def discover_all_tasks(self) -> List[BaseTask]: # Sort by category_id and a stringified task_id to handle both numeric IDs and slugs uniformly self._tasks_cache = sorted(tasks, key=lambda t: (t.category_id, str(t.task_id))) logger.info( - "Discovered %d %s tasks across all categories", + "Discovered %d %s tasks across all categories (suite=%s)", len(self._tasks_cache), self.mcp_service.title(), + self.task_suite or "default", ) return self._tasks_cache diff --git a/src/evaluator.py b/src/evaluator.py index 31f1b711..01518408 100644 --- a/src/evaluator.py +++ b/src/evaluator.py @@ -27,11 +27,13 @@ def __init__( output_dir: Path = None, reasoning_effort: str = "default", agent_name: str = "mcpmark", + task_suite: str = "standard", ): # Main configuration self.mcp_service = mcp_service self.timeout = timeout self.agent_name = (agent_name or "mcpmark").lower() + self.task_suite = (task_suite or "standard").lower() if self.agent_name not in AGENT_REGISTRY: raise ValueError(f"Unsupported agent '{agent_name}'. Available: {sorted(AGENT_REGISTRY)}") @@ -48,7 +50,9 @@ def __init__( self.litellm_run_model_name = None # Initialize managers using the factory pattern (simplified) - self.task_manager = MCPServiceFactory.create_task_manager(mcp_service) + self.task_manager = MCPServiceFactory.create_task_manager( + mcp_service, task_suite=self.task_suite + ) self.state_manager = MCPServiceFactory.create_state_manager(mcp_service) # Obtain static service configuration from state manager (e.g., notion_key) @@ -80,7 +84,9 @@ def __init__( model_slug = self.model_name.replace(".", "-") service_for_dir = "playwright" if mcp_service == "playwright_webarena" else mcp_service - self.base_experiment_dir = output_dir / f"{model_slug}__{service_for_dir}" / exp_name + suite_suffix = "" if self.task_suite in ("standard", "", None) else f"-{self.task_suite}" + service_dir_name = f"{service_for_dir}{suite_suffix}" + self.base_experiment_dir = output_dir / f"{model_slug}__{service_dir_name}" / exp_name self.base_experiment_dir.mkdir(parents=True, exist_ok=True) def _format_duration(self, seconds: float) -> str: diff --git a/src/mcp_services/filesystem/filesystem_task_manager.py b/src/mcp_services/filesystem/filesystem_task_manager.py index 3fc1a1df..ef8d5d50 100644 --- a/src/mcp_services/filesystem/filesystem_task_manager.py +++ b/src/mcp_services/filesystem/filesystem_task_manager.py @@ -30,7 +30,7 @@ class FilesystemTask(BaseTask): class FilesystemTaskManager(BaseTaskManager): """Simplified filesystem task manager using enhanced base class.""" - def __init__(self, tasks_root: Path = None): + def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): """Initialize filesystem task manager.""" if tasks_root is None: tasks_root = Path(__file__).resolve().parents[3] / "tasks" @@ -40,6 +40,7 @@ def __init__(self, tasks_root: Path = None): mcp_service="filesystem", task_class=FilesystemTask, task_organization="directory", + task_suite=task_suite, ) # Override only what's needed for filesystem-specific behavior diff --git a/src/mcp_services/github/github_state_manager.py b/src/mcp_services/github/github_state_manager.py index c33e0583..516cbaaf 100644 --- a/src/mcp_services/github/github_state_manager.py +++ b/src/mcp_services/github/github_state_manager.py @@ -626,7 +626,35 @@ def _request_with_retry( # Initial state for each task category is resolved via self.initial_state_mapping def select_initial_state_for_task(self, task_category: str) -> Optional[str]: - return self.initial_state_mapping.get(task_category) + """Resolve template name for a task category with light normalization.""" + if not task_category: + return None + + candidate_keys = [] + candidate_keys.append(task_category) + + # Allow users to swap between hyphen/underscore naming conventions. + hyphen_to_underscore = task_category.replace("-", "_") + if hyphen_to_underscore not in candidate_keys: + candidate_keys.append(hyphen_to_underscore) + + underscore_to_hyphen = task_category.replace("_", "-") + if underscore_to_hyphen not in candidate_keys: + candidate_keys.append(underscore_to_hyphen) + + for key in candidate_keys: + template = self.initial_state_mapping.get(key) + if template: + if key != task_category: + logger.debug( + "| Resolved GitHub template for %s via alias %s -> %s", + task_category, + key, + template, + ) + return template + + return None def extract_repo_info_from_url(self, repo_url: str) -> tuple[str, str]: """Extract owner and repo name from GitHub URL.""" diff --git a/src/mcp_services/github/github_task_manager.py b/src/mcp_services/github/github_task_manager.py index bb8511a7..932fbaca 100644 --- a/src/mcp_services/github/github_task_manager.py +++ b/src/mcp_services/github/github_task_manager.py @@ -42,7 +42,7 @@ class GitHubTask(BaseTask): class GitHubTaskManager(BaseTaskManager): """Manages task discovery, filtering, and verification for GitHub-based MCPMark evaluation.""" - def __init__(self, tasks_root: Path = None): + def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): """Initialize GitHub task manager. Args: @@ -57,6 +57,7 @@ def __init__(self, tasks_root: Path = None): mcp_service="github", task_class=GitHubTask, task_organization="file", + task_suite=task_suite, ) # GitHub uses file-based tasks # ========================================================================= diff --git a/src/mcp_services/notion/notion_task_manager.py b/src/mcp_services/notion/notion_task_manager.py index c2cfe5d6..9a206f8f 100644 --- a/src/mcp_services/notion/notion_task_manager.py +++ b/src/mcp_services/notion/notion_task_manager.py @@ -67,17 +67,18 @@ def get_description(self) -> str: class NotionTaskManager(BaseTaskManager): """Manages task discovery, filtering, and verification for Notion-based MCPMark evaluation.""" - def __init__(self, tasks_root: Path = None): + def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): """Initialize with the tasks directory path. Args: tasks_root: Path to the tasks directory + task_suite: Logical task suite (e.g., 'standard', 'easy') """ if tasks_root is None: tasks_root = Path(__file__).resolve().parents[3] / "tasks" # Call parent constructor - super().__init__(tasks_root, mcp_service="notion") + super().__init__(tasks_root, mcp_service="notion", task_suite=task_suite) # ========================================================================= # Service-specific implementations for template methods diff --git a/src/mcp_services/playwright/playwright_task_manager.py b/src/mcp_services/playwright/playwright_task_manager.py index a39018f3..ee638490 100644 --- a/src/mcp_services/playwright/playwright_task_manager.py +++ b/src/mcp_services/playwright/playwright_task_manager.py @@ -26,7 +26,7 @@ class PlaywrightTask(BaseTask): class PlaywrightTaskManager(BaseTaskManager): """Simple task manager for Playwright MCP tasks.""" - def __init__(self, tasks_root: Path = None): + def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): """Initialize with tasks directory.""" if tasks_root is None: tasks_root = Path(__file__).resolve().parents[3] / "tasks" @@ -36,6 +36,7 @@ def __init__(self, tasks_root: Path = None): mcp_service="playwright", task_class=PlaywrightTask, task_organization="directory", + task_suite=task_suite, ) def _create_task_from_files( diff --git a/src/mcp_services/playwright_webarena/playwright_task_manager.py b/src/mcp_services/playwright_webarena/playwright_task_manager.py index 2a44b118..406c010d 100644 --- a/src/mcp_services/playwright_webarena/playwright_task_manager.py +++ b/src/mcp_services/playwright_webarena/playwright_task_manager.py @@ -19,7 +19,12 @@ class PlaywrightTaskManager(BaseTaskManager): """Task manager for Playwright tasks against a WebArena environment.""" - def __init__(self, tasks_root: Path | None = None, base_url: str | None = None): + def __init__( + self, + tasks_root: Path | None = None, + base_url: str | None = None, + task_suite: str = "standard", + ): if tasks_root is None: tasks_root = Path(__file__).resolve().parents[3] / "tasks" super().__init__( @@ -27,6 +32,7 @@ def __init__(self, tasks_root: Path | None = None, base_url: str | None = None): mcp_service="playwright_webarena", task_class=BaseTask, task_organization="directory", + task_suite=task_suite, ) def _create_task_from_files( @@ -89,4 +95,4 @@ def _format_task_instruction(self, base_instruction: str) -> str: note = "Use Playwright MCP tools to complete this task." return (base_instruction + "\n\n" - + note + "\n\nNote: Based on your understanding, solve the task all at once by yourself, don't ask for my opinions on anything.") \ No newline at end of file + + note + "\n\nNote: Based on your understanding, solve the task all at once by yourself, don't ask for my opinions on anything.") diff --git a/src/mcp_services/postgres/postgres_task_manager.py b/src/mcp_services/postgres/postgres_task_manager.py index 292c032f..07efaa06 100644 --- a/src/mcp_services/postgres/postgres_task_manager.py +++ b/src/mcp_services/postgres/postgres_task_manager.py @@ -32,11 +32,12 @@ class PostgresTask(BaseTask): class PostgresTaskManager(BaseTaskManager): """Manages PostgreSQL tasks for MCPMark evaluation.""" - def __init__(self, tasks_root: Path = None): + def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): """Initialize PostgreSQL task manager. Args: tasks_root: Path to tasks directory + task_suite: Logical task suite (e.g., 'standard', 'easy') """ if tasks_root is None: tasks_root = Path(__file__).resolve().parents[3] / "tasks" @@ -46,6 +47,7 @@ def __init__(self, tasks_root: Path = None): mcp_service="postgres", task_class=PostgresTask, task_organization="file", # PostgreSQL uses file-based tasks + task_suite=task_suite, ) def _create_task_from_files( diff --git a/tasks/filesystem/easy/.gitkeep b/tasks/filesystem/easy/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tasks/filesystem/easy/file_context/file_splitting/description.md b/tasks/filesystem/easy/file_context/file_splitting/description.md new file mode 100644 index 00000000..03e40fa8 --- /dev/null +++ b/tasks/filesystem/easy/file_context/file_splitting/description.md @@ -0,0 +1,11 @@ +# File Splitting Task + +## šŸ“‹ Task Description + +You need to split a large text file into multiple smaller files with equal character counts. The task involves creating a new directory and splitting the content into exactly 3 files. + +## šŸŽÆ Task Objectives + +1. **Create a new directory** named `split` in the test directory +2. **Split the file** `large_file.txt` into exactly 3 files with **similar** character counts (maximum character difference of 100 between any two files) +3. **Name the files** as `split_01.txt`, `split_02.txt`, `split_03.txt` in the `split` directory diff --git a/tasks/filesystem/easy/file_context/file_splitting/meta.json b/tasks/filesystem/easy/file_context/file_splitting/meta.json new file mode 100644 index 00000000..d17a5e63 --- /dev/null +++ b/tasks/filesystem/easy/file_context/file_splitting/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "file_splitting", + "task_name": "File Splitting", + "category_id": "file_context", + "category_name": "File Context", + "description": "Split large_file.txt into three nearly equal chunks stored as split_01.txt-split_03.txt inside a new split directory.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content transformation", + "file automation" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "file_context/\n \u251c\u2500\u2500 file_01.txt\n \u251c\u2500\u2500 file_02.txt\n \u251c\u2500\u2500 file_03.txt\n \u251c\u2500\u2500 file_04.txt\n \u251c\u2500\u2500 file_05.txt\n \u251c\u2500\u2500 file_06.txt\n \u251c\u2500\u2500 file_07.txt\n \u251c\u2500\u2500 file_08.txt\n \u251c\u2500\u2500 file_09.txt\n \u251c\u2500\u2500 file_10.txt\n \u251c\u2500\u2500 file_11.txt\n \u251c\u2500\u2500 file_12.txt\n \u251c\u2500\u2500 file_13.txt\n \u251c\u2500\u2500 file_14.txt\n \u251c\u2500\u2500 file_15.txt\n \u251c\u2500\u2500 file_16.txt\n \u251c\u2500\u2500 file_17.txt\n \u251c\u2500\u2500 file_18.txt\n \u251c\u2500\u2500 file_19.txt\n \u251c\u2500\u2500 file_20.txt\n \u2514\u2500\u2500 large_file.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/file_context.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/file_context/file_splitting/verify.py b/tasks/filesystem/easy/file_context/file_splitting/verify.py new file mode 100644 index 00000000..82e27ea7 --- /dev/null +++ b/tasks/filesystem/easy/file_context/file_splitting/verify.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +""" +Verification script for File Splitting Task +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_split_directory_exists(test_dir: Path) -> bool: + """Verify that the split directory exists.""" + split_dir = test_dir / "split" + + if not split_dir.exists(): + print("āŒ Directory 'split' not found") + return False + + if not split_dir.is_dir(): + print("āŒ 'split' exists but is not a directory") + return False + + print("āœ… Split directory found") + return True + +def verify_all_split_files_exist(test_dir: Path) -> bool: + """Verify that all 3 split files exist with correct names.""" + split_dir = test_dir / "split" + + expected_files = [f"split_{i:02d}.txt" for i in range(1, 4)] + missing_files = [] + + for filename in expected_files: + file_path = split_dir / filename + if not file_path.exists(): + missing_files.append(filename) + + if missing_files: + print(f"āŒ Missing files: {missing_files}") + return False + + print("āœ… All 3 split files exist with correct names") + return True + +def verify_similar_file_lengths(test_dir: Path) -> bool: + """Verify that all split files have similar character counts (within 30 characters difference).""" + split_dir = test_dir / "split" + + file_lengths = [] + for i in range(1, 4): + filename = f"split_{i:02d}.txt" + file_path = split_dir / filename + + try: + content = file_path.read_text() + file_lengths.append(len(content)) + except Exception as e: + print(f"āŒ Error reading {filename}: {e}") + return False + + # Check if all lengths are within 30 characters of each other + min_length = min(file_lengths) + max_length = max(file_lengths) + length_difference = max_length - min_length + + if length_difference > 100: + print(f"āŒ File lengths differ by more than 30 characters: {length_difference}") + print(f" Min length: {min_length}, Max length: {max_length}") + print(f" All lengths: {file_lengths}") + return False + + print(f"āœ… All files have similar lengths (difference: {length_difference} characters)") + print(f" Min: {min_length}, Max: {max_length}") + return True + +def verify_content_integrity(test_dir: Path) -> bool: + """Verify that concatenated split files equal the original file.""" + split_dir = test_dir / "split" + original_file = test_dir / "large_file.txt" + + # Read original content + try: + original_content = original_file.read_text() + except Exception as e: + print(f"āŒ Error reading original file: {e}") + return False + + # Concatenate all split files + concatenated_content = "" + for i in range(1, 4): + filename = f"split_{i:02d}.txt" + file_path = split_dir / filename + + try: + content = file_path.read_text() + concatenated_content += content + except Exception as e: + print(f"āŒ Error reading {filename}: {e}") + return False + + # Compare content + if concatenated_content != original_content: + print("āŒ Concatenated content does not match original file") + print(f" Original length: {len(original_content)}") + print(f" Concatenated length: {len(concatenated_content)}") + return False + + print("āœ… Concatenated content matches original file exactly") + return True + +def verify_no_extra_files(test_dir: Path) -> bool: + """Verify that no extra files exist in the split directory.""" + split_dir = test_dir / "split" + + expected_files = {f"split_{i:02d}.txt" for i in range(1, 4)} + actual_files = {f.name for f in split_dir.iterdir() if f.is_file()} + + extra_files = actual_files - expected_files + if extra_files: + print(f"āŒ Extra files found in split directory: {extra_files}") + return False + + print("āœ… No extra files in split directory") + return True + +def main(): + """Main verification function.""" + test_dir = get_test_directory() + print("šŸ” Verifying File Splitting Task...") + + # Define verification steps + verification_steps = [ + ("Split Directory Exists", verify_split_directory_exists), + ("All Split Files Exist", verify_all_split_files_exist), + ("Similar File Lengths", verify_similar_file_lengths), + ("Content Integrity", verify_content_integrity), + ("No Extra Files", verify_no_extra_files), + ] + + # Run all verification steps + all_passed = True + for step_name, verify_func in verification_steps: + print(f"\n--- {step_name} ---") + if not verify_func(test_dir): + all_passed = False + + # Final result + print("\n" + "="*50) + if all_passed: + print("āœ… File splitting task completed correctly!") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("āŒ Task verification: FAIL") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/filesystem/easy/file_context/pattern_matching/description.md b/tasks/filesystem/easy/file_context/pattern_matching/description.md new file mode 100644 index 00000000..f551a703 --- /dev/null +++ b/tasks/filesystem/easy/file_context/pattern_matching/description.md @@ -0,0 +1,15 @@ +# File Filtering Task: Find Files with Common Substring + +## šŸ“‹ Task Description + +Your task is to find all files that contain a substring of 30 or more characters that also appears in `large_file.txt`. **You are not allowed to use python code.** + +## šŸŽÆ Task Objectives + +1. **Read the reference file** `large_file.txt` to understand its content +2. **Examine each file** from file_01.txt to file_20.txt +3. **Find files** that contain a substring of 30 or more characters that matches a substring in `large_file.txt` +4. **Create a file `answer.txt`** and write the results to it with the following format: + - One line per matching file + - Format: `filename.txt` + - Do not add any things else other than `filename.txt.` diff --git a/tasks/filesystem/easy/file_context/pattern_matching/meta.json b/tasks/filesystem/easy/file_context/pattern_matching/meta.json new file mode 100644 index 00000000..7f37363a --- /dev/null +++ b/tasks/filesystem/easy/file_context/pattern_matching/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "pattern_matching", + "task_name": "Pattern Matching", + "category_id": "file_context", + "category_name": "File Context", + "description": "Scan file_01.txt through file_20.txt for any 30+ character substring that also appears in large_file.txt and list each matching filename in answer.txt.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "pattern analysis", + "search and filtering" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "file_context/\n \u251c\u2500\u2500 file_01.txt\n \u251c\u2500\u2500 file_02.txt\n \u251c\u2500\u2500 file_03.txt\n \u251c\u2500\u2500 file_04.txt\n \u251c\u2500\u2500 file_05.txt\n \u251c\u2500\u2500 file_06.txt\n \u251c\u2500\u2500 file_07.txt\n \u251c\u2500\u2500 file_08.txt\n \u251c\u2500\u2500 file_09.txt\n \u251c\u2500\u2500 file_10.txt\n \u251c\u2500\u2500 file_11.txt\n \u251c\u2500\u2500 file_12.txt\n \u251c\u2500\u2500 file_13.txt\n \u251c\u2500\u2500 file_14.txt\n \u251c\u2500\u2500 file_15.txt\n \u251c\u2500\u2500 file_16.txt\n \u251c\u2500\u2500 file_17.txt\n \u251c\u2500\u2500 file_18.txt\n \u251c\u2500\u2500 file_19.txt\n \u251c\u2500\u2500 file_20.txt\n \u2514\u2500\u2500 large_file.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/file_context.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/file_context/pattern_matching/verify.py b/tasks/filesystem/easy/file_context/pattern_matching/verify.py new file mode 100644 index 00000000..5b19e9c2 --- /dev/null +++ b/tasks/filesystem/easy/file_context/pattern_matching/verify.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +""" +Verification script for File Filtering Task: Find Files with Common Substring +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_answer_file_exists(test_dir: Path) -> bool: + """Verify that the answer.txt file exists.""" + answer_file = test_dir / "answer.txt" + + if not answer_file.exists(): + print("āŒ File 'answer.txt' not found") + return False + + print("āœ… Answer file found") + return True + +def verify_answer_format(test_dir: Path) -> bool: + """Verify that the answer file has the correct format.""" + answer_file = test_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + + # If file is empty, that's acceptable (no matches found) + if not content: + print("āœ… Answer file is empty (no matches found)") + return True + + lines = content.split('\n') + + for i, line in enumerate(lines, 1): + line = line.strip() + if not line: + continue + + # Check format: just filename.txt + if not line.endswith('.txt') or not line.startswith('file_'): + print(f"āŒ Line {i} has incorrect format: {line}") + print(" Expected format: filename.txt") + return False + + print("āœ… Answer format is correct") + return True + + except Exception as e: + print(f"āŒ Error reading answer file: {e}") + return False + +def find_30_plus_char_matches(test_dir: Path) -> set: + """Find all files that have 30+ character substring matches with large_file.txt.""" + large_file = test_dir / "large_file.txt" + if not large_file.exists(): + print("āŒ large_file.txt not found") + return set() + + large_content = large_file.read_text() + matching_files = set() + + # Check each file from file_01.txt to file_20.txt + for i in range(1, 21): + filename = f"file_{i:02d}.txt" + file_path = test_dir / filename + + if not file_path.exists(): + continue + + file_content = file_path.read_text() + + # Check if there's a substring of 30+ characters that matches + has_match = False + for start_pos in range(len(file_content)): + for end_pos in range(start_pos + 30, len(file_content) + 1): + substring = file_content[start_pos:end_pos] + if substring in large_content: + has_match = True + break + if has_match: + break + + if has_match: + matching_files.add(filename) + + return matching_files + +def verify_matches_are_correct(test_dir: Path) -> bool: + """Verify that the files listed in answer.txt actually have 30+ character matches.""" + answer_file = test_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + + # If no content, check if there should actually be no matches + if not content: + expected_matches = find_30_plus_char_matches(test_dir) + if expected_matches: + print("āŒ Answer file is empty but matches should exist") + for filename in expected_matches: + print(f" Expected: {filename}") + return False + else: + print("āœ… No matches found (correct)") + return True + + # Parse answer file + answer_files = set() + lines = content.split('\n') + for line in lines: + line = line.strip() + if not line: + continue + answer_files.add(line) + + # Get expected matches + expected_matches = find_30_plus_char_matches(test_dir) + + # Check if all answer files actually have matches + for filename in answer_files: + if filename not in expected_matches: + print(f"āŒ File {filename} listed in answer but has no valid 30+ character match") + return False + + # Check if all expected matches are in answer + for filename in expected_matches: + if filename not in answer_files: + print(f"āŒ Missing match for {filename} in answer file") + return False + + print("āœ… All matches are correct") + return True + + except Exception as e: + print(f"āŒ Error verifying matches: {e}") + return False + +def verify_files_exist(test_dir: Path) -> bool: + """Verify that all files mentioned in answer.txt actually exist.""" + answer_file = test_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + + if not content: + return True # No files to verify + + lines = content.split('\n') + for line in lines: + line = line.strip() + if not line: + continue + + file_path = test_dir / line + + if not file_path.exists(): + print(f"āŒ File mentioned in answer does not exist: {line}") + return False + + print("āœ… All files mentioned in answer exist") + return True + + except Exception as e: + print(f"āŒ Error verifying file existence: {e}") + return False + +def main(): + """Main verification function.""" + test_dir = get_test_directory() + print("šŸ” Verifying File Filtering Task: Find Files with Common Substring...") + + # Define verification steps + verification_steps = [ + ("Answer File Exists", verify_answer_file_exists), + ("Answer Format", verify_answer_format), + ("Files Exist", verify_files_exist), + ("Matches are Correct", verify_matches_are_correct), + ] + + # Run all verification steps + all_passed = True + for step_name, verify_func in verification_steps: + print(f"\n--- {step_name} ---") + if not verify_func(test_dir): + all_passed = False + + # Final result + print("\n" + "="*50) + if all_passed: + print("āœ… File filtering task completed correctly!") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("āŒ Task verification: FAIL") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/filesystem/easy/file_context/uppercase/description.md b/tasks/filesystem/easy/file_context/uppercase/description.md new file mode 100644 index 00000000..4ba3dbef --- /dev/null +++ b/tasks/filesystem/easy/file_context/uppercase/description.md @@ -0,0 +1,11 @@ +# File Context Task: Convert Files to Uppercase + +## šŸ“‹ Task Description + +You need to process 5 text files (file_01.txt to file_05.txt) and convert their content to uppercase format. + +## šŸŽÆ Task Objectives + +1. **Create an uppercase directory** in the test environment root +2. **Convert each file** from file_01.txt to file_05.txt to uppercase +3. **Save converted files** in the uppercase/ directory with the same names diff --git a/tasks/filesystem/easy/file_context/uppercase/meta.json b/tasks/filesystem/easy/file_context/uppercase/meta.json new file mode 100644 index 00000000..d706dd5e --- /dev/null +++ b/tasks/filesystem/easy/file_context/uppercase/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "uppercase", + "task_name": "Uppercase", + "category_id": "file_context", + "category_name": "File Context", + "description": "Copy file_01.txt-file_05.txt into an uppercase/ folder and convert the contents of every file to uppercase text.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content transformation", + "batch processing" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "file_context/\n \u251c\u2500\u2500 file_01.txt\n \u251c\u2500\u2500 file_02.txt\n \u251c\u2500\u2500 file_03.txt\n \u251c\u2500\u2500 file_04.txt\n \u251c\u2500\u2500 file_05.txt\n \u251c\u2500\u2500 file_06.txt\n \u251c\u2500\u2500 file_07.txt\n \u251c\u2500\u2500 file_08.txt\n \u251c\u2500\u2500 file_09.txt\n \u251c\u2500\u2500 file_10.txt\n \u251c\u2500\u2500 file_11.txt\n \u251c\u2500\u2500 file_12.txt\n \u251c\u2500\u2500 file_13.txt\n \u251c\u2500\u2500 file_14.txt\n \u251c\u2500\u2500 file_15.txt\n \u251c\u2500\u2500 file_16.txt\n \u251c\u2500\u2500 file_17.txt\n \u251c\u2500\u2500 file_18.txt\n \u251c\u2500\u2500 file_19.txt\n \u251c\u2500\u2500 file_20.txt\n \u2514\u2500\u2500 large_file.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/file_context.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/file_context/uppercase/verify.py b/tasks/filesystem/easy/file_context/uppercase/verify.py new file mode 100644 index 00000000..41fc1e17 --- /dev/null +++ b/tasks/filesystem/easy/file_context/uppercase/verify.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +Verification script for File Context Task: Convert Files to Uppercase +""" + +import sys +from pathlib import Path +import os +import re + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_uppercase_directory_exists(test_dir: Path) -> bool: + """Verify that the uppercase directory exists.""" + uppercase_dir = test_dir / "uppercase" + + if not uppercase_dir.exists(): + print("āŒ Directory 'uppercase' not found") + return False + + if not uppercase_dir.is_dir(): + print("āŒ 'uppercase' exists but is not a directory") + return False + + print("āœ… Uppercase directory found") + return True + +def verify_uppercase_files_exist(test_dir: Path) -> bool: + """Verify that all 5 uppercase files exist.""" + uppercase_dir = test_dir / "uppercase" + + for i in range(1, 6): + filename = f"file_{i:02d}.txt" + file_path = uppercase_dir / filename + + if not file_path.exists(): + print(f"āŒ File '{filename}' not found in uppercase directory") + return False + + print("āœ… All 5 uppercase files found") + return True + +def verify_uppercase_content(test_dir: Path) -> bool: + """Verify that uppercase files contain the correct uppercase content.""" + uppercase_dir = test_dir / "uppercase" + + for i in range(1, 6): + filename = f"file_{i:02d}.txt" + original_file = test_dir / filename + uppercase_file = uppercase_dir / filename + + if not original_file.exists(): + print(f"āŒ Original file '{filename}' not found") + return False + + try: + original_content = original_file.read_text() + uppercase_content = uppercase_file.read_text() + + # Check if uppercase content is the uppercase version of original + expected_uppercase = original_content.upper() + + if uppercase_content != expected_uppercase: + print(f"āŒ File '{filename}' content is not properly converted to uppercase") + return False + + except Exception as e: + print(f"āŒ Error reading file '{filename}': {e}") + return False + + print("āœ… All uppercase files contain correct uppercase content") + return True + +def verify_answer_file_exists(test_dir: Path) -> bool: + """Verify that the answer.txt file exists in the uppercase directory.""" + uppercase_dir = test_dir / "uppercase" + answer_file = uppercase_dir / "answer.txt" + + if not answer_file.exists(): + print("āŒ File 'answer.txt' not found in uppercase directory") + return False + + print("āœ… Answer file found in uppercase directory") + return True + +def verify_answer_format(test_dir: Path) -> bool: + """Verify that the answer file has the correct format.""" + uppercase_dir = test_dir / "uppercase" + answer_file = uppercase_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + + if not content: + print("āŒ Answer file is empty") + return False + + lines = content.split('\n') + + # Check if we have exactly 10 lines + if len(lines) != 10: + print(f"āŒ Answer file has {len(lines)} lines, expected 10") + return False + + for i, line in enumerate(lines, 1): + line = line.strip() + if not line: + print(f"āŒ Line {i} is empty") + return False + + # Check format: filename:word_count + if ':' not in line: + print(f"āŒ Line {i} has incorrect format: {line}") + print(" Expected format: filename:word_count") + return False + + parts = line.split(':', 1) + if len(parts) != 2: + print(f"āŒ Line {i} has incorrect format: {line}") + print(" Expected format: filename:word_count") + return False + + filename, word_count_str = parts + + # Check filename format + if not filename.endswith('.txt') or not filename.startswith('file_'): + print(f"āŒ Line {i} has invalid filename: {filename}") + return False + + # Check word count format (should be integer) + try: + word_count = int(word_count_str) + if word_count <= 0: + print(f"āŒ Line {i} has invalid word count: {word_count_str}") + return False + except ValueError: + print(f"āŒ Line {i} has non-integer word count: {word_count_str}") + return False + + print("āœ… Answer format is correct") + return True + + except Exception as e: + print(f"āŒ Error reading answer file: {e}") + return False + +def count_words_in_file(file_path: Path) -> int: + """Count words in a file.""" + try: + content = file_path.read_text() + # Split by whitespace and filter out empty strings + words = [word for word in content.split() if word.strip()] + return len(words) + except Exception as e: + print(f"āŒ Error reading file {file_path}: {e}") + return 0 + +def verify_word_counts_are_correct(test_dir: Path) -> bool: + """Verify that the word counts in answer.txt are correct.""" + uppercase_dir = test_dir / "uppercase" + answer_file = uppercase_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + lines = content.split('\n') + + # Expected word counts based on answer.md + expected_counts = [22, 22, 22, 22, 18, 22, 22, 22, 18, 20] + + # Create a set of expected file entries for easier checking + expected_entries = set() + for i in range(1, 11): + filename = f"file_{i:02d}.txt" + expected_count = expected_counts[i - 1] + if i == 6: # Special case for file_06.txt: can be 21 or 22 + expected_entries.add(f"{filename}:21") + expected_entries.add(f"{filename}:22") + else: + expected_entries.add(f"{filename}:{expected_count}") + + # Check each line in the answer file + found_entries = set() + for line in lines: + line = line.strip() + if line in expected_entries: + found_entries.add(line) + else: + print(f"āŒ Invalid entry: {line}") + return False + + # Check if we found all expected entries + if len(found_entries) != 10: + print(f"āŒ Found {len(found_entries)} entries, expected 10") + missing = expected_entries - found_entries + if missing: + print(f" Missing entries: {missing}") + return False + + print("āœ… All word counts are correct") + return True + + except Exception as e: + print(f"āŒ Error verifying word counts: {e}") + return False + +def verify_all_files_are_included(test_dir: Path) -> bool: + """Verify that all 10 files are included in the answer.""" + uppercase_dir = test_dir / "uppercase" + answer_file = uppercase_dir / "answer.txt" + + try: + content = answer_file.read_text().strip() + lines = content.split('\n') + + # Check that all 10 files are present + found_files = set() + for line in lines: + parts = line.split(':', 1) + filename = parts[0] + found_files.add(filename) + + expected_files = {f"file_{i:02d}.txt" for i in range(1, 11)} + + if found_files != expected_files: + missing = expected_files - found_files + extra = found_files - expected_files + if missing: + print(f"āŒ Missing files in answer: {missing}") + if extra: + print(f"āŒ Extra files in answer: {extra}") + return False + + print("āœ… All 10 files are included in answer") + return True + + except Exception as e: + print(f"āŒ Error verifying file inclusion: {e}") + return False + +def main(): + """Main verification function.""" + try: + test_dir = get_test_directory() + print(f"šŸ” Verifying Uppercase in: {test_dir}") + print() + + # Run all verification checks + checks = [ + ("Uppercase directory exists", verify_uppercase_directory_exists), + ("Uppercase files exist", verify_uppercase_files_exist), + ("Uppercase content is correct", verify_uppercase_content), + ] + + all_passed = True + for check_name, check_func in checks: + print(f"šŸ“‹ {check_name}...") + if not check_func(test_dir): + all_passed = False + print() + + if all_passed: + print("šŸŽ‰ All verification checks passed!") + sys.exit(0) + else: + print("āŒ Some verification checks failed!") + sys.exit(1) + + except Exception as e: + print(f"āŒ Verification failed with error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/filesystem/easy/file_property/largest_rename/description.md b/tasks/filesystem/easy/file_property/largest_rename/description.md new file mode 100644 index 00000000..08275c06 --- /dev/null +++ b/tasks/filesystem/easy/file_property/largest_rename/description.md @@ -0,0 +1,11 @@ +# Largest File Rename Task + +## šŸ“‹ Task Description + +Rename the largest `.jpg` file in the test directory to `largest.jpg` based on file size. + +## šŸŽÆ Task Objectives + +1. **Find all `.jpg` files** in the test directory +2. **Determine which `.jpg` file is the largest** by file size +3. **Rename the largest `.jpg` file to `largest.jpg`** diff --git a/tasks/filesystem/easy/file_property/largest_rename/meta.json b/tasks/filesystem/easy/file_property/largest_rename/meta.json new file mode 100644 index 00000000..799c4e00 --- /dev/null +++ b/tasks/filesystem/easy/file_property/largest_rename/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "largest_rename", + "task_name": "Largest File Rename", + "category_id": "file_property", + "category_name": "File Property", + "description": "Identify the largest .jpg in the workspace and rename it to largest.jpg while leaving the other files untouched.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "file organization", + "attribute inspection" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "file_property/\n \u251c\u2500\u2500 bear.jpg\n \u251c\u2500\u2500 bridge.jpg\n \u251c\u2500\u2500 bus.MOV\n \u251c\u2500\u2500 random_file_1.txt\n \u251c\u2500\u2500 random_file_2.txt\n \u251c\u2500\u2500 random_file_3.txt\n \u251c\u2500\u2500 road.MOV\n \u2514\u2500\u2500 sg.jpg", + "stateUrl": "https://storage.mcpmark.ai/filesystem/file_property.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/file_property/largest_rename/verify.py b/tasks/filesystem/easy/file_property/largest_rename/verify.py new file mode 100644 index 00000000..a16f212e --- /dev/null +++ b/tasks/filesystem/easy/file_property/largest_rename/verify.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +""" +Verification script for Largest File Rename Task +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_sg_jpg_not_exists(test_dir: Path) -> bool: + """Verify that sg.jpg does not exist.""" + sg_file = test_dir / "sg.jpg" + + if sg_file.exists(): + print("āŒ sg.jpg still exists (should be renamed)") + return False + + print("āœ… sg.jpg does not exist") + return True + +def verify_largest_jpg_exists(test_dir: Path) -> bool: + """Verify that largest.jpg exists.""" + largest_file = test_dir / "largest.jpg" + + if not largest_file.exists(): + print("āŒ largest.jpg does not exist") + return False + + print("āœ… largest.jpg exists") + return True + +def main(): + """Main verification function.""" + try: + test_dir = get_test_directory() + print(f"šŸ” Verifying largest file rename in: {test_dir}") + + # Run all verification checks + checks = [ + ("sg.jpg does not exist", verify_sg_jpg_not_exists), + ("largest.jpg exists", verify_largest_jpg_exists) + ] + + all_passed = True + for check_name, check_func in checks: + print(f"\nšŸ“‹ Checking: {check_name}") + if not check_func(test_dir): + all_passed = False + + if all_passed: + print("\nšŸŽ‰ All verification checks passed!") + sys.exit(0) + else: + print("\nāŒ Some verification checks failed!") + sys.exit(1) + + except Exception as e: + print(f"āŒ Verification failed with error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/filesystem/easy/file_property/txt_merging/description.md b/tasks/filesystem/easy/file_property/txt_merging/description.md new file mode 100644 index 00000000..af6afb58 --- /dev/null +++ b/tasks/filesystem/easy/file_property/txt_merging/description.md @@ -0,0 +1,12 @@ +# Text File Merging Task + +## šŸ“‹ Task Description + +Merge all `.txt` files in the test directory into a single file called `merge.txt`. The merged file should contain the content from all `.txt` files. + +## šŸŽÆ Task Objectives + +1. **Read all `.txt` files** in the test directory +2. **Create a new file** called `merge.txt` in the test directory +3. **Write the content** from all `.txt` files into `merge.txt` +4. **The order** of content doesn't matter - as long as all content from all `.txt` files is present in `merge.txt` diff --git a/tasks/filesystem/easy/file_property/txt_merging/meta.json b/tasks/filesystem/easy/file_property/txt_merging/meta.json new file mode 100644 index 00000000..a67f5544 --- /dev/null +++ b/tasks/filesystem/easy/file_property/txt_merging/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "txt_merging", + "task_name": "Text File Merging", + "category_id": "file_property", + "category_name": "File Property", + "description": "Combine the contents of every .txt file into a single merge.txt file so the archive has one consolidated view.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content consolidation", + "file automation" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "file_property/\n \u251c\u2500\u2500 bear.jpg\n \u251c\u2500\u2500 bridge.jpg\n \u251c\u2500\u2500 bus.MOV\n \u251c\u2500\u2500 random_file_1.txt\n \u251c\u2500\u2500 random_file_2.txt\n \u251c\u2500\u2500 random_file_3.txt\n \u251c\u2500\u2500 road.MOV\n \u2514\u2500\u2500 sg.jpg", + "stateUrl": "https://storage.mcpmark.ai/filesystem/file_property.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/file_property/txt_merging/verify.py b/tasks/filesystem/easy/file_property/txt_merging/verify.py new file mode 100644 index 00000000..c922d01f --- /dev/null +++ b/tasks/filesystem/easy/file_property/txt_merging/verify.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +Verification script for Text File Merging Task +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def get_expected_contents(): + """Return the expected content from each .txt file.""" + return [ + "O rErmZ4tDgzMNoxn1oNfQhT1TRpy9w0tQPGTcrsaoMFrrgt9bY5mgBxO6q8c8lZywXxEEBWW4i6Jh9NbAtYtRKvkzB4bshGIMzn2G1 rDTpKJj", + "DmRrDFFaIl1mPubzSJJaN4aMeZyBHqVxZe5tpztHQ9zSe6b69Hnl7coqeNJXHXU2EnaDnyhYxZSWHPn3IWLsLGWrx7py8d37Z8blMnh7VDUH7hAMamhLRO8lfUVV1roM8a0njnW9evXRq5AoNTt8Tv7kQ5LmLe6Z66MZwtjckRAXmOB4x3AYbbxLULYZAxitW1KNG1yTaDOYZQhtKdZkX1XqytzBl9dRXI4gk91ZlVHLOiujwUa89EVsdjayKeCc21gCJMXvbhDSOGAs6dXZEHuaHQnnBdM19X3TwPgfDONyhlc pjwoQ45D56UQVWxwNIJUTgwS1vctYOx4XFpMgf3PRQ7zZdfhIuPBFdQwnQvYUeQbWa5gnyMO9FVSU0vm9uccbJQvkcEAJzMkEh9i7z6EEixtbwVedlTGWL2XBwjenRdf2qsOgvJo8Dyuvf35ieCFMG7wR7200rs GJZ5bRdx4R2gGOWVMi3MOBrqcw3KhbcpJtdQoKMALEjBMrY7VYKtAZNI6LoXX OOTJZ3x3usHRJY0gMtKhh6OJ 37aknvBwNYJ0IRWYWaeJ8LBwJyO6ZV3ZJ0palISQvGaHEZ0olHnK2iNCTxqxvF8J7EdIdIPYssl5f0XgPl6", + "aFCzXJbJq02zlCKnyarJnPUiwVIuUrQci3fZvGD53F5fUsKDUlEwO5 ANJ2VgBnJ5cuBJzjILcM9AxTvyNZ5NPIHjSCo5O20K" + ] + +def verify_merge_file_exists(test_dir: Path) -> bool: + """Verify that merge.txt exists in the test directory.""" + merge_file = test_dir / "merge.txt" + + if not merge_file.exists(): + print("āŒ merge.txt not found") + return False + + if not merge_file.is_file(): + print("āŒ merge.txt exists but is not a file") + return False + + print("āœ… merge.txt exists") + return True + +def verify_merge_file_contents(test_dir: Path) -> bool: + """Verify that merge.txt contains all expected content strings.""" + merge_file = test_dir / "merge.txt" + expected_contents = get_expected_contents() + + try: + with open(merge_file, 'r', encoding='utf-8') as f: + merge_content = f.read() + except Exception as e: + print(f"āŒ Failed to read merge.txt: {e}") + return False + + # Check that each expected content string is present in the merged file + missing_contents = [] + for content in expected_contents: + if content not in merge_content: + missing_contents.append(content[:50] + "..." if len(content) > 50 else content) + + if missing_contents: + print(f"āŒ Missing content in merge.txt:") + for content in missing_contents: + print(f" - {content}") + return False + + print("āœ… merge.txt contains all expected content") + return True + +def main(): + """Main verification function.""" + try: + test_dir = get_test_directory() + print(f"šŸ” Verifying text file merging in: {test_dir}") + + # Run all verification checks + checks = [ + ("Merge file existence", verify_merge_file_exists), + ("Merge file contents", verify_merge_file_contents) + ] + + all_passed = True + for check_name, check_func in checks: + print(f"\nšŸ“‹ Checking: {check_name}") + if not check_func(test_dir): + all_passed = False + + if all_passed: + print("\nšŸŽ‰ All verification checks passed!") + sys.exit(0) + else: + print("\nāŒ Some verification checks failed!") + sys.exit(1) + + except Exception as e: + print(f"āŒ Verification failed with error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tasks/filesystem/easy/folder_structure/structure_analysis/description.md b/tasks/filesystem/easy/folder_structure/structure_analysis/description.md new file mode 100644 index 00000000..c1a346ce --- /dev/null +++ b/tasks/filesystem/easy/folder_structure/structure_analysis/description.md @@ -0,0 +1,9 @@ +# Directory Structure Analysis Task + +You need to recursively traverse the entire folder structure under the main directory and count the total number of `.py` files in the entire directory (including all subdirectories). + +Write the answer (just a single number) in a file named `structure_analysis.txt` in the main directory (at the same level as the `complex_structure` folder). + +You should not change or delete any existed files. + +Do not try to use python code. diff --git a/tasks/filesystem/easy/folder_structure/structure_analysis/meta.json b/tasks/filesystem/easy/folder_structure/structure_analysis/meta.json new file mode 100644 index 00000000..10aed508 --- /dev/null +++ b/tasks/filesystem/easy/folder_structure/structure_analysis/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "structure_analysis", + "task_name": "Structure Analysis", + "category_id": "folder_structure", + "category_name": "Folder Structure", + "description": "Recursively inspect the complex_structure tree, count all .py files, and save the total as the only line of structure_analysis.txt.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "filesystem traversal" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "folder_structure/\n \u2514\u2500\u2500 complex_structure/\n \u251c\u2500\u2500 deeply/\n \u2502 \u2514\u2500\u2500 nested/\n \u2502 \u2514\u2500\u2500 folder/\n \u2502 \u2514\u2500\u2500 structure/\n \u251c\u2500\u2500 empty_folder/\n \u251c\u2500\u2500 folder_lxkHt_0_1/\n \u2502 \u2514\u2500\u2500 file_PeLzC_0.txt\n \u251c\u2500\u2500 folder_QdTAj_0_2/\n \u2502 \u251c\u2500\u2500 folder_eXccj_1_0/\n \u2502 \u2502 \u251c\u2500\u2500 folder_Mqlwh_2_1/\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_cKxcP_3_3/\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_BPTMK_4_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_RHtBP_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_QNqjq_4_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_gRwPE_5_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_jVlpp_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_vJuHz_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_XdXYJ_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_KvkKi_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_gGxLG_2.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_Hzkxo_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_XRjeh_1.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_vIBIt_4_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_kRDNS_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_wFSjJ_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_NyBSO_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_EOCNf_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_gmrXA_0.txt\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_NcruA_3_1/\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_bLWDj_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_WAftR_0.txt\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_qCDFI_3_2/\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_eSMOJ_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_oxADy_2.txt\n \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_RTbbc_1.txt\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_QVHUU_3_0/\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_FEPTK_4_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_GHoMC_5_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_rAMYd_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_iBDUY_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_IJCaw_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_VRXgp_5_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_hkUmS_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_nqLAf_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_XflmA_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_FlPoK_4_3/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_hSVNm_5_3/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_klnbn_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_iZuEl_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_LqAmy_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_LcURj_5_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_RgwOS_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_ZHnYb_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_tuZQJ_5_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_LHuIx_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_asJnB_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_EzLdu_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_ndhsJ_4_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_CUSXK_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_DpiuM_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_pSqeG_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_pstmE_5_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_YwdJt_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_StlsP_5_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_kriBJ_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_XCEdm_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_ToDjh_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_xbIVx_0.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_PJBok_4_4/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_mzxaf_5_0/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_ILBzj_2.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_MTGMm_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_zBDqz_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_sULMj_5_1/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_BHziw_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_sIjiu_2.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_VqNkB_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_vypSi_5_3/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_kZbIm_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_sOBtE_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_ZLGHy_5_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_azaFF_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_nAFRe_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_mIkQU_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_sGPxd_1.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_VTbEG_4_2/\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_HtYLg_0.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_JXjMd_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_tPccB_2.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_BuOSw_1.txt\n \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_TpoqE_0.txt\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 folder_wTvun_3_4/\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_GyhyE_1.txt\n \u2502 \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_POsla_2.txt\n \u2502 \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_tSsvk_0.txt\n \u2502 \u2502 \u2502 \u251c\u2500\u2500 file_irNju_0.txt\n \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_jYBRm_1.txt\n \u2502 \u2502 \u251c\u2500\u2500 folder_YlJLI_2_0/\n \u2502 \u2502 \u2502 \u2514\u2500\u2500 file_FpFSL_0.txt\n \u2502 \u2502 \u251c\u2500\u2500 file_cFgBr_2.txt\n \u2502 \u2502 \u251c\u2500\u2500 file_lKEWN_1.txt\n \u2502 \u2502 \u2514\u2500\u2500 file_ZEWFP_0.txt\n \u2502 \u2514\u2500\u2500 file_ayUCH_0.txt\n \u251c\u2500\u2500 folder_xtgyi_0_0/\n \u2502 \u2514\u2500\u2500 file_BvSOB_0.txt\n \u251c\u2500\u2500 mixed_content/\n \u2502 \u2514\u2500\u2500 images_and_text/\n \u2502 \u2514\u2500\u2500 notes.txt\n \u251c\u2500\u2500 project/\n \u2502 \u251c\u2500\u2500 docs/\n \u2502 \u2502 \u2514\u2500\u2500 archive/\n \u2502 \u2502 \u2514\u2500\u2500 2023/\n \u2502 \u2502 \u2514\u2500\u2500 reports/\n \u2502 \u2502 \u251c\u2500\u2500 report_0.txt\n \u2502 \u2502 \u251c\u2500\u2500 report_1.txt\n \u2502 \u2502 \u2514\u2500\u2500 report_2.txt\n \u2502 \u2514\u2500\u2500 src/\n \u2502 \u2514\u2500\u2500 main/\n \u2502 \u2514\u2500\u2500 resources/\n \u2514\u2500\u2500 m.py", + "stateUrl": "https://storage.mcpmark.ai/filesystem/folder_structure.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/folder_structure/structure_analysis/verify.py b/tasks/filesystem/easy/folder_structure/structure_analysis/verify.py new file mode 100644 index 00000000..8b5c95ab --- /dev/null +++ b/tasks/filesystem/easy/folder_structure/structure_analysis/verify.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Verification script for Directory Structure Analysis Task +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_structure_analysis_file_exists(test_dir: Path) -> bool: + """Verify that the structure_analysis.txt file exists.""" + analysis_file = test_dir / "structure_analysis.txt" + + if not analysis_file.exists(): + print("āŒ File 'structure_analysis.txt' not found") + return False + + print("āœ… structure_analysis.txt file found") + return True + +def verify_structure_analysis_content(test_dir: Path) -> bool: + """Verify that the structure_analysis.txt file contains the correct count.""" + analysis_file = test_dir / "structure_analysis.txt" + + try: + content = analysis_file.read_text().strip() + + if not content: + print("āŒ structure_analysis.txt file is empty") + return False + + # The expected answer is 1 + expected_count = 1 + + # Check if content is exactly "1" + if content != str(expected_count): + print(f"āŒ Expected '{expected_count}', but found: '{content}'") + return False + + print(f"āœ… Python file count is correct: {content}") + return True + + except Exception as e: + print(f"āŒ Error reading structure_analysis.txt file: {e}") + return False + +def main(): + """Main verification function.""" + try: + test_dir = get_test_directory() + print(f"šŸ” Verifying Directory Structure Analysis Task in: {test_dir}") + print() + + # Define verification steps + verification_steps = [ + ("Structure Analysis File Exists", verify_structure_analysis_file_exists), + ("Python File Count is Correct", verify_structure_analysis_content), + ] + + # Run all verification steps + all_passed = True + for step_name, verify_func in verification_steps: + print(f"šŸ“‹ {step_name}...") + if not verify_func(test_dir): + all_passed = False + print() + + # Final result + if all_passed: + print("šŸŽ‰ All verification checks passed!") + sys.exit(0) + else: + print("āŒ Some verification checks failed!") + sys.exit(1) + + except Exception as e: + print(f"āŒ Verification failed with error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tasks/filesystem/easy/legal_document/file_reorganize/description.md b/tasks/filesystem/easy/legal_document/file_reorganize/description.md new file mode 100644 index 00000000..a08d0e0d --- /dev/null +++ b/tasks/filesystem/easy/legal_document/file_reorganize/description.md @@ -0,0 +1,16 @@ +# Legal Document File Reorganization Task + +**Overview** + +The folder "legal_files/" contains multiple versions of the Stock Purchase Agreement (Preferred_Stock_Purchase_Agreement_v0.txt through Preferred_Stock_Purchase_Agreement_v10.txt). + +## Task + +Your task is to: + +1. Identify the final version of the document among the different versions +2. Create a folder named `final_version` inside the `legal_files/` directory +3. Create an **empty file** with the same name as the final version in the newly created `final_version/` folder +4. Keep the original file in its original location + +Note: Due to the large file size, you only need to create an empty file (not copy the content). The filename should remain unchanged in the `final_version/` folder. diff --git a/tasks/filesystem/easy/legal_document/file_reorganize/meta.json b/tasks/filesystem/easy/legal_document/file_reorganize/meta.json new file mode 100644 index 00000000..02f999d4 --- /dev/null +++ b/tasks/filesystem/easy/legal_document/file_reorganize/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "file_reorganize", + "task_name": "File Reorganize", + "category_id": "legal_document", + "category_name": "Legal Document", + "description": "Determine the final Stock Purchase Agreement version and create an empty copy of that filename inside legal_files/final_version/.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "file organization", + "version management" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "legal_document/\n \u2514\u2500\u2500 legal_files/\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v0.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v1.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v2.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v3.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v4.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v5.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v6.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v7.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v8.txt\n \u251c\u2500\u2500 Preferred_Stock_Purchase_Agreement_v9.txt\n \u2514\u2500\u2500 Preferred_Stock_Purchase_Agreement_v10.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/legal_document.zip", + "stateOriginalUrl": "https://www.cooleygo.com/documents/nvca-financing-documents" + } +} diff --git a/tasks/filesystem/easy/legal_document/file_reorganize/verify.py b/tasks/filesystem/easy/legal_document/file_reorganize/verify.py new file mode 100644 index 00000000..e3e0b24f --- /dev/null +++ b/tasks/filesystem/easy/legal_document/file_reorganize/verify.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Verification script for Legal Document File Reorganization Task +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_final_version_folder_exists(test_dir: Path) -> bool: + """Verify that the final_version folder exists in legal_files.""" + final_version_dir = test_dir / "legal_files" / "final_version" + + if not final_version_dir.exists(): + print("āŒ Folder 'legal_files/final_version' not found") + return False + + if not final_version_dir.is_dir(): + print("āŒ 'legal_files/final_version' exists but is not a directory") + return False + + print("āœ… Folder 'legal_files/final_version' found") + return True + +def verify_target_file_exists(test_dir: Path) -> bool: + """Verify that Preferred_Stock_Purchase_Agreement_v10.txt exists in final_version folder.""" + target_file = test_dir / "legal_files" / "final_version" / "Preferred_Stock_Purchase_Agreement_v10.txt" + + if not target_file.exists(): + print("āŒ File 'legal_files/final_version/Preferred_Stock_Purchase_Agreement_v10.txt' not found") + return False + + if not target_file.is_file(): + print("āŒ 'Preferred_Stock_Purchase_Agreement_v10.txt' exists but is not a file") + return False + + print("āœ… Target file 'Preferred_Stock_Purchase_Agreement_v10.txt' found in final_version folder") + return True + +def verify_original_file_preserved(test_dir: Path) -> bool: + """Verify that the original v10 file is still in place.""" + original_file = test_dir / "legal_files" / "Preferred_Stock_Purchase_Agreement_v10.txt" + + if not original_file.exists(): + print("āŒ Original file 'Preferred_Stock_Purchase_Agreement_v10.txt' was removed") + return False + + print("āœ… Original file 'Preferred_Stock_Purchase_Agreement_v10.txt' preserved") + return True + +def verify_only_v10_in_final_version(test_dir: Path) -> bool: + """Verify that final_version folder contains only v10 file.""" + final_version_dir = test_dir / "legal_files" / "final_version" + + # Get all files in final_version folder + files = list(final_version_dir.iterdir()) + + # Filter out directories, keep only files + files_only = [f for f in files if f.is_file()] + + if len(files_only) != 1: + print(f"āŒ final_version folder should contain exactly 1 file, but found {len(files_only)}") + for f in files_only: + print(f" - {f.name}") + return False + + # Check if the only file is v10 + if files_only[0].name != "Preferred_Stock_Purchase_Agreement_v10.txt": + print(f"āŒ final_version folder contains wrong file: {files_only[0].name}") + print(" Expected: Preferred_Stock_Purchase_Agreement_v10.txt") + return False + + print("āœ… final_version folder contains only Preferred_Stock_Purchase_Agreement_v10.txt") + return True + +def main(): + """Main verification function.""" + test_dir = get_test_directory() + print("šŸ” Verifying Legal Document File Reorganization Task...") + + # Define verification steps + verification_steps = [ + ("Final Version Folder Exists", verify_final_version_folder_exists), + ("Target File Exists", verify_target_file_exists), + ("Only V10 in Final Version", verify_only_v10_in_final_version), + ("Original File Preserved", verify_original_file_preserved), + ] + + # Run all verification steps + all_passed = True + for step_name, verify_func in verification_steps: + print(f"\n--- {step_name} ---") + if not verify_func(test_dir): + all_passed = False + + # Final result + print("\n" + "="*50) + if all_passed: + print("āœ… Legal document file reorganization completed correctly!") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("āŒ Task verification: FAIL") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tasks/filesystem/easy/papers/papers_counting/description.md b/tasks/filesystem/easy/papers/papers_counting/description.md new file mode 100644 index 00000000..cfe3f2e0 --- /dev/null +++ b/tasks/filesystem/easy/papers/papers_counting/description.md @@ -0,0 +1,15 @@ +# File Context Task: Count HTML Files + +## šŸ“‹ Task Description + +You need to count the number of HTML files in the given directory and write the count to a file. + +## šŸŽÆ Task Objectives + +1. **Count HTML files** in the given directory +2. **Create a file** named `count.txt` in the same directory +3. **Write the count** (just the number) to `count.txt` + +## šŸ“ Expected Output + +- File `count.txt` containing only the number of HTML files found diff --git a/tasks/filesystem/easy/papers/papers_counting/meta.json b/tasks/filesystem/easy/papers/papers_counting/meta.json new file mode 100644 index 00000000..a7e75fed --- /dev/null +++ b/tasks/filesystem/easy/papers/papers_counting/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "papers_counting", + "task_name": "Papers Counting", + "category_id": "papers", + "category_name": "Papers", + "description": "Count how many .html papers live in the directory and write just that number into count.txt.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "reporting" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "papers/\n \u251c\u2500\u2500 1707.06347.html\n \u251c\u2500\u2500 2105.04165.html\n \u251c\u2500\u2500 2201.11903.html\n \u251c\u2500\u2500 2303.08774.html\n \u251c\u2500\u2500 2306.08640.html\n \u251c\u2500\u2500 2310.02255.html\n \u251c\u2500\u2500 2310.08446.html\n \u251c\u2500\u2500 2312.00849.html\n \u251c\u2500\u2500 2312.07533.html\n \u251c\u2500\u2500 2312.11805.html\n \u251c\u2500\u2500 2402.00253.html\n \u251c\u2500\u2500 2402.03300.html\n \u251c\u2500\u2500 2403.05530.html\n \u251c\u2500\u2500 2404.13046.html\n \u251c\u2500\u2500 2404.14367.html\n \u251c\u2500\u2500 2404.14396.html\n \u251c\u2500\u2500 2405.09818.html\n \u251c\u2500\u2500 2405.13911.html\n \u251c\u2500\u2500 2405.16473.html\n \u251c\u2500\u2500 2405.16640.html\n \u251c\u2500\u2500 2406.08478.html\n \u251c\u2500\u2500 2406.16852.html\n \u251c\u2500\u2500 2406.17294.html\n \u251c\u2500\u2500 2407.01284.html\n \u251c\u2500\u2500 2407.01509.html\n \u251c\u2500\u2500 2407.21783.html\n \u251c\u2500\u2500 2408.03326.html\n \u251c\u2500\u2500 2408.12528.html\n \u251c\u2500\u2500 2409.19256.html\n \u251c\u2500\u2500 2410.05993.html\n \u251c\u2500\u2500 2410.06166.html\n \u251c\u2500\u2500 2410.10563.html\n \u251c\u2500\u2500 2410.13848.html\n \u251c\u2500\u2500 2410.17885.html\n \u251c\u2500\u2500 2410.21276.html\n \u251c\u2500\u2500 2411.07975.html\n \u251c\u2500\u2500 2411.10442.html\n \u251c\u2500\u2500 2411.11930.html\n \u251c\u2500\u2500 2411.14432.html\n \u251c\u2500\u2500 2412.05271.html\n \u251c\u2500\u2500 2412.08443.html\n \u251c\u2500\u2500 2412.10302.html\n \u251c\u2500\u2500 2412.15115.html\n \u251c\u2500\u2500 2412.16720.html\n \u251c\u2500\u2500 2412.17256.html\n \u251c\u2500\u2500 2412.18319.html\n \u251c\u2500\u2500 2412.20631.html\n \u251c\u2500\u2500 2501.04686.html\n \u251c\u2500\u2500 2501.06186.html\n \u251c\u2500\u2500 2501.12599.html\n \u251c\u2500\u2500 2501.12948.html\n \u251c\u2500\u2500 2501.17811.html\n \u251c\u2500\u2500 2502.01456.html\n \u251c\u2500\u2500 2502.09621.html\n \u251c\u2500\u2500 2502.10391.html\n \u251c\u2500\u2500 2502.13923.html\n \u251c\u2500\u2500 2503.01785.html\n \u251c\u2500\u2500 2503.06520.html\n \u251c\u2500\u2500 2503.06749.html\n \u251c\u2500\u2500 2503.07065.html\n \u251c\u2500\u2500 2503.07365.html\n \u251c\u2500\u2500 2503.07536.html\n \u251c\u2500\u2500 2503.10291.html\n \u251c\u2500\u2500 2503.10615.html\n \u251c\u2500\u2500 2503.12937.html\n \u251c\u2500\u2500 2503.13939.html\n \u251c\u2500\u2500 2503.14476.html\n \u251c\u2500\u2500 2503.17352.html\n \u251c\u2500\u2500 2503.18892.html\n \u251c\u2500\u2500 2503.19786.html\n \u251c\u2500\u2500 2503.20783.html\n \u251c\u2500\u2500 2503.21620.html\n \u251c\u2500\u2500 2503.21776.html\n \u251c\u2500\u2500 2503.22679.html\n \u251c\u2500\u2500 2504.02587.html\n \u251c\u2500\u2500 2504.05599.html\n \u251c\u2500\u2500 2504.07491.html\n \u251c\u2500\u2500 2504.07934.html\n \u251c\u2500\u2500 2504.07954.html\n \u251c\u2500\u2500 2504.11455.html\n \u251c\u2500\u2500 2504.14945.html\n \u251c\u2500\u2500 2504.16656.html\n \u251c\u2500\u2500 2505.00703.html\n \u2514\u2500\u2500 arxiv_2025.bib", + "stateUrl": "https://storage.mcpmark.ai/filesystem/papers.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/papers/papers_counting/verify.py b/tasks/filesystem/easy/papers/papers_counting/verify.py new file mode 100644 index 00000000..9ff5cc1d --- /dev/null +++ b/tasks/filesystem/easy/papers/papers_counting/verify.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" +Verification script for Paper Counting Task: Count HTML Files +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_count_file_exists(test_dir: Path) -> bool: + """Verify that the count.txt file exists.""" + count_file = test_dir / "count.txt" + + if not count_file.exists(): + print("āŒ File 'count.txt' not found") + return False + + print("āœ… count.txt file found") + return True + +def verify_count_content(test_dir: Path) -> bool: + """Verify that count.txt contains the correct number (83).""" + count_file = test_dir / "count.txt" + + try: + content = count_file.read_text().strip() + + # Check if content is exactly "83" + if content == "83": + print("āœ… count.txt contains the correct number: 83") + return True + else: + print(f"āŒ count.txt contains '{content}' but expected '83'") + return False + + except Exception as e: + print(f"āŒ Error reading count.txt: {e}") + return False + +def verify_actual_html_count(test_dir: Path) -> bool: + """Verify that there are actually 83 HTML files in the directory.""" + html_files = list(test_dir.glob("*.html")) + count = len(html_files) + + if count == 83: + print(f"āœ… Verified: There are exactly {count} HTML files in the directory") + return True + else: + print(f"āš ļø Found {count} HTML files in the directory (expected 83)") + return False + +def main(): + """Main verification function.""" + try: + test_dir = get_test_directory() + print(f"šŸ” Verifying HTML file count in: {test_dir}") + + # Define verification steps + verification_steps = [ + ("Count File Exists", verify_count_file_exists), + ("Count Content", verify_count_content), + ("Actual HTML Count", verify_actual_html_count), + ] + + # Run all verification steps + all_passed = True + for step_name, verify_func in verification_steps: + print(f"\n--- {step_name} ---") + if not verify_func(test_dir): + all_passed = False + + # Final result + print("\n" + "="*50) + if all_passed: + print("āœ… HTML file count is correct!") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("āŒ Task verification: FAIL") + sys.exit(1) + + except Exception as e: + print(f"āŒ Verification failed with error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tasks/filesystem/easy/student_database/duplicate_name/description.md b/tasks/filesystem/easy/student_database/duplicate_name/description.md new file mode 100644 index 00000000..8a65318e --- /dev/null +++ b/tasks/filesystem/easy/student_database/duplicate_name/description.md @@ -0,0 +1,5 @@ +Please help me identify any duplicate name from the list of all the 150 students. Do not use python code. You only need to find **any one** duplicate name. Then generate a `namesake.txt` file to record the result in the following format, with only three lines. Note: when recording the name, replace underscores with spaces. + +name: xxx +count: xxx +ids: xxx, xxx, ... diff --git a/tasks/filesystem/easy/student_database/duplicate_name/meta.json b/tasks/filesystem/easy/student_database/duplicate_name/meta.json new file mode 100644 index 00000000..a1786ff7 --- /dev/null +++ b/tasks/filesystem/easy/student_database/duplicate_name/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "duplicate_name", + "task_name": "Duplicate Name", + "category_id": "student_database", + "category_name": "Student Database", + "description": "Search the 150 student folders for any repeated full name and document the name, count, and ids in namesake.txt.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "pattern analysis", + "data validation" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "student_database/\n \u251c\u2500\u2500 20101250_Patricia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20101701_Isabella_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20102572_Michael_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104233_Robert_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104498_Sarah_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104653_Sophia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104675_Michael_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104846_Christopher_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20107487_Mia_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20108742_Sarah_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20109144_Emma_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20109803_Oliver_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20111634_Isabella_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20112439_Christopher_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20113368_William_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20113603_Robert_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20114397_Isabella_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20114869_Ethan_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115252_Mason_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115632_Elizabeth_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115753_Charlotte_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115924_Michael_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20116232_Olivia_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20119528_Thomas_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20122427_Karen_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20122977_Evelyn_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20123376_Joseph_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20125451_Barbara_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126203_Barbara_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126394_Olivia_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126471_Ethan_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20127423_John_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20128249_Oliver_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20128879_Christopher_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20129898_Jessica_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20131271_Olivia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20131518_Sophia_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132026_Isabella_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132370_James_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132669_Noah_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20133527_Mason_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20133697_Isabella_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20135821_Thomas_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20136681_Benjamin_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20136890_Benjamin_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20137514_Lucas_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139234_Harper_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139637_Noah_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139647_Patricia_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20141421_Linda_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20142085_William_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20142383_Amelia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20143406_Susan_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20143830_James_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146035_Christopher_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146277_William_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146279_Christopher_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20147301_James_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20147789_James_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20148681_John_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20148778_Susan_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20149712_Jessica_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20151012_Harper_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153174_Benjamin_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153412_Charlotte_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153606_James_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153687_Richard_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20154518_John_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20154710_Benjamin_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156469_Jennifer_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156522_Jennifer_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156851_Noah_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20157943_Harper_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158266_Sophia_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158294_Sophia_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158819_Sarah_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20159113_John_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20159695_James_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20161279_William_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20162253_Mason_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20162542_Mia_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20163356_Ava_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20164515_Patricia_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20164801_Noah_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20165511_Mary_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166436_Christopher_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166487_Barbara_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166564_Ava_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166998_Ava_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20168311_Lucas_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20168491_Karen_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20169515_Thomas_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171050_Christopher_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171406_Mary_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171613_Ethan_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20172106_Isabella_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173259_Michael_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173492_Richard_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173501_Mary_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173517_Susan_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20174207_Richard_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20174369_Mary_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20175314_William_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20176169_Lucas_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20176947_Noah_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20177389_James_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20178687_Isabella_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20179461_William_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20179690_Linda_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20181056_Sarah_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20182020_Patricia_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20182390_Ethan_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20183149_David_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20183219_Charlotte_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20184489_Jessica_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20186154_Charlotte_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20186510_James_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187107_David_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187144_Mary_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187892_Christopher_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187921_Mary_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187967_Sarah_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20188937_James_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189123_Mary_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189192_Olivia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189268_Emma_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189854_William_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20191265_Joseph_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20192725_Robert_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194054_Michael_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194160_Benjamin_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194164_Sarah_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194525_John_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20195164_Jennifer_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20195982_David_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196776_William_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196896_Olivia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196961_Joseph_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196998_Ethan_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20198548_Evelyn_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199036_Benjamin_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199583_Mary_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199735_Mason_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199872_Sophia_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199980_James_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20201385_John_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20201800_John_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20202548_Robert_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20203855_Mia_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u2514\u2500\u2500 20204611_Sarah_Wilson/\n \u251c\u2500\u2500 basic_info.txt\n \u2514\u2500\u2500 recommendation_letter.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/student_database.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/student_database/duplicate_name/verify.py b/tasks/filesystem/easy/student_database/duplicate_name/verify.py new file mode 100644 index 00000000..6302b249 --- /dev/null +++ b/tasks/filesystem/easy/student_database/duplicate_name/verify.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +""" +Verification script for Student Database Task: Find Duplicate Names +Simplified version that only checks against expected results without folder validation +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_namesake_file_exists(test_dir: Path) -> bool: + """Verify that the namesake.txt file exists.""" + namesake_file = test_dir / "namesake.txt" + + if not namesake_file.exists(): + print("āŒ File 'namesake.txt' not found") + return False + + print("āœ… Namesake file found") + return True + +def parse_namesake_file(test_dir: Path) -> dict: + """Parse the namesake.txt file and return structured data.""" + namesake_file = test_dir / "namesake.txt" + + try: + content = namesake_file.read_text() + lines = content.strip().split('\n') + + namesakes = {} + current_line = 0 + + while current_line < len(lines): + # Skip blank lines + if not lines[current_line].strip(): + current_line += 1 + continue + + # Check if we have enough lines for a complete group + if current_line + 2 >= len(lines): + print(f"āŒ Incomplete group at line {current_line + 1}") + return {} + + # Parse group + name_line = lines[current_line].strip() + count_line = lines[current_line + 1].strip() + ids_line = lines[current_line + 2].strip() + + # Extract name + if not name_line.startswith("name: "): + print(f"āŒ Invalid name line format at line {current_line + 1}: {name_line}") + return {} + name = name_line.replace("name: ", "").strip() + + # Extract count + if not count_line.startswith("count: "): + print(f"āŒ Invalid count line format at line {current_line + 2}: {count_line}") + return {} + count_str = count_line.replace("count: ", "").strip() + try: + count = int(count_str) + except ValueError: + print(f"āŒ Invalid count format: {count_str}") + return {} + + # Extract IDs + if not ids_line.startswith("ids: "): + print(f"āŒ Invalid ids line format at line {current_line + 3}: {ids_line}") + return {} + ids_str = ids_line.replace("ids: ", "").strip() + ids = [id.strip() for id in ids_str.split(",")] + + namesakes[name] = { + 'count': count, + 'ids': ids + } + + current_line += 4 # Skip to next group (after blank line) + + return namesakes + + except Exception as e: + print(f"āŒ Error parsing namesake file: {e}") + return {} + +def verify_against_expected_results(namesakes: dict) -> bool: + """Verify that exactly 1 duplicate name is found and it is correct.""" + + # Expected duplicate names from answer.md (hardcoded) + expected_duplicates = { + 'Isabella Smith': ['20132026', '20133697'], + 'Ava Lopez': ['20166564', '20166998'], + 'James Moore': ['20159695', '20188937'], + 'William Taylor': ['20175314', '20189854'], + 'Ethan Wilson': ['20182390', '20196998'], + 'Christopher Taylor': ['20128879', '20187892'], + 'William Anderson': ['20142085', '20146277'], + 'James Anderson': ['20147789', '20153606'], + 'Olivia Jones': ['20189192', '20196896'], + 'Mason Johnson': ['20115252', '20199735'], + 'Benjamin Jackson': ['20153174', '20194160'], + 'John Taylor': ['20194525', '20201385'], + 'Susan Anderson': ['20148778', '20173517'], + 'Christopher Moore': ['20112439', '20146279'], + 'Sarah Wilson': ['20158819', '20204611'], + 'Sarah Brown': ['20104498', '20108742'] + } + + # Check if exactly 1 duplicate name is found + if len(namesakes) != 1: + print(f"āŒ Expected exactly 1 duplicate name, but found {len(namesakes)}") + return False + + print(f"āœ… Found exactly 1 duplicate name (as required)") + + # Check if the namesake in the file is actually a correct duplicate + for name, data in namesakes.items(): + if name not in expected_duplicates: + print(f"āŒ '{name}' is not a duplicate name (not in expected list)") + return False + + expected_ids = set(expected_duplicates[name]) + stated_ids = set(data['ids']) + + if expected_ids != stated_ids: + print(f"āŒ ID mismatch for '{name}':") + print(f" Expected: {sorted(expected_ids)}") + print(f" Stated: {sorted(stated_ids)}") + return False + + # Verify count matches + if data['count'] != 2: + print(f"āŒ Count mismatch for '{name}': expected 2, got {data['count']}") + return False + + print("āœ… The identified duplicate name is correct") + print("āœ… All student IDs match expected results") + print("āœ… Count is correct (2 for the duplicate name)") + return True + +def main(): + """Main verification function.""" + test_dir = get_test_directory() + print("šŸ” Verifying Student Database Task: Find Duplicate Names...") + + # Check if namesake file exists + print("\n--- File Existence Check ---") + if not verify_namesake_file_exists(test_dir): + print("\nāŒ Basic verification failed, cannot proceed with content verification") + sys.exit(1) + + # Parse the file and run content verification + print("\n--- Content Verification ---") + namesakes = parse_namesake_file(test_dir) + + if not namesakes: + print("āŒ Failed to parse namesake file") + sys.exit(1) + + # Verify against expected results + print("\n--- Results Verification ---") + if not verify_against_expected_results(namesakes): + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + # Final result + print("\n" + "="*50) + print("āœ… Namesake identification completed correctly!") + print(f"šŸŽ‰ Found 1 duplicate name (exactly 1 required)") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/filesystem/easy/student_database/recommender_name/description.md b/tasks/filesystem/easy/student_database/recommender_name/description.md new file mode 100644 index 00000000..e14ed4f4 --- /dev/null +++ b/tasks/filesystem/easy/student_database/recommender_name/description.md @@ -0,0 +1 @@ +Please find the recommendation letter for Patricia Jones and identify who wrote it. Generate a `recommender.txt` file with only the author's name. diff --git a/tasks/filesystem/easy/student_database/recommender_name/meta.json b/tasks/filesystem/easy/student_database/recommender_name/meta.json new file mode 100644 index 00000000..dbf32dce --- /dev/null +++ b/tasks/filesystem/easy/student_database/recommender_name/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "recommender_name", + "task_name": "Recommender Name", + "category_id": "student_database", + "category_name": "Student Database", + "description": "Read Patricia Jones's recommendation letter to capture who signed it and store only that name in recommender.txt.", + "author": "Lingjun Chen", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "document search" + ], + "mcp": [ + "filesystem" + ], + "meta_data": { + "stateType": "text", + "stateContent": "student_database/\n \u251c\u2500\u2500 20101250_Patricia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20101701_Isabella_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20102572_Michael_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104233_Robert_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104498_Sarah_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104653_Sophia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104675_Michael_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20104846_Christopher_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20107487_Mia_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20108742_Sarah_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20109144_Emma_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20109803_Oliver_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20111634_Isabella_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20112439_Christopher_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20113368_William_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20113603_Robert_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20114397_Isabella_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20114869_Ethan_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115252_Mason_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115632_Elizabeth_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115753_Charlotte_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20115924_Michael_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20116232_Olivia_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20119528_Thomas_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20122427_Karen_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20122977_Evelyn_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20123376_Joseph_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20125451_Barbara_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126203_Barbara_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126394_Olivia_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20126471_Ethan_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20127423_John_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20128249_Oliver_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20128879_Christopher_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20129898_Jessica_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20131271_Olivia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20131518_Sophia_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132026_Isabella_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132370_James_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20132669_Noah_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20133527_Mason_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20133697_Isabella_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20135821_Thomas_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20136681_Benjamin_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20136890_Benjamin_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20137514_Lucas_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139234_Harper_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139637_Noah_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20139647_Patricia_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20141421_Linda_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20142085_William_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20142383_Amelia_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20143406_Susan_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20143830_James_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146035_Christopher_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146277_William_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20146279_Christopher_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20147301_James_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20147789_James_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20148681_John_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20148778_Susan_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20149712_Jessica_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20151012_Harper_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153174_Benjamin_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153412_Charlotte_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153606_James_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20153687_Richard_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20154518_John_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20154710_Benjamin_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156469_Jennifer_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156522_Jennifer_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20156851_Noah_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20157943_Harper_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158266_Sophia_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158294_Sophia_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20158819_Sarah_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20159113_John_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20159695_James_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20161279_William_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20162253_Mason_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20162542_Mia_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20163356_Ava_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20164515_Patricia_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20164801_Noah_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20165511_Mary_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166436_Christopher_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166487_Barbara_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166564_Ava_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20166998_Ava_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20168311_Lucas_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20168491_Karen_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20169515_Thomas_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171050_Christopher_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171406_Mary_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20171613_Ethan_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20172106_Isabella_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173259_Michael_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173492_Richard_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173501_Mary_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20173517_Susan_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20174207_Richard_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20174369_Mary_Garcia/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20175314_William_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20176169_Lucas_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20176947_Noah_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20177389_James_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20178687_Isabella_Anderson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20179461_William_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20179690_Linda_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20181056_Sarah_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20182020_Patricia_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20182390_Ethan_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20183149_David_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20183219_Charlotte_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20184489_Jessica_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20186154_Charlotte_Smith/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20186510_James_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187107_David_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187144_Mary_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187892_Christopher_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187921_Mary_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20187967_Sarah_Davis/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20188937_James_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189123_Mary_Martin/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189192_Olivia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189268_Emma_Williams/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20189854_William_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20191265_Joseph_Lopez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20192725_Robert_Martinez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194054_Michael_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194160_Benjamin_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194164_Sarah_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20194525_John_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20195164_Jennifer_Gonzalez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20195982_David_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196776_William_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196896_Olivia_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196961_Joseph_Thomas/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20196998_Ethan_Wilson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20198548_Evelyn_Moore/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199036_Benjamin_Hernandez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199583_Mary_Brown/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199735_Mason_Johnson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199872_Sophia_Jackson/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20199980_James_Rodriguez/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20201385_John_Taylor/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20201800_John_Jones/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20202548_Robert_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u251c\u2500\u2500 20203855_Mia_Miller/\n \u2502 \u251c\u2500\u2500 basic_info.txt\n \u2502 \u2514\u2500\u2500 recommendation_letter.txt\n \u2514\u2500\u2500 20204611_Sarah_Wilson/\n \u251c\u2500\u2500 basic_info.txt\n \u2514\u2500\u2500 recommendation_letter.txt", + "stateUrl": "https://storage.mcpmark.ai/filesystem/student_database.zip", + "stateOriginalUrl": null + } +} diff --git a/tasks/filesystem/easy/student_database/recommender_name/verify.py b/tasks/filesystem/easy/student_database/recommender_name/verify.py new file mode 100644 index 00000000..7809bf29 --- /dev/null +++ b/tasks/filesystem/easy/student_database/recommender_name/verify.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Verification script for Student Database Task: Find Recommender Name +""" + +import sys +from pathlib import Path +import os + +def get_test_directory() -> Path: + """Get the test directory from FILESYSTEM_TEST_DIR env var.""" + test_root = os.environ.get("FILESYSTEM_TEST_DIR") + if not test_root: + raise ValueError("FILESYSTEM_TEST_DIR environment variable is required") + return Path(test_root) + +def verify_recommender_file_exists(test_dir: Path) -> bool: + """Verify that the recommender.txt file exists.""" + recommender_file = test_dir / "recommender.txt" + + if not recommender_file.exists(): + print("āŒ File 'recommender.txt' not found") + return False + + print("āœ… Recommender file found") + return True + +def verify_recommender_content(test_dir: Path) -> bool: + """Verify that the recommender.txt file contains 'Brown'.""" + recommender_file = test_dir / "recommender.txt" + + try: + content = recommender_file.read_text() + + if "Brown" in content: + print("āœ… Recommender name 'Brown' found in file") + return True + else: + print("āŒ Recommender name 'Brown' not found in file") + print(f" File content: {content.strip()}") + return False + + except Exception as e: + print(f"āŒ Error reading recommender file: {e}") + return False + +def main(): + """Main verification function.""" + test_dir = get_test_directory() + print("šŸ” Verifying Student Database Task: Find Recommender Name...") + + # Check if recommender file exists + print("\n--- File Existence Check ---") + if not verify_recommender_file_exists(test_dir): + print("\nāŒ Basic verification failed, cannot proceed with content verification") + sys.exit(1) + + # Verify content + print("\n--- Content Verification ---") + if not verify_recommender_content(test_dir): + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + # Final result + print("\n" + "="*50) + print("āœ… Recommender identification completed correctly!") + print("šŸŽ‰ Task verification: PASS") + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/tasks/filesystem/desktop/music_report/description.md b/tasks/filesystem/standard/desktop/music_report/description.md similarity index 100% rename from tasks/filesystem/desktop/music_report/description.md rename to tasks/filesystem/standard/desktop/music_report/description.md diff --git a/tasks/filesystem/desktop/music_report/meta.json b/tasks/filesystem/standard/desktop/music_report/meta.json similarity index 100% rename from tasks/filesystem/desktop/music_report/meta.json rename to tasks/filesystem/standard/desktop/music_report/meta.json diff --git a/tasks/filesystem/desktop/music_report/verify.py b/tasks/filesystem/standard/desktop/music_report/verify.py similarity index 100% rename from tasks/filesystem/desktop/music_report/verify.py rename to tasks/filesystem/standard/desktop/music_report/verify.py diff --git a/tasks/filesystem/desktop/project_management/description.md b/tasks/filesystem/standard/desktop/project_management/description.md similarity index 100% rename from tasks/filesystem/desktop/project_management/description.md rename to tasks/filesystem/standard/desktop/project_management/description.md diff --git a/tasks/filesystem/desktop/project_management/meta.json b/tasks/filesystem/standard/desktop/project_management/meta.json similarity index 100% rename from tasks/filesystem/desktop/project_management/meta.json rename to tasks/filesystem/standard/desktop/project_management/meta.json diff --git a/tasks/filesystem/desktop/project_management/verify.py b/tasks/filesystem/standard/desktop/project_management/verify.py similarity index 100% rename from tasks/filesystem/desktop/project_management/verify.py rename to tasks/filesystem/standard/desktop/project_management/verify.py diff --git a/tasks/filesystem/desktop/timeline_extraction/description.md b/tasks/filesystem/standard/desktop/timeline_extraction/description.md similarity index 100% rename from tasks/filesystem/desktop/timeline_extraction/description.md rename to tasks/filesystem/standard/desktop/timeline_extraction/description.md diff --git a/tasks/filesystem/desktop/timeline_extraction/meta.json b/tasks/filesystem/standard/desktop/timeline_extraction/meta.json similarity index 100% rename from tasks/filesystem/desktop/timeline_extraction/meta.json rename to tasks/filesystem/standard/desktop/timeline_extraction/meta.json diff --git a/tasks/filesystem/desktop/timeline_extraction/verify.py b/tasks/filesystem/standard/desktop/timeline_extraction/verify.py similarity index 100% rename from tasks/filesystem/desktop/timeline_extraction/verify.py rename to tasks/filesystem/standard/desktop/timeline_extraction/verify.py diff --git a/tasks/filesystem/desktop_template/budget_computation/description.md b/tasks/filesystem/standard/desktop_template/budget_computation/description.md similarity index 100% rename from tasks/filesystem/desktop_template/budget_computation/description.md rename to tasks/filesystem/standard/desktop_template/budget_computation/description.md diff --git a/tasks/filesystem/desktop_template/budget_computation/meta.json b/tasks/filesystem/standard/desktop_template/budget_computation/meta.json similarity index 100% rename from tasks/filesystem/desktop_template/budget_computation/meta.json rename to tasks/filesystem/standard/desktop_template/budget_computation/meta.json diff --git a/tasks/filesystem/desktop_template/budget_computation/verify.py b/tasks/filesystem/standard/desktop_template/budget_computation/verify.py similarity index 100% rename from tasks/filesystem/desktop_template/budget_computation/verify.py rename to tasks/filesystem/standard/desktop_template/budget_computation/verify.py diff --git a/tasks/filesystem/desktop_template/contact_information/description.md b/tasks/filesystem/standard/desktop_template/contact_information/description.md similarity index 100% rename from tasks/filesystem/desktop_template/contact_information/description.md rename to tasks/filesystem/standard/desktop_template/contact_information/description.md diff --git a/tasks/filesystem/desktop_template/contact_information/meta.json b/tasks/filesystem/standard/desktop_template/contact_information/meta.json similarity index 100% rename from tasks/filesystem/desktop_template/contact_information/meta.json rename to tasks/filesystem/standard/desktop_template/contact_information/meta.json diff --git a/tasks/filesystem/desktop_template/contact_information/verify.py b/tasks/filesystem/standard/desktop_template/contact_information/verify.py similarity index 100% rename from tasks/filesystem/desktop_template/contact_information/verify.py rename to tasks/filesystem/standard/desktop_template/contact_information/verify.py diff --git a/tasks/filesystem/desktop_template/file_arrangement/description.md b/tasks/filesystem/standard/desktop_template/file_arrangement/description.md similarity index 100% rename from tasks/filesystem/desktop_template/file_arrangement/description.md rename to tasks/filesystem/standard/desktop_template/file_arrangement/description.md diff --git a/tasks/filesystem/desktop_template/file_arrangement/meta.json b/tasks/filesystem/standard/desktop_template/file_arrangement/meta.json similarity index 100% rename from tasks/filesystem/desktop_template/file_arrangement/meta.json rename to tasks/filesystem/standard/desktop_template/file_arrangement/meta.json diff --git a/tasks/filesystem/desktop_template/file_arrangement/verify.py b/tasks/filesystem/standard/desktop_template/file_arrangement/verify.py similarity index 100% rename from tasks/filesystem/desktop_template/file_arrangement/verify.py rename to tasks/filesystem/standard/desktop_template/file_arrangement/verify.py diff --git a/tasks/filesystem/file_context/duplicates_searching/description.md b/tasks/filesystem/standard/file_context/duplicates_searching/description.md similarity index 100% rename from tasks/filesystem/file_context/duplicates_searching/description.md rename to tasks/filesystem/standard/file_context/duplicates_searching/description.md diff --git a/tasks/filesystem/file_context/duplicates_searching/meta.json b/tasks/filesystem/standard/file_context/duplicates_searching/meta.json similarity index 100% rename from tasks/filesystem/file_context/duplicates_searching/meta.json rename to tasks/filesystem/standard/file_context/duplicates_searching/meta.json diff --git a/tasks/filesystem/file_context/duplicates_searching/verify.py b/tasks/filesystem/standard/file_context/duplicates_searching/verify.py similarity index 100% rename from tasks/filesystem/file_context/duplicates_searching/verify.py rename to tasks/filesystem/standard/file_context/duplicates_searching/verify.py diff --git a/tasks/filesystem/file_context/file_merging/description.md b/tasks/filesystem/standard/file_context/file_merging/description.md similarity index 100% rename from tasks/filesystem/file_context/file_merging/description.md rename to tasks/filesystem/standard/file_context/file_merging/description.md diff --git a/tasks/filesystem/file_context/file_merging/meta.json b/tasks/filesystem/standard/file_context/file_merging/meta.json similarity index 100% rename from tasks/filesystem/file_context/file_merging/meta.json rename to tasks/filesystem/standard/file_context/file_merging/meta.json diff --git a/tasks/filesystem/file_context/file_merging/verify.py b/tasks/filesystem/standard/file_context/file_merging/verify.py similarity index 100% rename from tasks/filesystem/file_context/file_merging/verify.py rename to tasks/filesystem/standard/file_context/file_merging/verify.py diff --git a/tasks/filesystem/file_context/file_splitting/description.md b/tasks/filesystem/standard/file_context/file_splitting/description.md similarity index 100% rename from tasks/filesystem/file_context/file_splitting/description.md rename to tasks/filesystem/standard/file_context/file_splitting/description.md diff --git a/tasks/filesystem/file_context/file_splitting/meta.json b/tasks/filesystem/standard/file_context/file_splitting/meta.json similarity index 100% rename from tasks/filesystem/file_context/file_splitting/meta.json rename to tasks/filesystem/standard/file_context/file_splitting/meta.json diff --git a/tasks/filesystem/file_context/file_splitting/verify.py b/tasks/filesystem/standard/file_context/file_splitting/verify.py similarity index 100% rename from tasks/filesystem/file_context/file_splitting/verify.py rename to tasks/filesystem/standard/file_context/file_splitting/verify.py diff --git a/tasks/filesystem/file_context/pattern_matching/description.md b/tasks/filesystem/standard/file_context/pattern_matching/description.md similarity index 100% rename from tasks/filesystem/file_context/pattern_matching/description.md rename to tasks/filesystem/standard/file_context/pattern_matching/description.md diff --git a/tasks/filesystem/file_context/pattern_matching/meta.json b/tasks/filesystem/standard/file_context/pattern_matching/meta.json similarity index 100% rename from tasks/filesystem/file_context/pattern_matching/meta.json rename to tasks/filesystem/standard/file_context/pattern_matching/meta.json diff --git a/tasks/filesystem/file_context/pattern_matching/verify.py b/tasks/filesystem/standard/file_context/pattern_matching/verify.py similarity index 100% rename from tasks/filesystem/file_context/pattern_matching/verify.py rename to tasks/filesystem/standard/file_context/pattern_matching/verify.py diff --git a/tasks/filesystem/file_context/uppercase/description.md b/tasks/filesystem/standard/file_context/uppercase/description.md similarity index 100% rename from tasks/filesystem/file_context/uppercase/description.md rename to tasks/filesystem/standard/file_context/uppercase/description.md diff --git a/tasks/filesystem/file_context/uppercase/meta.json b/tasks/filesystem/standard/file_context/uppercase/meta.json similarity index 100% rename from tasks/filesystem/file_context/uppercase/meta.json rename to tasks/filesystem/standard/file_context/uppercase/meta.json diff --git a/tasks/filesystem/file_context/uppercase/verify.py b/tasks/filesystem/standard/file_context/uppercase/verify.py similarity index 100% rename from tasks/filesystem/file_context/uppercase/verify.py rename to tasks/filesystem/standard/file_context/uppercase/verify.py diff --git a/tasks/filesystem/file_property/size_classification/description.md b/tasks/filesystem/standard/file_property/size_classification/description.md similarity index 100% rename from tasks/filesystem/file_property/size_classification/description.md rename to tasks/filesystem/standard/file_property/size_classification/description.md diff --git a/tasks/filesystem/file_property/size_classification/meta.json b/tasks/filesystem/standard/file_property/size_classification/meta.json similarity index 100% rename from tasks/filesystem/file_property/size_classification/meta.json rename to tasks/filesystem/standard/file_property/size_classification/meta.json diff --git a/tasks/filesystem/file_property/size_classification/verify.py b/tasks/filesystem/standard/file_property/size_classification/verify.py similarity index 100% rename from tasks/filesystem/file_property/size_classification/verify.py rename to tasks/filesystem/standard/file_property/size_classification/verify.py diff --git a/tasks/filesystem/file_property/time_classification/description.md b/tasks/filesystem/standard/file_property/time_classification/description.md similarity index 100% rename from tasks/filesystem/file_property/time_classification/description.md rename to tasks/filesystem/standard/file_property/time_classification/description.md diff --git a/tasks/filesystem/file_property/time_classification/meta.json b/tasks/filesystem/standard/file_property/time_classification/meta.json similarity index 100% rename from tasks/filesystem/file_property/time_classification/meta.json rename to tasks/filesystem/standard/file_property/time_classification/meta.json diff --git a/tasks/filesystem/file_property/time_classification/verify.py b/tasks/filesystem/standard/file_property/time_classification/verify.py similarity index 100% rename from tasks/filesystem/file_property/time_classification/verify.py rename to tasks/filesystem/standard/file_property/time_classification/verify.py diff --git a/tasks/filesystem/folder_structure/structure_analysis/description.md b/tasks/filesystem/standard/folder_structure/structure_analysis/description.md similarity index 100% rename from tasks/filesystem/folder_structure/structure_analysis/description.md rename to tasks/filesystem/standard/folder_structure/structure_analysis/description.md diff --git a/tasks/filesystem/folder_structure/structure_analysis/meta.json b/tasks/filesystem/standard/folder_structure/structure_analysis/meta.json similarity index 100% rename from tasks/filesystem/folder_structure/structure_analysis/meta.json rename to tasks/filesystem/standard/folder_structure/structure_analysis/meta.json diff --git a/tasks/filesystem/folder_structure/structure_analysis/verify.py b/tasks/filesystem/standard/folder_structure/structure_analysis/verify.py similarity index 100% rename from tasks/filesystem/folder_structure/structure_analysis/verify.py rename to tasks/filesystem/standard/folder_structure/structure_analysis/verify.py diff --git a/tasks/filesystem/folder_structure/structure_mirror/description.md b/tasks/filesystem/standard/folder_structure/structure_mirror/description.md similarity index 100% rename from tasks/filesystem/folder_structure/structure_mirror/description.md rename to tasks/filesystem/standard/folder_structure/structure_mirror/description.md diff --git a/tasks/filesystem/folder_structure/structure_mirror/meta.json b/tasks/filesystem/standard/folder_structure/structure_mirror/meta.json similarity index 100% rename from tasks/filesystem/folder_structure/structure_mirror/meta.json rename to tasks/filesystem/standard/folder_structure/structure_mirror/meta.json diff --git a/tasks/filesystem/folder_structure/structure_mirror/verify.py b/tasks/filesystem/standard/folder_structure/structure_mirror/verify.py similarity index 100% rename from tasks/filesystem/folder_structure/structure_mirror/verify.py rename to tasks/filesystem/standard/folder_structure/structure_mirror/verify.py diff --git a/tasks/filesystem/legal_document/dispute_review/description.md b/tasks/filesystem/standard/legal_document/dispute_review/description.md similarity index 100% rename from tasks/filesystem/legal_document/dispute_review/description.md rename to tasks/filesystem/standard/legal_document/dispute_review/description.md diff --git a/tasks/filesystem/legal_document/dispute_review/meta.json b/tasks/filesystem/standard/legal_document/dispute_review/meta.json similarity index 100% rename from tasks/filesystem/legal_document/dispute_review/meta.json rename to tasks/filesystem/standard/legal_document/dispute_review/meta.json diff --git a/tasks/filesystem/legal_document/dispute_review/verify.py b/tasks/filesystem/standard/legal_document/dispute_review/verify.py similarity index 100% rename from tasks/filesystem/legal_document/dispute_review/verify.py rename to tasks/filesystem/standard/legal_document/dispute_review/verify.py diff --git a/tasks/filesystem/legal_document/individual_comments/description.md b/tasks/filesystem/standard/legal_document/individual_comments/description.md similarity index 100% rename from tasks/filesystem/legal_document/individual_comments/description.md rename to tasks/filesystem/standard/legal_document/individual_comments/description.md diff --git a/tasks/filesystem/legal_document/individual_comments/meta.json b/tasks/filesystem/standard/legal_document/individual_comments/meta.json similarity index 100% rename from tasks/filesystem/legal_document/individual_comments/meta.json rename to tasks/filesystem/standard/legal_document/individual_comments/meta.json diff --git a/tasks/filesystem/legal_document/individual_comments/verify.py b/tasks/filesystem/standard/legal_document/individual_comments/verify.py similarity index 100% rename from tasks/filesystem/legal_document/individual_comments/verify.py rename to tasks/filesystem/standard/legal_document/individual_comments/verify.py diff --git a/tasks/filesystem/legal_document/solution_tracing/description.md b/tasks/filesystem/standard/legal_document/solution_tracing/description.md similarity index 100% rename from tasks/filesystem/legal_document/solution_tracing/description.md rename to tasks/filesystem/standard/legal_document/solution_tracing/description.md diff --git a/tasks/filesystem/legal_document/solution_tracing/meta.json b/tasks/filesystem/standard/legal_document/solution_tracing/meta.json similarity index 100% rename from tasks/filesystem/legal_document/solution_tracing/meta.json rename to tasks/filesystem/standard/legal_document/solution_tracing/meta.json diff --git a/tasks/filesystem/legal_document/solution_tracing/verify.py b/tasks/filesystem/standard/legal_document/solution_tracing/verify.py similarity index 100% rename from tasks/filesystem/legal_document/solution_tracing/verify.py rename to tasks/filesystem/standard/legal_document/solution_tracing/verify.py diff --git a/tasks/filesystem/papers/author_folders/description.md b/tasks/filesystem/standard/papers/author_folders/description.md similarity index 100% rename from tasks/filesystem/papers/author_folders/description.md rename to tasks/filesystem/standard/papers/author_folders/description.md diff --git a/tasks/filesystem/papers/author_folders/meta.json b/tasks/filesystem/standard/papers/author_folders/meta.json similarity index 100% rename from tasks/filesystem/papers/author_folders/meta.json rename to tasks/filesystem/standard/papers/author_folders/meta.json diff --git a/tasks/filesystem/papers/author_folders/verify.py b/tasks/filesystem/standard/papers/author_folders/verify.py similarity index 100% rename from tasks/filesystem/papers/author_folders/verify.py rename to tasks/filesystem/standard/papers/author_folders/verify.py diff --git a/tasks/filesystem/papers/find_math_paper/description.md b/tasks/filesystem/standard/papers/find_math_paper/description.md similarity index 100% rename from tasks/filesystem/papers/find_math_paper/description.md rename to tasks/filesystem/standard/papers/find_math_paper/description.md diff --git a/tasks/filesystem/papers/find_math_paper/meta.json b/tasks/filesystem/standard/papers/find_math_paper/meta.json similarity index 100% rename from tasks/filesystem/papers/find_math_paper/meta.json rename to tasks/filesystem/standard/papers/find_math_paper/meta.json diff --git a/tasks/filesystem/papers/find_math_paper/verify.py b/tasks/filesystem/standard/papers/find_math_paper/verify.py similarity index 100% rename from tasks/filesystem/papers/find_math_paper/verify.py rename to tasks/filesystem/standard/papers/find_math_paper/verify.py diff --git a/tasks/filesystem/papers/organize_legacy_papers/description.md b/tasks/filesystem/standard/papers/organize_legacy_papers/description.md similarity index 100% rename from tasks/filesystem/papers/organize_legacy_papers/description.md rename to tasks/filesystem/standard/papers/organize_legacy_papers/description.md diff --git a/tasks/filesystem/papers/organize_legacy_papers/meta.json b/tasks/filesystem/standard/papers/organize_legacy_papers/meta.json similarity index 100% rename from tasks/filesystem/papers/organize_legacy_papers/meta.json rename to tasks/filesystem/standard/papers/organize_legacy_papers/meta.json diff --git a/tasks/filesystem/papers/organize_legacy_papers/verify.py b/tasks/filesystem/standard/papers/organize_legacy_papers/verify.py similarity index 100% rename from tasks/filesystem/papers/organize_legacy_papers/verify.py rename to tasks/filesystem/standard/papers/organize_legacy_papers/verify.py diff --git a/tasks/filesystem/student_database/duplicate_name/description.md b/tasks/filesystem/standard/student_database/duplicate_name/description.md similarity index 100% rename from tasks/filesystem/student_database/duplicate_name/description.md rename to tasks/filesystem/standard/student_database/duplicate_name/description.md diff --git a/tasks/filesystem/student_database/duplicate_name/meta.json b/tasks/filesystem/standard/student_database/duplicate_name/meta.json similarity index 100% rename from tasks/filesystem/student_database/duplicate_name/meta.json rename to tasks/filesystem/standard/student_database/duplicate_name/meta.json diff --git a/tasks/filesystem/student_database/duplicate_name/verify.py b/tasks/filesystem/standard/student_database/duplicate_name/verify.py similarity index 100% rename from tasks/filesystem/student_database/duplicate_name/verify.py rename to tasks/filesystem/standard/student_database/duplicate_name/verify.py diff --git a/tasks/filesystem/student_database/english_talent/description.md b/tasks/filesystem/standard/student_database/english_talent/description.md similarity index 100% rename from tasks/filesystem/student_database/english_talent/description.md rename to tasks/filesystem/standard/student_database/english_talent/description.md diff --git a/tasks/filesystem/student_database/english_talent/meta.json b/tasks/filesystem/standard/student_database/english_talent/meta.json similarity index 100% rename from tasks/filesystem/student_database/english_talent/meta.json rename to tasks/filesystem/standard/student_database/english_talent/meta.json diff --git a/tasks/filesystem/student_database/english_talent/verify.py b/tasks/filesystem/standard/student_database/english_talent/verify.py similarity index 100% rename from tasks/filesystem/student_database/english_talent/verify.py rename to tasks/filesystem/standard/student_database/english_talent/verify.py diff --git a/tasks/filesystem/student_database/gradebased_score/description.md b/tasks/filesystem/standard/student_database/gradebased_score/description.md similarity index 100% rename from tasks/filesystem/student_database/gradebased_score/description.md rename to tasks/filesystem/standard/student_database/gradebased_score/description.md diff --git a/tasks/filesystem/student_database/gradebased_score/meta.json b/tasks/filesystem/standard/student_database/gradebased_score/meta.json similarity index 100% rename from tasks/filesystem/student_database/gradebased_score/meta.json rename to tasks/filesystem/standard/student_database/gradebased_score/meta.json diff --git a/tasks/filesystem/student_database/gradebased_score/verify.py b/tasks/filesystem/standard/student_database/gradebased_score/verify.py similarity index 100% rename from tasks/filesystem/student_database/gradebased_score/verify.py rename to tasks/filesystem/standard/student_database/gradebased_score/verify.py diff --git a/tasks/filesystem/threestudio/code_locating/description.md b/tasks/filesystem/standard/threestudio/code_locating/description.md similarity index 100% rename from tasks/filesystem/threestudio/code_locating/description.md rename to tasks/filesystem/standard/threestudio/code_locating/description.md diff --git a/tasks/filesystem/threestudio/code_locating/meta.json b/tasks/filesystem/standard/threestudio/code_locating/meta.json similarity index 100% rename from tasks/filesystem/threestudio/code_locating/meta.json rename to tasks/filesystem/standard/threestudio/code_locating/meta.json diff --git a/tasks/filesystem/threestudio/code_locating/verify.py b/tasks/filesystem/standard/threestudio/code_locating/verify.py similarity index 100% rename from tasks/filesystem/threestudio/code_locating/verify.py rename to tasks/filesystem/standard/threestudio/code_locating/verify.py diff --git a/tasks/filesystem/threestudio/output_analysis/description.md b/tasks/filesystem/standard/threestudio/output_analysis/description.md similarity index 100% rename from tasks/filesystem/threestudio/output_analysis/description.md rename to tasks/filesystem/standard/threestudio/output_analysis/description.md diff --git a/tasks/filesystem/threestudio/output_analysis/meta.json b/tasks/filesystem/standard/threestudio/output_analysis/meta.json similarity index 100% rename from tasks/filesystem/threestudio/output_analysis/meta.json rename to tasks/filesystem/standard/threestudio/output_analysis/meta.json diff --git a/tasks/filesystem/threestudio/output_analysis/verify.py b/tasks/filesystem/standard/threestudio/output_analysis/verify.py similarity index 100% rename from tasks/filesystem/threestudio/output_analysis/verify.py rename to tasks/filesystem/standard/threestudio/output_analysis/verify.py diff --git a/tasks/filesystem/threestudio/requirements_completion/description.md b/tasks/filesystem/standard/threestudio/requirements_completion/description.md similarity index 100% rename from tasks/filesystem/threestudio/requirements_completion/description.md rename to tasks/filesystem/standard/threestudio/requirements_completion/description.md diff --git a/tasks/filesystem/threestudio/requirements_completion/meta.json b/tasks/filesystem/standard/threestudio/requirements_completion/meta.json similarity index 100% rename from tasks/filesystem/threestudio/requirements_completion/meta.json rename to tasks/filesystem/standard/threestudio/requirements_completion/meta.json diff --git a/tasks/filesystem/threestudio/requirements_completion/verify.py b/tasks/filesystem/standard/threestudio/requirements_completion/verify.py similarity index 100% rename from tasks/filesystem/threestudio/requirements_completion/verify.py rename to tasks/filesystem/standard/threestudio/requirements_completion/verify.py diff --git a/tasks/filesystem/votenet/dataset_comparison/description.md b/tasks/filesystem/standard/votenet/dataset_comparison/description.md similarity index 100% rename from tasks/filesystem/votenet/dataset_comparison/description.md rename to tasks/filesystem/standard/votenet/dataset_comparison/description.md diff --git a/tasks/filesystem/votenet/dataset_comparison/meta.json b/tasks/filesystem/standard/votenet/dataset_comparison/meta.json similarity index 100% rename from tasks/filesystem/votenet/dataset_comparison/meta.json rename to tasks/filesystem/standard/votenet/dataset_comparison/meta.json diff --git a/tasks/filesystem/votenet/dataset_comparison/verify.py b/tasks/filesystem/standard/votenet/dataset_comparison/verify.py similarity index 100% rename from tasks/filesystem/votenet/dataset_comparison/verify.py rename to tasks/filesystem/standard/votenet/dataset_comparison/verify.py diff --git a/tasks/filesystem/votenet/debugging/description.md b/tasks/filesystem/standard/votenet/debugging/description.md similarity index 100% rename from tasks/filesystem/votenet/debugging/description.md rename to tasks/filesystem/standard/votenet/debugging/description.md diff --git a/tasks/filesystem/votenet/debugging/meta.json b/tasks/filesystem/standard/votenet/debugging/meta.json similarity index 100% rename from tasks/filesystem/votenet/debugging/meta.json rename to tasks/filesystem/standard/votenet/debugging/meta.json diff --git a/tasks/filesystem/votenet/debugging/verify.py b/tasks/filesystem/standard/votenet/debugging/verify.py similarity index 100% rename from tasks/filesystem/votenet/debugging/verify.py rename to tasks/filesystem/standard/votenet/debugging/verify.py diff --git a/tasks/filesystem/votenet/requirements_writing/description.md b/tasks/filesystem/standard/votenet/requirements_writing/description.md similarity index 100% rename from tasks/filesystem/votenet/requirements_writing/description.md rename to tasks/filesystem/standard/votenet/requirements_writing/description.md diff --git a/tasks/filesystem/votenet/requirements_writing/meta.json b/tasks/filesystem/standard/votenet/requirements_writing/meta.json similarity index 100% rename from tasks/filesystem/votenet/requirements_writing/meta.json rename to tasks/filesystem/standard/votenet/requirements_writing/meta.json diff --git a/tasks/filesystem/votenet/requirements_writing/verify.py b/tasks/filesystem/standard/votenet/requirements_writing/verify.py similarity index 100% rename from tasks/filesystem/votenet/requirements_writing/verify.py rename to tasks/filesystem/standard/votenet/requirements_writing/verify.py diff --git a/tasks/github/easy/build-your-own-x/close_commented_issues/description.md b/tasks/github/easy/build-your-own-x/close_commented_issues/description.md new file mode 100644 index 00000000..cbc9074a --- /dev/null +++ b/tasks/github/easy/build-your-own-x/close_commented_issues/description.md @@ -0,0 +1 @@ +Use the GitHub MCP tools to close every issue in `mcpmark-eval/build-your-own-x` that already has at least one comment. Leave all other issues unchanged. diff --git a/tasks/github/easy/build-your-own-x/close_commented_issues/meta.json b/tasks/github/easy/build-your-own-x/close_commented_issues/meta.json new file mode 100644 index 00000000..4eeec732 --- /dev/null +++ b/tasks/github/easy/build-your-own-x/close_commented_issues/meta.json @@ -0,0 +1,22 @@ +{ + "task_id": "close_commented_issues", + "task_name": "Close Commented Issues", + "category_id": "build-your-own-x", + "category_name": "Build Your Own X (Easy)", + "description": "Use GitHub MCP tools to close every issue with comments in build-your-own-x and leave everything else alone.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "issue management" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/build-your-own-x", + "stateOriginalUrl": "https://github.com/codecrafters-io/build-your-own-x" + } +} diff --git a/tasks/github/easy/build-your-own-x/close_commented_issues/verify.py b/tasks/github/easy/build-your-own-x/close_commented_issues/verify.py new file mode 100644 index 00000000..91c162b6 --- /dev/null +++ b/tasks/github/easy/build-your-own-x/close_commented_issues/verify.py @@ -0,0 +1,76 @@ +import os +import sys +from typing import Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "build-your-own-x" +TARGET_ISSUES = [23, 25] + + +def _fetch_issue(org: str, token: str, number: int) -> Optional[dict]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/issues/{number}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: + print(f"Request error for issue #{number}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching issue #{number}", + file=sys.stderr, + ) + return None + + try: + return response.json() + except Exception as exc: + print(f"Unable to parse issue #{number}: {exc}", file=sys.stderr) + return None + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + print("Checking issue states in remote repository...") + success = True + + for issue_number in TARGET_ISSUES: + data = _fetch_issue(org, token, issue_number) + if data is None: + success = False + continue + + state = data.get("state", "").lower() + if state != "closed": + print( + f"Issue #{issue_number} is '{state}' but must be closed.", + file=sys.stderr, + ) + success = False + else: + print(f"Issue #{issue_number} is closed as expected.") + + return success + + +if __name__ == "__main__": + sys.exit(0 if verify() else 1) diff --git a/tasks/github/easy/build-your-own-x/record_recent_commits/description.md b/tasks/github/easy/build-your-own-x/record_recent_commits/description.md new file mode 100644 index 00000000..d1a8c979 --- /dev/null +++ b/tasks/github/easy/build-your-own-x/record_recent_commits/description.md @@ -0,0 +1,16 @@ +Use the GitHub MCP tools to work in the `mcpmark-eval/build-your-own-x` repository. + +1. Retrieve the newest five commits on the default branch. +2. Open a new issue titled exactly `Latest 5 Commit Snapshot`. +3. Set the issue body to exactly this format (newest commit first): + +``` +Latest 5 commits (newest first) +1. | | +2. | | +3. | | +4. | | +5. | | +``` + +Use the full 40-character SHA and only the first line of each commit message. The `` must come from the commit metadata's author name field (not the GitHub username/login). Leave the issue open and do not touch other issues. diff --git a/tasks/github/easy/build-your-own-x/record_recent_commits/meta.json b/tasks/github/easy/build-your-own-x/record_recent_commits/meta.json new file mode 100644 index 00000000..4387729c --- /dev/null +++ b/tasks/github/easy/build-your-own-x/record_recent_commits/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "record_recent_commits", + "task_name": "Record Recent Commits", + "category_id": "build-your-own-x", + "category_name": "Build Your Own X (Easy)", + "description": "Summarize the latest five commits by opening an issue with their SHAs, authors, and subjects.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "commits", + "issue" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/build-your-own-x", + "stateOriginalUrl": "https://github.com/codecrafters-io/build-your-own-x" + } +} diff --git a/tasks/github/easy/build-your-own-x/record_recent_commits/verify.py b/tasks/github/easy/build-your-own-x/record_recent_commits/verify.py new file mode 100644 index 00000000..6271dc6b --- /dev/null +++ b/tasks/github/easy/build-your-own-x/record_recent_commits/verify.py @@ -0,0 +1,167 @@ +import os +import sys +from typing import List, Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "build-your-own-x" +BRANCH = "master" +ISSUE_TITLE = "Latest 5 Commit Snapshot" +EXPECTED_HEADER = "latest 5 commits (newest first)" + + +def _request(url: str, token: str) -> Optional[requests.Response]: + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: # pragma: no cover - network errors + print(f"Request error for {url}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} for {url}", + file=sys.stderr, + ) + return None + + return response + + +def _fetch_commits(org: str, token: str) -> Optional[List[dict]]: + url = ( + f"https://api.github.com/repos/{org}/{REPO_NAME}/commits" + f"?per_page=5&sha={BRANCH}" + ) + response = _request(url, token) + if response is None: + return None + + try: + return response.json() + except Exception as exc: + print(f"Unable to parse commits: {exc}", file=sys.stderr) + return None + + +def _find_issue(org: str, token: str) -> Optional[dict]: + page = 1 + while True: + url = ( + f"https://api.github.com/repos/{org}/{REPO_NAME}/issues" + f"?state=open&per_page=100&page={page}" + ) + response = _request(url, token) + if response is None: + return None + + try: + issues = response.json() + except Exception as exc: + print(f"Unable to parse issues: {exc}", file=sys.stderr) + return None + + if not issues: + break + + for issue in issues: + if issue.get("title") == ISSUE_TITLE: + # Exclude pull requests + if "pull_request" in issue: + continue + return issue + + page += 1 + + print( + f"No open issue titled '{ISSUE_TITLE}' was found.", + file=sys.stderr, + ) + return None + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + commits = _fetch_commits(org, token) + if commits is None: + return False + + if len(commits) < 5: + print("Less than five commits returned; cannot verify.", file=sys.stderr) + return False + + issue = _find_issue(org, token) + if issue is None: + return False + + if issue.get("title") != ISSUE_TITLE: + print( + f"Found issue title '{issue.get('title')}', expected '{ISSUE_TITLE}'.", + file=sys.stderr, + ) + return False + + if (issue.get("state") or "").lower() != "open": + print("Issue must remain open.", file=sys.stderr) + return False + + body = issue.get("body") or "" + if not body.strip(): + print("Issue body is empty.", file=sys.stderr) + return False + + lines = [line.strip() for line in body.splitlines() if line.strip()] + if not lines: + print("Issue body contains no content.", file=sys.stderr) + return False + + header = lines[0].lower() + if header != EXPECTED_HEADER: + print( + "Issue body must start with 'Latest 5 commits (newest first)'.", + file=sys.stderr, + ) + return False + + entries = lines[1:] + if len(entries) != 5: + print("Issue body must list exactly five commit entries.", file=sys.stderr) + return False + + for idx in range(5): + commit = commits[idx] + sha = commit.get("sha", "") + subject = (commit.get("commit", {}).get("message", "").splitlines()[0]).strip() + author = commit.get("commit", {}).get("author", {}).get("name", "") + + expected_line = f"{idx + 1}. {sha} | {author} | {subject}" + actual_line = entries[idx] + if actual_line != expected_line: + print( + f"Entry {idx + 1} mismatch.\nExpected: {expected_line}\nFound: {actual_line}", + file=sys.stderr, + ) + return False + + print("Issue contains the expected latest five commits.") + return True + + +if __name__ == "__main__": + sys.exit(0 if verify() else 1) diff --git a/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/description.md b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/description.md new file mode 100644 index 00000000..e5be7ba3 --- /dev/null +++ b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/description.md @@ -0,0 +1,13 @@ +Use the GitHub MCP tools to edit the `mcpmark-eval/claude-code` repository. + +1. On the `main` branch, add a new file `docs/TERMINAL_SHORTCUTS.md` containing exactly: + +``` +# Terminal Shortcuts + +- `claude plan`: Outline the next steps before making edits. +- `claude apply`: Run the plan and apply the queued changes. +- `claude check`: Re-run relevant tests or linters to validate the edits. +``` + +2. Commit with the message `docs: add terminal shortcuts reference` and push directly to `main`. diff --git a/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/meta.json b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/meta.json new file mode 100644 index 00000000..a8289748 --- /dev/null +++ b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "add_terminal_shortcuts_doc", + "task_name": "Add Terminal Shortcuts Doc", + "category_id": "claude-code", + "category_name": "Claude Code (Easy)", + "description": "Add a simple terminal shortcuts reference file to docs/TERMINAL_SHORTCUTS.md and push it to main.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "docs update", + "content creation" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/claude-code", + "stateOriginalUrl": "https://github.com/anthropics/claude-code" + } +} diff --git a/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/verify.py b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/verify.py new file mode 100644 index 00000000..619d6db7 --- /dev/null +++ b/tasks/github/easy/claude-code/add_terminal_shortcuts_doc/verify.py @@ -0,0 +1,85 @@ +import base64 +import os +import sys +from typing import Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "claude-code" +TARGET_FILE = "docs/TERMINAL_SHORTCUTS.md" +BRANCH = "main" +EXPECTED_CONTENT = """# Terminal Shortcuts + +- `claude plan`: Outline the next steps before making edits. +- `claude apply`: Run the plan and apply the queued changes. +- `claude check`: Re-run relevant tests or linters to validate the edits. +""".strip() + + +def _download_file(org: str, token: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{TARGET_FILE}?ref={BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: + print(f"Request error for {TARGET_FILE}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {TARGET_FILE}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8").strip() + except Exception as exc: + print(f"Unable to decode {TARGET_FILE}: {exc}", file=sys.stderr) + return None + + return content + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + print(f"Checking {TARGET_FILE} in remote repository...") + content = _download_file(org, token) + + if content is None: + return False + + normalized = content.strip() + if normalized != EXPECTED_CONTENT: + print("TERMINAL_SHORTCUTS.md does not match the expected content.", file=sys.stderr) + print("Expected:") + print(EXPECTED_CONTENT) + print("Found:") + print(content) + return False + + print("All checks passed! docs/TERMINAL_SHORTCUTS.md contains the expected text.") + return True + + +if __name__ == "__main__": + success = verify() + sys.exit(0 if success else 1) diff --git a/tasks/github/easy/claude-code/thank_docker_pr_author/description.md b/tasks/github/easy/claude-code/thank_docker_pr_author/description.md new file mode 100644 index 00000000..1a6c3fc8 --- /dev/null +++ b/tasks/github/easy/claude-code/thank_docker_pr_author/description.md @@ -0,0 +1,4 @@ +Use the GitHub MCP tools to comment on the pull request in `mcpmark-eval/claude-code` that proposes automating Docker image builds with GitHub Actions. + +1. Skim the PR description so you understand it’s the Docker workflow automation proposal. +2. Add a new comment on that PR that thanks the author and contains all of these keywords: `Docker workflow`, `automation`, `review`. diff --git a/tasks/github/easy/claude-code/thank_docker_pr_author/meta.json b/tasks/github/easy/claude-code/thank_docker_pr_author/meta.json new file mode 100644 index 00000000..b277b94c --- /dev/null +++ b/tasks/github/easy/claude-code/thank_docker_pr_author/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "thank_docker_pr_author", + "task_name": "Thank Docker PR Author", + "category_id": "claude-code", + "category_name": "Claude Code (Easy)", + "description": "Leave a thank-you comment on the Docker automation PR mentioning the workflow automation review keywords.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "pull request", + "comment" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/claude-code", + "stateOriginalUrl": "https://github.com/anthropics/claude-code" + } +} diff --git a/tasks/github/easy/claude-code/thank_docker_pr_author/verify.py b/tasks/github/easy/claude-code/thank_docker_pr_author/verify.py new file mode 100644 index 00000000..2aa18a43 --- /dev/null +++ b/tasks/github/easy/claude-code/thank_docker_pr_author/verify.py @@ -0,0 +1,76 @@ +import os +import sys +from typing import Optional, Union + +import requests +from dotenv import load_dotenv + +REPO_NAME = "claude-code" +PR_NUMBER = 53 +KEYWORDS = ["docker workflow", "automation", "review"] + + +def _github_get(org: str, token: str, path: str) -> Optional[Union[list, dict]]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/{path}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} for {path}", + file=sys.stderr, + ) + return None + + return response.json() + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + comments = _github_get(org, token, f"issues/{PR_NUMBER}/comments?per_page=100") + if comments is None: + return False + + for comment in comments: + body = comment.get("body", "").strip() + lowered = body.lower() + if not body: + continue + + if not any(thank_word in lowered for thank_word in ("thanks", "thank you")): + continue + + if all(keyword in lowered for keyword in KEYWORDS): + print("All checks passed! Keyword-rich thank-you comment found on PR #53.") + return True + + print( + "Did not find a thank-you comment containing all required keywords on PR #53.", + file=sys.stderr, + ) + return False + + +if __name__ == "__main__": + success = verify() + sys.exit(0 if success else 1) diff --git a/tasks/github/easy/claude-code/triage_missing_tool_result_issue/description.md b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/description.md new file mode 100644 index 00000000..32691fd2 --- /dev/null +++ b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/description.md @@ -0,0 +1,5 @@ +Use the GitHub MCP tools to triage issue #24 in the `mcpmark-eval/claude-code` repository. + +1. Read the issue details to understand the reported API error. +2. Add a triage comment on the issue that explicitly includes all of the following keywords: `invalid_request_error`, `toolu_01Kjp7i9iF3xJ3z9aH4pSaRw`, `tool_result`, `tool_use`. Use them while confirming the API error and asking for the missing result block. +3. Remove the `area:packaging` label from issue #24. diff --git a/tasks/github/easy/claude-code/triage_missing_tool_result_issue/meta.json b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/meta.json new file mode 100644 index 00000000..35ae99f6 --- /dev/null +++ b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "triage_missing_tool_result_issue", + "task_name": "Triage Missing Tool Result Issue", + "category_id": "claude-code", + "category_name": "Claude Code (Easy)", + "description": "Leave a predefined triage comment on issue #24 and remove the area:packaging label.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "issue triage", + "github" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/claude-code", + "stateOriginalUrl": "https://github.com/anthropics/claude-code" + } +} diff --git a/tasks/github/easy/claude-code/triage_missing_tool_result_issue/verify.py b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/verify.py new file mode 100644 index 00000000..5df90eba --- /dev/null +++ b/tasks/github/easy/claude-code/triage_missing_tool_result_issue/verify.py @@ -0,0 +1,89 @@ +import os +import sys +from typing import Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "claude-code" +ISSUE_NUMBER = 24 +KEYWORDS = [ + "invalid_request_error", + "toolu_01kjp7i9if3xj3z9ah4psarw", + "tool_result", + "tool_use", +] +REMOVED_LABEL = "area:packaging" + + +def _github_get(org: str, token: str, path: str) -> Optional[dict]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/{path}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} for {path}", + file=sys.stderr, + ) + return None + + return response.json() + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + issue = _github_get(org, token, f"issues/{ISSUE_NUMBER}") + if issue is None: + return False + + label_names = {label.get("name", "") for label in issue.get("labels", [])} + if REMOVED_LABEL in label_names: + print(f"Label '{REMOVED_LABEL}' is still present on issue #{ISSUE_NUMBER}.", file=sys.stderr) + return False + + comments = _github_get(org, token, f"issues/{ISSUE_NUMBER}/comments?per_page=100") + if comments is None: + return False + + found = False + for comment in comments: + body = comment.get("body", "").strip().lower() + if all(keyword in body for keyword in KEYWORDS): + found = True + break + + if not found: + print( + "Did not find a triage comment containing all required keywords.", + file=sys.stderr, + ) + return False + + print("All checks passed! Comment added and label removed.") + return True + + +if __name__ == "__main__": + success = verify() + sys.exit(0 if success else 1) diff --git a/tasks/github/easy/mcpmark-cicd/basic_ci_checks/description.md b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/description.md new file mode 100644 index 00000000..49b5b192 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/description.md @@ -0,0 +1,15 @@ +Use the GitHub MCP tools to update the `mcpmark-eval/mcpmark-cicd` repository with a very small CI workflow. + +## Goal +Add a GitHub Actions workflow named **Basic CI Checks** that automatically runs linting and unit tests any time work is pushed to or proposed for the `main` branch. + +## Requirements +1. Create a branch called `basic-ci-checks` from `main`. +2. Add `.github/workflows/basic-ci.yml` with the following characteristics: + - Workflow name: `Basic CI Checks`. + - Trigger on both `push` and `pull_request`, limited to the `main` branch. + - Single job called `quality-checks` that runs on `ubuntu-latest` and uses Node.js 18 (`actions/setup-node`). + - Steps must include `actions/checkout`, `npm ci`, `npm run lint`, and `npm test` in that order after Node is configured. +3. Commit the workflow to your branch, open a pull request titled `Add basic CI checks`, and merge it so the workflow exists on `main`. + +That's it—no caching, matrix builds, or issue automation required. Keep it lightweight and focused on verifying the existing lint/test scripts. diff --git a/tasks/github/easy/mcpmark-cicd/basic_ci_checks/meta.json b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/meta.json new file mode 100644 index 00000000..84aa1b4e --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "basic_ci_checks", + "task_name": "Basic CI Checks", + "category_id": "mcpmark-cicd", + "category_name": "MCPMark CI/CD (Easy)", + "description": "Add a lightweight GitHub Actions workflow that runs npm ci, npm run lint, and npm test whenever main is updated or receives a pull request.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "ci/cd", + "github actions", + "workflow basics" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/mcpmark-cicd", + "stateOriginalUrl": null + } +} diff --git a/tasks/github/easy/mcpmark-cicd/basic_ci_checks/verify.py b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/verify.py new file mode 100644 index 00000000..fbda25a4 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/basic_ci_checks/verify.py @@ -0,0 +1,123 @@ +import base64 +import os +import sys +from typing import List, Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "mcpmark-cicd" +WORKFLOW_PATH = ".github/workflows/basic-ci.yml" +BRANCH = "main" + + +def _download_file(org: str, token: str, path: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: # pragma: no cover - network failure + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {path}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8") + except Exception as exc: + print(f"Unable to decode {path}: {exc}", file=sys.stderr) + return None + + return content + + +def _line_index(lines: List[str], needle: str) -> int: + for idx, line in enumerate(lines): + if needle in line: + return idx + return -1 + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + content = _download_file(org, token, WORKFLOW_PATH) + if content is None: + print( + "Workflow file .github/workflows/basic-ci.yml was not found on main", + file=sys.stderr, + ) + return False + + normalized = content.lower() + normalized_lines = [line.strip().lower() for line in content.splitlines()] + + errors = [] + + required_snippets = { + "workflow name": "name: basic ci checks", + "job name": "quality-checks", + "checkout step": "actions/checkout", + "setup-node step": "actions/setup-node", + "node version": "node-version: 18", + "ubuntu runner": "runs-on: ubuntu-latest", + "push trigger": "push:", + "pull_request trigger": "pull_request:", + } + + for label, snippet in required_snippets.items(): + if snippet not in normalized: + errors.append(f"Missing {label} ({snippet}) in workflow") + + branch_limited = "- main" in normalized or "[main]" in normalized + if not branch_limited: + errors.append("Workflow triggers must be limited to the main branch") + + for command in ["npm ci", "npm run lint", "npm test"]: + if command not in normalized: + errors.append(f"Missing '{command}' step") + + # Ensure npm commands happen in the expected order + ci_index = _line_index(normalized_lines, "npm ci") + lint_index = _line_index(normalized_lines, "npm run lint") + test_index = _line_index(normalized_lines, "npm test") + + if ci_index == -1 or lint_index == -1 or test_index == -1: + errors.append("Could not find all npm commands to validate ordering") + else: + if not (ci_index < lint_index < test_index): + errors.append("npm commands must run in order: ci -> lint -> test") + + if errors: + print("Verification failed:") + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ… basic-ci workflow found with required steps and triggers") + return True + + +if __name__ == "__main__": + sys.exit(0 if verify() else 1) diff --git a/tasks/github/easy/mcpmark-cicd/issue_lint_guard/description.md b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/description.md new file mode 100644 index 00000000..ec8b2962 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/description.md @@ -0,0 +1,14 @@ +Use the GitHub MCP tools to wire up a tiny issue-triggered lint check for `mcpmark-eval/mcpmark-cicd`. + +## Goal +Whenever a maintainer opens the tracking issue **Lint workflow check**, the repo should automatically run `npm run lint` via GitHub Actions. Keep it simple—just prove the workflow fires for issue events. + +## Requirements +1. Create a branch called `issue-lint-workflow` from `main`. +2. Add `.github/workflows/issue-lint.yml` with: + - Workflow name **Issue Lint Guard**. + - Trigger: `issues` with `types: [opened]` (no push/PR triggers). + - Single job `lint` on `ubuntu-latest` using Node.js 18 via `actions/setup-node`. + - Steps in order: `actions/checkout`, `npm ci`, `npm run lint`. +3. Open a pull request titled `Add issue lint workflow`, get it merged so the workflow exists on `main`. +4. After the merge, open a new issue titled **Lint workflow check** to trigger the workflow and wait until the matching run finishes successfully. Leave the issue open; we only care that the run went green. diff --git a/tasks/github/easy/mcpmark-cicd/issue_lint_guard/meta.json b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/meta.json new file mode 100644 index 00000000..82cf79e8 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "issue_lint_guard", + "task_name": "Issue Lint Guard", + "category_id": "mcpmark-cicd", + "category_name": "MCPMark CI/CD (Easy)", + "description": "Add an issue-triggered lint workflow and prove it runs when the tracking issue is opened.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "ci/cd", + "github actions", + "issues" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/mcpmark-cicd", + "stateOriginalUrl": null + } +} diff --git a/tasks/github/easy/mcpmark-cicd/issue_lint_guard/verify.py b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/verify.py new file mode 100644 index 00000000..2ead9e42 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/issue_lint_guard/verify.py @@ -0,0 +1,213 @@ +import base64 +import os +import sys +import time +from typing import List, Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "mcpmark-cicd" +WORKFLOW_PATH = ".github/workflows/issue-lint.yml" +WORKFLOW_FILE = "issue-lint.yml" +TARGET_BRANCH = "main" +TRACKING_ISSUE_TITLE = "Lint workflow check" +MAX_POLL_ATTEMPTS = 12 +POLL_INTERVAL_SECONDS = 10 + + +def _download_file(org: str, token: str, path: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={TARGET_BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: # pragma: no cover - network error handling + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {path}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8") + except Exception as exc: # pragma: no cover - decode error + print(f"Unable to decode {path}: {exc}", file=sys.stderr) + return None + + return content + + +def _line_index(lines: List[str], needle: str) -> int: + for idx, line in enumerate(lines): + if needle in line: + return idx + return -1 + + +def _list_workflow_runs(org: str, token: str) -> Optional[List[dict]]: + url = ( + f"https://api.github.com/repos/{org}/{REPO_NAME}/actions/workflows/{WORKFLOW_FILE}/runs" + f"?event=issues&per_page=15" + ) + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: # pragma: no cover - network error handling + print(f"Request error when listing workflow runs: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when listing workflow runs", + file=sys.stderr, + ) + return None + + data = response.json() + return data.get("workflow_runs", []) + + +def _wait_for_tracking_issue_run(org: str, token: str) -> bool: + for attempt in range(1, MAX_POLL_ATTEMPTS + 1): + runs = _list_workflow_runs(org, token) + if runs is None: + return False + + relevant = [ + run + for run in runs + if run.get("display_title") == TRACKING_ISSUE_TITLE + ] + + if not relevant: + print( + f"[{attempt}/{MAX_POLL_ATTEMPTS}] No Issue Lint Guard run for '{TRACKING_ISSUE_TITLE}' yet; waiting..." + ) + time.sleep(POLL_INTERVAL_SECONDS) + continue + + latest = relevant[0] + status = latest.get("status") + conclusion = latest.get("conclusion") + html_url = latest.get("html_url") + + if status != "completed": + print( + f"[{attempt}/{MAX_POLL_ATTEMPTS}] Latest run is '{status}'; waiting for completion..." + ) + time.sleep(POLL_INTERVAL_SECONDS) + continue + + if conclusion != "success": + print( + "Latest Issue Lint Guard run finished without success.", + file=sys.stderr, + ) + print(f"Status: {status}, Conclusion: {conclusion}", file=sys.stderr) + if html_url: + print(f"Run URL: {html_url}", file=sys.stderr) + return False + + if html_url: + print(f"āœ… Latest Issue Lint Guard run succeeded: {html_url}") + else: + print("āœ… Latest Issue Lint Guard run succeeded") + return True + + print( + f"Timed out waiting for a successful Issue Lint Guard run for '{TRACKING_ISSUE_TITLE}'", + file=sys.stderr, + ) + return False + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + content = _download_file(org, token, WORKFLOW_PATH) + if content is None: + print( + "Workflow file .github/workflows/issue-lint.yml was not found on main", + file=sys.stderr, + ) + return False + + normalized = content.lower() + normalized_lines = [line.strip().lower() for line in content.splitlines()] + + errors = [] + + required_snippets = { + "workflow name": "name: issue lint guard", + "issues trigger": "issues:", + "types opened": "types:", + "job name": "lint:", + "runner": "runs-on: ubuntu-latest", + "checkout": "actions/checkout", + "setup-node": "actions/setup-node", + "node version": "node-version: 18", + "npm ci": "npm ci", + "npm run lint": "npm run lint", + } + + for label, snippet in required_snippets.items(): + if snippet not in normalized: + errors.append(f"Missing {label} ({snippet}) in workflow") + + types_line = next( + (line for line in normalized_lines if "types" in line and "opened" in line), + None, + ) + if types_line is None: + errors.append("issues trigger must limit types to include 'opened'") + + checkout_idx = _line_index(normalized_lines, "actions/checkout") + setup_idx = _line_index(normalized_lines, "actions/setup-node") + ci_idx = _line_index(normalized_lines, "npm ci") + lint_idx = _line_index(normalized_lines, "npm run lint") + + if -1 in [checkout_idx, setup_idx, ci_idx, lint_idx]: + errors.append("Could not determine workflow step ordering") + else: + if not (checkout_idx < setup_idx < ci_idx < lint_idx): + errors.append( + "Steps must run in order: checkout -> setup-node -> npm ci -> npm run lint" + ) + + if errors: + print("Workflow validation failed:") + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ… issue-lint workflow file looks correct") + + return _wait_for_tracking_issue_run(org, token) + + +if __name__ == "__main__": + sys.exit(0 if verify() else 1) diff --git a/tasks/github/easy/mcpmark-cicd/nightly_health_check/description.md b/tasks/github/easy/mcpmark-cicd/nightly_health_check/description.md new file mode 100644 index 00000000..5dccd21f --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/nightly_health_check/description.md @@ -0,0 +1,14 @@ +Use the GitHub MCP tools to add a tiny bit of automation to `mcpmark-eval/mcpmark-cicd`. + +Goal: every night the repo should run the existing health check script. + +Do the usual branch/PR flow with a branch named `nightly-health` and a PR titled `Add nightly health check`. + +Create `.github/workflows/nightly-health.yml` with: +- workflow name `Nightly Health Check` +- triggers: `workflow_dispatch` plus a cron schedule `0 2 * * *` +- one job called `health-check` on `ubuntu-latest` +- use Node.js 18 via `actions/setup-node` +- steps in order: checkout, npm ci, `npm run health-check` + +Merge the PR so the workflow lives on `main`. diff --git a/tasks/github/easy/mcpmark-cicd/nightly_health_check/meta.json b/tasks/github/easy/mcpmark-cicd/nightly_health_check/meta.json new file mode 100644 index 00000000..976412b4 --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/nightly_health_check/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "nightly_health_check", + "task_name": "Nightly Health Check", + "category_id": "mcpmark-cicd", + "category_name": "MCPMark CI/CD (Easy)", + "description": "Add a scheduled workflow that runs the npm health check script every night.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "ci/cd", + "github actions", + "scheduling" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/mcpmark-cicd", + "stateOriginalUrl": null + } +} diff --git a/tasks/github/easy/mcpmark-cicd/nightly_health_check/verify.py b/tasks/github/easy/mcpmark-cicd/nightly_health_check/verify.py new file mode 100644 index 00000000..33cb9a5e --- /dev/null +++ b/tasks/github/easy/mcpmark-cicd/nightly_health_check/verify.py @@ -0,0 +1,125 @@ +import base64 +import os +import sys +from typing import List, Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "mcpmark-cicd" +WORKFLOW_PATH = ".github/workflows/nightly-health.yml" +BRANCH = "main" + + +def _download_file(org: str, token: str, path: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: # pragma: no cover + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {path}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8") + except Exception as exc: + print(f"Unable to decode {path}: {exc}", file=sys.stderr) + return None + + return content + + +def _line_index(lines: List[str], needle: str) -> int: + for idx, line in enumerate(lines): + if needle in line: + return idx + return -1 + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + content = _download_file(org, token, WORKFLOW_PATH) + if content is None: + print( + "Workflow file .github/workflows/nightly-health.yml was not found on main", + file=sys.stderr, + ) + return False + + normalized = content.lower() + normalized_lines = [line.strip().lower() for line in content.splitlines()] + + errors = [] + + required_bits = { + "workflow name": "name: nightly health check", + "workflow_dispatch trigger": "workflow_dispatch:", + "schedule": "schedule:", + "cron": "0 2 * * *", + "job name": "health-check:", + "runner": "runs-on: ubuntu-latest", + "checkout": "actions/checkout", + "setup-node": "actions/setup-node", + "node version": "node-version: 18", + "npm ci": "npm ci", + "health script": "npm run health-check", + } + + for label, snippet in required_bits.items(): + if snippet not in normalized: + errors.append(f"Missing {label} ({snippet}) in workflow") + + schedule_index = _line_index(normalized_lines, "schedule:") + cron_index = _line_index(normalized_lines, "- cron: '0 2 * * *'") + if cron_index == -1: + cron_index = _line_index(normalized_lines, "cron: '0 2 * * *'") + if cron_index == -1: + cron_index = _line_index(normalized_lines, 'cron: "0 2 * * *"') + + if schedule_index == -1 or cron_index == -1 or cron_index < schedule_index: + errors.append("Cron expression must appear under schedule trigger") + + ci_index = _line_index(normalized_lines, "npm ci") + health_index = _line_index(normalized_lines, "npm run health-check") + if ci_index == -1 or health_index == -1: + errors.append("npm ci and npm run health-check must both appear") + else: + if not ci_index < health_index: + errors.append("npm ci must run before npm run health-check") + + if errors: + print("Verification failed:") + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ… nightly-health workflow found with required schedule and steps") + return True + + +if __name__ == "__main__": + sys.exit(0 if verify() else 1) diff --git a/tasks/github/easy/missing-semester/count_translations/description.md b/tasks/github/easy/missing-semester/count_translations/description.md new file mode 100644 index 00000000..72afd63b --- /dev/null +++ b/tasks/github/easy/missing-semester/count_translations/description.md @@ -0,0 +1,12 @@ +Use the GitHub MCP tools to inspect the `mcpmark-eval/missing-semester` repository. + +1. Navigate the repository to find the list of community translations that appears on the site's home page. +2. Determine how many translation links are currently listed. +3. Record both the count and the specific file you used as evidence by creating an `ANSWER.md` file in the repository root that contains exactly: + +``` +Translation Count: +Source: +``` + +4. Commit the new file and push the change to `master`. diff --git a/tasks/github/easy/missing-semester/count_translations/meta.json b/tasks/github/easy/missing-semester/count_translations/meta.json new file mode 100644 index 00000000..ebc46510 --- /dev/null +++ b/tasks/github/easy/missing-semester/count_translations/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "count_translations", + "task_name": "Count Translations", + "category_id": "missing-semester", + "category_name": "Missing Semester (Easy)", + "description": "Use GitHub MCP to count the translations listed on the home page, record the value in ANSWER.md, and push the change to master.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content search", + "answer file" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/missing-semester", + "stateOriginalUrl": "https://github.com/missing-semester/missing-semester" + } +} diff --git a/tasks/github/easy/missing-semester/count_translations/verify.py b/tasks/github/easy/missing-semester/count_translations/verify.py new file mode 100644 index 00000000..a67d8791 --- /dev/null +++ b/tasks/github/easy/missing-semester/count_translations/verify.py @@ -0,0 +1,92 @@ +import base64 +import os +import sys +from typing import Optional + +import requests +from dotenv import load_dotenv + +REPO_NAME = "missing-semester" +TARGET_FILE = "ANSWER.md" +BRANCH = "master" +EXPECTED_COUNT = "translation count: 14" +EXPECTED_SOURCE = "source: index.md" + + +def _download_file(org: str, token: str, path: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers, timeout=30) + except Exception as exc: + print(f"Request error for {path}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {path}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8").strip() + except Exception as exc: + print(f"Unable to decode {path}: {exc}", file=sys.stderr) + return None + + return content + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + print("Checking ANSWER.md in remote repository...") + answer_content = _download_file(org, token, TARGET_FILE) + + if answer_content is None: + return False + + normalized = " ".join(answer_content.lower().split()) + + if EXPECTED_COUNT not in normalized: + print( + "ANSWER.md must include 'Translation Count: 14' (spacing/casing ignored).", + file=sys.stderr, + ) + print("Found:") + print(answer_content) + return False + + if EXPECTED_SOURCE not in normalized: + print( + "ANSWER.md must include 'Source: index.md' (spacing/casing ignored).", + file=sys.stderr, + ) + print("Found:") + print(answer_content) + return False + + print("All checks passed! ANSWER.md contains the expected count and source.") + return True + + +if __name__ == "__main__": + success = verify() + sys.exit(0 if success else 1) diff --git a/tasks/github/easy/missing-semester/find_ga_tracking_id/description.md b/tasks/github/easy/missing-semester/find_ga_tracking_id/description.md new file mode 100644 index 00000000..51e08f23 --- /dev/null +++ b/tasks/github/easy/missing-semester/find_ga_tracking_id/description.md @@ -0,0 +1,10 @@ +Use the GitHub MCP tools to inspect the `mcpmark-eval/missing-semester` repository. + +1. Determine the Analytics tracking ID that the Missing Semester site declares in its configuration. +2. Create an `ANSWER.md` file in the repository root that contains exactly: + +``` +Analytics Tracking ID: +``` + +3. Commit the new file and push the change to `master`. diff --git a/tasks/github/easy/missing-semester/find_ga_tracking_id/meta.json b/tasks/github/easy/missing-semester/find_ga_tracking_id/meta.json new file mode 100644 index 00000000..cc9a9674 --- /dev/null +++ b/tasks/github/easy/missing-semester/find_ga_tracking_id/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "find_ga_tracking_id", + "task_name": "Find GA Tracking ID", + "category_id": "missing-semester", + "category_name": "Missing Semester (Easy)", + "description": "Use GitHub MCP to discover the single Google Analytics tracking ID declared in the site configuration, write it to ANSWER.md, and push the change to master.", + "author": "Zijian Wu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "config search", + "analytics", + "answer file" + ], + "mcp": [ + "github" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://github.com/mcpmark-source/missing-semester", + "stateOriginalUrl": "https://github.com/missing-semester/missing-semester" + } +} diff --git a/tasks/github/easy/missing-semester/find_ga_tracking_id/verify.py b/tasks/github/easy/missing-semester/find_ga_tracking_id/verify.py new file mode 100644 index 00000000..68ac4428 --- /dev/null +++ b/tasks/github/easy/missing-semester/find_ga_tracking_id/verify.py @@ -0,0 +1,84 @@ +import base64 +import os +import sys +from typing import Optional + +import requests +from dotenv import load_dotenv + +# Accept either wording, regardless of casing +EXPECTED_VARIANTS = { + "google analytics tracking id: g-p7wvhd84d1", + "analytics tracking id: g-p7wvhd84d1", +} +REPO_NAME = "missing-semester" +TARGET_FILE = "ANSWER.md" +BRANCH = "master" + + +def _download_file(org: str, token: str) -> Optional[str]: + url = f"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{TARGET_FILE}?ref={BRANCH}" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + } + + try: + response = requests.get(url, headers=headers) + except Exception as exc: + print(f"Request error for {TARGET_FILE}: {exc}", file=sys.stderr) + return None + + if response.status_code != 200: + print( + f"GitHub API returned {response.status_code} when fetching {TARGET_FILE}", + file=sys.stderr, + ) + return None + + data = response.json() + try: + content = base64.b64decode(data.get("content", "")).decode("utf-8").strip() + except Exception as exc: + print(f"Unable to decode {TARGET_FILE}: {exc}", file=sys.stderr) + return None + + return content + + +def verify() -> bool: + load_dotenv(".mcp_env") + + token = os.environ.get("MCP_GITHUB_TOKEN") + org = os.environ.get("GITHUB_EVAL_ORG") + + if not token: + print("MCP_GITHUB_TOKEN is missing", file=sys.stderr) + return False + + if not org: + print("GITHUB_EVAL_ORG is missing", file=sys.stderr) + return False + + print("Checking ANSWER.md in remote repository...") + answer_content = _download_file(org, token) + + if answer_content is None: + return False + + normalized = answer_content.strip().lower() + if normalized not in EXPECTED_VARIANTS: + print("ANSWER.md does not contain an accepted tracking ID format", file=sys.stderr) + print("Accepted variants:", file=sys.stderr) + for variant in EXPECTED_VARIANTS: + print(f" - {variant}", file=sys.stderr) + print(f"Found: {answer_content}", file=sys.stderr) + return False + + print("All checks passed! ANSWER.md matches an accepted content variant.") + return True + + +if __name__ == "__main__": + success = verify() + sys.exit(0 if success else 1) diff --git a/tasks/github/build_your_own_x/find_commit_date/description.md b/tasks/github/standard/build_your_own_x/find_commit_date/description.md similarity index 100% rename from tasks/github/build_your_own_x/find_commit_date/description.md rename to tasks/github/standard/build_your_own_x/find_commit_date/description.md diff --git a/tasks/github/build_your_own_x/find_commit_date/meta.json b/tasks/github/standard/build_your_own_x/find_commit_date/meta.json similarity index 100% rename from tasks/github/build_your_own_x/find_commit_date/meta.json rename to tasks/github/standard/build_your_own_x/find_commit_date/meta.json diff --git a/tasks/github/build_your_own_x/find_commit_date/verify.py b/tasks/github/standard/build_your_own_x/find_commit_date/verify.py similarity index 100% rename from tasks/github/build_your_own_x/find_commit_date/verify.py rename to tasks/github/standard/build_your_own_x/find_commit_date/verify.py diff --git a/tasks/github/build_your_own_x/find_rag_commit/description.md b/tasks/github/standard/build_your_own_x/find_rag_commit/description.md similarity index 100% rename from tasks/github/build_your_own_x/find_rag_commit/description.md rename to tasks/github/standard/build_your_own_x/find_rag_commit/description.md diff --git a/tasks/github/build_your_own_x/find_rag_commit/meta.json b/tasks/github/standard/build_your_own_x/find_rag_commit/meta.json similarity index 100% rename from tasks/github/build_your_own_x/find_rag_commit/meta.json rename to tasks/github/standard/build_your_own_x/find_rag_commit/meta.json diff --git a/tasks/github/build_your_own_x/find_rag_commit/verify.py b/tasks/github/standard/build_your_own_x/find_rag_commit/verify.py similarity index 100% rename from tasks/github/build_your_own_x/find_rag_commit/verify.py rename to tasks/github/standard/build_your_own_x/find_rag_commit/verify.py diff --git a/tasks/github/claude-code/automated_changelog_generation/description.md b/tasks/github/standard/claude-code/automated_changelog_generation/description.md similarity index 100% rename from tasks/github/claude-code/automated_changelog_generation/description.md rename to tasks/github/standard/claude-code/automated_changelog_generation/description.md diff --git a/tasks/github/claude-code/automated_changelog_generation/meta.json b/tasks/github/standard/claude-code/automated_changelog_generation/meta.json similarity index 100% rename from tasks/github/claude-code/automated_changelog_generation/meta.json rename to tasks/github/standard/claude-code/automated_changelog_generation/meta.json diff --git a/tasks/github/claude-code/automated_changelog_generation/verify.py b/tasks/github/standard/claude-code/automated_changelog_generation/verify.py similarity index 100% rename from tasks/github/claude-code/automated_changelog_generation/verify.py rename to tasks/github/standard/claude-code/automated_changelog_generation/verify.py diff --git a/tasks/github/claude-code/claude_collaboration_analysis/description.md b/tasks/github/standard/claude-code/claude_collaboration_analysis/description.md similarity index 100% rename from tasks/github/claude-code/claude_collaboration_analysis/description.md rename to tasks/github/standard/claude-code/claude_collaboration_analysis/description.md diff --git a/tasks/github/claude-code/claude_collaboration_analysis/meta.json b/tasks/github/standard/claude-code/claude_collaboration_analysis/meta.json similarity index 100% rename from tasks/github/claude-code/claude_collaboration_analysis/meta.json rename to tasks/github/standard/claude-code/claude_collaboration_analysis/meta.json diff --git a/tasks/github/claude-code/claude_collaboration_analysis/verify.py b/tasks/github/standard/claude-code/claude_collaboration_analysis/verify.py similarity index 100% rename from tasks/github/claude-code/claude_collaboration_analysis/verify.py rename to tasks/github/standard/claude-code/claude_collaboration_analysis/verify.py diff --git a/tasks/github/claude-code/critical_issue_hotfix_workflow/description.md b/tasks/github/standard/claude-code/critical_issue_hotfix_workflow/description.md similarity index 100% rename from tasks/github/claude-code/critical_issue_hotfix_workflow/description.md rename to tasks/github/standard/claude-code/critical_issue_hotfix_workflow/description.md diff --git a/tasks/github/claude-code/critical_issue_hotfix_workflow/meta.json b/tasks/github/standard/claude-code/critical_issue_hotfix_workflow/meta.json similarity index 100% rename from tasks/github/claude-code/critical_issue_hotfix_workflow/meta.json rename to tasks/github/standard/claude-code/critical_issue_hotfix_workflow/meta.json diff --git a/tasks/github/claude-code/critical_issue_hotfix_workflow/verify.py b/tasks/github/standard/claude-code/critical_issue_hotfix_workflow/verify.py similarity index 100% rename from tasks/github/claude-code/critical_issue_hotfix_workflow/verify.py rename to tasks/github/standard/claude-code/critical_issue_hotfix_workflow/verify.py diff --git a/tasks/github/claude-code/feature_commit_tracking/description.md b/tasks/github/standard/claude-code/feature_commit_tracking/description.md similarity index 100% rename from tasks/github/claude-code/feature_commit_tracking/description.md rename to tasks/github/standard/claude-code/feature_commit_tracking/description.md diff --git a/tasks/github/claude-code/feature_commit_tracking/meta.json b/tasks/github/standard/claude-code/feature_commit_tracking/meta.json similarity index 100% rename from tasks/github/claude-code/feature_commit_tracking/meta.json rename to tasks/github/standard/claude-code/feature_commit_tracking/meta.json diff --git a/tasks/github/claude-code/feature_commit_tracking/verify.py b/tasks/github/standard/claude-code/feature_commit_tracking/verify.py similarity index 100% rename from tasks/github/claude-code/feature_commit_tracking/verify.py rename to tasks/github/standard/claude-code/feature_commit_tracking/verify.py diff --git a/tasks/github/claude-code/label_color_standardization/description.md b/tasks/github/standard/claude-code/label_color_standardization/description.md similarity index 100% rename from tasks/github/claude-code/label_color_standardization/description.md rename to tasks/github/standard/claude-code/label_color_standardization/description.md diff --git a/tasks/github/claude-code/label_color_standardization/meta.json b/tasks/github/standard/claude-code/label_color_standardization/meta.json similarity index 100% rename from tasks/github/claude-code/label_color_standardization/meta.json rename to tasks/github/standard/claude-code/label_color_standardization/meta.json diff --git a/tasks/github/claude-code/label_color_standardization/verify.py b/tasks/github/standard/claude-code/label_color_standardization/verify.py similarity index 100% rename from tasks/github/claude-code/label_color_standardization/verify.py rename to tasks/github/standard/claude-code/label_color_standardization/verify.py diff --git a/tasks/github/easyr1/advanced_branch_strategy/description.md b/tasks/github/standard/easyr1/advanced_branch_strategy/description.md similarity index 100% rename from tasks/github/easyr1/advanced_branch_strategy/description.md rename to tasks/github/standard/easyr1/advanced_branch_strategy/description.md diff --git a/tasks/github/easyr1/advanced_branch_strategy/meta.json b/tasks/github/standard/easyr1/advanced_branch_strategy/meta.json similarity index 100% rename from tasks/github/easyr1/advanced_branch_strategy/meta.json rename to tasks/github/standard/easyr1/advanced_branch_strategy/meta.json diff --git a/tasks/github/easyr1/advanced_branch_strategy/verify.py b/tasks/github/standard/easyr1/advanced_branch_strategy/verify.py similarity index 100% rename from tasks/github/easyr1/advanced_branch_strategy/verify.py rename to tasks/github/standard/easyr1/advanced_branch_strategy/verify.py diff --git a/tasks/github/easyr1/config_parameter_audit/description.md b/tasks/github/standard/easyr1/config_parameter_audit/description.md similarity index 100% rename from tasks/github/easyr1/config_parameter_audit/description.md rename to tasks/github/standard/easyr1/config_parameter_audit/description.md diff --git a/tasks/github/easyr1/config_parameter_audit/meta.json b/tasks/github/standard/easyr1/config_parameter_audit/meta.json similarity index 100% rename from tasks/github/easyr1/config_parameter_audit/meta.json rename to tasks/github/standard/easyr1/config_parameter_audit/meta.json diff --git a/tasks/github/easyr1/config_parameter_audit/verify.py b/tasks/github/standard/easyr1/config_parameter_audit/verify.py similarity index 100% rename from tasks/github/easyr1/config_parameter_audit/verify.py rename to tasks/github/standard/easyr1/config_parameter_audit/verify.py diff --git a/tasks/github/easyr1/performance_regression_investigation/description.md b/tasks/github/standard/easyr1/performance_regression_investigation/description.md similarity index 100% rename from tasks/github/easyr1/performance_regression_investigation/description.md rename to tasks/github/standard/easyr1/performance_regression_investigation/description.md diff --git a/tasks/github/easyr1/performance_regression_investigation/meta.json b/tasks/github/standard/easyr1/performance_regression_investigation/meta.json similarity index 100% rename from tasks/github/easyr1/performance_regression_investigation/meta.json rename to tasks/github/standard/easyr1/performance_regression_investigation/meta.json diff --git a/tasks/github/easyr1/performance_regression_investigation/verify.py b/tasks/github/standard/easyr1/performance_regression_investigation/verify.py similarity index 100% rename from tasks/github/easyr1/performance_regression_investigation/verify.py rename to tasks/github/standard/easyr1/performance_regression_investigation/verify.py diff --git a/tasks/github/easyr1/qwen3_issue_management/description.md b/tasks/github/standard/easyr1/qwen3_issue_management/description.md similarity index 100% rename from tasks/github/easyr1/qwen3_issue_management/description.md rename to tasks/github/standard/easyr1/qwen3_issue_management/description.md diff --git a/tasks/github/easyr1/qwen3_issue_management/meta.json b/tasks/github/standard/easyr1/qwen3_issue_management/meta.json similarity index 100% rename from tasks/github/easyr1/qwen3_issue_management/meta.json rename to tasks/github/standard/easyr1/qwen3_issue_management/meta.json diff --git a/tasks/github/easyr1/qwen3_issue_management/verify.py b/tasks/github/standard/easyr1/qwen3_issue_management/verify.py similarity index 100% rename from tasks/github/easyr1/qwen3_issue_management/verify.py rename to tasks/github/standard/easyr1/qwen3_issue_management/verify.py diff --git a/tasks/github/harmony/fix_conflict/description.md b/tasks/github/standard/harmony/fix_conflict/description.md similarity index 100% rename from tasks/github/harmony/fix_conflict/description.md rename to tasks/github/standard/harmony/fix_conflict/description.md diff --git a/tasks/github/harmony/fix_conflict/meta.json b/tasks/github/standard/harmony/fix_conflict/meta.json similarity index 100% rename from tasks/github/harmony/fix_conflict/meta.json rename to tasks/github/standard/harmony/fix_conflict/meta.json diff --git a/tasks/github/harmony/fix_conflict/verify.py b/tasks/github/standard/harmony/fix_conflict/verify.py similarity index 100% rename from tasks/github/harmony/fix_conflict/verify.py rename to tasks/github/standard/harmony/fix_conflict/verify.py diff --git a/tasks/github/harmony/issue_pr_commit_workflow/description.md b/tasks/github/standard/harmony/issue_pr_commit_workflow/description.md similarity index 100% rename from tasks/github/harmony/issue_pr_commit_workflow/description.md rename to tasks/github/standard/harmony/issue_pr_commit_workflow/description.md diff --git a/tasks/github/harmony/issue_pr_commit_workflow/meta.json b/tasks/github/standard/harmony/issue_pr_commit_workflow/meta.json similarity index 100% rename from tasks/github/harmony/issue_pr_commit_workflow/meta.json rename to tasks/github/standard/harmony/issue_pr_commit_workflow/meta.json diff --git a/tasks/github/harmony/issue_pr_commit_workflow/verify.py b/tasks/github/standard/harmony/issue_pr_commit_workflow/verify.py similarity index 100% rename from tasks/github/harmony/issue_pr_commit_workflow/verify.py rename to tasks/github/standard/harmony/issue_pr_commit_workflow/verify.py diff --git a/tasks/github/harmony/issue_tagging_pr_closure/description.md b/tasks/github/standard/harmony/issue_tagging_pr_closure/description.md similarity index 100% rename from tasks/github/harmony/issue_tagging_pr_closure/description.md rename to tasks/github/standard/harmony/issue_tagging_pr_closure/description.md diff --git a/tasks/github/harmony/issue_tagging_pr_closure/meta.json b/tasks/github/standard/harmony/issue_tagging_pr_closure/meta.json similarity index 100% rename from tasks/github/harmony/issue_tagging_pr_closure/meta.json rename to tasks/github/standard/harmony/issue_tagging_pr_closure/meta.json diff --git a/tasks/github/harmony/issue_tagging_pr_closure/verify.py b/tasks/github/standard/harmony/issue_tagging_pr_closure/verify.py similarity index 100% rename from tasks/github/harmony/issue_tagging_pr_closure/verify.py rename to tasks/github/standard/harmony/issue_tagging_pr_closure/verify.py diff --git a/tasks/github/harmony/multi_branch_commit_aggregation/description.md b/tasks/github/standard/harmony/multi_branch_commit_aggregation/description.md similarity index 100% rename from tasks/github/harmony/multi_branch_commit_aggregation/description.md rename to tasks/github/standard/harmony/multi_branch_commit_aggregation/description.md diff --git a/tasks/github/harmony/multi_branch_commit_aggregation/meta.json b/tasks/github/standard/harmony/multi_branch_commit_aggregation/meta.json similarity index 100% rename from tasks/github/harmony/multi_branch_commit_aggregation/meta.json rename to tasks/github/standard/harmony/multi_branch_commit_aggregation/meta.json diff --git a/tasks/github/harmony/multi_branch_commit_aggregation/verify.py b/tasks/github/standard/harmony/multi_branch_commit_aggregation/verify.py similarity index 100% rename from tasks/github/harmony/multi_branch_commit_aggregation/verify.py rename to tasks/github/standard/harmony/multi_branch_commit_aggregation/verify.py diff --git a/tasks/github/harmony/release_management_workflow/description.md b/tasks/github/standard/harmony/release_management_workflow/description.md similarity index 100% rename from tasks/github/harmony/release_management_workflow/description.md rename to tasks/github/standard/harmony/release_management_workflow/description.md diff --git a/tasks/github/harmony/release_management_workflow/meta.json b/tasks/github/standard/harmony/release_management_workflow/meta.json similarity index 100% rename from tasks/github/harmony/release_management_workflow/meta.json rename to tasks/github/standard/harmony/release_management_workflow/meta.json diff --git a/tasks/github/harmony/release_management_workflow/verify.py b/tasks/github/standard/harmony/release_management_workflow/verify.py similarity index 100% rename from tasks/github/harmony/release_management_workflow/verify.py rename to tasks/github/standard/harmony/release_management_workflow/verify.py diff --git a/tasks/github/mcpmark-cicd/deployment_status_workflow/description.md b/tasks/github/standard/mcpmark-cicd/deployment_status_workflow/description.md similarity index 100% rename from tasks/github/mcpmark-cicd/deployment_status_workflow/description.md rename to tasks/github/standard/mcpmark-cicd/deployment_status_workflow/description.md diff --git a/tasks/github/mcpmark-cicd/deployment_status_workflow/meta.json b/tasks/github/standard/mcpmark-cicd/deployment_status_workflow/meta.json similarity index 100% rename from tasks/github/mcpmark-cicd/deployment_status_workflow/meta.json rename to tasks/github/standard/mcpmark-cicd/deployment_status_workflow/meta.json diff --git a/tasks/github/mcpmark-cicd/deployment_status_workflow/verify.py b/tasks/github/standard/mcpmark-cicd/deployment_status_workflow/verify.py similarity index 100% rename from tasks/github/mcpmark-cicd/deployment_status_workflow/verify.py rename to tasks/github/standard/mcpmark-cicd/deployment_status_workflow/verify.py diff --git a/tasks/github/mcpmark-cicd/issue_management_workflow/description.md b/tasks/github/standard/mcpmark-cicd/issue_management_workflow/description.md similarity index 100% rename from tasks/github/mcpmark-cicd/issue_management_workflow/description.md rename to tasks/github/standard/mcpmark-cicd/issue_management_workflow/description.md diff --git a/tasks/github/mcpmark-cicd/issue_management_workflow/meta.json b/tasks/github/standard/mcpmark-cicd/issue_management_workflow/meta.json similarity index 100% rename from tasks/github/mcpmark-cicd/issue_management_workflow/meta.json rename to tasks/github/standard/mcpmark-cicd/issue_management_workflow/meta.json diff --git a/tasks/github/mcpmark-cicd/issue_management_workflow/verify.py b/tasks/github/standard/mcpmark-cicd/issue_management_workflow/verify.py similarity index 100% rename from tasks/github/mcpmark-cicd/issue_management_workflow/verify.py rename to tasks/github/standard/mcpmark-cicd/issue_management_workflow/verify.py diff --git a/tasks/github/mcpmark-cicd/linting_ci_workflow/description.md b/tasks/github/standard/mcpmark-cicd/linting_ci_workflow/description.md similarity index 100% rename from tasks/github/mcpmark-cicd/linting_ci_workflow/description.md rename to tasks/github/standard/mcpmark-cicd/linting_ci_workflow/description.md diff --git a/tasks/github/mcpmark-cicd/linting_ci_workflow/meta.json b/tasks/github/standard/mcpmark-cicd/linting_ci_workflow/meta.json similarity index 100% rename from tasks/github/mcpmark-cicd/linting_ci_workflow/meta.json rename to tasks/github/standard/mcpmark-cicd/linting_ci_workflow/meta.json diff --git a/tasks/github/mcpmark-cicd/linting_ci_workflow/verify.py b/tasks/github/standard/mcpmark-cicd/linting_ci_workflow/verify.py similarity index 100% rename from tasks/github/mcpmark-cicd/linting_ci_workflow/verify.py rename to tasks/github/standard/mcpmark-cicd/linting_ci_workflow/verify.py diff --git a/tasks/github/mcpmark-cicd/pr_automation_workflow/description.md b/tasks/github/standard/mcpmark-cicd/pr_automation_workflow/description.md similarity index 100% rename from tasks/github/mcpmark-cicd/pr_automation_workflow/description.md rename to tasks/github/standard/mcpmark-cicd/pr_automation_workflow/description.md diff --git a/tasks/github/mcpmark-cicd/pr_automation_workflow/meta.json b/tasks/github/standard/mcpmark-cicd/pr_automation_workflow/meta.json similarity index 100% rename from tasks/github/mcpmark-cicd/pr_automation_workflow/meta.json rename to tasks/github/standard/mcpmark-cicd/pr_automation_workflow/meta.json diff --git a/tasks/github/mcpmark-cicd/pr_automation_workflow/verify.py b/tasks/github/standard/mcpmark-cicd/pr_automation_workflow/verify.py similarity index 100% rename from tasks/github/mcpmark-cicd/pr_automation_workflow/verify.py rename to tasks/github/standard/mcpmark-cicd/pr_automation_workflow/verify.py diff --git a/tasks/github/missing-semester/assign_contributor_labels/description.md b/tasks/github/standard/missing-semester/assign_contributor_labels/description.md similarity index 100% rename from tasks/github/missing-semester/assign_contributor_labels/description.md rename to tasks/github/standard/missing-semester/assign_contributor_labels/description.md diff --git a/tasks/github/missing-semester/assign_contributor_labels/meta.json b/tasks/github/standard/missing-semester/assign_contributor_labels/meta.json similarity index 100% rename from tasks/github/missing-semester/assign_contributor_labels/meta.json rename to tasks/github/standard/missing-semester/assign_contributor_labels/meta.json diff --git a/tasks/github/missing-semester/assign_contributor_labels/verify.py b/tasks/github/standard/missing-semester/assign_contributor_labels/verify.py similarity index 100% rename from tasks/github/missing-semester/assign_contributor_labels/verify.py rename to tasks/github/standard/missing-semester/assign_contributor_labels/verify.py diff --git a/tasks/github/missing-semester/find_legacy_name/description.md b/tasks/github/standard/missing-semester/find_legacy_name/description.md similarity index 100% rename from tasks/github/missing-semester/find_legacy_name/description.md rename to tasks/github/standard/missing-semester/find_legacy_name/description.md diff --git a/tasks/github/missing-semester/find_legacy_name/meta.json b/tasks/github/standard/missing-semester/find_legacy_name/meta.json similarity index 100% rename from tasks/github/missing-semester/find_legacy_name/meta.json rename to tasks/github/standard/missing-semester/find_legacy_name/meta.json diff --git a/tasks/github/missing-semester/find_legacy_name/verify.py b/tasks/github/standard/missing-semester/find_legacy_name/verify.py similarity index 100% rename from tasks/github/missing-semester/find_legacy_name/verify.py rename to tasks/github/standard/missing-semester/find_legacy_name/verify.py diff --git a/tasks/github/missing-semester/find_salient_file/description.md b/tasks/github/standard/missing-semester/find_salient_file/description.md similarity index 100% rename from tasks/github/missing-semester/find_salient_file/description.md rename to tasks/github/standard/missing-semester/find_salient_file/description.md diff --git a/tasks/github/missing-semester/find_salient_file/meta.json b/tasks/github/standard/missing-semester/find_salient_file/meta.json similarity index 100% rename from tasks/github/missing-semester/find_salient_file/meta.json rename to tasks/github/standard/missing-semester/find_salient_file/meta.json diff --git a/tasks/github/missing-semester/find_salient_file/verify.py b/tasks/github/standard/missing-semester/find_salient_file/verify.py similarity index 100% rename from tasks/github/missing-semester/find_salient_file/verify.py rename to tasks/github/standard/missing-semester/find_salient_file/verify.py diff --git a/tasks/notion/easy/.gitkeep b/tasks/notion/easy/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/description.md b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/description.md new file mode 100644 index 00000000..2609933c --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/description.md @@ -0,0 +1,27 @@ +Find the page named "Computer Science Student Dashboard" and extend the **Code Snippets** section with Go content. + +**Task Requirements:** +1. Add a bold paragraph that contains exactly the text `Go` to mark the start of the Go snippets. +2. Directly under that heading, add three code blocks configured with `language` set to **go**: + a. **Basic Go program** – Caption must be `Basic Go program` and the code content must be exactly: + ```go + package main + + import "fmt" + + func main() { + fmt.Println("Hello, World!") + } + ``` + b. **For loop in Go** – Caption must be `For loop in Go` and the code content must be exactly: + ```go + for i := 0; i < 5; i++ { + fmt.Println(i) + } + ``` + c. **Function definition in Go** – Caption must be `Function definition in Go` and the code content must be exactly: + ```go + func add(a, b int) int { + return a + b + } + ``` diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/meta.json b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/meta.json new file mode 100644 index 00000000..f0c97259 --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "simple__code_snippets_go", + "task_name": "Simple Code Snippets Go", + "category_id": "computer_science_student_dashboard", + "category_name": "Computer Science Student Dashboard", + "description": "Add a new Go column to the Code Snippets section between Python and JavaScript columns.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content organization", + "visual formatting", + "template population" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/computer-science-student-dashboard" + } +} diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/verify.py b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/verify.py new file mode 100644 index 00000000..2601c90f --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/verify.py @@ -0,0 +1,125 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + +# Expected code blocks (language=go) +EXPECTED_CODE_BLOCKS = [ + { + "caption": "Basic Go program", + "code": ( + 'package main\n\nimport "fmt"\n\nfunc main() {\n fmt.Println("Hello, World!")\n}' + ), + }, + { + "caption": "For loop in Go", + "code": ("for i := 0; i < 5; i++ {\n fmt.Println(i)\n}"), + }, + { + "caption": "Function definition in Go", + "code": ("func add(a, b int) int {\n return a + b\n}"), + }, +] + +HEADER_TEXT = "Go" + + +def _normalize(text: str) -> str: + """Remove trailing spaces on each line and strip leading/trailing blank lines.""" + return "\n".join(line.rstrip() for line in text.strip().splitlines()) + + +def _find_page(notion: Client, main_id: str | None) -> str | None: + """Return a page_id to verify against or None if not found.""" + page_id = None + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id( + notion, main_id + ) + if found_id and object_type == "page": + page_id = found_id + if not page_id: + page_id = notion_utils.find_page(notion, "Computer Science Student Dashboard") + return page_id + + +def _has_bold_header_text(block, text: str) -> bool: + """Generic bold header/paragraph check for a given text.""" + block_type = block.get("type") + if block_type not in {"paragraph", "heading_1", "heading_2", "heading_3"}: + return False + rich_text_list = block.get(block_type, {}).get("rich_text", []) + if not rich_text_list: + return False + plain = "".join(rt.get("plain_text", "") for rt in rich_text_list).strip() + if plain != text: + return False + return any(rt.get("annotations", {}).get("bold", False) for rt in rich_text_list) + + +def _collect_code_blocks(blocks): + """Return list of (code_content, caption) tuples for code blocks with language 'go'.""" + collected = [] + for block in blocks: + if block.get("type") != "code": + continue + code_data = block.get("code", {}) + if code_data.get("language") != "go": + continue + code_plain = "".join( + rt.get("plain_text", "") for rt in code_data.get("rich_text", []) + ) + caption_plain = "".join( + rt.get("plain_text", "") for rt in code_data.get("caption", []) + ) + collected.append((code_plain, caption_plain)) + return collected + + +def verify(notion: Client, main_id: str | None = None) -> bool: + page_id = _find_page(notion, main_id) + if not page_id: + print("Error: Target page not found.", file=sys.stderr) + return False + + all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + + # Verify header + header_ok = any(_has_bold_header_text(b, HEADER_TEXT) for b in all_blocks) + if not header_ok: + print("Failure: Bold header 'Go' not found.", file=sys.stderr) + return False + + # Verify code blocks + code_blocks_found = _collect_code_blocks(all_blocks) + + remaining = EXPECTED_CODE_BLOCKS.copy() + for code, caption in code_blocks_found: + norm_code = _normalize(code) + for expected in remaining: + if ( + _normalize(expected["code"]) == norm_code + and expected["caption"] == caption + ): + remaining.remove(expected) + break + if remaining: + missing = ", ".join(exp["caption"] for exp in remaining) + print( + f"Failure: Missing or incorrect Go code blocks: {missing}", file=sys.stderr + ) + return False + + print( + "Success: Verified Go header and required Go code blocks." + ) + return True + + +def main(): + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + sys.exit(0 if verify(notion, main_id) else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/description.md b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/description.md new file mode 100644 index 00000000..42db4b82 --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/description.md @@ -0,0 +1,4 @@ +Create a new study-session entry on the **Computer Science Student Dashboard** page. + +1. Locate the ā˜‘ļø Habit tracker section of the page. +2. **Insert a new date mention** for `2025-01-29` immediately **after the existing `2022-09-02` items but before the divider block** that follows them. Match the formatting of the existing dates (bold text with a Notion date mention). diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/meta.json b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/meta.json new file mode 100644 index 00000000..ec338b4c --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "simple__study_session_tracker", + "task_name": "Simple Study Session Tracker", + "category_id": "computer_science_student_dashboard", + "category_name": "Computer Science Student Dashboard", + "description": "Create a new study-session entry in the Habit tracker section with four unchecked to-do items.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content organization", + "visual formatting", + "status tracking" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/computer-science-student-dashboard" + } +} diff --git a/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/verify.py b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/verify.py new file mode 100644 index 00000000..727cedbc --- /dev/null +++ b/tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/verify.py @@ -0,0 +1,132 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +def verify(notion: Client, main_id: str | None = None) -> bool: + """Verify that the new study-session entry for 2025-01-29 was added correctly. + + The script checks that: + 1. A bold date-mention with start=2025-01-29 exists. + 2. The mention sits after the 2022-09-02 section but before the divider that originally + followed that section. + """ + + # --------------------------------------------------------------------- + # Locate the main page ------------------------------------------------- + # --------------------------------------------------------------------- + page_id: str | None = None + + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id( + notion, main_id + ) + if found_id and object_type == "page": + page_id = found_id + + if not page_id: + page_id = notion_utils.find_page(notion, "Computer Science Student Dashboard") + + if not page_id: + print( + "Error: Page 'Computer Science Student Dashboard' not found.", + file=sys.stderr, + ) + return False + + # --------------------------------------------------------------------- + # Fetch all blocks under the page (flattened order) -------------------- + # --------------------------------------------------------------------- + all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + + # --------------------------------------------------------------------- + # Locate reference blocks --------------------------------------------- + # --------------------------------------------------------------------- + TARGET_DATE = "2025-01-29" + PREVIOUS_DATE = "2022-09-02" + + index_previous_date: int | None = None + index_new_date: int | None = None + index_divider_after_previous: int | None = None + + for idx, block in enumerate(all_blocks): + # Divider detection (we care only about the first divider that appears after + # the 2022-09-02 block) + if block.get("type") == "divider": + if index_previous_date is not None and index_divider_after_previous is None: + index_divider_after_previous = idx + + # We only need to inspect paragraph blocks that contain a date mention + if block.get("type") != "paragraph": + continue + + rich_text_list = block["paragraph"].get("rich_text", []) + for rt in rich_text_list: + if ( + rt.get("type") != "mention" + or rt.get("mention", {}).get("type") != "date" + ): + continue + + date_start = rt["mention"]["date"].get("start") + + if date_start == PREVIOUS_DATE and index_previous_date is None: + index_previous_date = idx + + if date_start == TARGET_DATE and index_new_date is None: + index_new_date = idx + # (1) Verify bold annotation + if not rt.get("annotations", {}).get("bold", False): + print( + "Error: The 2025-01-29 date mention is not bold.", + file=sys.stderr, + ) + return False + + # Ensure all reference indices were found + if index_previous_date is None: + print("Error: Could not locate the 2022-09-02 date section.", file=sys.stderr) + return False + if index_divider_after_previous is None: + print( + "Error: Could not locate the divider that follows the 2022-09-02 section.", + file=sys.stderr, + ) + return False + if index_new_date is None: + print( + "Error: Could not locate the new 2025-01-29 date mention.", file=sys.stderr + ) + return False + + # (2) Verify ordering + if not (index_previous_date < index_new_date < index_divider_after_previous): + print( + "Error: The 2025-01-29 section is positioned incorrectly.", file=sys.stderr + ) + return False + + # --------------------------------------------------------------------- + # Success -------------------------------------------------------------- + # --------------------------------------------------------------------- + print("Success: Date mention for 2025-01-29 added in the correct position.") + return True + + +# ------------------------------------------------------------------------- +# Command-line entry-point ------------------------------------------------- +# ------------------------------------------------------------------------- + + +def main() -> None: + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + + if verify(notion, main_id): + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/description.md b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/description.md new file mode 100644 index 00000000..431a2af9 --- /dev/null +++ b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/description.md @@ -0,0 +1,11 @@ +Please migrate expiring assets out of the **IT Inventory** database using the simplified checklist below. Your changes will be verified automatically, so match the details exactly. + +--- +Task Steps +1. Inside the **IT Trouble Shooting Hub** page, locate the database named **IT Inventory**. +2. Collect every page in **IT Inventory** whose **Status** is **Expired** or **To be returned**. +3. Create a **new full-page database** under the same hub titled **IT Asset Retirement Queue** with exactly these properties (names and types must match): + • Serial – title + • Status – select + • Expiration date – date +4. For every item gathered in step 2, create a page in **IT Asset Retirement Queue**, copy over the Serial, Status, and Expiration date values, then archive the original inventory page once the copy is made. diff --git a/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/meta.json b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/meta.json new file mode 100644 index 00000000..b88aaea5 --- /dev/null +++ b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/meta.json @@ -0,0 +1,26 @@ +{ + "task_id": "simple__asset_retirement_migration", + "task_name": "Simple Asset Retirement Migration", + "category_id": "it_trouble_shooting_hub", + "category_name": "IT Trouble Shooting Hub", + "description": "Restructure the IT Inventory database by migrating expired assets to a new IT Asset Retirement Queue database.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "database manipulation", + "automated migration", + "conditional filtering", + "data aggregation", + "report generation" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/It-Trouble-Shooting-Hub-23e81626b6d78020aba7eb65ae1cc2d5", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/it-trouble-shooting-hub" + } +} diff --git a/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/verify.py b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/verify.py new file mode 100644 index 00000000..2f41977f --- /dev/null +++ b/tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/verify.py @@ -0,0 +1,143 @@ +import sys +from typing import Dict +from notion_client import Client +from tasks.utils import notion_utils + + +def _get_database(root_page_id: str, notion: Client, name: str) -> str | None: + """Helper that finds a child database by title inside a page.""" + return notion_utils.find_database_in_block(notion, root_page_id, name) + + +def _check_property(props: Dict, name: str, expected_type: str) -> bool: + if name not in props: + print(f"Error: Property '{name}' missing in database.", file=sys.stderr) + return False + if props[name]["type"] != expected_type: + print( + f"Error: Property '{name}' expected type '{expected_type}', found '{props[name]['type']}'.", + file=sys.stderr, + ) + return False + return True + + +def verify(notion: Client, main_id: str | None = None) -> bool: + """Verifies that the IT Asset Retirement Queue was created and populated correctly.""" + + # ------------------------------------------------------------------------- + # Resolve the root IT Trouble Shooting Hub page + # ------------------------------------------------------------------------- + root_page_id = None + if main_id: + found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if found_id and obj_type == "page": + root_page_id = found_id + + if not root_page_id: + root_page_id = notion_utils.find_page(notion, "IT Trouble Shooting Hub") + if not root_page_id: + print( + "Error: Could not locate the 'IT Trouble Shooting Hub' page.", + file=sys.stderr, + ) + return False + + # ------------------------------------------------------------------------- + # Locate the original and new databases + # ------------------------------------------------------------------------- + inventory_db_id = _get_database(root_page_id, notion, "IT Inventory") + if not inventory_db_id: + print("Error: 'IT Inventory' database not found.", file=sys.stderr) + return False + + retirement_db_id = _get_database(root_page_id, notion, "IT Asset Retirement Queue") + if not retirement_db_id: + print("Error: 'IT Asset Retirement Queue' database not found.", file=sys.stderr) + return False + + # ------------------------------------------------------------------------- + # Validate schema of the retirement queue database + # ------------------------------------------------------------------------- + retirement_db = notion.databases.retrieve(database_id=retirement_db_id) + r_props = retirement_db["properties"] + + required_schema = { + "Serial": "title", + "Status": "select", + "Expiration date": "date", + } + + for pname, ptype in required_schema.items(): + if not _check_property(r_props, pname, ptype): + return False + + # ------------------------------------------------------------------------- + # Validate that inventory items are moved & archived + # ------------------------------------------------------------------------- + expired_filter = { + "property": "Status", + "select": {"equals": "Expired"}, + } + to_return_filter = { + "property": "Status", + "select": {"equals": "To be returned"}, + } + compound_filter = {"or": [expired_filter, to_return_filter]} + + # Query for any *active* items that still match these statuses + remaining_items = notion.databases.query( + database_id=inventory_db_id, + filter=compound_filter, + archived=False, + ).get("results", []) + + if remaining_items: + print( + f"Error: {len(remaining_items)} 'Expired' / 'To be returned' items still present in IT Inventory.", + file=sys.stderr, + ) + return False + + # There should be at least one entry in the retirement queue + retirement_pages = notion.databases.query(database_id=retirement_db_id).get( + "results", [] + ) + expected_serials = {"65XYQ/GB", "36x10PIQ"} + if len(retirement_pages) != len(expected_serials): + print( + f"Error: Expected {len(expected_serials)} retirement pages, found {len(retirement_pages)}.", + file=sys.stderr, + ) + return False + + serials_seen = set() + for page in retirement_pages: + props = page["properties"] + # Collect Serial title + title_rich = props.get("Serial", {}).get("title", []) + serial_val = "".join([t.get("plain_text", "") for t in title_rich]).strip() + serials_seen.add(serial_val) + + if serials_seen != expected_serials: + print( + f"Error: Serial values mismatch. Expected {sorted(expected_serials)}, found {sorted(serials_seen)}.", + file=sys.stderr, + ) + return False + + print("Success: All verification criteria satisfied.") + return True + + +def main(): + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/description.md b/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/description.md new file mode 100644 index 00000000..02510bb9 --- /dev/null +++ b/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/description.md @@ -0,0 +1 @@ +Go to Japan Travel Planner, and go to the Travel Itineray database, and remove the itinerary in OSAKA after 6 PM (excluding 6 PM) in Day 1 and Day 2. \ No newline at end of file diff --git a/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/meta.json b/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/meta.json new file mode 100644 index 00000000..b4e0fddf --- /dev/null +++ b/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "simple__remove_osaka_itinerary", + "task_name": "Simple Remove Osaka Itinerary", + "category_id": "japan_travel_planner", + "category_name": "Japan Travel Planner", + "description": "Remove the itinerary items in Osaka after 6 PM from Day 1 and Day 2 travel schedules.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "conditional filtering", + "automated migration" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/japantravelplanner101" + } +} diff --git a/tasks/notion/japan_travel_planner/remove_osaka_itinerary/verify.py b/tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/verify.py similarity index 100% rename from tasks/notion/japan_travel_planner/remove_osaka_itinerary/verify.py rename to tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/verify.py diff --git a/tasks/notion/easy/online_resume/simple__skills_development_tracker/description.md b/tasks/notion/easy/online_resume/simple__skills_development_tracker/description.md new file mode 100644 index 00000000..4a78d912 --- /dev/null +++ b/tasks/notion/easy/online_resume/simple__skills_development_tracker/description.md @@ -0,0 +1,19 @@ +Create a comprehensive skills audit system by performing the following tasks: + +**Task Requirements:** +1. Create a new database named "Skills Development Tracker" as a child database in the main resume page with the following properties: + - Name (title property) + - Current Skill (relation to Skills database) + - Current Proficiency (rollup from related skill's "Skill Level" property) + - Target Proficiency (number property with format "percent") + - Gap (formula: Target Proficiency - Current Proficiency) + - Learning Resources (rich text property) + - Progress Notes (rich text property) + +2. Populate the Skills Development Tracker database with entries for all skills that have a proficiency level below 70% (0.7): + - For each qualifying skill, create an entry with: + - Name: "[Skill Name] Development Plan" + - Link to the corresponding skill in Skills database + - Target Proficiency: Set to Current + 25% (capped at 95%) + - Learning Resources: "Online courses and practice projects" + - Progress Notes: "Initial assessment completed" diff --git a/tasks/notion/easy/online_resume/simple__skills_development_tracker/meta.json b/tasks/notion/easy/online_resume/simple__skills_development_tracker/meta.json new file mode 100644 index 00000000..66b6bde4 --- /dev/null +++ b/tasks/notion/easy/online_resume/simple__skills_development_tracker/meta.json @@ -0,0 +1,27 @@ +{ + "task_id": "simple__skills_development_tracker", + "task_name": "Simple Skills Development Tracker", + "category_id": "online_resume", + "category_name": "Online Resume", + "description": "Create a comprehensive skills audit system with development tracking for skills below 70% proficiency.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "database manipulation", + "cross-reference linking", + "conditional filtering", + "data aggregation", + "template population", + "visual formatting" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/online-resume" + } +} diff --git a/tasks/notion/easy/online_resume/simple__skills_development_tracker/verify.py b/tasks/notion/easy/online_resume/simple__skills_development_tracker/verify.py new file mode 100644 index 00000000..9ab91205 --- /dev/null +++ b/tasks/notion/easy/online_resume/simple__skills_development_tracker/verify.py @@ -0,0 +1,206 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +def verify(notion: Client, main_id: str = None) -> bool: + """ + Verifies that the Skills Development Tracker database was created correctly. + """ + page_id = None + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id( + notion, main_id + ) + if found_id and object_type == "page": + page_id = found_id + + if not page_id: + page_id = notion_utils.find_page(notion, "New Online Resume") + if not page_id: + print("Error: Page 'New Online Resume' not found.", file=sys.stderr) + return False + + # Step 1: Verify Skills Development Tracker database exists + tracker_db_id = notion_utils.find_database_in_block( + notion, page_id, "Skills Development Tracker" + ) + if not tracker_db_id: + print( + "Error: Database 'Skills Development Tracker' not found.", file=sys.stderr + ) + return False + + # Step 2: Verify database schema + try: + db_info = notion.databases.retrieve(database_id=tracker_db_id) + properties = db_info.get("properties", {}) + + # Check required properties + required_props = { + "Name": "title", + "Current Skill": "relation", + "Current Proficiency": "rollup", + "Target Proficiency": "number", + "Gap": "formula", + "Learning Resources": "rich_text", + "Progress Notes": "rich_text", + } + + for prop_name, expected_type in required_props.items(): + if prop_name not in properties: + print( + f"Error: Property '{prop_name}' not found in database.", + file=sys.stderr, + ) + return False + if properties[prop_name]["type"] != expected_type: + print( + f"Error: Property '{prop_name}' has incorrect type. Expected '{expected_type}', got '{properties[prop_name]['type']}'.", + file=sys.stderr, + ) + return False + + # Verify Target Proficiency is percent format + if ( + properties["Target Proficiency"].get("number", {}).get("format") + != "percent" + ): + print( + "Error: Target Proficiency should have 'percent' format.", + file=sys.stderr, + ) + return False + + except Exception as e: + print(f"Error retrieving database info: {e}", file=sys.stderr) + return False + + # Step 3: Get Skills database to check entries + skills_db_id = notion_utils.find_database_in_block(notion, page_id, "Skills") + if not skills_db_id: + print("Error: Skills database not found.", file=sys.stderr) + return False + + # Get all skills with proficiency < 70% + skills_below_70 = [] + try: + skills_results = notion.databases.query(database_id=skills_db_id).get( + "results", [] + ) + for skill in skills_results: + skill_level = ( + skill.get("properties", {}).get("Skill Level", {}).get("number", 1.0) + ) + if skill_level < 0.7: + skill_name = ( + skill.get("properties", {}).get("Skill", {}).get("title", []) + ) + if skill_name: + skill_name_text = skill_name[0].get("text", {}).get("content", "") + skills_below_70.append( + { + "name": skill_name_text, + "id": skill["id"], + "level": skill_level, + } + ) + except Exception as e: + print(f"Error querying Skills database: {e}", file=sys.stderr) + return False + + if not skills_below_70: + print("Warning: No skills found with proficiency below 70%.", file=sys.stderr) + # This might be OK if all skills are above 70% + + # Step 4: Verify entries in Skills Development Tracker + try: + tracker_results = notion.databases.query(database_id=tracker_db_id).get( + "results", [] + ) + + # Check that we have entries for skills below 70% + if len(skills_below_70) > 0 and len(tracker_results) == 0: + print( + "Error: No entries found in Skills Development Tracker database.", + file=sys.stderr, + ) + return False + + # Verify each entry + for entry in tracker_results: + props = entry.get("properties", {}) + + # Check name format + name_prop = props.get("Name", {}).get("title", []) + if not name_prop: + print("Error: Entry missing Name property.", file=sys.stderr) + return False + name_text = name_prop[0].get("text", {}).get("content", "") + if not name_text.endswith(" Development Plan"): + print( + f"Error: Entry name '{name_text}' doesn't follow expected format.", + file=sys.stderr, + ) + return False + + # Check relation to Skills database + skill_relation = props.get("Current Skill", {}).get("relation", []) + if not skill_relation: + print( + f"Error: Entry '{name_text}' missing Current Skill relation.", + file=sys.stderr, + ) + return False + + # Check Target Proficiency (should be set) + target_prof = props.get("Target Proficiency", {}).get("number") + if target_prof is None: + print( + f"Error: Entry '{name_text}' missing Target Proficiency.", + file=sys.stderr, + ) + return False + + # Check Learning Resources + learning_resources = props.get("Learning Resources", {}).get( + "rich_text", [] + ) + if not learning_resources: + print( + f"Error: Entry '{name_text}' missing Learning Resources.", + file=sys.stderr, + ) + return False + + # Check Progress Notes + progress_notes = props.get("Progress Notes", {}).get("rich_text", []) + if not progress_notes: + print( + f"Error: Entry '{name_text}' missing Progress Notes.", + file=sys.stderr, + ) + return False + + except Exception as e: + print(f"Error querying Skills Development Tracker: {e}", file=sys.stderr) + return False + + print("Success: Skills Development Tracker database verified successfully.") + return True + + +def main(): + """ + Executes the verification process and exits with a status code. + """ + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/description.md b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/description.md new file mode 100644 index 00000000..afaac23e --- /dev/null +++ b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/description.md @@ -0,0 +1,30 @@ +# Task: Expert Level Learning Path (Simplified) + +## Objective +Extend the Python Roadmap with a new Expert Level chapter, create a bridge lesson, and add two expert lessons that build on existing material. + +## Requirements + +### 1. Add the Expert Level chapter +- **Database**: Chapters +- **Name**: `Expert Level` +- **Icon**: 🟣 (purple circle emoji) +- Make sure it is linked into the roadmap alongside the existing chapters. + +### 2. Create the bridge lesson +Create a lesson that connects advanced material to the new chapter: +- **Title**: `Advanced Foundations Review` +- **Status**: Done +- **Chapter**: Link it to `Expert Level` +- **Parent item**: Link to the lesson whose title contains "Control" (e.g., "Control Flow") +- **Sub-items**: Include links to the lessons containing "Decorators" and "Calling API" + +### 3. Add two expert lessons +Add the following entries to the Steps database: + +| Lesson Title | Status | Chapter | Parent item | Date | +|--------------|--------|---------|-------------|------| +| `Metaprogramming and AST Manipulation` | To Do | Expert Level | Advanced Foundations Review | 2025-09-15 | +| `Async Concurrency Patterns` | To Do | Expert Level | Calling API | 2025-09-20 | + +The lessons must inherit the correct chapter link, parent relationship, and due date as shown above. diff --git a/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/meta.json b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/meta.json new file mode 100644 index 00000000..18a0aa37 --- /dev/null +++ b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/meta.json @@ -0,0 +1,26 @@ +{ + "task_id": "expert_level_lessons", + "task_name": "Expert Level Lessons", + "category_id": "python_roadmap", + "category_name": "Python Roadmap", + "description": "Create an Expert Level chapter with sophisticated prerequisite chains and four expert-level lessons.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "database manipulation", + "cross-reference linking", + "conditional filtering", + "status tracking", + "template population" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Python-Roadmap-25281626b6d78012bf2bce1fa8711f4d", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/python-roadmap" + } +} diff --git a/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/verify.py b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/verify.py new file mode 100644 index 00000000..5ee120b4 --- /dev/null +++ b/tasks/notion/easy/python_roadmap/simple__expert_level_lessons/verify.py @@ -0,0 +1,234 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +TARGET_PAGE_TITLE = "Python Roadmap" +CHAPTER_NAME = "Expert Level" +CHAPTER_ICON = "🟣" +BRIDGE_TITLE = "Advanced Foundations Review" +REQUIRED_SUBITEM_TITLES = ["Decorators", "Calling API"] + +LESSON_REQUIREMENTS = [ + { + "title": "Metaprogramming and AST Manipulation", + "status": "To Do", + "date": "2025-09-15", + "parent_title": BRIDGE_TITLE, + }, + { + "title": "Async Concurrency Patterns", + "status": "To Do", + "date": "2025-09-20", + "parent_title": "Calling API", + }, +] + + +def _get_database_ids(notion: Client, page_id: str) -> tuple[str | None, str | None]: + """Return the block IDs for the Chapters and Steps databases on the page.""" + chapters_db_id = None + steps_db_id = None + + blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + for block in blocks: + if block.get("type") != "child_database": + continue + title = block.get("child_database", {}).get("title", "") + if "Chapters" in title and not chapters_db_id: + chapters_db_id = block["id"] + elif "Steps" in title and not steps_db_id: + steps_db_id = block["id"] + + return chapters_db_id, steps_db_id + + +def _query_step_by_title(notion: Client, database_id: str, title: str, *, exact: bool = True): + """Return the first step entry matching the given title pattern.""" + title_filter = {"equals": title} if exact else {"contains": title} + response = notion.databases.query( + database_id=database_id, + filter={"property": "Lessons", "title": title_filter}, + page_size=5, + ) + results = response.get("results", []) + return results[0] if results else None + + +def verify(notion: Client, main_id: str | None = None) -> bool: + """Verify the simplified Expert Level learning path setup.""" + # Resolve the roadmap page. + if main_id: + page_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if not page_id or object_type != "page": + print("Error: Python Roadmap page not found.", file=sys.stderr) + return False + else: + page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE) + if not page_id: + print("Error: Python Roadmap page not found.", file=sys.stderr) + return False + + # Locate the Chapters and Steps databases. + chapters_db_id, steps_db_id = _get_database_ids(notion, page_id) + if not chapters_db_id: + print("Error: Chapters database not found on the page.", file=sys.stderr) + return False + if not steps_db_id: + print("Error: Steps database not found on the page.", file=sys.stderr) + return False + + # Ensure the Expert Level chapter exists with the purple icon. + try: + chapter_resp = notion.databases.query( + database_id=chapters_db_id, + filter={"property": "Name", "title": {"equals": CHAPTER_NAME}}, + page_size=1, + ) + except Exception as exc: + print(f"Error querying Chapters database: {exc}", file=sys.stderr) + return False + + results = chapter_resp.get("results", []) + if not results: + print("Error: Expert Level chapter not found.", file=sys.stderr) + return False + + expert_chapter = results[0] + expert_chapter_id = expert_chapter["id"] + icon = expert_chapter.get("icon") or {} + if icon.get("type") != "emoji" or icon.get("emoji") != CHAPTER_ICON: + print("Error: Expert Level chapter must use the purple circle emoji icon.", file=sys.stderr) + return False + + print("āœ“ Expert Level chapter exists with the correct icon.") + + # Locate prerequisite lessons (Control Flow, Decorators, Calling API). + control_lesson = _query_step_by_title(notion, steps_db_id, "Control", exact=False) + if not control_lesson: + print("Error: Could not find a lesson containing 'Control' in its title.", file=sys.stderr) + return False + control_lesson_id = control_lesson["id"] + + prerequisite_ids = {} + for title in REQUIRED_SUBITEM_TITLES: + lesson = _query_step_by_title(notion, steps_db_id, title, exact=False) + if not lesson: + print(f"Error: Required lesson containing '{title}' not found.", file=sys.stderr) + return False + prerequisite_ids[title] = lesson["id"] + + # Verify the bridge lesson. + bridge_lesson = _query_step_by_title(notion, steps_db_id, BRIDGE_TITLE, exact=True) + if not bridge_lesson: + print("Error: Advanced Foundations Review lesson not found.", file=sys.stderr) + return False + + status = (bridge_lesson["properties"].get("Status", {}).get("status") or {}).get("name") + if status != "Done": + print("Error: Advanced Foundations Review must have status 'Done'.", file=sys.stderr) + return False + + # Ensure chapter relation includes Expert Level. + chapter_rel = bridge_lesson["properties"].get("Chapters", {}).get("relation", []) + if not any(rel["id"] == expert_chapter_id for rel in chapter_rel): + print("Error: Advanced Foundations Review must link to the Expert Level chapter.", file=sys.stderr) + return False + + # Parent item should be the control lesson. + parent_rel = bridge_lesson["properties"].get("Parent item", {}).get("relation", []) + if not parent_rel or parent_rel[0]["id"] != control_lesson_id: + print("Error: Advanced Foundations Review should use the control lesson as its Parent item.", file=sys.stderr) + return False + + # Sub-items must include the required lessons. + sub_rel = bridge_lesson["properties"].get("Sub-item", {}).get("relation", []) + sub_ids = {rel["id"] for rel in sub_rel} + missing = [title for title, rel_id in prerequisite_ids.items() if rel_id not in sub_ids] + if missing: + print( + f"Error: Advanced Foundations Review must include these lessons as sub-items: {', '.join(missing)}.", + file=sys.stderr, + ) + return False + + print("āœ“ Bridge lesson configured with the correct status, chapter, parent, and sub-items.") + + # Verify the two expert lessons. + overall_success = True + for spec in LESSON_REQUIREMENTS: + lesson = _query_step_by_title(notion, steps_db_id, spec["title"], exact=True) + if not lesson: + print(f"Error: Lesson '{spec['title']}' not found.", file=sys.stderr) + overall_success = False + continue + + lesson_ok = True + + # Status check. + status_name = (lesson["properties"].get("Status", {}).get("status") or {}).get("name") + if status_name != spec["status"]: + print( + f"Error: Lesson '{spec['title']}' should have status '{spec['status']}', found '{status_name}'.", + file=sys.stderr, + ) + lesson_ok = False + + # Chapter relation check. + lesson_chapters = lesson["properties"].get("Chapters", {}).get("relation", []) + if not any(rel["id"] == expert_chapter_id for rel in lesson_chapters): + print(f"Error: Lesson '{spec['title']}' must link to the Expert Level chapter.", file=sys.stderr) + lesson_ok = False + + # Parent relation check. + parent_title = spec["parent_title"] + if parent_title == BRIDGE_TITLE: + expected_parent_id = bridge_lesson["id"] + else: + expected_parent_id = prerequisite_ids.get(parent_title) + + parent_relation = lesson["properties"].get("Parent item", {}).get("relation", []) + if not expected_parent_id: + print( + f"Error: Could not resolve expected parent '{parent_title}' for lesson '{spec['title']}'.", + file=sys.stderr, + ) + lesson_ok = False + else: + if not parent_relation or parent_relation[0]["id"] != expected_parent_id: + print( + f"Error: Lesson '{spec['title']}' should have '{parent_title}' as its Parent item.", + file=sys.stderr, + ) + lesson_ok = False + # Date check. + date_prop = lesson["properties"].get("Date", {}).get("date") or {} + if date_prop.get("start") != spec["date"]: + print( + f"Error: Lesson '{spec['title']}' should use date {spec['date']}, found {date_prop.get('start')}.", + file=sys.stderr, + ) + lesson_ok = False + + if lesson_ok: + print(f"āœ“ Lesson '{spec['title']}' has the expected properties.") + else: + overall_success = False + + if not overall_success: + return False + + print("Success: Expert Level chapter, bridge lesson, and expert lessons configured correctly.") + return True + + +def main() -> None: + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/self_assessment/simple__faq_column_layout/description.md b/tasks/notion/easy/self_assessment/simple__faq_column_layout/description.md new file mode 100644 index 00000000..3a7daa60 --- /dev/null +++ b/tasks/notion/easy/self_assessment/simple__faq_column_layout/description.md @@ -0,0 +1,6 @@ +Navigate to the "Self Assessment" page and reorganize the FAQ toggle content to make it easier to scan. + +**Task Requirements:** +1. Add a column list with two columns inside the FAQ toggle. +2. Move the first two existing Q&A pairs from the FAQ into the left column. +3. Move the third existing Q&A pair into the right column, keeping the original heading/paragraph formatting. diff --git a/tasks/notion/easy/self_assessment/simple__faq_column_layout/meta.json b/tasks/notion/easy/self_assessment/simple__faq_column_layout/meta.json new file mode 100644 index 00000000..9f356b30 --- /dev/null +++ b/tasks/notion/easy/self_assessment/simple__faq_column_layout/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "simple__faq_column_layout", + "task_name": "Simple FAQ Column Layout", + "category_id": "self_assessment", + "category_name": "Self Assessment", + "description": "Reorganize the FAQ section content into a two-column layout with balanced Q&A pairs.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content organization", + "visual formatting", + "template population" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d", + "stateOriginalUrl": "https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d" + } +} diff --git a/tasks/notion/easy/self_assessment/simple__faq_column_layout/verify.py b/tasks/notion/easy/self_assessment/simple__faq_column_layout/verify.py new file mode 100644 index 00000000..dccddbf0 --- /dev/null +++ b/tasks/notion/easy/self_assessment/simple__faq_column_layout/verify.py @@ -0,0 +1,161 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +def verify(notion: Client, main_id: str = None) -> bool: + """ + Verifies that the FAQ toggle has been properly reorganized with a column list. + """ + # Start from main_id if provided + page_id = None + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id( + notion, main_id + ) + if found_id and object_type == "page": + page_id = found_id + + if not page_id: + # Try to find the Self Assessment page + page_id = notion_utils.find_page(notion, "Self Assessment") + + if not page_id: + print("Error: Self Assessment page not found.", file=sys.stderr) + return False + + # Get all blocks recursively from the page + all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + + # Find the FAQ toggle block + faq_toggle_block = None + faq_toggle_id = None + for block in all_blocks: + if block.get("type") == "toggle": + block_text = notion_utils.get_block_plain_text(block) + if "FAQ" in block_text: + faq_toggle_block = block + faq_toggle_id = block.get("id") + print(f"Found FAQ toggle block: {block_text}") + break + + if not faq_toggle_block: + print("Error: FAQ toggle block not found.", file=sys.stderr) + return False + + # Find column_list inside the FAQ toggle + column_list_block = None + for block in all_blocks: + if ( + block.get("type") == "column_list" + and block.get("parent", {}).get("block_id") == faq_toggle_id + ): + column_list_block = block + break + + if not column_list_block: + print("Error: No column_list found inside FAQ toggle.", file=sys.stderr) + return False + + # Check that there are no Q&A pairs directly under FAQ toggle (outside column_list) + direct_faq_children = [] + for block in all_blocks: + if block.get("parent", {}).get("block_id") == faq_toggle_id and block.get( + "id" + ) != column_list_block.get("id"): + direct_faq_children.append(block) + + # Check if any of these are heading_3 or paragraph blocks (Q&A content) + for block in direct_faq_children: + if block.get("type") in ["heading_3", "paragraph"]: + print( + f"Error: Found Q&A content outside column_list: {notion_utils.get_block_plain_text(block)[:50]}...", + file=sys.stderr, + ) + return False + + # Find the two columns + columns = [] + column_list_id = column_list_block.get("id") + for block in all_blocks: + if ( + block.get("type") == "column" + and block.get("parent", {}).get("block_id") == column_list_id + ): + columns.append(block) + + if len(columns) != 2: + print(f"Error: Expected 2 columns, found {len(columns)}.", file=sys.stderr) + return False + + # Count Q&A pairs in each column + qa_counts = [] + total_pairs = 0 + + for i, column in enumerate(columns[:2]): + column_id = column.get("id") + + column_blocks = [ + block + for block in all_blocks + if block.get("parent", {}).get("block_id") == column_id + ] + + qa_pairs = 0 + j = 0 + while j < len(column_blocks): + if ( + column_blocks[j].get("type") == "heading_3" + and j + 1 < len(column_blocks) + and column_blocks[j + 1].get("type") == "paragraph" + ): + qa_pairs += 1 + j += 2 + else: + j += 1 + + qa_counts.append(qa_pairs) + total_pairs += qa_pairs + print(f"Column {i + 1}: Found {qa_pairs} Q&A pairs") + + if qa_counts[0] < 2: + print( + f"Error: Left column should contain at least 2 Q&A pairs, found {qa_counts[0]}.", + file=sys.stderr, + ) + return False + + if qa_counts[1] < 1: + print( + f"Error: Right column should contain at least 1 Q&A pair, found {qa_counts[1]}.", + file=sys.stderr, + ) + return False + + if total_pairs < 3: + print( + f"Error: Expected at least 3 total Q&A pairs across both columns, found {total_pairs}.", + file=sys.stderr, + ) + return False + + print( + "Success: FAQ toggle organized with two columns holding the existing Q&A pairs (two on the left, one on the right)." + ) + return True + + +def main(): + """ + Executes the verification process and exits with a status code. + """ + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/standard_operating_procedure/simple__section_organization/description.md b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/description.md new file mode 100644 index 00000000..ebf2f9c2 --- /dev/null +++ b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/description.md @@ -0,0 +1,10 @@ +# Task: Reorganize Standard Operating Procedure Page Sections + +## Objective +Modify the structure of the Standard Operating Procedure page in Notion by updating the order of two sections. + +## Requirements +- Navigate to the Standard Operating Procedure page +- Swap the positions of the "Terminologies" and "Roles & responsibilities" sections +- Preserve all content within each section exactly as is +- Maintain the original formatting and structure of each section diff --git a/tasks/notion/easy/standard_operating_procedure/simple__section_organization/meta.json b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/meta.json new file mode 100644 index 00000000..ba72cfaa --- /dev/null +++ b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "simple__section_organization", + "task_name": "Simple Section Organization", + "category_id": "standard_operating_procedure", + "category_name": "Standard Operating Procedure", + "description": "Reorganize the Standard Operating Procedure page by swapping sections and creating a column layout.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content organization", + "cross-reference linking", + "visual formatting" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Standard-Operating-Procedure-24381626b6d780a8b678f9e62ae5b152", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/standard-operating-procedure" + } +} diff --git a/tasks/notion/easy/standard_operating_procedure/simple__section_organization/verify.py b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/verify.py new file mode 100644 index 00000000..e2503df2 --- /dev/null +++ b/tasks/notion/easy/standard_operating_procedure/simple__section_organization/verify.py @@ -0,0 +1,76 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +TARGET_PAGE_TITLE = "Standard Operating Procedure" +ROLES_HEADING = "Roles & responsibilities" +TERMINOLOGIES_HEADING = "Terminologies" + + +def _find_heading_indices(blocks: list[dict]) -> tuple[int | None, int | None]: + """Return the indices of the target headings within the flattened block list.""" + roles_index = None + terminologies_index = None + + for index, block in enumerate(blocks): + if block.get("type") != "heading_2": + continue + rich_text = block.get("heading_2", {}).get("rich_text", []) + if not rich_text: + continue + heading_text = rich_text[0].get("text", {}).get("content", "") + if heading_text == ROLES_HEADING and roles_index is None: + roles_index = index + elif heading_text == TERMINOLOGIES_HEADING and terminologies_index is None: + terminologies_index = index + + return roles_index, terminologies_index + + +def verify(notion: Client, main_id: str | None = None) -> bool: + """Ensure the Roles & responsibilities section appears before Terminologies.""" + # Resolve page id. + if main_id: + page_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if not page_id or object_type != "page": + print("Error: Standard Operating Procedure page not found.", file=sys.stderr) + return False + else: + page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE) + if not page_id: + print("Error: Standard Operating Procedure page not found.", file=sys.stderr) + return False + + # Fetch all blocks (flattened order from top to bottom). + blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + roles_index, terminologies_index = _find_heading_indices(blocks) + + if roles_index is None: + print("Error: 'Roles & responsibilities' section not found.", file=sys.stderr) + return False + if terminologies_index is None: + print("Error: 'Terminologies' section not found.", file=sys.stderr) + return False + + if roles_index >= terminologies_index: + print( + "Error: Sections are not swapped. 'Roles & responsibilities' should appear before 'Terminologies'.", + file=sys.stderr, + ) + return False + + print("Success: Section order updated so 'Roles & responsibilities' precedes 'Terminologies'.") + return True + + +def main() -> None: + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/easy/team_projects/simple__swap_tasks/description.md b/tasks/notion/easy/team_projects/simple__swap_tasks/description.md new file mode 100644 index 00000000..403698c7 --- /dev/null +++ b/tasks/notion/easy/team_projects/simple__swap_tasks/description.md @@ -0,0 +1 @@ +Go to the Team Projects page, find the person responsible for the most tasks (10 in total) and the person responsible for the fewest tasks (3 in total), then swap their assigned tasks. \ No newline at end of file diff --git a/tasks/notion/easy/team_projects/simple__swap_tasks/meta.json b/tasks/notion/easy/team_projects/simple__swap_tasks/meta.json new file mode 100644 index 00000000..a0501526 --- /dev/null +++ b/tasks/notion/easy/team_projects/simple__swap_tasks/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "simple__swap_tasks", + "task_name": "Simple Swap Tasks", + "category_id": "team_projects", + "category_name": "Team Projects", + "description": "Find the person responsible for the most and fewest tasks, then swap their assigned tasks.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data aggregation", + "automated migration", + "conditional filtering" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Team-Projects-24e81626b6d7809c982fdb7a25825898", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/gantt-chart" + } +} diff --git a/tasks/notion/team_projects/swap_tasks/verify.py b/tasks/notion/easy/team_projects/simple__swap_tasks/verify.py similarity index 100% rename from tasks/notion/team_projects/swap_tasks/verify.py rename to tasks/notion/easy/team_projects/simple__swap_tasks/verify.py diff --git a/tasks/notion/easy/toronto_guide/simple__change_color/description.md b/tasks/notion/easy/toronto_guide/simple__change_color/description.md new file mode 100644 index 00000000..e69a8ade --- /dev/null +++ b/tasks/notion/easy/toronto_guide/simple__change_color/description.md @@ -0,0 +1,7 @@ +Open the **Toronto Guide** page and refresh the colors of the tags in the **Food** database. + +## Requirements +1. Find and open the Toronto Guide page in Notion. +2. Locate the *Food* database on that page. +3. Update every tag in the Food database that is currently pink so that it uses a different color of your choice (any non-pink color is fine). +4. Do not modify callouts or tags in the other databases. diff --git a/tasks/notion/easy/toronto_guide/simple__change_color/meta.json b/tasks/notion/easy/toronto_guide/simple__change_color/meta.json new file mode 100644 index 00000000..f5d31322 --- /dev/null +++ b/tasks/notion/easy/toronto_guide/simple__change_color/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "simple__change_color", + "task_name": "Simple Change Color", + "category_id": "toronto_guide", + "category_name": "Toronto Guide", + "description": "Navigate to the Toronto Guide page and change all pink-colored elements to different colors.", + "author": "Xiangyan Liu", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "visual formatting", + "conditional filtering" + ], + "mcp": [ + "notion" + ], + "meta_data": { + "stateType": "url", + "stateContent": null, + "stateUrl": "https://painted-tennis-ebc.notion.site/Toronto-Guide-25281626b6d7802caa7cc394647e901c", + "stateOriginalUrl": "https://www.notion.so/marketplace/templates/conquering-toronto-a-destination-guide" + } +} diff --git a/tasks/notion/easy/toronto_guide/simple__change_color/verify.py b/tasks/notion/easy/toronto_guide/simple__change_color/verify.py new file mode 100644 index 00000000..44a3f692 --- /dev/null +++ b/tasks/notion/easy/toronto_guide/simple__change_color/verify.py @@ -0,0 +1,100 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + + +TARGET_PAGE_TITLE = "Toronto Guide" +FOOD_DATABASE_KEYWORD = "Food" +TARGET_TAG_NAMES = [ + "Middle Eastern", + "Jamaican", + "Indian", +] + + +def _get_food_database_id(notion: Client, page_id: str) -> str | None: + """Return the block ID of the Food database shown on the target page.""" + all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id) + for block in all_blocks: + if not block or block.get("type") != "child_database": + continue + title = block.get("child_database", {}).get("title", "") + if FOOD_DATABASE_KEYWORD.lower() in title.lower(): + return block.get("id") + return None + + +def verify(notion: Client, main_id: str | None = None) -> bool: + """Check that all target tags in the Food database are no longer pink.""" + # Resolve the Toronto Guide page ID. + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if not found_id or object_type != "page": + print("Error: Toronto Guide page not found.", file=sys.stderr) + return False + page_id = found_id + else: + page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE) + if not page_id: + print("Error: Toronto Guide page not found.", file=sys.stderr) + return False + + # Locate the Food database block. + food_db_id = _get_food_database_id(notion, page_id) + if not food_db_id: + print("Error: Food database not found on the Toronto Guide page.", file=sys.stderr) + return False + + # Fetch database definition and inspect tag options. + try: + db_info = notion.databases.retrieve(database_id=food_db_id) + except Exception as exc: + print(f"Error: Unable to retrieve Food database ({exc}).", file=sys.stderr) + return False + + tags_property = db_info.get("properties", {}).get("Tags", {}) + if tags_property.get("type") != "multi_select": + print("Error: Food database does not have a multi-select Tags property.", file=sys.stderr) + return False + + options = tags_property.get("multi_select", {}).get("options", []) + remaining_targets = set(TARGET_TAG_NAMES) + failures = False + + for option in options: + tag_name = option.get("name", "").strip() + if tag_name not in remaining_targets: + continue + + remaining_targets.discard(tag_name) + color = option.get("color") + if color == "pink": + print(f"Error: Tag '{tag_name}' in Food database is still pink.", file=sys.stderr) + failures = True + else: + print(f"āœ“ Tag '{tag_name}' color updated to '{color}'.") + + if remaining_targets: + print( + f"Error: Food tags not found (expected to exist): {sorted(remaining_targets)}.", + file=sys.stderr, + ) + return False + + if failures: + return False + + print("Success: All Food database tags are now non-pink.") + return True + + +def main() -> None: + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/notion/company_in_a_box/employee_onboarding/description.md b/tasks/notion/standard/company_in_a_box/employee_onboarding/description.md similarity index 100% rename from tasks/notion/company_in_a_box/employee_onboarding/description.md rename to tasks/notion/standard/company_in_a_box/employee_onboarding/description.md diff --git a/tasks/notion/company_in_a_box/employee_onboarding/meta.json b/tasks/notion/standard/company_in_a_box/employee_onboarding/meta.json similarity index 100% rename from tasks/notion/company_in_a_box/employee_onboarding/meta.json rename to tasks/notion/standard/company_in_a_box/employee_onboarding/meta.json diff --git a/tasks/notion/company_in_a_box/employee_onboarding/verify.py b/tasks/notion/standard/company_in_a_box/employee_onboarding/verify.py similarity index 100% rename from tasks/notion/company_in_a_box/employee_onboarding/verify.py rename to tasks/notion/standard/company_in_a_box/employee_onboarding/verify.py diff --git a/tasks/notion/company_in_a_box/goals_restructure/description.md b/tasks/notion/standard/company_in_a_box/goals_restructure/description.md similarity index 100% rename from tasks/notion/company_in_a_box/goals_restructure/description.md rename to tasks/notion/standard/company_in_a_box/goals_restructure/description.md diff --git a/tasks/notion/company_in_a_box/goals_restructure/meta.json b/tasks/notion/standard/company_in_a_box/goals_restructure/meta.json similarity index 100% rename from tasks/notion/company_in_a_box/goals_restructure/meta.json rename to tasks/notion/standard/company_in_a_box/goals_restructure/meta.json diff --git a/tasks/notion/company_in_a_box/goals_restructure/verify.py b/tasks/notion/standard/company_in_a_box/goals_restructure/verify.py similarity index 100% rename from tasks/notion/company_in_a_box/goals_restructure/verify.py rename to tasks/notion/standard/company_in_a_box/goals_restructure/verify.py diff --git a/tasks/notion/company_in_a_box/quarterly_review_dashboard/description.md b/tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/description.md similarity index 100% rename from tasks/notion/company_in_a_box/quarterly_review_dashboard/description.md rename to tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/description.md diff --git a/tasks/notion/company_in_a_box/quarterly_review_dashboard/meta.json b/tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/meta.json similarity index 100% rename from tasks/notion/company_in_a_box/quarterly_review_dashboard/meta.json rename to tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/meta.json diff --git a/tasks/notion/company_in_a_box/quarterly_review_dashboard/verify.py b/tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/verify.py similarity index 100% rename from tasks/notion/company_in_a_box/quarterly_review_dashboard/verify.py rename to tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/verify.py diff --git a/tasks/notion/computer_science_student_dashboard/code_snippets_go/description.md b/tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/description.md similarity index 100% rename from tasks/notion/computer_science_student_dashboard/code_snippets_go/description.md rename to tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/description.md diff --git a/tasks/notion/computer_science_student_dashboard/code_snippets_go/meta.json b/tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/meta.json similarity index 100% rename from tasks/notion/computer_science_student_dashboard/code_snippets_go/meta.json rename to tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/meta.json diff --git a/tasks/notion/computer_science_student_dashboard/code_snippets_go/verify.py b/tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/verify.py similarity index 100% rename from tasks/notion/computer_science_student_dashboard/code_snippets_go/verify.py rename to tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/verify.py diff --git a/tasks/notion/computer_science_student_dashboard/courses_internships_relation/description.md b/tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/description.md similarity index 100% rename from tasks/notion/computer_science_student_dashboard/courses_internships_relation/description.md rename to tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/description.md diff --git a/tasks/notion/computer_science_student_dashboard/courses_internships_relation/meta.json b/tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/meta.json similarity index 100% rename from tasks/notion/computer_science_student_dashboard/courses_internships_relation/meta.json rename to tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/meta.json diff --git a/tasks/notion/computer_science_student_dashboard/courses_internships_relation/verify.py b/tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/verify.py similarity index 100% rename from tasks/notion/computer_science_student_dashboard/courses_internships_relation/verify.py rename to tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/verify.py diff --git a/tasks/notion/computer_science_student_dashboard/study_session_tracker/description.md b/tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/description.md similarity index 100% rename from tasks/notion/computer_science_student_dashboard/study_session_tracker/description.md rename to tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/description.md diff --git a/tasks/notion/computer_science_student_dashboard/study_session_tracker/meta.json b/tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/meta.json similarity index 100% rename from tasks/notion/computer_science_student_dashboard/study_session_tracker/meta.json rename to tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/meta.json diff --git a/tasks/notion/computer_science_student_dashboard/study_session_tracker/verify.py b/tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/verify.py similarity index 100% rename from tasks/notion/computer_science_student_dashboard/study_session_tracker/verify.py rename to tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/verify.py diff --git a/tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/description.md b/tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/description.md similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/description.md rename to tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/description.md diff --git a/tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/meta.json b/tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/meta.json similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/meta.json rename to tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/meta.json diff --git a/tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/verify.py b/tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/verify.py similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/asset_retirement_migration/verify.py rename to tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/verify.py diff --git a/tasks/notion/it_trouble_shooting_hub/security_audit_ticket/description.md b/tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/description.md similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/security_audit_ticket/description.md rename to tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/description.md diff --git a/tasks/notion/it_trouble_shooting_hub/security_audit_ticket/meta.json b/tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/meta.json similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/security_audit_ticket/meta.json rename to tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/meta.json diff --git a/tasks/notion/it_trouble_shooting_hub/security_audit_ticket/verify.py b/tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/verify.py similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/security_audit_ticket/verify.py rename to tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/verify.py diff --git a/tasks/notion/it_trouble_shooting_hub/verification_expired_update/description.md b/tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/description.md similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/verification_expired_update/description.md rename to tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/description.md diff --git a/tasks/notion/it_trouble_shooting_hub/verification_expired_update/meta.json b/tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/meta.json similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/verification_expired_update/meta.json rename to tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/meta.json diff --git a/tasks/notion/it_trouble_shooting_hub/verification_expired_update/verify.py b/tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/verify.py similarity index 100% rename from tasks/notion/it_trouble_shooting_hub/verification_expired_update/verify.py rename to tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/verify.py diff --git a/tasks/notion/japan_travel_planner/daily_itinerary_overview/description.md b/tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/description.md similarity index 100% rename from tasks/notion/japan_travel_planner/daily_itinerary_overview/description.md rename to tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/description.md diff --git a/tasks/notion/japan_travel_planner/daily_itinerary_overview/meta.json b/tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/meta.json similarity index 100% rename from tasks/notion/japan_travel_planner/daily_itinerary_overview/meta.json rename to tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/meta.json diff --git a/tasks/notion/japan_travel_planner/daily_itinerary_overview/verify.py b/tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/verify.py similarity index 100% rename from tasks/notion/japan_travel_planner/daily_itinerary_overview/verify.py rename to tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/verify.py diff --git a/tasks/notion/japan_travel_planner/packing_progress_summary/description.md b/tasks/notion/standard/japan_travel_planner/packing_progress_summary/description.md similarity index 100% rename from tasks/notion/japan_travel_planner/packing_progress_summary/description.md rename to tasks/notion/standard/japan_travel_planner/packing_progress_summary/description.md diff --git a/tasks/notion/japan_travel_planner/packing_progress_summary/meta.json b/tasks/notion/standard/japan_travel_planner/packing_progress_summary/meta.json similarity index 100% rename from tasks/notion/japan_travel_planner/packing_progress_summary/meta.json rename to tasks/notion/standard/japan_travel_planner/packing_progress_summary/meta.json diff --git a/tasks/notion/japan_travel_planner/packing_progress_summary/verify.py b/tasks/notion/standard/japan_travel_planner/packing_progress_summary/verify.py similarity index 100% rename from tasks/notion/japan_travel_planner/packing_progress_summary/verify.py rename to tasks/notion/standard/japan_travel_planner/packing_progress_summary/verify.py diff --git a/tasks/notion/japan_travel_planner/remove_osaka_itinerary/description.md b/tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/description.md similarity index 100% rename from tasks/notion/japan_travel_planner/remove_osaka_itinerary/description.md rename to tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/description.md diff --git a/tasks/notion/japan_travel_planner/remove_osaka_itinerary/meta.json b/tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/meta.json similarity index 100% rename from tasks/notion/japan_travel_planner/remove_osaka_itinerary/meta.json rename to tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/meta.json diff --git a/tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/verify.py b/tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/verify.py new file mode 100644 index 00000000..8316e347 --- /dev/null +++ b/tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/verify.py @@ -0,0 +1,288 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + +def get_page_title(page_result): + """Extract title from a page result""" + properties = page_result.get('properties', {}) + name_property = properties.get('Name', {}) + if name_property.get('type') == 'title': + title_array = name_property.get('title', []) + if title_array and len(title_array) > 0: + return title_array[0].get('plain_text', '') + return '' + +def get_page_time(page_result): + """Extract time from Notes field""" + properties = page_result.get('properties', {}) + notes_property = properties.get('Notes', {}) + if notes_property.get('type') == 'rich_text': + rich_text_array = notes_property.get('rich_text', []) + if rich_text_array and len(rich_text_array) > 0: + notes_text = rich_text_array[0].get('plain_text', '') + return notes_text.strip() + return '' + +def get_page_group(page_result): + """Extract group/location from page""" + properties = page_result.get('properties', {}) + group_property = properties.get('Group', {}) + if group_property.get('type') == 'select': + select = group_property.get('select') + if select: + return select.get('name', '') + return '' + +def get_page_day(page_result): + """Extract day from page""" + properties = page_result.get('properties', {}) + day_property = properties.get('Day', {}) + if day_property.get('type') == 'select': + select = day_property.get('select') + if select: + return select.get('name', '') + return '' + +def parse_time_to_minutes(time_str): + """Convert time string to minutes for comparison + Returns None if time cannot be parsed""" + if not time_str: + return None + + # Clean the time string + time_str = time_str.strip().upper() + + # Remove any text after the time (e.g., "7:30 PM\n" -> "7:30 PM") + time_str = time_str.split('\n')[0].strip() + + # Extract time components + try: + if 'PM' in time_str: + time_part = time_str.replace('PM', '').strip() + if ':' in time_part: + hours, minutes = time_part.split(':') + hours = int(hours) + minutes = int(minutes) + else: + hours = int(time_part) + minutes = 0 + # Convert PM hours (add 12 for PM times except 12 PM) + if hours != 12: + hours += 12 + return hours * 60 + minutes + elif 'AM' in time_str: + time_part = time_str.replace('AM', '').strip() + if ':' in time_part: + hours, minutes = time_part.split(':') + hours = int(hours) + minutes = int(minutes) + else: + hours = int(time_part) + minutes = 0 + # Handle 12 AM (midnight) + if hours == 12: + hours = 0 + return hours * 60 + minutes + except: + return None + + return None + +def verify(notion: Client, main_id: str = None) -> bool: + """ + Verifies that all OSAKA events after 6PM have been removed from Day 1 and Day 2 in the Japan Travel Planner. + + Expected items that should be deleted (all in OSAKA, after 6PM, on Day 1 or Day 2): + 1. Rikuro's Namba Main Branch - 7 PM (Day 1) + 2. Shin Sekai "New World" - 8 PM (Day 2) + 3. Katsudon Chiyomatsu - 7:30 PM (Day 2) + 4. Ebisubashi Bridge - 9 PM (Day 1) + + Note: Kuromon Ichiba Market at 6 PM should NOT be deleted (it's at 6PM, not after) + Items after 6PM on other days (Day 3-8) should NOT be deleted + """ + + # Step 1: Find the main Japan Travel Planner page + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if not found_id or object_type != 'page': + print("Error: Japan Travel Planner page not found.", file=sys.stderr) + return False + else: + # Try to find the page by searching + found_id = notion_utils.find_page(notion, "Japan Travel Planner") + if not found_id: + print("Error: Japan Travel Planner page not found.", file=sys.stderr) + return False + + print(f"Found Japan Travel Planner page: {found_id}") + + # Step 2: Find the Travel Itinerary database + all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id) + travel_itinerary_db_id = None + + for block in all_blocks: + if block and block.get("type") == "child_database": + title = block.get("child_database", {}).get("title", "") + if "Travel Itinerary" in title: + travel_itinerary_db_id = block.get("id") + print(f"Found Travel Itinerary database: {travel_itinerary_db_id}") + break + + if not travel_itinerary_db_id: + print("Error: Travel Itinerary database not found", file=sys.stderr) + return False + + # Step 3: Query the database for OSAKA items on Day 1 and Day 2 + try: + query_result = notion.databases.query( + database_id=travel_itinerary_db_id, + filter={ + "and": [ + {"property": "Group", "select": {"equals": "Osaka"}}, + {"or": [ + {"property": "Day", "select": {"equals": "Day 1"}}, + {"property": "Day", "select": {"equals": "Day 2"}} + ]} + ] + } + ) + except Exception as e: + print(f"Error querying Travel Itinerary database: {e}", file=sys.stderr) + return False + + # Step 4: Check for items that should have been deleted + six_pm_minutes = 18 * 60 # 6 PM in minutes (18:00) + + # Expected deleted items (4 specific items after 6 PM on Day 1 and Day 2) + expected_deleted = { + "Rikuro's Namba Main Branch": {"time": "7 PM", "day": "Day 1", "found": False}, + "Shin Sekai \"New World\"": {"time": "8 PM", "day": "Day 2", "found": False}, + "Katsudon Chiyomatsu": {"time": "7:30 PM", "day": "Day 2", "found": False}, + "Ebisubashi Bridge": {"time": "9 PM", "day": "Day 1", "found": False} + } + + # Items that should remain (at or before 6 PM) + expected_remaining = { + "Kuromon Ichiba Market": {"time": "6 PM", "found": False} + } + + osaka_items_after_6pm = [] + osaka_items_at_or_before_6pm = [] + + # Debug: Show total query results + print(f"Debug: Found {len(query_result.get('results', []))} total OSAKA items on Day 1 and Day 2") + + # Process all OSAKA items on Day 1 and Day 2 + for page in query_result.get('results', []): + page_title = get_page_title(page).strip() + page_time = get_page_time(page) + page_group = get_page_group(page) + page_day = get_page_day(page) + + if page_group != "Osaka": + continue + + # Parse time to check if after 6 PM + time_minutes = parse_time_to_minutes(page_time) + + if time_minutes is not None and time_minutes > six_pm_minutes: + osaka_items_after_6pm.append({ + "title": page_title, + "time": page_time, + "day": page_day, + "id": page.get('id') + }) + + # Check if this is one of the expected deleted items + for expected_title, expected_info in expected_deleted.items(): + # Clean up the titles for comparison + clean_page_title = page_title.strip().lower() + clean_expected_title = expected_title.strip().lower() + + # Check for "Rikuro's" or "Rikuro's" (different apostrophe types) + if "rikuro" in clean_page_title and "rikuro" in clean_expected_title: + title_match = True + elif clean_page_title == clean_expected_title: + title_match = True + elif clean_expected_title in clean_page_title or clean_page_title in clean_expected_title: + title_match = True + else: + title_match = False + + if title_match and page_day == expected_info["day"]: + print(f"Debug: Found '{page_title}' on {page_day} at {page_time} - matches expected '{expected_title}'") + expected_deleted[expected_title]["found"] = True + + elif time_minutes is not None and time_minutes <= six_pm_minutes: + osaka_items_at_or_before_6pm.append({ + "title": page_title, + "time": page_time, + "day": page_day, + "id": page.get('id') + }) + + # Check if this is one of the expected remaining items + for expected_title in expected_remaining: + if expected_title.lower() in page_title.lower() or page_title.lower() in expected_title.lower(): + expected_remaining[expected_title]["found"] = True + + # Step 5: Verify results + print(f"\nVerification Summary:") + print(f"=" * 50) + + all_passed = True + + # Check that the 4 expected items after 6 PM have been deleted + print("\n4 Items that should be deleted (after 6 PM on Day 1 and Day 2):") + for item_name, item_info in expected_deleted.items(): + if item_info["found"]: + # If found = True, it means the item still exists (was not deleted) + print(f"āœ— {item_name} ({item_info['day']}, {item_info['time']}) - Still exists, should be deleted", file=sys.stderr) + all_passed = False + else: + # If found = False, it means the item was deleted correctly + print(f"āœ“ {item_name} ({item_info['day']}, {item_info['time']}) - Correctly deleted") + + + # Check that items at or before 6 PM remain + print("\nItems that should remain (at or before 6 PM on Day 1 and Day 2):") + for item_name, item_info in expected_remaining.items(): + if item_info["found"]: + print(f"āœ“ {item_name} ({item_info['time']}) - Correctly retained") + else: + print(f"āœ— {item_name} ({item_info['time']}) - Missing, should not be deleted", file=sys.stderr) + all_passed = False + + # Report any items after 6 PM that still exist + if osaka_items_after_6pm: + print(f"\nāœ— Found {len(osaka_items_after_6pm)} OSAKA item(s) after 6 PM on Day 1/Day 2:", file=sys.stderr) + for item in osaka_items_after_6pm: + print(f" - {item['title']} at {item['time']} ({item['day']})", file=sys.stderr) + else: + print(f"\nāœ“ No OSAKA items found after 6 PM on Day 1/Day 2 (all correctly deleted)") + + # Report count summary + print(f"\nCount Summary:") + print(f"- OSAKA items after 6 PM on Day 1/Day 2 found: {len(osaka_items_after_6pm)} (should be 0)") + print(f"- OSAKA items at/before 6 PM on Day 1/Day 2 found: {len(osaka_items_at_or_before_6pm)}") + print(f"- Expected deletions verified: {sum(1 for item in expected_deleted.values() if not item['found'])}/4") + + return all_passed + +def main(): + """ + Executes the verification process and exits with a status code. + """ + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + + if verify(notion, main_id): + print("\nVerification passed: All 4 required OSAKA events after 6 PM on Day 1 and Day 2 have been removed") + sys.exit(0) + else: + print("\nVerification failed: Some OSAKA events after 6 PM on Day 1/Day 2 still exist") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/notion/japan_travel_planner/restaurant_expenses_sync/description.md b/tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/description.md similarity index 100% rename from tasks/notion/japan_travel_planner/restaurant_expenses_sync/description.md rename to tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/description.md diff --git a/tasks/notion/japan_travel_planner/restaurant_expenses_sync/meta.json b/tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/meta.json similarity index 100% rename from tasks/notion/japan_travel_planner/restaurant_expenses_sync/meta.json rename to tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/meta.json diff --git a/tasks/notion/japan_travel_planner/restaurant_expenses_sync/verify.py b/tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/verify.py similarity index 100% rename from tasks/notion/japan_travel_planner/restaurant_expenses_sync/verify.py rename to tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/verify.py diff --git a/tasks/notion/online_resume/layout_adjustment/description.md b/tasks/notion/standard/online_resume/layout_adjustment/description.md similarity index 100% rename from tasks/notion/online_resume/layout_adjustment/description.md rename to tasks/notion/standard/online_resume/layout_adjustment/description.md diff --git a/tasks/notion/online_resume/layout_adjustment/meta.json b/tasks/notion/standard/online_resume/layout_adjustment/meta.json similarity index 100% rename from tasks/notion/online_resume/layout_adjustment/meta.json rename to tasks/notion/standard/online_resume/layout_adjustment/meta.json diff --git a/tasks/notion/online_resume/layout_adjustment/verify.py b/tasks/notion/standard/online_resume/layout_adjustment/verify.py similarity index 100% rename from tasks/notion/online_resume/layout_adjustment/verify.py rename to tasks/notion/standard/online_resume/layout_adjustment/verify.py diff --git a/tasks/notion/online_resume/projects_section_update/description.md b/tasks/notion/standard/online_resume/projects_section_update/description.md similarity index 100% rename from tasks/notion/online_resume/projects_section_update/description.md rename to tasks/notion/standard/online_resume/projects_section_update/description.md diff --git a/tasks/notion/online_resume/projects_section_update/meta.json b/tasks/notion/standard/online_resume/projects_section_update/meta.json similarity index 100% rename from tasks/notion/online_resume/projects_section_update/meta.json rename to tasks/notion/standard/online_resume/projects_section_update/meta.json diff --git a/tasks/notion/online_resume/projects_section_update/verify.py b/tasks/notion/standard/online_resume/projects_section_update/verify.py similarity index 100% rename from tasks/notion/online_resume/projects_section_update/verify.py rename to tasks/notion/standard/online_resume/projects_section_update/verify.py diff --git a/tasks/notion/online_resume/skills_development_tracker/description.md b/tasks/notion/standard/online_resume/skills_development_tracker/description.md similarity index 100% rename from tasks/notion/online_resume/skills_development_tracker/description.md rename to tasks/notion/standard/online_resume/skills_development_tracker/description.md diff --git a/tasks/notion/online_resume/skills_development_tracker/meta.json b/tasks/notion/standard/online_resume/skills_development_tracker/meta.json similarity index 100% rename from tasks/notion/online_resume/skills_development_tracker/meta.json rename to tasks/notion/standard/online_resume/skills_development_tracker/meta.json diff --git a/tasks/notion/online_resume/skills_development_tracker/verify.py b/tasks/notion/standard/online_resume/skills_development_tracker/verify.py similarity index 100% rename from tasks/notion/online_resume/skills_development_tracker/verify.py rename to tasks/notion/standard/online_resume/skills_development_tracker/verify.py diff --git a/tasks/notion/online_resume/work_history_addition/description.md b/tasks/notion/standard/online_resume/work_history_addition/description.md similarity index 100% rename from tasks/notion/online_resume/work_history_addition/description.md rename to tasks/notion/standard/online_resume/work_history_addition/description.md diff --git a/tasks/notion/online_resume/work_history_addition/meta.json b/tasks/notion/standard/online_resume/work_history_addition/meta.json similarity index 100% rename from tasks/notion/online_resume/work_history_addition/meta.json rename to tasks/notion/standard/online_resume/work_history_addition/meta.json diff --git a/tasks/notion/online_resume/work_history_addition/verify.py b/tasks/notion/standard/online_resume/work_history_addition/verify.py similarity index 100% rename from tasks/notion/online_resume/work_history_addition/verify.py rename to tasks/notion/standard/online_resume/work_history_addition/verify.py diff --git a/tasks/notion/python_roadmap/expert_level_lessons/description.md b/tasks/notion/standard/python_roadmap/expert_level_lessons/description.md similarity index 100% rename from tasks/notion/python_roadmap/expert_level_lessons/description.md rename to tasks/notion/standard/python_roadmap/expert_level_lessons/description.md diff --git a/tasks/notion/python_roadmap/expert_level_lessons/meta.json b/tasks/notion/standard/python_roadmap/expert_level_lessons/meta.json similarity index 100% rename from tasks/notion/python_roadmap/expert_level_lessons/meta.json rename to tasks/notion/standard/python_roadmap/expert_level_lessons/meta.json diff --git a/tasks/notion/python_roadmap/expert_level_lessons/verify.py b/tasks/notion/standard/python_roadmap/expert_level_lessons/verify.py similarity index 100% rename from tasks/notion/python_roadmap/expert_level_lessons/verify.py rename to tasks/notion/standard/python_roadmap/expert_level_lessons/verify.py diff --git a/tasks/notion/python_roadmap/learning_metrics_dashboard/description.md b/tasks/notion/standard/python_roadmap/learning_metrics_dashboard/description.md similarity index 100% rename from tasks/notion/python_roadmap/learning_metrics_dashboard/description.md rename to tasks/notion/standard/python_roadmap/learning_metrics_dashboard/description.md diff --git a/tasks/notion/python_roadmap/learning_metrics_dashboard/meta.json b/tasks/notion/standard/python_roadmap/learning_metrics_dashboard/meta.json similarity index 100% rename from tasks/notion/python_roadmap/learning_metrics_dashboard/meta.json rename to tasks/notion/standard/python_roadmap/learning_metrics_dashboard/meta.json diff --git a/tasks/notion/python_roadmap/learning_metrics_dashboard/verify.py b/tasks/notion/standard/python_roadmap/learning_metrics_dashboard/verify.py similarity index 100% rename from tasks/notion/python_roadmap/learning_metrics_dashboard/verify.py rename to tasks/notion/standard/python_roadmap/learning_metrics_dashboard/verify.py diff --git a/tasks/notion/self_assessment/faq_column_layout/description.md b/tasks/notion/standard/self_assessment/faq_column_layout/description.md similarity index 100% rename from tasks/notion/self_assessment/faq_column_layout/description.md rename to tasks/notion/standard/self_assessment/faq_column_layout/description.md diff --git a/tasks/notion/self_assessment/faq_column_layout/meta.json b/tasks/notion/standard/self_assessment/faq_column_layout/meta.json similarity index 100% rename from tasks/notion/self_assessment/faq_column_layout/meta.json rename to tasks/notion/standard/self_assessment/faq_column_layout/meta.json diff --git a/tasks/notion/self_assessment/faq_column_layout/verify.py b/tasks/notion/standard/self_assessment/faq_column_layout/verify.py similarity index 100% rename from tasks/notion/self_assessment/faq_column_layout/verify.py rename to tasks/notion/standard/self_assessment/faq_column_layout/verify.py diff --git a/tasks/notion/self_assessment/hyperfocus_analysis_report/description.md b/tasks/notion/standard/self_assessment/hyperfocus_analysis_report/description.md similarity index 100% rename from tasks/notion/self_assessment/hyperfocus_analysis_report/description.md rename to tasks/notion/standard/self_assessment/hyperfocus_analysis_report/description.md diff --git a/tasks/notion/self_assessment/hyperfocus_analysis_report/meta.json b/tasks/notion/standard/self_assessment/hyperfocus_analysis_report/meta.json similarity index 100% rename from tasks/notion/self_assessment/hyperfocus_analysis_report/meta.json rename to tasks/notion/standard/self_assessment/hyperfocus_analysis_report/meta.json diff --git a/tasks/notion/self_assessment/hyperfocus_analysis_report/verify.py b/tasks/notion/standard/self_assessment/hyperfocus_analysis_report/verify.py similarity index 100% rename from tasks/notion/self_assessment/hyperfocus_analysis_report/verify.py rename to tasks/notion/standard/self_assessment/hyperfocus_analysis_report/verify.py diff --git a/tasks/notion/self_assessment/numbered_list_emojis/description.md b/tasks/notion/standard/self_assessment/numbered_list_emojis/description.md similarity index 100% rename from tasks/notion/self_assessment/numbered_list_emojis/description.md rename to tasks/notion/standard/self_assessment/numbered_list_emojis/description.md diff --git a/tasks/notion/self_assessment/numbered_list_emojis/meta.json b/tasks/notion/standard/self_assessment/numbered_list_emojis/meta.json similarity index 100% rename from tasks/notion/self_assessment/numbered_list_emojis/meta.json rename to tasks/notion/standard/self_assessment/numbered_list_emojis/meta.json diff --git a/tasks/notion/self_assessment/numbered_list_emojis/verify.py b/tasks/notion/standard/self_assessment/numbered_list_emojis/verify.py similarity index 100% rename from tasks/notion/self_assessment/numbered_list_emojis/verify.py rename to tasks/notion/standard/self_assessment/numbered_list_emojis/verify.py diff --git a/tasks/notion/standard_operating_procedure/deployment_process_sop/description.md b/tasks/notion/standard/standard_operating_procedure/deployment_process_sop/description.md similarity index 100% rename from tasks/notion/standard_operating_procedure/deployment_process_sop/description.md rename to tasks/notion/standard/standard_operating_procedure/deployment_process_sop/description.md diff --git a/tasks/notion/standard_operating_procedure/deployment_process_sop/meta.json b/tasks/notion/standard/standard_operating_procedure/deployment_process_sop/meta.json similarity index 100% rename from tasks/notion/standard_operating_procedure/deployment_process_sop/meta.json rename to tasks/notion/standard/standard_operating_procedure/deployment_process_sop/meta.json diff --git a/tasks/notion/standard_operating_procedure/deployment_process_sop/verify.py b/tasks/notion/standard/standard_operating_procedure/deployment_process_sop/verify.py similarity index 100% rename from tasks/notion/standard_operating_procedure/deployment_process_sop/verify.py rename to tasks/notion/standard/standard_operating_procedure/deployment_process_sop/verify.py diff --git a/tasks/notion/standard_operating_procedure/section_organization/description.md b/tasks/notion/standard/standard_operating_procedure/section_organization/description.md similarity index 100% rename from tasks/notion/standard_operating_procedure/section_organization/description.md rename to tasks/notion/standard/standard_operating_procedure/section_organization/description.md diff --git a/tasks/notion/standard_operating_procedure/section_organization/meta.json b/tasks/notion/standard/standard_operating_procedure/section_organization/meta.json similarity index 100% rename from tasks/notion/standard_operating_procedure/section_organization/meta.json rename to tasks/notion/standard/standard_operating_procedure/section_organization/meta.json diff --git a/tasks/notion/standard_operating_procedure/section_organization/verify.py b/tasks/notion/standard/standard_operating_procedure/section_organization/verify.py similarity index 100% rename from tasks/notion/standard_operating_procedure/section_organization/verify.py rename to tasks/notion/standard/standard_operating_procedure/section_organization/verify.py diff --git a/tasks/notion/team_projects/priority_tasks_table/description.md b/tasks/notion/standard/team_projects/priority_tasks_table/description.md similarity index 100% rename from tasks/notion/team_projects/priority_tasks_table/description.md rename to tasks/notion/standard/team_projects/priority_tasks_table/description.md diff --git a/tasks/notion/team_projects/priority_tasks_table/meta.json b/tasks/notion/standard/team_projects/priority_tasks_table/meta.json similarity index 100% rename from tasks/notion/team_projects/priority_tasks_table/meta.json rename to tasks/notion/standard/team_projects/priority_tasks_table/meta.json diff --git a/tasks/notion/team_projects/priority_tasks_table/verify.py b/tasks/notion/standard/team_projects/priority_tasks_table/verify.py similarity index 100% rename from tasks/notion/team_projects/priority_tasks_table/verify.py rename to tasks/notion/standard/team_projects/priority_tasks_table/verify.py diff --git a/tasks/notion/team_projects/swap_tasks/description.md b/tasks/notion/standard/team_projects/swap_tasks/description.md similarity index 100% rename from tasks/notion/team_projects/swap_tasks/description.md rename to tasks/notion/standard/team_projects/swap_tasks/description.md diff --git a/tasks/notion/team_projects/swap_tasks/meta.json b/tasks/notion/standard/team_projects/swap_tasks/meta.json similarity index 100% rename from tasks/notion/team_projects/swap_tasks/meta.json rename to tasks/notion/standard/team_projects/swap_tasks/meta.json diff --git a/tasks/notion/standard/team_projects/swap_tasks/verify.py b/tasks/notion/standard/team_projects/swap_tasks/verify.py new file mode 100644 index 00000000..2c9a8d6c --- /dev/null +++ b/tasks/notion/standard/team_projects/swap_tasks/verify.py @@ -0,0 +1,215 @@ +import sys +from notion_client import Client +from tasks.utils import notion_utils + +def verify(notion: Client, main_id: str = None) -> bool: + """ + Verifies that the task assignees have been swapped correctly. + Checks: + 1. "Develop a plan for promotion" and "Evaluate different third-party services" have swapped assignees + 2. The person with most tasks and person with least tasks have swapped all their tasks + """ + # Step 1: Find the Team Projects page + if main_id: + found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id) + if not found_id or object_type != 'page': + print("Error: Team Projects page not found.", file=sys.stderr) + return False + else: + # Try to find the page by searching + found_id = notion_utils.find_page(notion, "Team Projects") + if not found_id: + print("Error: Team Projects page not found.", file=sys.stderr) + return False + + # Get all blocks from the page to find database references + all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id) + + # Find Tasks database ID from the page + tasks_db_id = None + + for block in all_blocks: + if block and block.get("type") == "child_database": + db_title = block.get("child_database", {}).get("title", "") + if "Tasks" in db_title: + tasks_db_id = block["id"] + break + + if not tasks_db_id: + print("Error: Tasks database not found.", file=sys.stderr) + return False + + print("\nšŸ“‹ Starting verification...") + + # Step 2: Query all tasks to analyze assignees + + try: + all_tasks_response = notion.databases.query( + database_id=tasks_db_id, + page_size=100 + ) + + if not all_tasks_response.get("results"): + print("Error: No tasks found in Tasks database.", file=sys.stderr) + return False + + tasks = all_tasks_response["results"] + + except Exception as e: + print(f"Error querying Tasks database: {e}", file=sys.stderr) + return False + + # Step 3: Check specific tasks have swapped assignees + + develop_plan_task = None + evaluate_services_task = None + + for task in tasks: + task_name = task["properties"]["Name"]["title"][0]["text"]["content"] + if task_name == "Develop a plan for promotion": + develop_plan_task = task + elif task_name == "Evaluate different third-party services": + evaluate_services_task = task + + if not develop_plan_task or not evaluate_services_task: + print("Error: Could not find both required tasks.", file=sys.stderr) + return False + + # Get assignees for these tasks + develop_plan_assignees = develop_plan_task["properties"]["Assigned"]["people"] + evaluate_services_assignees = evaluate_services_task["properties"]["Assigned"]["people"] + + if not develop_plan_assignees or not evaluate_services_assignees: + print("Error: Tasks don't have assignees.", file=sys.stderr) + return False + + develop_plan_assignee_id = develop_plan_assignees[0]["id"] + evaluate_services_assignee_id = evaluate_services_assignees[0]["id"] + + # These should be different (swapped) + if develop_plan_assignee_id == evaluate_services_assignee_id: + print("Error: Tasks should have different assignees after swap.", file=sys.stderr) + return False + + # Step 4: Count tasks per person + + task_counts = {} + unassigned_count = 0 + + for task in tasks: + assignees = task["properties"]["Assigned"]["people"] + if assignees: + assignee_id = assignees[0]["id"] + if assignee_id not in task_counts: + task_counts[assignee_id] = [] + task_counts[assignee_id].append(task["properties"]["Name"]["title"][0]["text"]["content"]) + else: + unassigned_count += 1 + + # Sort by task count + sorted_assignees = sorted(task_counts.items(), key=lambda x: len(x[1])) + + if len(sorted_assignees) < 2: + print("Error: Need at least 2 people with tasks to verify swap.", file=sys.stderr) + return False + + # Get person with least and most tasks + person_with_least = sorted_assignees[0] + person_with_most = sorted_assignees[-1] + + least_id, least_tasks = person_with_least + most_id, most_tasks = person_with_most + + # Step 5: Verify the swap pattern + + # Original distribution (before swap): + # - 5ac96c02-49a4-4320-8de6-b663ba83126b had 3 tasks (least) + # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a had 10 tasks (most) + + # After complete swap, we expect: + # - 5ac96c02-49a4-4320-8de6-b663ba83126b should have 10 tasks + # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a should have 3 tasks + + original_least_id = "5ac96c02-49a4-4320-8de6-b663ba83126b" + original_most_id = "ac7a3bd0-c111-4464-8f45-8a857a1abc8a" + + # Check if the swap has been completed + swap_completed = False + for assignee_id, assignee_tasks in task_counts.items(): + if assignee_id == original_least_id and len(assignee_tasks) == 10: + # Person who had 3 now has 10 + for other_id, other_tasks in task_counts.items(): + if other_id == original_most_id and len(other_tasks) == 3: + # Person who had 10 now has 3 + swap_completed = True + break + + # Step 6: Summary + print(f"\nšŸ“Š Task Distribution:") + print(f" • Total tasks: {len(tasks)}") + print(f" • Assigned tasks: {len(tasks) - unassigned_count}") + print(f" • Unassigned tasks: {unassigned_count}") + print(f" • People with tasks: {len(task_counts)}") + print(f"\n Task counts by person:") + for assignee_id, assignee_tasks in sorted_assignees: + print(f" - {assignee_id[:8]}...: {len(assignee_tasks)} tasks") + + # Step 7: Final verification + print("\nšŸ” Verification Results:") + + # Check that the swap has created a significant difference + if len(most_tasks) - len(least_tasks) < 5: + print(f"Warning: Difference between most and least is only {len(most_tasks) - len(least_tasks)} tasks", file=sys.stderr) + + # Verify specific expected outcomes + verification_passed = True + + # Check 1: Specific tasks have been swapped + specific_tasks_swapped = develop_plan_assignee_id != evaluate_services_assignee_id + if specific_tasks_swapped: + print(" āœ“ Specific tasks have been swapped") + else: + print(" āœ— Specific tasks were not swapped", file=sys.stderr) + verification_passed = False + + # Check 2: Task distribution shows a complete swap + if swap_completed: + print(" āœ“ Complete task swap verified (3↔10 tasks)") + else: + # Show actual distribution for debugging + person1_tasks = len(task_counts.get(original_least_id, [])) + person2_tasks = len(task_counts.get(original_most_id, [])) + print(f" āœ— Swap incomplete! Expected 5ac96c02→10 tasks, ac7a3bd0→3 tasks", file=sys.stderr) + print(f" Actual: 5ac96c02→{person1_tasks} tasks, ac7a3bd0→{person2_tasks} tasks", file=sys.stderr) + verification_passed = False + + # Check 3: Total task count is preserved + total_assigned_tasks = sum(len(tasks) for _, tasks in task_counts.items()) + expected_total = len(tasks) - unassigned_count + + if total_assigned_tasks == expected_total: + print(f" āœ“ Total task count preserved ({total_assigned_tasks} assigned)") + else: + print(f" āœ— Task count mismatch: {total_assigned_tasks} vs {expected_total} expected", file=sys.stderr) + verification_passed = False + + if verification_passed: + print("\nāœ… All verification checks passed!") + return True + else: + print("\nāŒ Verification failed", file=sys.stderr) + return False + +def main(): + """ + Executes the verification process and exits with a status code. + """ + notion = notion_utils.get_notion_client() + main_id = sys.argv[1] if len(sys.argv) > 1 else None + if verify(notion, main_id): + sys.exit(0) + else: + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/notion/toronto_guide/change_color/description.md b/tasks/notion/standard/toronto_guide/change_color/description.md similarity index 100% rename from tasks/notion/toronto_guide/change_color/description.md rename to tasks/notion/standard/toronto_guide/change_color/description.md diff --git a/tasks/notion/toronto_guide/change_color/meta.json b/tasks/notion/standard/toronto_guide/change_color/meta.json similarity index 100% rename from tasks/notion/toronto_guide/change_color/meta.json rename to tasks/notion/standard/toronto_guide/change_color/meta.json diff --git a/tasks/notion/toronto_guide/change_color/verify.py b/tasks/notion/standard/toronto_guide/change_color/verify.py similarity index 100% rename from tasks/notion/toronto_guide/change_color/verify.py rename to tasks/notion/standard/toronto_guide/change_color/verify.py diff --git a/tasks/notion/toronto_guide/weekend_adventure_planner/description.md b/tasks/notion/standard/toronto_guide/weekend_adventure_planner/description.md similarity index 100% rename from tasks/notion/toronto_guide/weekend_adventure_planner/description.md rename to tasks/notion/standard/toronto_guide/weekend_adventure_planner/description.md diff --git a/tasks/notion/toronto_guide/weekend_adventure_planner/meta.json b/tasks/notion/standard/toronto_guide/weekend_adventure_planner/meta.json similarity index 100% rename from tasks/notion/toronto_guide/weekend_adventure_planner/meta.json rename to tasks/notion/standard/toronto_guide/weekend_adventure_planner/meta.json diff --git a/tasks/notion/toronto_guide/weekend_adventure_planner/verify.py b/tasks/notion/standard/toronto_guide/weekend_adventure_planner/verify.py similarity index 100% rename from tasks/notion/toronto_guide/weekend_adventure_planner/verify.py rename to tasks/notion/standard/toronto_guide/weekend_adventure_planner/verify.py diff --git a/tasks/playwright/easy/.gitkeep b/tasks/playwright/easy/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tasks/playwright/eval_web/cloudflare_turnstile_challenge/description.md b/tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/description.md similarity index 100% rename from tasks/playwright/eval_web/cloudflare_turnstile_challenge/description.md rename to tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/description.md diff --git a/tasks/playwright/eval_web/cloudflare_turnstile_challenge/meta.json b/tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/meta.json similarity index 100% rename from tasks/playwright/eval_web/cloudflare_turnstile_challenge/meta.json rename to tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/meta.json diff --git a/tasks/playwright/eval_web/cloudflare_turnstile_challenge/verify.py b/tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/verify.py similarity index 100% rename from tasks/playwright/eval_web/cloudflare_turnstile_challenge/verify.py rename to tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/verify.py diff --git a/tasks/playwright/eval_web/extraction_table/data.csv b/tasks/playwright/standard/eval_web/extraction_table/data.csv similarity index 100% rename from tasks/playwright/eval_web/extraction_table/data.csv rename to tasks/playwright/standard/eval_web/extraction_table/data.csv diff --git a/tasks/playwright/eval_web/extraction_table/description.md b/tasks/playwright/standard/eval_web/extraction_table/description.md similarity index 100% rename from tasks/playwright/eval_web/extraction_table/description.md rename to tasks/playwright/standard/eval_web/extraction_table/description.md diff --git a/tasks/playwright/eval_web/extraction_table/meta.json b/tasks/playwright/standard/eval_web/extraction_table/meta.json similarity index 100% rename from tasks/playwright/eval_web/extraction_table/meta.json rename to tasks/playwright/standard/eval_web/extraction_table/meta.json diff --git a/tasks/playwright/eval_web/extraction_table/verify.py b/tasks/playwright/standard/eval_web/extraction_table/verify.py similarity index 100% rename from tasks/playwright/eval_web/extraction_table/verify.py rename to tasks/playwright/standard/eval_web/extraction_table/verify.py diff --git a/tasks/playwright/web_search/birth_of_arvinxu/description.md b/tasks/playwright/standard/web_search/birth_of_arvinxu/description.md similarity index 100% rename from tasks/playwright/web_search/birth_of_arvinxu/description.md rename to tasks/playwright/standard/web_search/birth_of_arvinxu/description.md diff --git a/tasks/playwright/web_search/birth_of_arvinxu/meta.json b/tasks/playwright/standard/web_search/birth_of_arvinxu/meta.json similarity index 100% rename from tasks/playwright/web_search/birth_of_arvinxu/meta.json rename to tasks/playwright/standard/web_search/birth_of_arvinxu/meta.json diff --git a/tasks/playwright/web_search/birth_of_arvinxu/verify.py b/tasks/playwright/standard/web_search/birth_of_arvinxu/verify.py similarity index 100% rename from tasks/playwright/web_search/birth_of_arvinxu/verify.py rename to tasks/playwright/standard/web_search/birth_of_arvinxu/verify.py diff --git a/tasks/playwright/web_search/r1_arxiv/content.txt b/tasks/playwright/standard/web_search/r1_arxiv/content.txt similarity index 100% rename from tasks/playwright/web_search/r1_arxiv/content.txt rename to tasks/playwright/standard/web_search/r1_arxiv/content.txt diff --git a/tasks/playwright/web_search/r1_arxiv/description.md b/tasks/playwright/standard/web_search/r1_arxiv/description.md similarity index 100% rename from tasks/playwright/web_search/r1_arxiv/description.md rename to tasks/playwright/standard/web_search/r1_arxiv/description.md diff --git a/tasks/playwright/web_search/r1_arxiv/meta.json b/tasks/playwright/standard/web_search/r1_arxiv/meta.json similarity index 100% rename from tasks/playwright/web_search/r1_arxiv/meta.json rename to tasks/playwright/standard/web_search/r1_arxiv/meta.json diff --git a/tasks/playwright/web_search/r1_arxiv/verify.py b/tasks/playwright/standard/web_search/r1_arxiv/verify.py similarity index 100% rename from tasks/playwright/web_search/r1_arxiv/verify.py rename to tasks/playwright/standard/web_search/r1_arxiv/verify.py diff --git a/tasks/playwright_webarena/easy/.gitkeep b/tasks/playwright_webarena/easy/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tasks/playwright_webarena/easy/reddit/ai_data_analyst/description.md b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/description.md new file mode 100644 index 00000000..90366359 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/description.md @@ -0,0 +1,15 @@ +Deliver a bite-sized AI community snapshot by capturing two essential metrics and posting them back to the forum. + +**Task Requirements:** + +1. Register and log in with username `AIDataAnalyst2025` and password `SecurePass123!` so every action is tied to that account. +2. Visit the `deeplearning` forum (page 1 only) and count how many posts are visible; note the exact total. +3. Go to the `MachineLearning` forum, find the post titled `[P] I made a command-line tool that explains your errors using ChatGPT (link in comments)`, and record the vote count shown in the listing. +4. Still in `MachineLearning`, create a post titled `MachineLearning_Extraction` whose body is exactly the two-line Markdown list below (keep the keys and pipe separators untouched, replacing each `FILL_VALUE` with your numbers): + +``` +- Deeplearning_Post_Count|FILL_VALUE +- ChatGPT_Tool_Vote_Count|FILL_VALUE +``` + +No searches or multi-page navigation are required—just capture these two data points and report them accurately. diff --git a/tasks/playwright_webarena/easy/reddit/ai_data_analyst/label.txt b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/label.txt new file mode 100644 index 00000000..53e8ca0f --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/label.txt @@ -0,0 +1,2 @@ +- Deeplearning_Post_Count|25 +- ChatGPT_Tool_Vote_Count|2655 diff --git a/tasks/playwright_webarena/easy/reddit/ai_data_analyst/meta.json b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/meta.json new file mode 100644 index 00000000..fe2d3e4e --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "ai_data_analyst_easy", + "task_name": "AI Data Analyst (Easy)", + "category_id": "reddit", + "category_name": "Reddit", + "description": "Capture a lightweight AI/ML engagement snapshot by signing in, grabbing three key metrics, and posting them back to the MachineLearning forum.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "user interaction", + "data extraction", + "comparative analysis" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/reddit/ai_data_analyst/verify.py b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/verify.py new file mode 100644 index 00000000..aa20606d --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/ai_data_analyst/verify.py @@ -0,0 +1,175 @@ +import asyncio +import os +import re +import sys +from pathlib import Path +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError + +BASE_URL = os.getenv("WEBARENA_BASE_URL", "http://localhost:9999").rstrip("/") +USERNAME = "AIDataAnalyst2025" +PASSWORD = "SecurePass123!" +POST_TITLE = "MachineLearning_Extraction" +REQUIRED_FIELDS = [ + "Deeplearning_Post_Count", + "ChatGPT_Tool_Vote_Count", +] +LABEL_PATH = Path(__file__).parent / "label.txt" + + +def parse_key_value_format(text: str) -> dict: + data = {} + if not text: + return data + for line in text.splitlines(): + line = line.strip() + if not line or "|" not in line: + continue + line = re.sub(r"^[-•*]\s*", "", line) + key, value = line.split("|", 1) + data[key.strip()] = value.strip() + return data + + +def load_expected_values() -> dict: + if not LABEL_PATH.exists(): + return {} + return parse_key_value_format(LABEL_PATH.read_text(encoding="utf-8")) + + +async def ensure_logged_in(page) -> bool: + print("Step 1: Ensuring we are logged in...", file=sys.stderr) + await page.goto(f"{BASE_URL}/", wait_until="networkidle") + user_button = page.locator(f'button:has-text("{USERNAME}")') + if await user_button.count(): + print("āœ“ Already logged in", file=sys.stderr) + return True + + login_link = page.locator('a:has-text("Log in")') + if not await login_link.count(): + print("FAILED: Login link not found", file=sys.stderr) + return False + + await login_link.click() + await page.wait_for_load_state("networkidle") + await page.fill('input[name="_username"]', USERNAME) + await page.fill('input[name="_password"]', PASSWORD) + await page.click('button:has-text("Log in")') + await page.wait_for_load_state("networkidle") + + if await page.locator(f'button:has-text("{USERNAME}")').count(): + print(f"āœ“ Logged in as {USERNAME}", file=sys.stderr) + return True + + print("FAILED: Could not log in with provided credentials", file=sys.stderr) + return False + + +async def fetch_submission_content(page): + print("Step 2: Retrieving MachineLearning submission...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/MachineLearning", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if not await post_link.count(): + print( + f"FAILED: Submission '{POST_TITLE}' not found in MachineLearning forum", + file=sys.stderr, + ) + return None + + await post_link.first.click() + await page.wait_for_load_state("networkidle") + + selectors = [ + ".submission__body", + "article", + ".post-body", + ".RichText", + '[class*="RichText"]', + ] + + for selector in selectors: + locator = page.locator(selector) + if await locator.count(): + content = await locator.first.inner_text() + if content: + print(f"āœ“ Found submission body via selector {selector}", file=sys.stderr) + return content + + print("FAILED: Unable to locate submission body content", file=sys.stderr) + return None + + +def validate_submission(extracted: dict, expected: dict) -> bool: + missing = [key for key in REQUIRED_FIELDS if key not in extracted] + if missing: + print( + f"FAILED: Submission body missing required keys: {', '.join(missing)}", + file=sys.stderr, + ) + return False + + errors = [] + for key in REQUIRED_FIELDS: + actual = extracted.get(key, "") + expect = expected.get(key, "") + try: + actual_val = int(actual) + expect_val = int(expect) + if actual_val != expect_val: + errors.append(f"{key}: expected {expect_val}, found {actual_val}") + except ValueError: + errors.append(f"{key}: value '{actual}' is not numeric") + + if errors: + print("FAILED: Submission values do not match expected data:", file=sys.stderr) + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ“ Submission content matches expected metrics", file=sys.stderr) + return True + + +async def verify() -> bool: + expected = load_expected_values() + if not expected: + print("FAILED: label.txt with expected values is missing", file=sys.stderr) + return False + + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + page = await context.new_page() + + try: + if not await ensure_logged_in(page): + return False + + content = await fetch_submission_content(page) + if not content: + return False + + extracted = parse_key_value_format(content) + if not validate_submission(extracted, expected): + return False + + print("SUCCESS: Easy AI data analyst task verified", file=sys.stderr) + return True + + except PlaywrightTimeoutError as exc: + print(f"FAILED: Timeout occurred - {exc}", file=sys.stderr) + return False + except Exception as exc: + print(f"FAILED: Unexpected error - {exc}", file=sys.stderr) + return False + finally: + await browser.close() + + +def main(): + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/reddit/llm_research_summary/description.md b/tasks/playwright_webarena/easy/reddit/llm_research_summary/description.md new file mode 100644 index 00000000..dd640139 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/llm_research_summary/description.md @@ -0,0 +1,16 @@ +Produce a five-line GPT activity bulletin so readers can quickly see how active the MachineLearning forum is. + +**Task Requirements:** + +1. Register and log in with username `llm_analyst_2024` and password `LLMExpert2024!`. +2. On the first page of the `MachineLearning` forum, count how many post titles contain `GPT`, `ChatGPT`, or `LLM`; record the exact total. +3. From that subset, identify the single post with the highest upvote count and note its full title and vote total. +4. Create a new post titled `LLM Research Summary: GPT Discussions Analysis [2024]` in the same forum. The body must be exactly the Markdown list below with your numbers in place of each `FILL_VALUE` (keep the keys, order, and pipe separator intact): + +``` +- Total_LLM_Posts|FILL_VALUE +- Top1_Title|FILL_VALUE +- Top1_Upvotes|FILL_VALUE +``` + +That’s the entire easy version—just report the headline LLM count and the hottest related thread. diff --git a/tasks/playwright_webarena/easy/reddit/llm_research_summary/label.txt b/tasks/playwright_webarena/easy/reddit/llm_research_summary/label.txt new file mode 100644 index 00000000..ffe67439 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/llm_research_summary/label.txt @@ -0,0 +1,3 @@ +- Total_LLM_Posts|9 +- Top1_Title|[P] I made a command-line tool that explains your errors using ChatGPT (link in comments) +- Top1_Upvotes|2655 diff --git a/tasks/playwright_webarena/easy/reddit/llm_research_summary/meta.json b/tasks/playwright_webarena/easy/reddit/llm_research_summary/meta.json new file mode 100644 index 00000000..d69b8ddf --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/llm_research_summary/meta.json @@ -0,0 +1,25 @@ +{ + "task_id": "llm_research_summary_easy", + "task_name": "LLM Research Summary (Easy)", + "category_id": "reddit", + "category_name": "Reddit", + "description": "Collect the headline GPT metrics from MachineLearning and publish a short five-line recap.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "search aggregation", + "content submission", + "user interaction" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/reddit/llm_research_summary/verify.py b/tasks/playwright_webarena/easy/reddit/llm_research_summary/verify.py new file mode 100644 index 00000000..25c68fda --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/llm_research_summary/verify.py @@ -0,0 +1,190 @@ +import asyncio +import os +import re +import sys +from pathlib import Path +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError + +BASE_URL = os.getenv("WEBARENA_BASE_URL", "http://localhost:9999").rstrip("/") +USERNAME = "llm_analyst_2024" +PASSWORD = "LLMExpert2024!" +FORUM_SLUG = "MachineLearning" +POST_TITLE = "LLM Research Summary: GPT Discussions Analysis [2024]" +REQUIRED_FIELDS = [ + "Total_LLM_Posts", + "Top1_Title", + "Top1_Upvotes", +] +NUMERIC_FIELDS = {"Total_LLM_Posts", "Top1_Upvotes"} +LABEL_PATH = Path(__file__).parent / "label.txt" + + +def parse_key_value_format(text: str) -> dict: + data = {} + if not text: + return data + for line in text.splitlines(): + line = line.strip() + if not line or "|" not in line: + continue + line = re.sub(r"^[-•*]\s*", "", line) + key, value = line.split("|", 1) + data[key.strip()] = value.strip() + return data + + +def normalize_text(value: str) -> str: + if value is None: + return "" + replacements = { + "\u2019": "'", + "\u2018": "'", + "\u201c": '"', + "\u201d": '"', + } + for src, dst in replacements.items(): + value = value.replace(src, dst) + return " ".join(value.split()).strip() + + +def load_expected_values() -> dict: + if not LABEL_PATH.exists(): + return {} + return parse_key_value_format(LABEL_PATH.read_text(encoding="utf-8")) + + +async def ensure_logged_in(page) -> bool: + print("Step 1: Signing in as llm_analyst_2024...", file=sys.stderr) + await page.goto(f"{BASE_URL}/", wait_until="networkidle") + user_button = page.locator(f'button:has-text("{USERNAME}")') + if await user_button.count(): + print("āœ“ Already logged in", file=sys.stderr) + return True + + login_link = page.locator('a:has-text("Log in")') + if not await login_link.count(): + print("FAILED: Login link not found", file=sys.stderr) + return False + + await login_link.click() + await page.wait_for_load_state("networkidle") + await page.fill('input[name="_username"]', USERNAME) + await page.fill('input[name="_password"]', PASSWORD) + await page.click('button:has-text("Log in")') + await page.wait_for_load_state("networkidle") + + if await page.locator(f'button:has-text("{USERNAME}")').count(): + print(f"āœ“ Logged in as {USERNAME}", file=sys.stderr) + return True + + print("FAILED: Could not log in with provided credentials", file=sys.stderr) + return False + + +async def fetch_summary_body(page): + print("Step 2: Opening MachineLearning summary post...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/{FORUM_SLUG}", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if not await post_link.count(): + print(f"FAILED: Submission '{POST_TITLE}' not found", file=sys.stderr) + return None + + await post_link.first.click() + await page.wait_for_load_state("networkidle") + + selectors = [ + ".submission__body", + "article", + ".post-body", + ".RichText", + '[class*="RichText"]', + 'div:has-text("Total_LLM_Posts")', + ] + + for selector in selectors: + locator = page.locator(selector) + if await locator.count(): + content = await locator.first.inner_text() + if content: + print(f"āœ“ Found summary content via selector {selector}", file=sys.stderr) + return content + + print("FAILED: Unable to locate submission body", file=sys.stderr) + return None + + +def validate_fields(extracted: dict, expected: dict) -> bool: + missing = [key for key in REQUIRED_FIELDS if key not in extracted] + if missing: + print(f"FAILED: Missing required keys: {', '.join(missing)}", file=sys.stderr) + return False + + errors = [] + for key in REQUIRED_FIELDS: + actual = extracted.get(key, "") + expect = expected.get(key, "") + if key in NUMERIC_FIELDS: + try: + actual_val = int(actual) + expect_val = int(expect) + if actual_val != expect_val: + errors.append(f"{key}: expected {expect_val}, found {actual_val}") + except ValueError: + errors.append(f"{key}: '{actual}' is not numeric") + else: + if normalize_text(actual) != normalize_text(expect): + errors.append(f"{key}: expected '{expect}', found '{actual}'") + + if errors: + print("FAILED: Summary values do not match expected data:", file=sys.stderr) + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ“ Summary values match expected snapshot", file=sys.stderr) + return True + + +async def verify() -> bool: + expected = load_expected_values() + if not expected: + print("FAILED: label.txt is missing", file=sys.stderr) + return False + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + page = await context.new_page() + + try: + if not await ensure_logged_in(page): + return False + + content = await fetch_summary_body(page) + if not content: + return False + + extracted = parse_key_value_format(content) + if not validate_fields(extracted, expected): + return False + + print("SUCCESS: LLM research easy task verified", file=sys.stderr) + return True + + except PlaywrightTimeoutError as exc: + print(f"FAILED: Timeout occurred - {exc}", file=sys.stderr) + return False + except Exception as exc: + print(f"FAILED: Unexpected error - {exc}", file=sys.stderr) + return False + finally: + await browser.close() + + +def main(): + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/description.md b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/description.md new file mode 100644 index 00000000..bd49f092 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/description.md @@ -0,0 +1,17 @@ +Provide a lightweight status report on what’s trending in the movies forum so stakeholders can scan it at a glance. + +**Task Requirements:** + +1. Register and log in with username `movie_reviewer_2024` and password `movie_reviewer_2024`. +2. On the first page of the `movies` forum, count how many post titles contain any four-digit year (e.g., 1984, 2024) and record the total. +3. Still on that page, find the post with the highest upvote count and record its full title as well as the vote and comment counts shown. +4. Publish a post in the same forum titled `Wonderful Movies Analysis: Community Favorites [2024]`. The body must match the four-line Markdown list below—keep the keys, order, and pipe separators exactly as written while replacing each `FILL_VALUE` with your data: + +``` +- Total_Year_Posts|FILL_VALUE +- Top_Title|FILL_VALUE +- Top_Upvotes|FILL_VALUE +- Top_Comments|FILL_VALUE +``` + +No multi-page browsing or special threads are required; this easy task captures just the top signals from the first page. diff --git a/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/label.txt b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/label.txt new file mode 100644 index 00000000..5e6f1f7f --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/label.txt @@ -0,0 +1,4 @@ +- Total_Year_Posts|1 +- Top_Title|Who will win the Oscar for ACTRESS IN A SUPPORTING ROLE? +- Top_Upvotes|9933 +- Top_Comments|23 diff --git a/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/meta.json b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/meta.json new file mode 100644 index 00000000..65aa9a19 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/meta.json @@ -0,0 +1,25 @@ +{ + "task_id": "movie_reviewer_analysis_easy", + "task_name": "Movie Reviewer Analysis (Easy)", + "category_id": "reddit", + "category_name": "Reddit", + "description": "Grab the first-page movie signals plus the Rittenhouse poster stats and share them in a concise recap post.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "user interaction", + "data extraction", + "comparative analysis", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/verify.py b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/verify.py new file mode 100644 index 00000000..30f1bb07 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/verify.py @@ -0,0 +1,195 @@ +import asyncio +import os +import re +import sys +from pathlib import Path +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError + +BASE_URL = os.getenv("WEBARENA_BASE_URL", "http://localhost:9999").rstrip("/") +USERNAME = "movie_reviewer_2024" +PASSWORD = "movie_reviewer_2024" +FORUM_SLUG = "movies" +POST_TITLE = "Wonderful Movies Analysis: Community Favorites [2024]" +REQUIRED_FIELDS = [ + "Total_Year_Posts", + "Top_Title", + "Top_Upvotes", + "Top_Comments", +] +NUMERIC_FIELDS = { + "Total_Year_Posts", + "Top_Upvotes", + "Top_Comments", +} +LABEL_PATH = Path(__file__).parent / "label.txt" + + +def parse_key_value_format(text: str) -> dict: + data = {} + if not text: + return data + for line in text.splitlines(): + line = line.strip() + if not line or "|" not in line: + continue + line = re.sub(r"^[-•*]\s*", "", line) + key, value = line.split("|", 1) + data[key.strip()] = value.strip() + return data + + +def normalize_text(value: str) -> str: + if value is None: + return "" + replacements = { + "\u2019": "'", + "\u2018": "'", + "\u201c": '"', + "\u201d": '"', + } + for src, dst in replacements.items(): + value = value.replace(src, dst) + return " ".join(value.split()).strip() + + +def load_expected_values() -> dict: + if not LABEL_PATH.exists(): + return {} + return parse_key_value_format(LABEL_PATH.read_text(encoding="utf-8")) + + +async def ensure_logged_in(page) -> bool: + print("Step 1: Authenticating movie_reviewer_2024...", file=sys.stderr) + await page.goto(f"{BASE_URL}/", wait_until="networkidle") + user_button = page.locator(f'button:has-text("{USERNAME}")') + if await user_button.count(): + print("āœ“ Already logged in", file=sys.stderr) + return True + + login_link = page.locator('a:has-text("Log in")') + if not await login_link.count(): + print("FAILED: Login link not found", file=sys.stderr) + return False + + await login_link.click() + await page.wait_for_load_state("networkidle") + await page.fill('input[name="_username"]', USERNAME) + await page.fill('input[name="_password"]', PASSWORD) + await page.click('button:has-text("Log in")') + await page.wait_for_load_state("networkidle") + + if await page.locator(f'button:has-text("{USERNAME}")').count(): + print(f"āœ“ Logged in as {USERNAME}", file=sys.stderr) + return True + + print("FAILED: Could not log in with provided credentials", file=sys.stderr) + return False + + +async def fetch_summary_body(page): + print("Step 2: Locating the movies summary post...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/{FORUM_SLUG}", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if not await post_link.count(): + print(f"FAILED: Submission '{POST_TITLE}' not found", file=sys.stderr) + return None + + await post_link.first.click() + await page.wait_for_load_state("networkidle") + + selectors = [ + ".submission__body", + "article", + ".post-body", + ".RichText", + '[class*="RichText"]', + 'div:has-text("Total_Year_Posts")', + ] + + for selector in selectors: + locator = page.locator(selector) + if await locator.count(): + content = await locator.first.inner_text() + if content: + print(f"āœ“ Retrieved summary content via selector {selector}", file=sys.stderr) + return content + + print("FAILED: Unable to locate submission body", file=sys.stderr) + return None + + +def validate_summary(extracted: dict, expected: dict) -> bool: + missing = [key for key in REQUIRED_FIELDS if key not in extracted] + if missing: + print(f"FAILED: Missing required keys: {', '.join(missing)}", file=sys.stderr) + return False + + errors = [] + for key in REQUIRED_FIELDS: + actual = extracted.get(key, "") + expect = expected.get(key, "") + if key in NUMERIC_FIELDS: + try: + actual_val = int(actual) + expect_val = int(expect) + if actual_val != expect_val: + errors.append(f"{key}: expected {expect_val}, found {actual_val}") + except ValueError: + errors.append(f"{key}: '{actual}' is not numeric") + else: + if normalize_text(actual) != normalize_text(expect): + errors.append(f"{key}: expected '{expect}', found '{actual}'") + + if errors: + print("FAILED: Summary values differ from expected snapshot:", file=sys.stderr) + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ“ Summary values match expected data", file=sys.stderr) + return True + + +async def verify() -> bool: + expected = load_expected_values() + if not expected: + print("FAILED: label.txt is missing", file=sys.stderr) + return False + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + page = await context.new_page() + + try: + if not await ensure_logged_in(page): + return False + + content = await fetch_summary_body(page) + if not content: + return False + + extracted = parse_key_value_format(content) + if not validate_summary(extracted, expected): + return False + + print("SUCCESS: Movie analysis easy task verified", file=sys.stderr) + return True + + except PlaywrightTimeoutError as exc: + print(f"FAILED: Timeout occurred - {exc}", file=sys.stderr) + return False + except Exception as exc: + print(f"FAILED: Unexpected error - {exc}", file=sys.stderr) + return False + finally: + await browser.close() + + +def main(): + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/description.md b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/description.md new file mode 100644 index 00000000..7a1e27a9 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/description.md @@ -0,0 +1,18 @@ +Share a five-line NBA engagement brief so sports moderators can instantly see which thread is dominating. + +**Task Requirements:** + +1. Register and log in with username `NBA_DataAnalyst_2024` and password `Research#2024!`. +2. In the `sports` forum (or via the site search for ā€œNBAā€), locate the NBA-related post with the highest upvote count you can see. Record its exact title, vote total, comment count, and author username. +3. Count how many distinct NBA-titled posts you reviewed for this summary (at minimum, include the post from step 2) and store that total as `Total_NBA_Posts`. +4. Publish a post titled `Statistical Analysis: NBA Content Engagement on This Forum` in the same forum. Its body must consist solely of the Markdown list below; keep the keys/order/pipes untouched while replacing each `FILL_VALUE` with your numbers: + +``` +- Total_NBA_Posts|FILL_VALUE +- Top_Title|FILL_VALUE +- Top_Votes|FILL_VALUE +- Top_Comments|FILL_VALUE +- Top_Author|FILL_VALUE +``` + +This easy edition just reports the leading NBA thread plus the count of posts you reviewed—no deeper profile checks are necessary. diff --git a/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/label.txt b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/label.txt new file mode 100644 index 00000000..645491e8 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/label.txt @@ -0,0 +1,5 @@ +- Total_NBA_Posts|20 +- Top_Title|Hamby claims [WNBA Champ] Aces 'unprofessional' after trade +- Top_Votes|614 +- Top_Comments|170 +- Top_Author|Responsible-Lunch815 diff --git a/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/meta.json b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/meta.json new file mode 100644 index 00000000..5eb1ebc7 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/meta.json @@ -0,0 +1,25 @@ +{ + "task_id": "nba_statistics_analysis_easy", + "task_name": "NBA Statistics Analysis (Easy)", + "category_id": "reddit", + "category_name": "Reddit", + "description": "Summarize just the three strongest NBA threads and share their vote/comment stats in a short post.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "user interaction", + "data extraction", + "comparative analysis", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/verify.py b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/verify.py new file mode 100644 index 00000000..33acdf79 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/verify.py @@ -0,0 +1,196 @@ +import asyncio +import os +import re +import sys +from pathlib import Path +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError + +BASE_URL = os.getenv("WEBARENA_BASE_URL", "http://localhost:9999").rstrip("/") +USERNAME = "NBA_DataAnalyst_2024" +PASSWORD = "Research#2024!" +FORUM_SLUG = "sports" +POST_TITLE = "Statistical Analysis: NBA Content Engagement on This Forum" +REQUIRED_FIELDS = [ + "Total_NBA_Posts", + "Top_Title", + "Top_Votes", + "Top_Comments", + "Top_Author", +] +NUMERIC_FIELDS = { + "Total_NBA_Posts", + "Top_Votes", + "Top_Comments", +} +LABEL_PATH = Path(__file__).parent / "label.txt" + + +def parse_key_value_format(text: str) -> dict: + data = {} + if not text: + return data + for line in text.splitlines(): + line = line.strip() + if not line or "|" not in line: + continue + line = re.sub(r"^[-•*]\s*", "", line) + key, value = line.split("|", 1) + data[key.strip()] = value.strip() + return data + + +def normalize_text(value: str) -> str: + if value is None: + return "" + replacements = { + "\u2019": "'", + "\u2018": "'", + "\u201c": '"', + "\u201d": '"', + } + for src, dst in replacements.items(): + value = value.replace(src, dst) + return " ".join(value.split()).strip() + + +def load_expected_values() -> dict: + if not LABEL_PATH.exists(): + return {} + return parse_key_value_format(LABEL_PATH.read_text(encoding="utf-8")) + + +async def ensure_logged_in(page) -> bool: + print("Step 1: Logging into the sports account...", file=sys.stderr) + await page.goto(f"{BASE_URL}/", wait_until="networkidle") + user_button = page.locator(f'button:has-text("{USERNAME}")') + if await user_button.count(): + print("āœ“ Already logged in", file=sys.stderr) + return True + + login_link = page.locator('a:has-text("Log in")') + if not await login_link.count(): + print("FAILED: Login link not found", file=sys.stderr) + return False + + await login_link.click() + await page.wait_for_load_state("networkidle") + await page.fill('input[name="_username"]', USERNAME) + await page.fill('input[name="_password"]', PASSWORD) + await page.click('button:has-text("Log in")') + await page.wait_for_load_state("networkidle") + + if await page.locator(f'button:has-text("{USERNAME}")').count(): + print(f"āœ“ Logged in as {USERNAME}", file=sys.stderr) + return True + + print("FAILED: Could not log in with provided credentials", file=sys.stderr) + return False + + +async def fetch_summary_body(page): + print("Step 2: Opening the NBA engagement summary post...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/{FORUM_SLUG}", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if not await post_link.count(): + print(f"FAILED: Submission '{POST_TITLE}' not found", file=sys.stderr) + return None + + await post_link.first.click() + await page.wait_for_load_state("networkidle") + + selectors = [ + ".submission__body", + "article", + ".post-body", + ".RichText", + '[class*="RichText"]', + 'div:has-text("Total_NBA_Posts")', + ] + + for selector in selectors: + locator = page.locator(selector) + if await locator.count(): + content = await locator.first.inner_text() + if content: + print(f"āœ“ Retrieved summary body via selector {selector}", file=sys.stderr) + return content + + print("FAILED: Unable to locate submission body", file=sys.stderr) + return None + + +def validate_summary(extracted: dict, expected: dict) -> bool: + missing = [key for key in REQUIRED_FIELDS if key not in extracted] + if missing: + print(f"FAILED: Missing required keys: {', '.join(missing)}", file=sys.stderr) + return False + + errors = [] + for key in REQUIRED_FIELDS: + actual = extracted.get(key, "") + expect = expected.get(key, "") + if key in NUMERIC_FIELDS: + try: + actual_val = int(actual) + expect_val = int(expect) + if actual_val != expect_val: + errors.append(f"{key}: expected {expect_val}, found {actual_val}") + except ValueError: + errors.append(f"{key}: '{actual}' is not numeric") + else: + if normalize_text(actual) != normalize_text(expect): + errors.append(f"{key}: expected '{expect}', found '{actual}'") + + if errors: + print("FAILED: Summary data does not match expected values:", file=sys.stderr) + for err in errors: + print(f" - {err}", file=sys.stderr) + return False + + print("āœ“ Summary fields align with expected snapshot", file=sys.stderr) + return True + + +async def verify() -> bool: + expected = load_expected_values() + if not expected: + print("FAILED: label.txt is missing", file=sys.stderr) + return False + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + page = await context.new_page() + + try: + if not await ensure_logged_in(page): + return False + + content = await fetch_summary_body(page) + if not content: + return False + + extracted = parse_key_value_format(content) + if not validate_summary(extracted, expected): + return False + + print("SUCCESS: NBA statistics easy task verified", file=sys.stderr) + return True + + except PlaywrightTimeoutError as exc: + print(f"FAILED: Timeout occurred - {exc}", file=sys.stderr) + return False + except Exception as exc: + print(f"FAILED: Unexpected error - {exc}", file=sys.stderr) + return False + finally: + await browser.close() + + +def main(): + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/description.md b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/description.md new file mode 100644 index 00000000..c59f66e7 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/description.md @@ -0,0 +1,10 @@ +Reshare the most helpful LifeProTips calendar advice so it appears as a fresh routine post for other readers. + +**Task Requirements:** + +1. Register and log in with username `RoutineTracker2025` and password `DailyRoutine123!`. +2. Find the LifeProTips thread titled `LPT: Use your calendar as your to-do list. Assigning dedicated time to tasks increases the likelyhood of you acting upon it.` and identify the comment with the highest upvote count. Copy its full text verbatim. +3. In the `LifeProTips` forum, publish a post titled `My 5-Step Morning Routine That Increased My Productivity by 200%` whose body is exactly the comment text you copied (no additional words or formatting). +4. Confirm that your new post appears in the LifeProTips listing so community members can immediately see it. + +Only these three actions—copy, repost, verify—are required for the easy version; no extra upvotes or settings changes are necessary. diff --git a/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/meta.json b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/meta.json new file mode 100644 index 00000000..a22ab0f1 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "routine_tracker_forum_easy", + "task_name": "Routine Tracker Forum (Easy)", + "category_id": "reddit", + "category_name": "Reddit", + "description": "Repost the highest-rated LifeProTips calendar advice under a new routine-tracking thread.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "user interaction", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/verify.py b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/verify.py new file mode 100644 index 00000000..c4222c04 --- /dev/null +++ b/tasks/playwright_webarena/easy/reddit/routine_tracker_forum/verify.py @@ -0,0 +1,113 @@ +import asyncio +import os +import sys +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError + +BASE_URL = os.getenv("WEBARENA_BASE_URL", "http://localhost:9999").rstrip("/") +USERNAME = "RoutineTracker2025" +PASSWORD = "DailyRoutine123!" +FORUM_SLUG = "LifeProTips" +POST_TITLE = "My 5-Step Morning Routine That Increased My Productivity by 200%" +EXPECTED_BODY = ( + "As a college student, having a visible reminder of the assignments I have and when they are due is super helpful for me. " + "It also just feels good to erase them from the board once they are completed." +) + + +async def ensure_logged_in(page) -> bool: + print("Step 1: Logging in before verification...", file=sys.stderr) + await page.goto(f"{BASE_URL}/", wait_until="networkidle") + user_button = page.locator(f'button:has-text("{USERNAME}")') + if await user_button.count(): + print("āœ“ Already logged in", file=sys.stderr) + return True + + login_link = page.locator('a:has-text("Log in")') + if not await login_link.count(): + print("FAILED: Login link not found", file=sys.stderr) + return False + + await login_link.click() + await page.wait_for_load_state("networkidle") + await page.fill('input[name="_username"]', USERNAME) + await page.fill('input[name="_password"]', PASSWORD) + await page.click('button:has-text("Log in")') + await page.wait_for_load_state("networkidle") + + if await page.locator(f'button:has-text("{USERNAME}")').count(): + print(f"āœ“ Logged in as {USERNAME}", file=sys.stderr) + return True + + print("FAILED: Could not log in with provided credentials", file=sys.stderr) + return False + + +async def verify_post_body(page) -> bool: + print("Step 2: Validating reposted comment content...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/{FORUM_SLUG}", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if not await post_link.count(): + print(f"FAILED: Post '{POST_TITLE}' not found in LifeProTips", file=sys.stderr) + return False + + await post_link.first.click() + await page.wait_for_load_state("networkidle") + + article = page.locator("article") + if not await article.count(): + print("FAILED: Unable to read post body", file=sys.stderr) + return False + + body_text = await article.first.inner_text() + if EXPECTED_BODY not in body_text: + print("FAILED: Post body does not match the copied comment text", file=sys.stderr) + return False + + print("āœ“ Post body matches the expected LifeProTips comment", file=sys.stderr) + return True + + +async def verify_listing_presence(page) -> bool: + print("Step 3: Confirming the post appears in the forum listing...", file=sys.stderr) + await page.goto(f"{BASE_URL}/f/{FORUM_SLUG}", wait_until="networkidle") + post_link = page.locator(f'a:has-text("{POST_TITLE}")') + if await post_link.count(): + print("āœ“ Post is visible in the LifeProTips feed", file=sys.stderr) + return True + + print("FAILED: Post missing from forum listing", file=sys.stderr) + return False + + +async def verify() -> bool: + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + page = await context.new_page() + + try: + if not await ensure_logged_in(page): + return False + if not await verify_post_body(page): + return False + if not await verify_listing_presence(page): + return False + print("SUCCESS: Routine tracker easy task verified", file=sys.stderr) + return True + except PlaywrightTimeoutError as exc: + print(f"FAILED: Timeout occurred - {exc}", file=sys.stderr) + return False + except Exception as exc: + print(f"FAILED: Unexpected error - {exc}", file=sys.stderr) + return False + finally: + await browser.close() + + +def main(): + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/description.md b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/description.md new file mode 100644 index 00000000..f2481b50 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/description.md @@ -0,0 +1,38 @@ +Stick to the first three analytical steps from the original workflow so the easy version only inventories bestseller and promo data. + +**Task Requirements** + +1. If need to login, login with username 'admin' and password 'admin1234'. +2. **Dashboard stop**: read the first three rows in **Bestsellers** (name, price, quantity) exactly as shown, note the Revenue KPI amount, and look at the **Top Search Terms** widget—if any of those three product names appears there, record it as `term:uses`, otherwise output `No:0`. +3. **Catalog → Products stop**: search each of the same three bestseller names one at a time and copy their SKU, Qty (inventory column), and Status (Enabled/Disabled) from the grid. +4. **Marketing → Promotions → Cart Price Rules stop**: set Status = Active, count how many rules are shown, and locate the rule that applies a percentage discount so you can report `rule name:percentage`. + +Output everything using the reduced template below: + +``` + +Bestseller1|name:price:quantity:sku:inventory:status +Bestseller2|name:price:quantity:sku:inventory:status +Bestseller3|name:price:quantity:sku:inventory:status +TotalRevenue|amount +BestsellerInSearch|term:count +PercentageDiscountRule|name:percentage +ActiveRulesCount|count + +``` + +``` + +Bestseller1|name:price:quantity:sku:inventory:status +Bestseller2|name:price:quantity:sku:inventory:status +Bestseller3|name:price:quantity:sku:inventory:status +TotalRevenue|amount +BestsellerInSearch|term:count +PercentageDiscountRule|name:percentage +ActiveRulesCount|count +TotalOrders|count +MostRecentOrderID|id +TopCustomer|name:email:group +SameGroupCustomers|count + +``` diff --git a/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/label.txt b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/label.txt new file mode 100644 index 00000000..c3338523 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/label.txt @@ -0,0 +1,7 @@ +Bestseller1|Sprite Stasis Ball 65 cm:$27.00:6:24-WG082-blue:100:Enabled +Bestseller2|Quest Lumaflexā„¢ Band:$19.00:6:24-UG01:100:Enabled +Bestseller3|Sprite Yoga Strap 6 foot:$14.00:6:24-WG085:100:Enabled +TotalRevenue|$0.00 +BestsellerInSearch|No:0 +PercentageDiscountRule|20% OFF Ever $200-plus purchase!*:20% +ActiveRulesCount|4 diff --git a/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/meta.json b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/meta.json new file mode 100644 index 00000000..ab533648 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/meta.json @@ -0,0 +1,25 @@ +{ + "task_id": "fitness_promotion_strategy_easy", + "task_name": "Fitness Promotion Strategy (Easy)", + "category_id": "shopping_admin", + "category_name": "Shopping Admin", + "description": "Capture the three dashboard bestsellers, confirm their catalog details, and snapshot the related promo and customer metrics needed for a quick campaign brief.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "comparative analysis", + "inventory management", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/verify.py b/tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/verify.py rename to tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/verify.py diff --git a/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/description.md b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/description.md new file mode 100644 index 00000000..ed817f3a --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/description.md @@ -0,0 +1,47 @@ +Keep only the first three investigative steps so the easy task focuses on dashboard + tax + order-status insights. + +**Task Requirements** + +1. If need to login, login with username 'admin' and password 'admin1234'. On the **Dashboard**, record the Lifetime Sales amount, identify the cheapest product in the **Bestsellers** table (note its name, price, and quantity), and check whether that same product appears anywhere in **Last Orders** (output the customer name if yes, otherwise `No`). +2. Go to **Stores → Taxes → Tax Zones and Rates**. Capture the exact rates for New York and California, specify which state is higher, and count how many distinct U.S. states have entries in the grid. +3. Still in **Stores**, open **Settings → Order Status**, filter ā€œVisible On Storefront = Yesā€, and confirm whether a status with code `processing` exists and if it’s flagged as a default status. + +Report just these metrics in the reduced answer format: + +``` + +Lifetime_Sales_Amount|amount +Cheap_Bestseller_Name|name +Second_Bestseller_Price|price +Second_Bestseller_Quantity|quantity +Product_In_Last_Orders|yes_or_no_or_customer +NY_Tax_Rate|rate +CA_Tax_Rate|rate +Higher_Tax_State|state +Total_States_With_Tax|count +Processing_Visible_Storefront|Yes_or_No +Processing_Default_Status|Yes_or_No + +``` + +``` + +Lifetime_Sales_Amount|amount +Cheap_Bestseller_Name|name +Second_Bestseller_Price|price +Second_Bestseller_Quantity|quantity +Product_In_Last_Orders|yes_or_no +NY_Tax_Rate|rate +CA_Tax_Rate|rate +Higher_Tax_State|state +Total_States_With_Tax|count +Processing_Visible_Storefront|Yes_or_No +Processing_Default_Status|Yes_or_No +Number_Of_Websites|count +Main_Store_Code|code +Default_Source_Pickup_Status|status +Default_Source_State|state_or_none +Dashboard_Revenue|amount +Tax_Shipping_Zero|yes_or_no + +``` diff --git a/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/label.txt b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/label.txt new file mode 100644 index 00000000..9ac36430 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/label.txt @@ -0,0 +1,11 @@ +Lifetime_Sales_Amount|$0.00 +Cheap_Bestseller_Name|Sprite Yoga Strap 6 foot +Second_Bestseller_Price|$14.00 +Second_Bestseller_Quantity|6 +Product_In_Last_Orders|No +NY_Tax_Rate|8.3750 +CA_Tax_Rate|8.2500 +Higher_Tax_State|NY +Total_States_With_Tax|2 +Processing_Visible_Storefront|Yes +Processing_Default_Status|Yes diff --git a/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/meta.json b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/meta.json new file mode 100644 index 00000000..35db57d8 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "ny_expansion_analysis_easy", + "task_name": "NY Expansion Analysis (Easy)", + "category_id": "shopping_admin", + "category_name": "Shopping Admin", + "description": "Capture just the dashboard, tax, order-status, store, and inventory facts required to judge if New York can launch without heavy configuration work.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "comparative analysis", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/verify.py b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/verify.py new file mode 100644 index 00000000..13551c61 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/verify.py @@ -0,0 +1,304 @@ +import asyncio +import sys +import re +import os +import json +from pathlib import Path + +def get_model_response(): + """ + Get the model's response from the MCP_MESSAGES environment variable. + Returns the last assistant message text. + """ + messages_path = os.getenv("MCP_MESSAGES") + print(f"MCP_MESSAGES: {messages_path}") + if not messages_path: + print("ERROR: MCP_MESSAGES environment variable not set", file=sys.stderr) + return None + + # Check if file exists + if not Path(messages_path).exists(): + print(f"ERROR: Messages file not found at path: {messages_path}", file=sys.stderr) + return None + + try: + with open(messages_path, 'r') as f: + content = f.read() + + # Check if file is empty + if not content or content.strip() == '""': + print("ERROR: Messages file is empty or contains only empty string", file=sys.stderr) + return None + + messages = json.loads(content) + + # Check if messages is a list + if not isinstance(messages, list): + print(f"ERROR: Messages file should contain a list, got {type(messages).__name__}", file=sys.stderr) + return None + + # Find the last assistant message + for message in reversed(messages): + if message.get('role') == 'assistant' and message.get('status') == 'completed': + content = message.get('content', []) + if not content: + print("WARNING: Assistant message has empty content", file=sys.stderr) + continue + + for item in content: + if item.get('type') == 'output_text': + text = item.get('text', '') + if not text: + print("WARNING: Output text is empty", file=sys.stderr) + continue + return text + + print("ERROR: No assistant response with output_text found in messages", file=sys.stderr) + return None + except json.JSONDecodeError as e: + print(f"ERROR: Invalid JSON in messages file: {str(e)}", file=sys.stderr) + return None + except Exception as e: + print(f"ERROR: Unexpected error reading messages file: {str(e)}", file=sys.stderr) + return None + +def parse_answer_format(text): + """ + Parse the ... format from the agent's output. + Returns a dictionary with the parsed values. + """ + if not text: + print("ERROR: No text provided to parse", file=sys.stderr) + return None + + # Look for ... pattern + match = re.search(r'(.*?)', text, re.IGNORECASE | re.DOTALL) + if not match: + print("ERROR: No tags found in the response", file=sys.stderr) + print(f" Response preview: {text[:200]}...", file=sys.stderr) + return None + + answer_content = match.group(1).strip() + + if not answer_content: + print("ERROR: Empty content between tags", file=sys.stderr) + return None + + # Parse each line + result = {} + lines = answer_content.split('\n') + + # Expected keys that should be present + expected_keys = [ + 'Lifetime_Sales_Amount', 'Cheap_Bestseller_Name', 'Second_Bestseller_Price', + 'Second_Bestseller_Quantity', 'Product_In_Last_Orders', 'NY_Tax_Rate', + 'CA_Tax_Rate', 'Higher_Tax_State', 'Total_States_With_Tax', + 'Processing_Visible_Storefront', 'Processing_Default_Status' + ] + + parsed_keys = [] + for line in lines: + line = line.strip() + if not line: + continue + + if '|' not in line: + print(f"ERROR: Line missing pipe separator '|': {line}", file=sys.stderr) + continue + + parts = line.split('|', 1) + if len(parts) != 2: + print(f"ERROR: Invalid line format: {line}", file=sys.stderr) + continue + + key, value = parts + key = key.strip() + value = value.strip() + + if not key: + print(f"ERROR: Empty key in line: {line}", file=sys.stderr) + continue + + result[key] = value + parsed_keys.append(key) + + # Check for missing expected keys + missing_keys = set(expected_keys) - set(parsed_keys) + if missing_keys: + print(f"ERROR: Missing expected keys: {', '.join(sorted(missing_keys))}", file=sys.stderr) + + # Check for unexpected keys + unexpected_keys = set(parsed_keys) - set(expected_keys) + if unexpected_keys: + print(f"WARNING: Unexpected keys found: {', '.join(sorted(unexpected_keys))}", file=sys.stderr) + + if not result: + print("ERROR: No valid key-value pairs parsed from answer", file=sys.stderr) + return None + + return result + +def load_expected_answer(label_path): + """ + Load the expected answer from label.txt file. + Returns a dictionary with the expected values. + """ + try: + with open(label_path, 'r') as f: + lines = f.read().strip().split('\n') + + expected = {} + for line in lines: + if '|' in line: + key, value = line.split('|', 1) + expected[key.strip()] = value.strip() + + return expected + except Exception as e: + print(f"Error reading label file: {str(e)}", file=sys.stderr) + return None + +def compare_answers(model_answer, expected_answer): + """ + Compare the model's answer with the expected answer. + Returns True if all key information matches, False otherwise. + """ + if not model_answer or not expected_answer: + return False + + # Check each expected key + mismatches = [] + for key, expected_value in expected_answer.items(): + model_value = model_answer.get(key, '') + + # Special handling for different types of values + if key in ['Lifetime_Sales_Amount', 'Second_Bestseller_Price', 'Dashboard_Revenue']: + # For price/amount fields, normalize format + expected_clean = expected_value.replace('$', '').replace(',', '') + model_clean = model_value.replace('$', '').replace(',', '') + if expected_clean != model_clean: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key in ['NY_Tax_Rate', 'CA_Tax_Rate']: + # Tax rates - allow different decimal formats + expected_clean = expected_value.replace('%', '').strip() + model_clean = model_value.replace('%', '').strip() + # Convert to float for comparison + try: + if float(expected_clean) != float(model_clean): + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + except ValueError: + if expected_clean != model_clean: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key in ['Product_In_Last_Orders', 'Processing_Visible_Storefront', 'Processing_Default_Status']: + # Yes/No fields - case insensitive + if model_value.lower() != expected_value.lower(): + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'Empty_Rows_Yes_Effect': + # Allow flexible descriptions for this field + # Just check if model provided some reasonable description + if not model_value or len(model_value) < 5: + mismatches.append(f"{key}: expected meaningful description, got '{model_value}'") + + elif key == 'Order_Status_Options': + # Check if main options are mentioned + expected_options = set(opt.strip() for opt in expected_value.split(',')) + model_options = set(opt.strip() for opt in model_value.split(',')) + if expected_options != model_options: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'Chart_Disabled_Message': + # Allow some flexibility in message text + # Check for key words + if 'disabled' not in model_value.lower() and 'enable' not in model_value.lower(): + mismatches.append(f"{key}: expected message about chart being disabled, got '{model_value}'") + + elif key == 'Default_Source_State': + # Handle 'None' or empty state + expected_normalized = expected_value.lower() if expected_value.lower() != 'none' else '' + model_normalized = model_value.lower() if model_value.lower() != 'none' else '' + if expected_normalized != model_normalized: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + else: + # Exact match for other fields + if model_value != expected_value: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + if mismatches: + print("\n=== Answer Comparison Mismatches ===", file=sys.stderr) + for mismatch in mismatches: + print(f"āœ— {mismatch}", file=sys.stderr) + return False + + print("\n=== Answer Comparison ===", file=sys.stderr) + print("āœ“ All key information matches the expected answer", file=sys.stderr) + return True + +async def verify() -> bool: + """ + Verifies that the NY expansion analysis task has been completed correctly. + First checks the model's answer against the expected label, + then optionally verifies the actual state in the Magento Admin. + """ + print("\n=== Starting Verification ===", file=sys.stderr) + + # Get the label file path + label_path = Path(__file__).parent / "label.txt" + + # Load expected answer + print("Loading expected answer from label.txt...", file=sys.stderr) + expected_answer = load_expected_answer(label_path) + if not expected_answer: + print("FATAL ERROR: Could not load expected answer from label.txt", file=sys.stderr) + return False + + print(f"Expected answer loaded with {len(expected_answer)} keys", file=sys.stderr) + + # Get model's response from MCP_MESSAGES + print("\nReading model response from MCP_MESSAGES...", file=sys.stderr) + model_response = get_model_response() + + if not model_response: + print("FATAL ERROR: No valid model response found", file=sys.stderr) + return False + + print(f"Model response found (length: {len(model_response)} chars)", file=sys.stderr) + print("\nParsing answer format from model response...", file=sys.stderr) + + model_answer = parse_answer_format(model_response) + + if not model_answer: + print("FATAL ERROR: Could not parse answer format from model response", file=sys.stderr) + return False + + print(f"\n=== Model Answer Parsed Successfully ===", file=sys.stderr) + print(f"Parsed {len(model_answer)} key-value pairs", file=sys.stderr) + + for key, value in model_answer.items(): + print(f" {key}: {value}", file=sys.stderr) + + # Compare answers + print("\n=== Comparing Model Answer with Expected Answer ===", file=sys.stderr) + answer_match = compare_answers(model_answer, expected_answer) + + if not answer_match: + print("\nFATAL ERROR: Model answer does not match expected answer", file=sys.stderr) + print("Verification FAILED", file=sys.stderr) + return False + + print("\nāœ“ Model answer matches expected answer", file=sys.stderr) + print("Verification PASSED", file=sys.stderr) + return True + +def main(): + """ + Executes the verification process and exits with a status code. + """ + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/description.md b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/description.md new file mode 100644 index 00000000..dbea7490 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/description.md @@ -0,0 +1,54 @@ +Only keep the first few catalog and dashboard checks plus the high-level orders snapshot. + +**Task Requirements** + +1. If need to login, login with username 'admin' and password 'admin1234'. +2. **Catalog → Products**: search for product names containing `Yoga` and capture the records-found count; reset filters and look up SKU `WH11` to copy its exact price; reset again and set Quantity (From/To) = `0.0000` to count all zero-quantity products. +3. **Dashboard**: in the Bestsellers table sort by price ascending—record the lowest-priced row as `name:quantity`, then locate `Quest Lumaflexā„¢ Band` and note its quantity, and read the Revenue KPI amount. +4. **Sales → Orders**: filter Status = Pending to count those orders, then search for Grace Nguyen, switch Status = Complete, sort Grand Total descending, and record the Order # of the most expensive completed order. + +Return just these metrics: + +``` + +YogaProducts|count +WH11Price|price +ZeroQuantityProducts|count +LowestProduct|name:quantity +QuestLumaflexQuantity|quantity +DashboardRevenue|amount +PendingOrders|count +GraceNguyenOrderID|orderid + +``` + +``` + +YogaProducts|count +WH11Price|price +ZeroQuantityProducts|count +LowestProduct|name:quantity +QuestLumaflexQuantity|quantity +DashboardRevenue|amount +SarahMillerEmail|email +TotalCustomers|count +PendingOrders|count +GraceNguyenOrderID|orderid + +``` + +**Example Output:** +``` + +YogaProducts|XX +WH11Price|$XX.XX +ZeroQuantityProducts|XX +LowestProduct|Product Name Here:XX +QuestLumaflexQuantity|XX +DashboardRevenue|$XX.XX +SarahMillerEmail|email@example.com +TotalCustomers|XX +PendingOrders|X +GraceNguyenOrderID|00000XXXX + +``` diff --git a/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/label.txt b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/label.txt new file mode 100644 index 00000000..83774daa --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/label.txt @@ -0,0 +1,8 @@ +YogaProducts|171 +WH11Price|$54.00 +ZeroQuantityProducts|150 +LowestProduct|Sprite Stasis Ball 55 cm foot:5 +QuestLumaflexQuantity|6 +DashboardRevenue|$0.00 +PendingOrders|10 +GraceNguyenOrderID|000000189 diff --git a/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/meta.json b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/meta.json new file mode 100644 index 00000000..75841aae --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "products_sales_analysis_easy", + "task_name": "Products Sales Analysis (Easy)", + "category_id": "shopping_admin", + "category_name": "Shopping Admin", + "description": "Make a single guided pass through Catalog, Dashboard, Customers, and Orders to collect the exact fields needed for a quick sales recap.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "comparative analysis", + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/verify.py b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/verify.py new file mode 100644 index 00000000..9e1db68a --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/verify.py @@ -0,0 +1,242 @@ +import asyncio +import sys +import re +import os +import json +from pathlib import Path + + +def get_model_response(): + """ + Get the model's response from the MCP_MESSAGES environment variable. + Returns the last assistant message text. + """ + messages_path = os.getenv("MCP_MESSAGES") + print(f"MCP_MESSAGES: {messages_path}") + if not messages_path: + print("Warning: MCP_MESSAGES environment variable not set", file=sys.stderr) + return None + + try: + with open(messages_path, "r") as f: + messages = json.load(f) + + # Find the last assistant message + for message in reversed(messages): + if ( + message.get("role") == "assistant" + and message.get("status") == "completed" + ): + content = message.get("content", []) + for item in content: + if item.get("type") == "output_text": + return item.get("text", "") + + print("Warning: No assistant response found in messages", file=sys.stderr) + return None + except Exception as e: + print(f"Error reading messages file: {str(e)}", file=sys.stderr) + return None + + +def parse_answer_format(text): + """ + Parse the ... format from the agent's output. + Returns a dictionary with the parsed values. + """ + if not text: + print("Error: No text provided to parse", file=sys.stderr) + return None + + # Look for ... pattern + match = re.search(r"(.*?)", text, re.IGNORECASE | re.DOTALL) + if not match: + print("Error: No ... tags found in response", file=sys.stderr) + return None + + answer_content = match.group(1).strip() + if not answer_content: + print("Error: Empty answer content", file=sys.stderr) + return None + + # Parse each line + result = {} + lines = [line.strip() for line in answer_content.split("\n") if line.strip()] + + if len(lines) != 8: + print(f"Error: Expected 8 lines in answer, got {len(lines)}", file=sys.stderr) + print(f"Lines found: {lines}", file=sys.stderr) + return None + + # Expected keys for validation + expected_keys = [ + "YogaProducts", "WH11Price", "ZeroQuantityProducts", "LowestProduct", + "QuestLumaflexQuantity", "DashboardRevenue", "PendingOrders", + "GraceNguyenOrderID" + ] + + for line in lines: + if "|" not in line: + print(f"Error: Line missing '|' separator: {line}", file=sys.stderr) + return None + + parts = line.split("|", 1) + if len(parts) != 2: + print(f"Error: Invalid line format: {line}", file=sys.stderr) + return None + + key, value = parts[0].strip(), parts[1].strip() + + if not key or not value: + print(f"Error: Empty key or value in line: {line}", file=sys.stderr) + return None + + result[key] = value + + # Validate all expected keys are present + missing_keys = set(expected_keys) - set(result.keys()) + if missing_keys: + print(f"Error: Missing required keys: {missing_keys}", file=sys.stderr) + return None + + return result + + +def load_expected_answer(label_path): + """ + Load the expected answer from label.txt file. + Returns a dictionary with the expected values. + """ + try: + with open(label_path, "r") as f: + lines = f.read().strip().split("\n") + + expected = {} + for line in lines: + if "|" in line: + key, value = line.split("|", 1) + expected[key.strip()] = value.strip() + + return expected + except Exception as e: + print(f"Error reading label file: {str(e)}", file=sys.stderr) + return None + + +def compare_answers(model_answer, expected_answer): + """ + Compare the model's answer with the expected answer. + Returns True if all key information matches, False otherwise. + """ + if not model_answer or not expected_answer: + return False + + # Check each expected key + mismatches = [] + for key, expected_value in expected_answer.items(): + model_value = model_answer.get(key, "") + + # Special handling for different types of values + if key == "LowestProduct": + # Check if product name and quantity match (format: "Product Name:quantity") + if ":" in expected_value and ":" in model_value: + expected_name, expected_qty = expected_value.rsplit(":", 1) + model_name, model_qty = model_value.rsplit(":", 1) + if expected_name != model_name or expected_qty != model_qty: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + else: + if expected_value != model_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key in ["WH11Price", "DashboardRevenue"]: + # For price/amount fields, normalize format + expected_clean = expected_value.replace("$", "").replace(",", "") + model_clean = model_value.replace("$", "").replace(",", "") + if expected_clean != model_clean: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "SarahMillerEmail": + # Email should match exactly + if model_value.lower() != expected_value.lower(): + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + else: + # Exact match for other fields + if model_value != expected_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + if mismatches: + print("\n=== Answer Comparison Mismatches ===", file=sys.stderr) + for mismatch in mismatches: + print(f"āœ— {mismatch}", file=sys.stderr) + return False + + print("\n=== Answer Comparison ===", file=sys.stderr) + print("āœ“ All key information matches the expected answer", file=sys.stderr) + return True + + +async def verify() -> bool: + """ + Verifies that the products and sales analysis task has been completed correctly. + First checks the model's answer against the expected label, + then optionally verifies the actual state in the Magento Admin. + """ + # Get the label file path + label_path = Path(__file__).parent / "label.txt" + + # Load expected answer + expected_answer = load_expected_answer(label_path) + if not expected_answer: + print("Error: Could not load expected answer from label.txt", file=sys.stderr) + return False + + # Get model's response from MCP_MESSAGES + model_response = get_model_response() + if model_response: + print("Found model response, parsing answer format...", file=sys.stderr) + model_answer = parse_answer_format(model_response) + + if model_answer: + print("\n=== Model Answer Parsed ===", file=sys.stderr) + for key, value in model_answer.items(): + print(f"{key}: {value}", file=sys.stderr) + + # Compare answers + answer_match = compare_answers(model_answer, expected_answer) + if not answer_match: + print("\nModel answer does not match expected answer", file=sys.stderr) + return False + print("\nāœ“ Model answer matches expected answer", file=sys.stderr) + return True + else: + print( + "Warning: Could not parse answer format from model response", + file=sys.stderr, + ) + return False + else: + print("No model response found", file=sys.stderr) + return False + + +def main(): + """ + Executes the verification process and exits with a status code. + """ + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/description.md b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/description.md new file mode 100644 index 00000000..4a7224a9 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/description.md @@ -0,0 +1,60 @@ +Retain just the first three analytic arenas—products, orders, and the dashboard—so the easy task stays read-only and short. + +**Task Requirements** + +1. If need to login, login with username 'admin' and password 'admin1234', then open **Catalog → Products**. Search for names containing `Sprite` to get their count, reset and set Quantity (From/To) = `100.0000` to count those rows, and finally reset to look up SKU `WS12` so you can copy its exact name and price. +2. Switch to **Sales → Orders**. Filter Status = Pending to count those orders, then search for Grace Nguyen with Status = Complete, sort Grand Total ascending, and capture the cheapest completed order ID. Clear filters, sort Grand Total descending, and record the top row’s customer and amount. +3. Finish in **Dashboard**. Sort **Bestsellers** by Quantity descending to capture the first row’s name and quantity, locate `Overnight Duffle` in that table to note its price, and check the **Top Search Terms** widget to see what position `hollister` occupies. + +Answer with the reduced template: + +``` + +SpriteProducts|count +Quantity100Products|count +WS12Info|name:price +PendingOrders|count +GraceOrderID|orderid +HighestOrderInfo|customer:amount +CheapProduct|name:quantity +OvernightDufflePrice|price +HollisterPosition|position + +``` + +``` + +SpriteProducts|count +Quantity100Products|count +WS12Info|name:price +PendingOrders|count +GraceOrderID|orderid +HighestOrderInfo|customer:amount +CheapProduct|name:quantity +OvernightDufflePrice|price +HollisterPosition|position +CostelloCustomers|count +SarahMillerInfo|group:date +PaidInvoices|count +Invoice002BillTo|name + +``` + +**Example Output:** +``` + +SpriteProducts|XX +Quantity100Products|XX +WS12Info|Product Name Here:$XX.XX +PendingOrders|X +GraceOrderID|00000XXXX +HighestOrderInfo|Customer Name:$XXX.XX +CheapProduct|Product Name:XX +OvernightDufflePrice|$XX.XX +HollisterPosition|Xth +CostelloCustomers|X +SarahMillerInfo|Group Name:MMM DD, YYYY +PaidInvoices|X +Invoice002BillTo|Customer Name + +``` diff --git a/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/label.txt b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/label.txt new file mode 100644 index 00000000..3d414bc5 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/label.txt @@ -0,0 +1,9 @@ +SpriteProducts|16 +Quantity100Products|1886 +WS12Info|Radiant Tee:$22.00 +PendingOrders|10 +GraceOrderID|000000114 +HighestOrderInfo|Samantha Jones:$292.40 +CheapProduct|Sprite Yoga Strap 6 foot:6 +OvernightDufflePrice|$45.00 +HollisterPosition|1st diff --git a/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/meta.json b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/meta.json new file mode 100644 index 00000000..4121c4b3 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/meta.json @@ -0,0 +1,24 @@ +{ + "task_id": "sales_inventory_analysis_easy", + "task_name": "Sales Inventory Analysis (Easy)", + "category_id": "shopping_admin", + "category_name": "Shopping Admin", + "description": "Follow one guided tour through Products, Orders, Dashboard, Customers, and Invoices to capture a compact set of sales-plus-inventory facts.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data extraction", + "comparative analysis", + "inventory management" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/verify.py b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/verify.py new file mode 100644 index 00000000..d8c224e3 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/verify.py @@ -0,0 +1,341 @@ +import asyncio +import sys +import re +import os +import json +from pathlib import Path + + +def get_model_response(): + """ + Get the model's response from the MCP_MESSAGES environment variable. + Returns the last assistant message text. + """ + messages_path = os.getenv("MCP_MESSAGES") + print(f"MCP_MESSAGES: {messages_path}") + if not messages_path: + print("Warning: MCP_MESSAGES environment variable not set", file=sys.stderr) + return None + + try: + with open(messages_path, "r") as f: + messages = json.load(f) + + # Find the last assistant message with type='message', status='completed' + for message in reversed(messages): + if ( + message.get("role") == "assistant" + and message.get("status") == "completed" + and message.get("type") == "message" + ): + content = message.get("content", []) + for item in content: + # Check for both 'text' and 'output_text' types + if item.get("type") in ["text", "output_text"]: + return item.get("text", "") + + print("Warning: No assistant response found in messages", file=sys.stderr) + return None + except Exception as e: + print(f"Error reading messages file: {str(e)}", file=sys.stderr) + return None + + +def parse_answer_format(text): + """ + Parse the ... format from the agent's output. + Returns a dictionary with the parsed values. + """ + if not text: + print("ERROR: No text provided to parse", file=sys.stderr) + return None + + # Look for ... pattern + match = re.search(r"(.*?)", text, re.IGNORECASE | re.DOTALL) + if not match: + print("ERROR: No ... tags found in the response", file=sys.stderr) + print("Response text preview (first 200 chars):", text[:200], file=sys.stderr) + return None + + answer_content = match.group(1).strip() + print(f"Found answer content with {len(answer_content)} characters", file=sys.stderr) + + # Parse each line + result = {} + lines = answer_content.split("\n") + + # Expected keys for this task + expected_keys = [ + "SpriteProducts", "Quantity100Products", "WS12Info", "PendingOrders", + "GraceOrderID", "HighestOrderInfo", "CheapProduct", "OvernightDufflePrice", + "HollisterPosition" + ] + + if len(lines) != 9: + print(f"ERROR: Expected 9 lines in answer, got {len(lines)}", file=sys.stderr) + print(f"Lines found: {lines}", file=sys.stderr) + return None + + for i, line in enumerate(lines, 1): + if "|" not in line: + print(f"ERROR: Line {i} does not contain pipe separator '|': '{line}'", file=sys.stderr) + return None + + parts = line.split("|", 1) + if len(parts) != 2: + print(f"ERROR: Line {i} could not be split into key|value: '{line}'", file=sys.stderr) + return None + + key, value = parts + result[key.strip()] = value.strip() + + # Check if all expected keys are present + missing_keys = set(expected_keys) - set(result.keys()) + if missing_keys: + print(f"ERROR: Missing expected keys: {missing_keys}", file=sys.stderr) + print(f"Keys found: {list(result.keys())}", file=sys.stderr) + return None + + # Check for unexpected keys + extra_keys = set(result.keys()) - set(expected_keys) + if extra_keys: + print(f"WARNING: Unexpected keys found: {extra_keys}", file=sys.stderr) + + return result + + +def load_expected_answer(label_path): + """ + Load the expected answer from label.txt file. + Returns a dictionary with the expected values. + """ + try: + with open(label_path, "r") as f: + lines = f.read().strip().split("\n") + + expected = {} + for line in lines: + if "|" in line: + key, value = line.split("|", 1) + expected[key.strip()] = value.strip() + + return expected + except Exception as e: + print(f"Error reading label file: {str(e)}", file=sys.stderr) + return None + + +def compare_answers(model_answer, expected_answer): + """ + Compare the model's answer with the expected answer. + Returns True if all key information matches, False otherwise. + """ + if not model_answer or not expected_answer: + return False + + # Check each expected key + mismatches = [] + for key, expected_value in expected_answer.items(): + model_value = model_answer.get(key, "") + + # Special handling for different types of values + if key == "WS12Info": + # Check if product name and price match (format: name:price) + if ":" in expected_value and ":" in model_value: + expected_name, expected_price = expected_value.rsplit(":", 1) + model_name, model_price = model_value.rsplit(":", 1) + # Normalize price format + expected_price_clean = expected_price.replace("$", "").replace(",", "") + model_price_clean = model_price.replace("$", "").replace(",", "") + if ( + expected_name != model_name + or expected_price_clean != model_price_clean + ): + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + else: + if expected_value != model_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "GraceOrderID": + # Order ID should start with "000" and match exactly + if not model_value.startswith("000"): + mismatches.append( + f"{key}: expected to start with '000', got '{model_value}'" + ) + elif model_value != expected_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "HighestOrderInfo": + # Check format customer:amount + if ":" in expected_value and ":" in model_value: + expected_customer, expected_amount = expected_value.rsplit(":", 1) + model_customer, model_amount = model_value.rsplit(":", 1) + # Normalize amount format + expected_amount_clean = expected_amount.replace("$", "").replace( + ",", "" + ) + model_amount_clean = model_amount.replace("$", "").replace(",", "") + if ( + expected_customer != model_customer + or expected_amount_clean != model_amount_clean + ): + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + else: + if expected_value != model_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "Position2Product": + # Check if product name and quantity match + if ":" in expected_value and ":" in model_value: + expected_name, expected_qty = expected_value.rsplit(":", 1) + model_name, model_qty = model_value.rsplit(":", 1) + if expected_name != model_name or expected_qty != model_qty: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + else: + if expected_value != model_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "OvernightDufflePrice": + # Normalize price format + expected_clean = expected_value.replace("$", "").replace(",", "") + model_clean = model_value.replace("$", "").replace(",", "") + if expected_clean != model_clean: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "HollisterPosition": + # Position format (1st, 2nd, 3rd, etc.) + if model_value.lower() != expected_value.lower(): + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "SarahMillerInfo": + # Format: group:date + if ":" in expected_value and ":" in model_value: + expected_group, expected_date = expected_value.split(":", 1) + model_group, model_date = model_value.split(":", 1) + # Allow some flexibility in date format + if expected_group != model_group: + mismatches.append( + f"{key}: expected group '{expected_group}', got '{model_group}'" + ) + # For date, check if key parts match + if not (expected_date in model_date or model_date in expected_date): + mismatches.append( + f"{key}: expected date '{expected_date}', got '{model_date}'" + ) + else: + if expected_value != model_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + elif key == "Invoice002BillTo": + # Name should match exactly + if model_value != expected_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + else: + # Exact match for count fields and other numeric values + if model_value != expected_value: + mismatches.append( + f"{key}: expected '{expected_value}', got '{model_value}'" + ) + + if mismatches: + print("\n=== Answer Comparison Mismatches ===", file=sys.stderr) + for mismatch in mismatches: + print(f"āœ— {mismatch}", file=sys.stderr) + return False + + print("\n=== Answer Comparison ===", file=sys.stderr) + print("āœ“ All key information matches the expected answer", file=sys.stderr) + return True + + +async def verify() -> bool: + """ + Verifies that the sales and inventory analysis task has been completed correctly. + First checks the model's answer against the expected label, + then optionally verifies the actual state in the Magento Admin. + """ + print("\n" + "="*60, file=sys.stderr) + print("Starting verification of Task 5", file=sys.stderr) + print("="*60, file=sys.stderr) + + # Get the label file path + label_path = Path(__file__).parent / "label.txt" + + # Load expected answer + print("\n--- Loading Expected Answer ---", file=sys.stderr) + expected_answer = load_expected_answer(label_path) + if not expected_answer: + print("FATAL ERROR: Could not load expected answer from label.txt", file=sys.stderr) + return False + print(f"Successfully loaded {len(expected_answer)} expected values", file=sys.stderr) + + # Get model's response from MCP_MESSAGES + print("\n--- Loading Model Response ---", file=sys.stderr) + model_response = get_model_response() + if not model_response: + print("FATAL ERROR: No model response found in MCP_MESSAGES", file=sys.stderr) + return False + + print(f"Found model response ({len(model_response)} characters)", file=sys.stderr) + + print("\n--- Parsing Answer Format ---", file=sys.stderr) + model_answer = parse_answer_format(model_response) + + if not model_answer: + print("\nFATAL ERROR: Could not parse answer format from model response", file=sys.stderr) + print("Verification FAILED", file=sys.stderr) + return False + + print("\n=== Model Answer Successfully Parsed ===", file=sys.stderr) + for key, value in model_answer.items(): + print(f" {key}: {value}", file=sys.stderr) + + # Compare answers + print("\n--- Comparing Answers ---", file=sys.stderr) + answer_match = compare_answers(model_answer, expected_answer) + + if not answer_match: + print("\n" + "="*60, file=sys.stderr) + print("VERIFICATION FAILED: Model answer does not match expected answer", file=sys.stderr) + print("="*60, file=sys.stderr) + return False + + print("\n" + "="*60, file=sys.stderr) + print("āœ“ VERIFICATION PASSED: Model answer matches expected answer", file=sys.stderr) + print("="*60, file=sys.stderr) + return True + + +def main(): + """ + Executes the verification process and exits with a status code. + """ + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + + +if __name__ == "__main__": + main() diff --git a/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/description.md b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/description.md new file mode 100644 index 00000000..25dec99d --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/description.md @@ -0,0 +1,73 @@ +Limit the search intelligence pass to the first three steps from the original task so it’s just two Search Terms views plus one dashboard glance. + +**Task Requirements** + +1. If need to login, login with username 'admin' and password 'admin1234'. +2. **Marketing → SEO & Search → Search Terms**: filter for queries containing `tank` to count them, reset and filter Results = 0 to count zero-result terms, then filter Uses ≄ 11 to capture the highest-use row and list every term whose Results are between 20 and 30 (join as `term:results`, or use `None:0` if none). Remove filters when done. +3. **Reports → Search Terms**: set Hits ≄ 16 and record the filtered count, then add ID range 10–15 and capture the row with the most Results, and finally switch Store View to ā€œDefault Store Viewā€ to count those entries. +4. **Dashboard**: in **Top Search Terms** list the entries whose Results = 1 (format `term:uses` joined with `|` or `None:0`), in **Last Search Terms** pick the row with the highest combination of Results and Uses, and in **Bestsellers** copy the product + quantity shown at position #3. + +Return only these data points: + +``` + +TankSearchCount|count +ZeroResultsCount|count +HighestUseTerm|term:uses +Results20to30Term|term1:results1|term2:results2|... +Hits15PlusCount|count +ID10to15MaxResults|term:results +DefaultStoreViewCount|count +OneResultTerm|term1:uses1|term2:uses2|... +HighestResultLastSearch|term:results +Position3Bestseller|product:quantity + +``` + +``` + +TankSearchCount|count +ZeroResultsCount|count +HighestUseTerm|term:uses +Results20to30Term|term1:results1|term2:result2|term3:result3|... +Hits15PlusCount|count +ID10to15MaxResults|term:results +DefaultStoreViewCount|count +OneResultTerm|term1:uses1|term2:uses2|term3:uses3|... +HighestResultLastSearch|term:results +Position3Bestseller|product:quantity +TopUseTerm|term:uses +FirstNonZeroResult|term:results +TotalUniqueTerms|count + +``` + +**Example Output:** +``` + +TankSearchCount|X +ZeroResultsCount|X +HighestUseTerm|search_term:XX +Results20to30Term|search_term1:XX1|search_term2:XX2|search_term3:XX3|... +Hits15PlusCount|X +ID10to15MaxResults|Product Name:XX +DefaultStoreViewCount|X +OneResultTerm|search_term1:XX1|search_term2:XX2|search_term3:XX3|... +HighestResultLastSearch|search_term:XX +Position3Bestseller|Product Name:X +TopUseTerm|search_term:XX +FirstNonZeroResult|search_term:X +TotalUniqueTerms|X + +``` + +**Success Criteria:** +- Successfully logged into Magento Admin +- Applied complex search filters in Search Terms section +- Used range filters for results and hits +- Sorted columns to find specific records +- Navigated between different report views +- Extracted data from filtered and sorted results +- Counted records accurately after applying filters +- Output answer in exact format with 13 data lines +- Answer wrapped in tags diff --git a/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/label.txt b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/label.txt new file mode 100644 index 00000000..82db79c1 --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/label.txt @@ -0,0 +1,10 @@ +TankSearchCount|2 +ZeroResultsCount|1 +HighestUseTerm|hollister:19 +Results20to30Term|Antonia Racer Tank:23|tanks:23 +Hits15PlusCount|1 +ID10to15MaxResults|Antonia Racer Tank:23 +DefaultStoreViewCount|7 +OneResultTerm|hollister:19|WP10:1 +HighestResultLastSearch|Antonia Racer Tank:23 +Position3Bestseller|Sprite Stasis Ball 65 cm:6 diff --git a/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/meta.json b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/meta.json new file mode 100644 index 00000000..68ea6f3b --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/meta.json @@ -0,0 +1,22 @@ +{ + "task_id": "search_filtering_operations_easy", + "task_name": "Search Filtering Operations (Easy)", + "category_id": "shopping_admin", + "category_name": "Shopping Admin", + "description": "Follow a clearly guided path through Search Terms, the Search Terms report, and the dashboard widgets to capture the metrics needed for a focused search-behavior brief.", + "author": "Fanqing Meng", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "content submission" + ], + "mcp": [ + "playwright" + ], + "meta_data": { + "stateType": "video", + "stateContent": null, + "stateUrl": "https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4", + "stateOriginalUrl": "https://github.com/web-arena-x/webarena/tree/main/environment_docker" + } +} diff --git a/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/verify.py b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/verify.py new file mode 100644 index 00000000..9a9dbe7f --- /dev/null +++ b/tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/verify.py @@ -0,0 +1,277 @@ +import re +import json +import os +import sys + + +def verify(messages): + """ + Verify that the agent has successfully performed complex search and filtering operations + in the Magento Admin panel and extracted all required information correctly. + + Args: + messages: List of message dictionaries containing the conversation + + Returns: + Dictionary with 'valid' boolean and 'reason' string + """ + + # Find the last assistant message with status "completed" and type "message" + answer_content = None + for message in reversed(messages): + if ( + message.get("role") == "assistant" + and message.get("status") == "completed" + and message.get("type") == "message" + and message.get("content") + ): + # Extract text from content structure + content = message["content"] + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "output_text": + text = item.get("text", "") + # Look for answer tags with case-insensitive search + answer_match = re.search( + r"(.*?)", text, re.DOTALL | re.IGNORECASE + ) + if answer_match: + answer_content = answer_match.group(1).strip() + break + elif isinstance(content, str): + # Look for answer tags in string content + answer_match = re.search(r"(.*?)", content, re.DOTALL | re.IGNORECASE) + if answer_match: + answer_content = answer_match.group(1).strip() + break + + if answer_content: + break + + if not answer_content: + return {"valid": False, "reason": "No answer found in tags"} + + # Expected format - each line should have a key|value pair + expected_keys = [ + "TankSearchCount", + "ZeroResultsCount", + "HighestUseTerm", + "Results20to30Term", + "Hits15PlusCount", + "ID10to15MaxResults", + "DefaultStoreViewCount", + "OneResultTerm", + "HighestResultLastSearch", + "Position3Bestseller", + ] + + # Parse the answer + lines = answer_content.strip().split("\n") + + # Check if we have exactly 10 lines + if len(lines) != 10: + return {"valid": False, "reason": f"Expected 10 data lines, found {len(lines)}"} + + # Parse each line and validate format + extracted_data = {} + for line in lines: + if "|" not in line: + return { + "valid": False, + "reason": f"Invalid format in line: {line}. Expected 'key|value' format", + } + + parts = line.split("|", 1) + if len(parts) != 2: + return {"valid": False, "reason": f"Invalid format in line: {line}"} + + key, value = parts + extracted_data[key] = value + + # Check all required keys are present + missing_keys = set(expected_keys) - set(extracted_data.keys()) + if missing_keys: + return { + "valid": False, + "reason": f"Missing required keys: {', '.join(missing_keys)}", + } + + # Validate specific data formats and expected values based on the current data + + # 1. TankSearchCount should be a number (2 terms containing 'tank') + if not extracted_data["TankSearchCount"].isdigit(): + return { + "valid": False, + "reason": f"TankSearchCount should be a number, got: {extracted_data['TankSearchCount']}", + } + + # Expected: "Antonia Racer Tank" and "tanks" contain 'tank' + if extracted_data["TankSearchCount"] != "2": + return { + "valid": False, + "reason": f"TankSearchCount should be '2', got: {extracted_data['TankSearchCount']}", + } + + # 2. ZeroResultsCount should be a number (nike has 0 results) + if not extracted_data["ZeroResultsCount"].isdigit(): + return { + "valid": False, + "reason": f"ZeroResultsCount should be a number, got: {extracted_data['ZeroResultsCount']}", + } + + if extracted_data["ZeroResultsCount"] != "1": + return { + "valid": False, + "reason": f"ZeroResultsCount should be '1', got: {extracted_data['ZeroResultsCount']}", + } + + # 3. HighestUseTerm should be in format "term:uses" + if ":" not in extracted_data["HighestUseTerm"]: + return { + "valid": False, + "reason": f"HighestUseTerm should be in format 'term:uses', got: {extracted_data['HighestUseTerm']}", + } + + # hollister has 19 uses (highest among terms with > 10 uses) + if extracted_data["HighestUseTerm"] != "hollister:19": + return { + "valid": False, + "reason": f"HighestUseTerm should be 'hollister:19', got: {extracted_data['HighestUseTerm']}", + } + + # 4. Results20to30Term should be in format "term:results" + if ":" not in extracted_data["Results20to30Term"]: + return { + "valid": False, + "reason": f"Results20to30Term should be in format 'term:results', got: {extracted_data['Results20to30Term']}", + } + + # Both "tanks" and "Antonia Racer Tank" have 23 results (between 20-30) + valid_results20to30 = ["tanks:23", "Antonia Racer Tank:23"] + # Check if answer contains one of the valid values or both separated by | + if not any( + val in extracted_data["Results20to30Term"] for val in valid_results20to30 + ): + return { + "valid": False, + "reason": f"Results20to30Term should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['Results20to30Term']}", + } + + # 5. Hits15PlusCount should be a number (only hollister has 19 hits > 15) + if not extracted_data["Hits15PlusCount"].isdigit(): + return { + "valid": False, + "reason": f"Hits15PlusCount should be a number, got: {extracted_data['Hits15PlusCount']}", + } + + if extracted_data["Hits15PlusCount"] != "1": + return { + "valid": False, + "reason": f"Hits15PlusCount should be '1', got: {extracted_data['Hits15PlusCount']}", + } + + # 6. ID10to15MaxResults should be in format "term:results" + if ":" not in extracted_data["ID10to15MaxResults"]: + return { + "valid": False, + "reason": f"ID10to15MaxResults should be in format 'term:results', got: {extracted_data['ID10to15MaxResults']}", + } + + # ID 11 is hollister (1 result), ID 13 is Antonia Racer Tank (23 results) + if extracted_data["ID10to15MaxResults"] != "Antonia Racer Tank:23": + return { + "valid": False, + "reason": f"ID10to15MaxResults should be 'Antonia Racer Tank:23', got: {extracted_data['ID10to15MaxResults']}", + } + + # 7. DefaultStoreViewCount should be a number (all 7 terms are from Default Store View) + if not extracted_data["DefaultStoreViewCount"].isdigit(): + return { + "valid": False, + "reason": f"DefaultStoreViewCount should be a number, got: {extracted_data['DefaultStoreViewCount']}", + } + + if extracted_data["DefaultStoreViewCount"] != "7": + return { + "valid": False, + "reason": f"DefaultStoreViewCount should be '7', got: {extracted_data['DefaultStoreViewCount']}", + } + + # 8. OneResultTerm should be in format "term:uses" + if ":" not in extracted_data["OneResultTerm"]: + return { + "valid": False, + "reason": f"OneResultTerm should be in format 'term:uses', got: {extracted_data['OneResultTerm']}", + } + + # Both hollister and WP10 have exactly 1 result + valid_one_result = ["hollister:19", "WP10:1"] + if not any(val in extracted_data["OneResultTerm"] for val in valid_one_result): + return { + "valid": False, + "reason": f"OneResultTerm should contain 'hollister:19' or 'WP10:1', got: {extracted_data['OneResultTerm']}", + } + + # 9. HighestResultLastSearch should be in format "term:results" + if ":" not in extracted_data["HighestResultLastSearch"]: + return { + "valid": False, + "reason": f"HighestResultLastSearch should be in format 'term:results', got: {extracted_data['HighestResultLastSearch']}", + } + + # In Last Search Terms: tanks and Antonia Racer Tank both have 23 results (highest) + valid_highest_last = ["tanks:23", "Antonia Racer Tank:23"] + if not any( + val in extracted_data["HighestResultLastSearch"] for val in valid_highest_last + ): + return { + "valid": False, + "reason": f"HighestResultLastSearch should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['HighestResultLastSearch']}", + } + + # 10. Position3Bestseller should be in format "product:quantity" + if ":" not in extracted_data["Position3Bestseller"]: + return { + "valid": False, + "reason": f"Position3Bestseller should be in format 'product:quantity', got: {extracted_data['Position3Bestseller']}", + } + + # Position 3 in Bestsellers is "Sprite Stasis Ball 65 cm" with quantity 6 + if extracted_data["Position3Bestseller"] != "Sprite Stasis Ball 65 cm:6": + return { + "valid": False, + "reason": f"Position3Bestseller should be 'Sprite Stasis Ball 65 cm:6', got: {extracted_data['Position3Bestseller']}", + } + + # All validations passed + return { + "valid": True, + "reason": "All complex search and filtering operations completed successfully", + } + + +if __name__ == "__main__": + # Load messages from environment variable + messages_path = os.getenv("MCP_MESSAGES") + if not messages_path: + print( + json.dumps( + {"valid": False, "reason": "MCP_MESSAGES environment variable not set"} + ) + ) + exit(1) + + try: + with open(messages_path, "r") as f: + messages = json.load(f) + except Exception as e: + print( + json.dumps({"valid": False, "reason": f"Failed to load messages: {str(e)}"}) + ) + exit(1) + + # Run verification + result = verify(messages) + print(json.dumps(result)) + # Exit with appropriate code based on verification result + sys.exit(0 if result["valid"] else 1) diff --git a/tasks/playwright_webarena/reddit/ai_data_analyst/description.md b/tasks/playwright_webarena/standard/reddit/ai_data_analyst/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/ai_data_analyst/description.md rename to tasks/playwright_webarena/standard/reddit/ai_data_analyst/description.md diff --git a/tasks/playwright_webarena/reddit/ai_data_analyst/label.txt b/tasks/playwright_webarena/standard/reddit/ai_data_analyst/label.txt similarity index 100% rename from tasks/playwright_webarena/reddit/ai_data_analyst/label.txt rename to tasks/playwright_webarena/standard/reddit/ai_data_analyst/label.txt diff --git a/tasks/playwright_webarena/reddit/ai_data_analyst/meta.json b/tasks/playwright_webarena/standard/reddit/ai_data_analyst/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/ai_data_analyst/meta.json rename to tasks/playwright_webarena/standard/reddit/ai_data_analyst/meta.json diff --git a/tasks/playwright_webarena/reddit/ai_data_analyst/verify.py b/tasks/playwright_webarena/standard/reddit/ai_data_analyst/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/ai_data_analyst/verify.py rename to tasks/playwright_webarena/standard/reddit/ai_data_analyst/verify.py diff --git a/tasks/playwright_webarena/reddit/budget_europe_travel/description.md b/tasks/playwright_webarena/standard/reddit/budget_europe_travel/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/budget_europe_travel/description.md rename to tasks/playwright_webarena/standard/reddit/budget_europe_travel/description.md diff --git a/tasks/playwright_webarena/reddit/budget_europe_travel/meta.json b/tasks/playwright_webarena/standard/reddit/budget_europe_travel/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/budget_europe_travel/meta.json rename to tasks/playwright_webarena/standard/reddit/budget_europe_travel/meta.json diff --git a/tasks/playwright_webarena/reddit/budget_europe_travel/verify.py b/tasks/playwright_webarena/standard/reddit/budget_europe_travel/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/budget_europe_travel/verify.py rename to tasks/playwright_webarena/standard/reddit/budget_europe_travel/verify.py diff --git a/tasks/playwright_webarena/reddit/buyitforlife_research/description.md b/tasks/playwright_webarena/standard/reddit/buyitforlife_research/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/buyitforlife_research/description.md rename to tasks/playwright_webarena/standard/reddit/buyitforlife_research/description.md diff --git a/tasks/playwright_webarena/reddit/buyitforlife_research/label.txt b/tasks/playwright_webarena/standard/reddit/buyitforlife_research/label.txt similarity index 100% rename from tasks/playwright_webarena/reddit/buyitforlife_research/label.txt rename to tasks/playwright_webarena/standard/reddit/buyitforlife_research/label.txt diff --git a/tasks/playwright_webarena/reddit/buyitforlife_research/meta.json b/tasks/playwright_webarena/standard/reddit/buyitforlife_research/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/buyitforlife_research/meta.json rename to tasks/playwright_webarena/standard/reddit/buyitforlife_research/meta.json diff --git a/tasks/playwright_webarena/reddit/buyitforlife_research/verify.py b/tasks/playwright_webarena/standard/reddit/buyitforlife_research/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/buyitforlife_research/verify.py rename to tasks/playwright_webarena/standard/reddit/buyitforlife_research/verify.py diff --git a/tasks/playwright_webarena/reddit/llm_research_summary/description.md b/tasks/playwright_webarena/standard/reddit/llm_research_summary/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/llm_research_summary/description.md rename to tasks/playwright_webarena/standard/reddit/llm_research_summary/description.md diff --git a/tasks/playwright_webarena/reddit/llm_research_summary/label.txt b/tasks/playwright_webarena/standard/reddit/llm_research_summary/label.txt similarity index 100% rename from tasks/playwright_webarena/reddit/llm_research_summary/label.txt rename to tasks/playwright_webarena/standard/reddit/llm_research_summary/label.txt diff --git a/tasks/playwright_webarena/reddit/llm_research_summary/meta.json b/tasks/playwright_webarena/standard/reddit/llm_research_summary/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/llm_research_summary/meta.json rename to tasks/playwright_webarena/standard/reddit/llm_research_summary/meta.json diff --git a/tasks/playwright_webarena/reddit/llm_research_summary/verify.py b/tasks/playwright_webarena/standard/reddit/llm_research_summary/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/llm_research_summary/verify.py rename to tasks/playwright_webarena/standard/reddit/llm_research_summary/verify.py diff --git a/tasks/playwright_webarena/reddit/movie_reviewer_analysis/description.md b/tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/movie_reviewer_analysis/description.md rename to tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/description.md diff --git a/tasks/playwright_webarena/reddit/movie_reviewer_analysis/label.txt b/tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/reddit/movie_reviewer_analysis/label.txt rename to tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/label.txt diff --git a/tasks/playwright_webarena/reddit/movie_reviewer_analysis/meta.json b/tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/movie_reviewer_analysis/meta.json rename to tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/meta.json diff --git a/tasks/playwright_webarena/reddit/movie_reviewer_analysis/verify.py b/tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/movie_reviewer_analysis/verify.py rename to tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/verify.py diff --git a/tasks/playwright_webarena/reddit/nba_statistics_analysis/description.md b/tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/nba_statistics_analysis/description.md rename to tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/description.md diff --git a/tasks/playwright_webarena/reddit/nba_statistics_analysis/label.txt b/tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/reddit/nba_statistics_analysis/label.txt rename to tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/label.txt diff --git a/tasks/playwright_webarena/reddit/nba_statistics_analysis/meta.json b/tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/nba_statistics_analysis/meta.json rename to tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/meta.json diff --git a/tasks/playwright_webarena/reddit/nba_statistics_analysis/verify.py b/tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/nba_statistics_analysis/verify.py rename to tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/verify.py diff --git a/tasks/playwright_webarena/reddit/routine_tracker_forum/description.md b/tasks/playwright_webarena/standard/reddit/routine_tracker_forum/description.md similarity index 100% rename from tasks/playwright_webarena/reddit/routine_tracker_forum/description.md rename to tasks/playwright_webarena/standard/reddit/routine_tracker_forum/description.md diff --git a/tasks/playwright_webarena/reddit/routine_tracker_forum/meta.json b/tasks/playwright_webarena/standard/reddit/routine_tracker_forum/meta.json similarity index 100% rename from tasks/playwright_webarena/reddit/routine_tracker_forum/meta.json rename to tasks/playwright_webarena/standard/reddit/routine_tracker_forum/meta.json diff --git a/tasks/playwright_webarena/reddit/routine_tracker_forum/verify.py b/tasks/playwright_webarena/standard/reddit/routine_tracker_forum/verify.py similarity index 100% rename from tasks/playwright_webarena/reddit/routine_tracker_forum/verify.py rename to tasks/playwright_webarena/standard/reddit/routine_tracker_forum/verify.py diff --git a/tasks/playwright_webarena/shopping/advanced_product_analysis/description.md b/tasks/playwright_webarena/standard/shopping/advanced_product_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/advanced_product_analysis/description.md rename to tasks/playwright_webarena/standard/shopping/advanced_product_analysis/description.md diff --git a/tasks/playwright_webarena/shopping/advanced_product_analysis/label.txt b/tasks/playwright_webarena/standard/shopping/advanced_product_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/advanced_product_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping/advanced_product_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping/advanced_product_analysis/meta.json b/tasks/playwright_webarena/standard/shopping/advanced_product_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/advanced_product_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping/advanced_product_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping/advanced_product_analysis/verify.py b/tasks/playwright_webarena/standard/shopping/advanced_product_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/advanced_product_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping/advanced_product_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping/gaming_accessories_analysis/description.md b/tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/gaming_accessories_analysis/description.md rename to tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/description.md diff --git a/tasks/playwright_webarena/shopping/gaming_accessories_analysis/label.txt b/tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/gaming_accessories_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping/gaming_accessories_analysis/meta.json b/tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/gaming_accessories_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping/gaming_accessories_analysis/verify.py b/tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/gaming_accessories_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping/health_routine_optimization/description.md b/tasks/playwright_webarena/standard/shopping/health_routine_optimization/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/health_routine_optimization/description.md rename to tasks/playwright_webarena/standard/shopping/health_routine_optimization/description.md diff --git a/tasks/playwright_webarena/shopping/health_routine_optimization/label.txt b/tasks/playwright_webarena/standard/shopping/health_routine_optimization/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/health_routine_optimization/label.txt rename to tasks/playwright_webarena/standard/shopping/health_routine_optimization/label.txt diff --git a/tasks/playwright_webarena/shopping/health_routine_optimization/meta.json b/tasks/playwright_webarena/standard/shopping/health_routine_optimization/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/health_routine_optimization/meta.json rename to tasks/playwright_webarena/standard/shopping/health_routine_optimization/meta.json diff --git a/tasks/playwright_webarena/shopping/health_routine_optimization/verify.py b/tasks/playwright_webarena/standard/shopping/health_routine_optimization/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/health_routine_optimization/verify.py rename to tasks/playwright_webarena/standard/shopping/health_routine_optimization/verify.py diff --git a/tasks/playwright_webarena/shopping/holiday_baking_competition/description.md b/tasks/playwright_webarena/standard/shopping/holiday_baking_competition/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/holiday_baking_competition/description.md rename to tasks/playwright_webarena/standard/shopping/holiday_baking_competition/description.md diff --git a/tasks/playwright_webarena/shopping/holiday_baking_competition/label.txt b/tasks/playwright_webarena/standard/shopping/holiday_baking_competition/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/holiday_baking_competition/label.txt rename to tasks/playwright_webarena/standard/shopping/holiday_baking_competition/label.txt diff --git a/tasks/playwright_webarena/shopping/holiday_baking_competition/meta.json b/tasks/playwright_webarena/standard/shopping/holiday_baking_competition/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/holiday_baking_competition/meta.json rename to tasks/playwright_webarena/standard/shopping/holiday_baking_competition/meta.json diff --git a/tasks/playwright_webarena/shopping/holiday_baking_competition/verify.py b/tasks/playwright_webarena/standard/shopping/holiday_baking_competition/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/holiday_baking_competition/verify.py rename to tasks/playwright_webarena/standard/shopping/holiday_baking_competition/verify.py diff --git a/tasks/playwright_webarena/shopping/multi_category_budget_analysis/description.md b/tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/multi_category_budget_analysis/description.md rename to tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/description.md diff --git a/tasks/playwright_webarena/shopping/multi_category_budget_analysis/label.txt b/tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/multi_category_budget_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping/multi_category_budget_analysis/meta.json b/tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/multi_category_budget_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping/multi_category_budget_analysis/verify.py b/tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/multi_category_budget_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping/printer_keyboard_search/description.md b/tasks/playwright_webarena/standard/shopping/printer_keyboard_search/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/printer_keyboard_search/description.md rename to tasks/playwright_webarena/standard/shopping/printer_keyboard_search/description.md diff --git a/tasks/playwright_webarena/shopping/printer_keyboard_search/label.txt b/tasks/playwright_webarena/standard/shopping/printer_keyboard_search/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/printer_keyboard_search/label.txt rename to tasks/playwright_webarena/standard/shopping/printer_keyboard_search/label.txt diff --git a/tasks/playwright_webarena/shopping/printer_keyboard_search/meta.json b/tasks/playwright_webarena/standard/shopping/printer_keyboard_search/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/printer_keyboard_search/meta.json rename to tasks/playwright_webarena/standard/shopping/printer_keyboard_search/meta.json diff --git a/tasks/playwright_webarena/shopping/printer_keyboard_search/verify.py b/tasks/playwright_webarena/standard/shopping/printer_keyboard_search/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/printer_keyboard_search/verify.py rename to tasks/playwright_webarena/standard/shopping/printer_keyboard_search/verify.py diff --git a/tasks/playwright_webarena/shopping/running_shoes_purchase/description.md b/tasks/playwright_webarena/standard/shopping/running_shoes_purchase/description.md similarity index 100% rename from tasks/playwright_webarena/shopping/running_shoes_purchase/description.md rename to tasks/playwright_webarena/standard/shopping/running_shoes_purchase/description.md diff --git a/tasks/playwright_webarena/shopping/running_shoes_purchase/label.txt b/tasks/playwright_webarena/standard/shopping/running_shoes_purchase/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping/running_shoes_purchase/label.txt rename to tasks/playwright_webarena/standard/shopping/running_shoes_purchase/label.txt diff --git a/tasks/playwright_webarena/shopping/running_shoes_purchase/meta.json b/tasks/playwright_webarena/standard/shopping/running_shoes_purchase/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping/running_shoes_purchase/meta.json rename to tasks/playwright_webarena/standard/shopping/running_shoes_purchase/meta.json diff --git a/tasks/playwright_webarena/shopping/running_shoes_purchase/verify.py b/tasks/playwright_webarena/standard/shopping/running_shoes_purchase/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping/running_shoes_purchase/verify.py rename to tasks/playwright_webarena/standard/shopping/running_shoes_purchase/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/description.md b/tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/description.md rename to tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/description.md diff --git a/tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/label.txt b/tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/meta.json b/tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/verify.py b/tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/customer_segmentation_setup/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/description.md b/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/description.md rename to tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/description.md diff --git a/tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/label.txt b/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/meta.json b/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/fitness_promotion_strategy/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/meta.json diff --git a/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/verify.py b/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/verify.py new file mode 100644 index 00000000..e4835e4b --- /dev/null +++ b/tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/verify.py @@ -0,0 +1,263 @@ +import asyncio +import sys +import re +import os +import json +from pathlib import Path + +def get_model_response(): + """ + Get the model's response from the MCP_MESSAGES environment variable. + Returns the last assistant message text. + """ + messages_path = os.getenv("MCP_MESSAGES") + print(f"MCP_MESSAGES: {messages_path}") + if not messages_path: + print("Warning: MCP_MESSAGES environment variable not set", file=sys.stderr) + return None + + try: + with open(messages_path, 'r') as f: + messages = json.load(f) + + # Find the last assistant message + for message in reversed(messages): + if message.get('role') == 'assistant' and message.get('status') == 'completed': + content = message.get('content', []) + for item in content: + if item.get('type') == 'output_text': + return item.get('text', '') + + print("Warning: No assistant response found in messages", file=sys.stderr) + return None + except Exception as e: + print(f"Error reading messages file: {str(e)}", file=sys.stderr) + return None + +def parse_answer_format(text): + """ + Parse the ... format from the agent's output. + Returns a dictionary with the parsed values. + """ + if not text: + return None + + # Look for ... pattern + match = re.search(r'(.*?)', text, re.IGNORECASE | re.DOTALL) + if not match: + return None + + answer_content = match.group(1).strip() + + # Parse each line + result = {} + lines = answer_content.split('\n') + + # Skip the check for exact number of lines - just parse what we have + # if len(lines) != 13: + # print(f"Error: Expected 13 lines in answer, got {len(lines)}", file=sys.stderr) + # return None + + for line in lines: + if '|' in line: + key, value = line.split('|', 1) + result[key.strip()] = value.strip() + + return result + +def load_expected_answer(label_path): + """ + Load the expected answer from label.txt file. + Returns a dictionary with the expected values. + """ + try: + with open(label_path, 'r') as f: + lines = f.read().strip().split('\n') + + expected = {} + for line in lines: + if '|' in line: + key, value = line.split('|', 1) + expected[key.strip()] = value.strip() + + return expected + except Exception as e: + print(f"Error reading label file: {str(e)}", file=sys.stderr) + return None + +def compare_answers(model_answer, expected_answer): + """ + Compare the model's answer with the expected answer. + Returns True if all key information matches, False otherwise. + """ + if not model_answer or not expected_answer: + return False + + # Check each expected key + mismatches = [] + for key, expected_value in expected_answer.items(): + model_value = model_answer.get(key, '') + + # Special handling for different types of values + if key in ['Bestseller1', 'Bestseller2', 'Bestseller3']: + # Check if all parts match (name:price:quantity:sku:inventory:status) + if ':' in expected_value and ':' in model_value: + expected_parts = expected_value.split(':') + model_parts = model_value.split(':') + if len(expected_parts) == 6 and len(model_parts) == 6: + # Compare each part + for i, (exp, mod) in enumerate(zip(expected_parts, model_parts)): + if i == 1: # Price field + exp_clean = exp.replace('$', '').replace(',', '') + mod_clean = mod.replace('$', '').replace(',', '') + if exp_clean != mod_clean: + mismatches.append(f"{key} price: expected '{exp}', got '{mod}'") + elif i == 4: # Inventory field (may have decimal places) + exp_float = float(exp.replace(',', '')) + mod_float = float(mod.replace(',', '')) + if abs(exp_float - mod_float) > 0.0001: + mismatches.append(f"{key} inventory: expected '{exp}', got '{mod}'") + else: + if exp.lower() != mod.lower(): + mismatches.append(f"{key} part {i}: expected '{exp}', got '{mod}'") + else: + mismatches.append(f"{key}: format mismatch - expected '{expected_value}', got '{model_value}'") + else: + if expected_value != model_value: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'LowestInventoryProduct': + # Check product name and inventory + if ':' in expected_value and ':' in model_value: + expected_name, expected_inv = expected_value.rsplit(':', 1) + model_name, model_inv = model_value.rsplit(':', 1) + if expected_name.lower() != model_name.lower(): + mismatches.append(f"{key} name: expected '{expected_name}', got '{model_name}'") + exp_float = float(expected_inv.replace(',', '')) + mod_float = float(model_inv.replace(',', '')) + if abs(exp_float - mod_float) > 0.0001: + mismatches.append(f"{key} inventory: expected '{expected_inv}', got '{model_inv}'") + else: + if expected_value != model_value: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key in ['TotalRevenue', 'MinimumPurchaseRule']: + # For price/amount fields, normalize format + expected_clean = expected_value.replace('$', '').replace(',', '') + model_clean = model_value.replace('$', '').replace(',', '') + if expected_clean != model_clean: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'BestsellerInSearch': + # Check search term and count + if expected_value.lower() != model_value.lower(): + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'PercentageDiscountRule': + # Check rule name and percentage + if ':' in expected_value and ':' in model_value: + expected_name, expected_pct = expected_value.rsplit(':', 1) + model_name, model_pct = model_value.rsplit(':', 1) + if expected_name != model_name: + mismatches.append(f"{key} name: expected '{expected_name}', got '{model_name}'") + # Normalize percentage (20% vs 20 vs 0.20) + exp_pct_clean = expected_pct.replace('%', '').strip() + mod_pct_clean = model_pct.replace('%', '').strip() + if exp_pct_clean != mod_pct_clean: + mismatches.append(f"{key} percentage: expected '{expected_pct}', got '{model_pct}'") + else: + if expected_value != model_value: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'TopCustomer': + # Check name:email:group + if ':' in expected_value and ':' in model_value: + expected_parts = expected_value.split(':') + model_parts = model_value.split(':') + if len(expected_parts) == 3 and len(model_parts) == 3: + exp_name, exp_email, exp_group = expected_parts + mod_name, mod_email, mod_group = model_parts + if exp_name != mod_name: + mismatches.append(f"{key} name: expected '{exp_name}', got '{mod_name}'") + if exp_email.lower() != mod_email.lower(): + mismatches.append(f"{key} email: expected '{exp_email}', got '{mod_email}'") + if exp_group.lower() != mod_group.lower(): + mismatches.append(f"{key} group: expected '{exp_group}', got '{mod_group}'") + else: + mismatches.append(f"{key}: format mismatch - expected '{expected_value}', got '{model_value}'") + else: + if expected_value != model_value: + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + elif key == 'MostRecentOrderDate': + # Date format may vary, do flexible comparison + if expected_value.lower() == 'none' and model_value.lower() == 'none': + continue + elif expected_value != model_value: + # Could add more flexible date parsing here if needed + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + else: + # Exact match for other fields (counts, etc.) + if str(model_value) != str(expected_value): + mismatches.append(f"{key}: expected '{expected_value}', got '{model_value}'") + + if mismatches: + print("\n=== Answer Comparison Mismatches ===", file=sys.stderr) + for mismatch in mismatches: + print(f"āœ— {mismatch}", file=sys.stderr) + return False + + print("\n=== Answer Comparison ===", file=sys.stderr) + print("āœ“ All key information matches the expected answer", file=sys.stderr) + return True + +async def verify() -> bool: + """ + Verifies that the bestseller analysis and promotion task has been completed correctly. + First checks the model's answer against the expected label, + then optionally verifies the actual state in the Magento Admin. + """ + # Get the label file path + label_path = Path(__file__).parent / "label.txt" + + # Load expected answer + expected_answer = load_expected_answer(label_path) + if not expected_answer: + print("Error: Could not load expected answer from label.txt", file=sys.stderr) + return False + + # Get model's response from MCP_MESSAGES + model_response = get_model_response() + if model_response: + print("Found model response, parsing answer format...", file=sys.stderr) + model_answer = parse_answer_format(model_response) + + if model_answer: + print("\n=== Model Answer Parsed ===", file=sys.stderr) + for key, value in model_answer.items(): + print(f"{key}: {value}", file=sys.stderr) + + # Compare answers + answer_match = compare_answers(model_answer, expected_answer) + if not answer_match: + print("\nModel answer does not match expected answer", file=sys.stderr) + return False + print("\nāœ“ Model answer matches expected answer", file=sys.stderr) + return True + else: + print("Warning: Could not parse answer format from model response", file=sys.stderr) + return False + else: + print("No model response found", file=sys.stderr) + return False + +def main(): + """ + Executes the verification process and exits with a status code. + """ + result = asyncio.run(verify()) + sys.exit(0 if result else 1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/description.md b/tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/description.md rename to tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/description.md diff --git a/tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/label.txt b/tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/meta.json b/tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/verify.py b/tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/marketing_customer_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/description.md b/tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/description.md rename to tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/description.md diff --git a/tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/label.txt b/tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/meta.json b/tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/verify.py b/tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/ny_expansion_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/products_sales_analysis/description.md b/tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/products_sales_analysis/description.md rename to tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/description.md diff --git a/tasks/playwright_webarena/shopping_admin/products_sales_analysis/label.txt b/tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/products_sales_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/products_sales_analysis/meta.json b/tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/products_sales_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/products_sales_analysis/verify.py b/tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/products_sales_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/description.md b/tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/description.md rename to tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/description.md diff --git a/tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/label.txt b/tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/meta.json b/tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/verify.py b/tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/sales_inventory_analysis/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/verify.py diff --git a/tasks/playwright_webarena/shopping_admin/search_filtering_operations/description.md b/tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/description.md similarity index 100% rename from tasks/playwright_webarena/shopping_admin/search_filtering_operations/description.md rename to tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/description.md diff --git a/tasks/playwright_webarena/shopping_admin/search_filtering_operations/label.txt b/tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/label.txt similarity index 100% rename from tasks/playwright_webarena/shopping_admin/search_filtering_operations/label.txt rename to tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/label.txt diff --git a/tasks/playwright_webarena/shopping_admin/search_filtering_operations/meta.json b/tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/meta.json similarity index 100% rename from tasks/playwright_webarena/shopping_admin/search_filtering_operations/meta.json rename to tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/meta.json diff --git a/tasks/playwright_webarena/shopping_admin/search_filtering_operations/verify.py b/tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/verify.py similarity index 100% rename from tasks/playwright_webarena/shopping_admin/search_filtering_operations/verify.py rename to tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/verify.py diff --git a/tasks/postgres/easy/.gitkeep b/tasks/postgres/easy/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tasks/postgres/easy/chinook/customer_data_migration_basic/customer_data.pkl b/tasks/postgres/easy/chinook/customer_data_migration_basic/customer_data.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3acfb74832bb9aee2bf78f8f4df63ae7dcf82d27 GIT binary patch literal 585 zcmX|-O-~y!5Qb4mLO>x@h)YEeZckh`%CgyfT&hGV97qBYOVv|_EY`wK);98bRizRV zKVXjL|1#?&_+ooL-uIb#tG@rN?D_N8UA(#rIJ_1nx3^R>2aR7;36^E$b4GX3gB#6N zu9bs>iIy)^tsS%ys-8IeA3V@?rVXaXX=+%WJ2)MLL$Yll6J3}b<*E~5v8{Q9s^pKz zGC`$-T4JfiY-K$4;)SvXtM-2;=SNnm3Yyh7exJG8M=~G zWDb6$Tln^J!#m11jMgKzpR{xZPRuKCIHi^|BT$J~p*Gjr;HwU;1+&cHsKGYWfNxJ`hEYk9LRdxyVbAd+IKE~7$P|~hM%vWzCRFaGj<@&;{{pqkxZmkIehQ7S z69wHc3LQU#@2j2vqb$8gFI!*#YiskoBdy=)oENmjtll$$`gLf|sj)%@-qi61d<}<} z 0.01: + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def verify_employee_count_and_titles(conn) -> bool: + """Verify the final employee count and title changes.""" + with conn.cursor() as cur: + # Check the final verification query results + cur.execute(""" + SELECT + COUNT(*) as total_employees, + COUNT(CASE WHEN "Title" = 'CEO' THEN 1 END) as ceo_count, + COUNT(CASE WHEN "Title" = 'IT Specialist' THEN 1 END) as it_specialist_count + FROM "Employee" + """) + result = cur.fetchone() + + total_employees, ceo_count, it_specialist_count = result + + if total_employees != 8: + print(f"āŒ Expected 8 total employees, got {total_employees}") + return False + + if ceo_count != 1: + print(f"āŒ Expected 1 CEO, got {ceo_count}") + return False + + if it_specialist_count != 2: + print(f"āŒ Expected 2 IT Specialists, got {it_specialist_count}") + return False + + print("āœ… Employee count and title verification passed") + return True + +def verify_specific_employees(conn) -> bool: + """Verify specific employee records and modifications.""" + with conn.cursor() as cur: + # Check all employee fields in one query + cur.execute(""" + SELECT "EmployeeId", "LastName", "FirstName", "Title", "ReportsTo", "BirthDate", + "HireDate", "Address", "City", "State", "Country", "PostalCode", + "Phone", "Fax", "Email" + FROM "Employee" + WHERE "EmployeeId" IN (1, 2) + ORDER BY "EmployeeId" + """) + employees = cur.fetchall() + + from datetime import datetime + + expected = [ + # Andrew Adams (ID 1) - Title changes to 'CEO', phone stays original, ReportsTo stays None + (1, 'Adams', 'Andrew', 'CEO', None, datetime(1962, 2, 18), datetime(2002, 8, 14), + '11120 Jasper Ave NW', 'Edmonton', 'AB', 'Canada', 'T5K 2N1', '+1 (780) 428-9482', '+1 (780) 428-3457', 'andrew@chinookcorp.com'), + # Nancy Edwards (ID 2) - Phone changes, title stays 'Sales Manager', ReportsTo stays 1 + (2, 'Edwards', 'Nancy', 'Sales Manager', 1, datetime(1958, 12, 8), datetime(2002, 5, 1), + '825 8 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 2T3', '+1 (403) 555-9999', '+1 (403) 262-3322', 'nancy@chinookcorp.com'), + ] + + if len(employees) != 2: + print(f"āŒ Expected 2 key employees, found {len(employees)}") + return False + + # Full field comparison for all employees using rows_match + for actual, expected_emp in zip(employees, expected): + if not rows_match(actual, expected_emp): + print(f"āŒ Employee {actual[0]} row mismatch: expected {expected_emp}, got {actual}") + return False + + print("āœ… Specific employee verification passed - all fields match exactly") + return True + +def main(): + """Main verification function.""" + print("=" * 50) + print("Verifying Task 3: Employee Hierarchy Management") + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Run verification checks with short-circuit evaluation + success = ( + verify_employee_count_and_titles(conn) and + verify_specific_employees(conn) + ) + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + print("All employee hierarchy management operations completed correctly!") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/easy/dvdrental/create_payment_index/description.md b/tasks/postgres/easy/dvdrental/create_payment_index/description.md new file mode 100644 index 00000000..d7c09814 --- /dev/null +++ b/tasks/postgres/easy/dvdrental/create_payment_index/description.md @@ -0,0 +1,21 @@ +Create an index to optimize customer payment queries in the DVD rental database. + +## Your Task: + +Create an index on the `customer_id` column of the `payment` table to improve query performance. + +## Requirements: + +- Create an index on the `payment` table's `customer_id` column +- The index name can be anything you choose (e.g., `idx_payment_customer_id`) +- Use the standard CREATE INDEX syntax + +## Why This Helps: + +The `customer_id` column is frequently used in: +- JOIN operations between customer and payment tables +- WHERE clauses filtering by customer +- Subqueries that look up payments for specific customers + +Adding an index will significantly speed up these operations. + diff --git a/tasks/postgres/easy/dvdrental/create_payment_index/meta.json b/tasks/postgres/easy/dvdrental/create_payment_index/meta.json new file mode 100644 index 00000000..7427a878 --- /dev/null +++ b/tasks/postgres/easy/dvdrental/create_payment_index/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "create_payment_index", + "task_name": "Create Payment Index", + "category_id": "dvdrental", + "category_name": "DVD Rental", + "description": "Add an index on payment.customer_id to speed up the customer payment lookups in the DVD Rental database.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "performance optimization", + "indexing" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Enum \"mpaa_rating\" {\n \"G\"\n \"PG\"\n \"PG-13\"\n \"R\"\n \"NC-17\"\n}\n\nTable \"customer\" {\n \"customer_id\" int4 [pk, not null, increment]\n \"store_id\" int2 [not null]\n \"first_name\" varchar(45) [not null]\n \"last_name\" varchar(45) [not null]\n \"email\" varchar(50)\n \"address_id\" int2 [not null]\n \"activebool\" bool [not null, default: true]\n \"create_date\" date [not null, default: `('now'::text)::date`]\n \"last_update\" timestamp [default: `now()`]\n \"active\" int4\n\n Indexes {\n address_id [type: btree, name: \"idx_fk_address_id\"]\n store_id [type: btree, name: \"idx_fk_store_id\"]\n last_name [type: btree, name: \"idx_last_name\"]\n }\n}\n\nTable \"actor\" {\n \"actor_id\" int4 [pk, not null, increment]\n \"first_name\" varchar(45) [not null]\n \"last_name\" varchar(45) [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n last_name [type: btree, name: \"idx_actor_last_name\"]\n }\n}\n\nTable \"category\" {\n \"category_id\" int4 [pk, not null, increment]\n \"name\" varchar(25) [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n}\n\nTable \"film\" {\n \"film_id\" int4 [pk, not null, increment]\n \"title\" varchar(255) [not null]\n \"description\" text\n \"release_year\" int4\n \"language_id\" int2 [not null]\n \"rental_duration\" int2 [not null, default: 3]\n \"rental_rate\" numeric(4,2) [not null, default: 4.99]\n \"length\" int2\n \"replacement_cost\" numeric(5,2) [not null, default: 19.99]\n \"rating\" mpaa_rating [default: 'G']\n \"last_update\" timestamp [not null, default: `now()`]\n \"special_features\" \"text[]\"\n \"fulltext\" tsvector [not null]\n\n Indexes {\n fulltext [type: gist, name: \"film_fulltext_idx\"]\n language_id [type: btree, name: \"idx_fk_language_id\"]\n title [type: btree, name: \"idx_title\"]\n }\n}\n\nTable \"film_actor\" {\n \"actor_id\" int2 [not null]\n \"film_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n (actor_id, film_id) [type: btree, name: \"film_actor_pkey\"]\n film_id [type: btree, name: \"idx_fk_film_id\"]\n }\n}\n\nTable \"film_category\" {\n \"film_id\" int2 [not null]\n \"category_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n (film_id, category_id) [type: btree, name: \"film_category_pkey\"]\n }\n}\n\nTable \"address\" {\n \"address_id\" int4 [pk, not null, increment]\n \"address\" varchar(50) [not null]\n \"address2\" varchar(50)\n \"district\" varchar(20) [not null]\n \"city_id\" int2 [not null]\n \"postal_code\" varchar(10)\n \"phone\" varchar(20) [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n city_id [type: btree, name: \"idx_fk_city_id\"]\n }\n}\n\nTable \"city\" {\n \"city_id\" int4 [pk, not null, increment]\n \"city\" varchar(50) [not null]\n \"country_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n country_id [type: btree, name: \"idx_fk_country_id\"]\n }\n}\n\nTable \"country\" {\n \"country_id\" int4 [pk, not null, increment]\n \"country\" varchar(50) [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n}\n\nTable \"inventory\" {\n \"inventory_id\" int4 [pk, not null, increment]\n \"film_id\" int2 [not null]\n \"store_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n (store_id, film_id) [type: btree, name: \"idx_store_id_film_id\"]\n }\n}\n\nTable \"language\" {\n \"language_id\" int4 [pk, not null, increment]\n \"name\" bpchar(20) [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n}\n\nTable \"payment\" {\n \"payment_id\" int4 [pk, not null, increment]\n \"customer_id\" int2 [not null]\n \"staff_id\" int2 [not null]\n \"rental_id\" int4 [not null]\n \"amount\" numeric(5,2) [not null]\n \"payment_date\" timestamp [not null]\n\n Indexes {\n rental_id [type: btree, name: \"idx_fk_rental_id\"]\n staff_id [type: btree, name: \"idx_fk_staff_id\"]\n }\n}\n\nTable \"rental\" {\n \"rental_id\" int4 [pk, not null, increment]\n \"rental_date\" timestamp [not null]\n \"inventory_id\" int4 [not null]\n \"customer_id\" int2 [not null]\n \"return_date\" timestamp\n \"staff_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n\n Indexes {\n (rental_date, inventory_id, customer_id) [type: btree, name: \"idx_unq_rental_rental_date_inventory_id_customer_id\"]\n inventory_id [type: btree, name: \"idx_fk_inventory_id\"]\n }\n}\n\nTable \"staff\" {\n \"staff_id\" int4 [pk, not null, increment]\n \"first_name\" varchar(45) [not null]\n \"last_name\" varchar(45) [not null]\n \"address_id\" int2 [not null]\n \"email\" varchar(50)\n \"store_id\" int2 [not null]\n \"active\" bool [not null, default: true]\n \"username\" varchar(16) [not null]\n \"password\" varchar(40)\n \"last_update\" timestamp [not null, default: `now()`]\n \"picture\" bytea\n}\n\nTable \"store\" {\n \"store_id\" int4 [pk, not null, increment]\n \"manager_staff_id\" int2 [unique, not null]\n \"address_id\" int2 [not null]\n \"last_update\" timestamp [not null, default: `now()`]\n}\n\nRef \"fk_address_city\":\"city\".\"city_id\" < \"address\".\"city_id\"\n\nRef \"fk_city\":\"country\".\"country_id\" < \"city\".\"country_id\"\n\nRef \"customer_address_id_fkey\":\"address\".\"address_id\" < \"customer\".\"address_id\" [update: cascade, delete: restrict]\n\nRef \"film_language_id_fkey\":\"language\".\"language_id\" < \"film\".\"language_id\" [update: cascade, delete: restrict]\n\nRef \"film_actor_actor_id_fkey\":\"actor\".\"actor_id\" < \"film_actor\".\"actor_id\" [update: cascade, delete: restrict]\n\nRef \"film_actor_film_id_fkey\":\"film\".\"film_id\" < \"film_actor\".\"film_id\" [update: cascade, delete: restrict]\n\nRef \"film_category_category_id_fkey\":\"category\".\"category_id\" < \"film_category\".\"category_id\" [update: cascade, delete: restrict]\n\nRef \"film_category_film_id_fkey\":\"film\".\"film_id\" < \"film_category\".\"film_id\" [update: cascade, delete: restrict]\n\nRef \"inventory_film_id_fkey\":\"film\".\"film_id\" < \"inventory\".\"film_id\" [update: cascade, delete: restrict]\n\nRef \"payment_customer_id_fkey\":\"customer\".\"customer_id\" < \"payment\".\"customer_id\" [update: cascade, delete: restrict]\n\nRef \"payment_rental_id_fkey\":\"rental\".\"rental_id\" < \"payment\".\"rental_id\" [update: cascade, delete: set null]\n\nRef \"payment_staff_id_fkey\":\"staff\".\"staff_id\" < \"payment\".\"staff_id\" [update: cascade, delete: restrict]\n\nRef \"rental_customer_id_fkey\":\"customer\".\"customer_id\" < \"rental\".\"customer_id\" [update: cascade, delete: restrict]\n\nRef \"rental_inventory_id_fkey\":\"inventory\".\"inventory_id\" < \"rental\".\"inventory_id\" [update: cascade, delete: restrict]\n\nRef \"rental_staff_id_key\":\"staff\".\"staff_id\" < \"rental\".\"staff_id\"\n\nRef \"staff_address_id_fkey\":\"address\".\"address_id\" < \"staff\".\"address_id\" [update: cascade, delete: restrict]\n\nRef \"store_address_id_fkey\":\"address\".\"address_id\" < \"store\".\"address_id\" [update: cascade, delete: restrict]\n\nRef \"store_manager_staff_id_fkey\":\"staff\".\"staff_id\" < \"store\".\"manager_staff_id\" [update: cascade, delete: restrict]\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/gordonkwokkwok/DVD-Rental-PostgreSQL-Project" + } +} diff --git a/tasks/postgres/dvdrental/customer_analytics_optimization/verify.py b/tasks/postgres/easy/dvdrental/create_payment_index/verify.py similarity index 100% rename from tasks/postgres/dvdrental/customer_analytics_optimization/verify.py rename to tasks/postgres/easy/dvdrental/create_payment_index/verify.py diff --git a/tasks/postgres/easy/employees/department_summary_view/description.md b/tasks/postgres/easy/employees/department_summary_view/description.md new file mode 100644 index 00000000..8c0fdd79 --- /dev/null +++ b/tasks/postgres/easy/employees/department_summary_view/description.md @@ -0,0 +1,30 @@ +Create an executive department summary view to provide quick insights into departmental metrics for leadership dashboards. This view will consolidate key department statistics in one easily accessible place. + +## Your Task: + +**Create the executive department summary view** — build a materialized view called `exec_department_summary` in the `employees` schema with these exact columns: + +* `department_name` (varchar) — department name +* `total_employees` (integer) — current active employee count (employees with active salary where to_date = '9999-01-01') +* `avg_salary` (decimal) — average current salary for active employees +* `total_payroll` (bigint) — total monthly payroll cost (sum of all current salaries in the department) +* `manager_name` (varchar) — current department manager's full name (first_name and last_name concatenated) + +## Requirements: + +1. Use materialized view to cache results for better performance +2. Join the following tables: + - `departments` - for department information + - `dept_emp` - for employee-department relationships + - `employees` - for employee details + - `salaries` - for current salary information + - `dept_manager` - for current manager information +3. Only include current active employees (those with to_date = '9999-01-01' in both `dept_emp` and `salaries`) +4. Only include current managers (to_date = '9999-01-01' in `dept_manager`) +5. Order results by department_name + +## After Creation: + +Refresh the materialized view to populate it with current data. + +This view will provide executives with a real-time snapshot of departmental workforce metrics and costs. diff --git a/tasks/postgres/easy/employees/department_summary_view/meta.json b/tasks/postgres/easy/employees/department_summary_view/meta.json new file mode 100644 index 00000000..900b91c6 --- /dev/null +++ b/tasks/postgres/easy/employees/department_summary_view/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "department_summary_view", + "task_name": "Department Summary View", + "category_id": "employees", + "category_name": "Employees", + "description": "Build the exec_department_summary materialized view showing department name, active headcount, payroll totals, and the manager name.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "reporting and analytics", + "materialized views" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Enum \"employees\".\"employee_gender\" {\n \"M\"\n \"F\"\n}\n\nTable \"employees\".\"department\" {\n \"id\" bpchar(4) [pk, not null]\n \"dept_name\" varchar(40) [unique, not null]\n}\n\nTable \"employees\".\"department_employee\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16982_primary\"]\n department_id [type: btree, name: \"idx_16982_dept_no\"]\n }\n}\n\nTable \"employees\".\"department_manager\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16985_primary\"]\n department_id [type: btree, name: \"idx_16985_dept_no\"]\n }\n}\n\nTable \"employees\".\"employee\" {\n \"id\" int8 [pk, not null, increment]\n \"birth_date\" date [not null]\n \"first_name\" varchar(14) [not null]\n \"last_name\" varchar(16) [not null]\n \"gender\" employees.employee_gender [not null]\n \"hire_date\" date [not null]\n}\n\nTable \"employees\".\"salary\" {\n \"employee_id\" int8 [not null]\n \"amount\" int8 [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, from_date) [type: btree, name: \"idx_16991_primary\"]\n }\n}\n\nTable \"employees\".\"title\" {\n \"employee_id\" int8 [not null]\n \"title\" varchar(50) [not null]\n \"from_date\" date [not null]\n \"to_date\" date\n\n Indexes {\n (employee_id, title, from_date) [type: btree, name: \"idx_16994_primary\"]\n }\n}\n\nRef \"dept_emp_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_employee\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_emp_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_employee\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_manager\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_manager\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"salaries_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"salary\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"titles_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"title\".\"employee_id\" [update: restrict, delete: cascade]\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz" + } +} diff --git a/tasks/postgres/easy/employees/department_summary_view/verify.py b/tasks/postgres/easy/employees/department_summary_view/verify.py new file mode 100644 index 00000000..e428c7ee --- /dev/null +++ b/tasks/postgres/easy/employees/department_summary_view/verify.py @@ -0,0 +1,149 @@ +""" +Verification script for PostgreSQL Task 6: Reporting and Automation System +""" + +import os +import sys +import psycopg2 +from decimal import Decimal + +def rows_match(actual_row, expected_row): + """ + Compare two rows with appropriate tolerance. + For Decimal types: allows 0.1 tolerance + For date types: convert to string for comparison + For other types: requires exact match + """ + if len(actual_row) != len(expected_row): + return False + + for actual, expected in zip(actual_row, expected_row): + if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)): + if abs(float(actual) - float(expected)) > 0.1: + return False + elif hasattr(actual, 'strftime'): # datetime.date or datetime.datetime + if str(actual) != str(expected): + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def verify_materialized_views(conn) -> bool: + """Verify that materialized views were created and populated correctly.""" + with conn.cursor() as cur: + # Check all departments' data accuracy + cur.execute(""" + SELECT department_name, total_employees, avg_salary, total_payroll, manager_name + FROM employees.exec_department_summary + ORDER BY department_name + """) + view_data = cur.fetchall() + + # Get actual data for all departments + cur.execute(""" + WITH current_salary AS ( + SELECT employee_id, amount + FROM ( + SELECT s.*, + ROW_NUMBER() OVER ( + PARTITION BY s.employee_id + ORDER BY s.from_date DESC, s.amount DESC + ) AS rn + FROM employees.salary s + WHERE s.to_date = DATE '9999-01-01' + ) x + WHERE rn = 1 + ), + current_dept AS ( + SELECT DISTINCT de.employee_id, de.department_id + FROM employees.department_employee de + WHERE de.to_date = DATE '9999-01-01' + ), + current_manager AS ( + SELECT department_id, + CONCAT(e.first_name, ' ', e.last_name) AS manager_name + FROM ( + SELECT dm.*, + ROW_NUMBER() OVER ( + PARTITION BY dm.department_id + ORDER BY dm.from_date DESC, dm.employee_id + ) AS rn + FROM employees.department_manager dm + WHERE dm.to_date = DATE '9999-01-01' + ) dm + JOIN employees.employee e ON e.id = dm.employee_id + WHERE dm.rn = 1 + ) + SELECT + d.dept_name AS department_name, + COUNT(cd.employee_id)::INT AS total_employees, + AVG(cs.amount)::DECIMAL AS avg_salary, + COALESCE(SUM(cs.amount), 0)::BIGINT AS total_payroll, + cm.manager_name + FROM employees.department d + LEFT JOIN current_dept cd ON cd.department_id = d.id + LEFT JOIN current_salary cs ON cs.employee_id = cd.employee_id + LEFT JOIN current_manager cm ON cm.department_id = d.id + GROUP BY d.id, d.dept_name, cm.manager_name + ORDER BY d.dept_name; + """) + actual_data = cur.fetchall() + + if len(view_data) != len(actual_data): + print(f"āŒ Department count mismatch: view={len(view_data)}, actual={len(actual_data)}") + return False + + for view_row, actual_row in zip(view_data, actual_data): + if not rows_match(view_row, actual_row): + print(f"āŒ Department summary data incorrect for {view_row[0]}: view={view_row}, actual={actual_row}") + return False + + return True + +def main(): + """Main verification function.""" + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify all components + success = verify_materialized_views(conn) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/easy/employees/employee_gender_statistics/description.md b/tasks/postgres/easy/employees/employee_gender_statistics/description.md new file mode 100644 index 00000000..515dbece --- /dev/null +++ b/tasks/postgres/easy/employees/employee_gender_statistics/description.md @@ -0,0 +1,19 @@ +Create a gender statistics summary table for the HR team's annual workforce composition report. This is a simple analysis to understand the gender distribution in our employee database. + +## Your Task: + +**Create the gender statistics table** — build a table called `gender_statistics` in the `employees` schema with these exact columns: + +* `gender` (varchar) — gender ('M' or 'F') +* `total_employees` (integer) — total number of employees of this gender +* `current_employees` (integer) — current employees of this gender (have active salary where to_date = '9999-01-01') +* `percentage_of_workforce` (decimal) — percentage of current workforce (current_employees / total current employees * 100) + +## Requirements: + +1. Calculate total employees by counting all employees of each gender from the `employees` table +2. Calculate current employees by counting employees with active salary records (to_date = '9999-01-01' in the `salaries` table) +3. Calculate the percentage based on current workforce only +4. The table should contain exactly 2 rows (one for 'M' and one for 'F') + +This analysis will help HR understand the basic gender composition of our workforce for diversity reporting. diff --git a/tasks/postgres/easy/employees/employee_gender_statistics/meta.json b/tasks/postgres/easy/employees/employee_gender_statistics/meta.json new file mode 100644 index 00000000..cac10af0 --- /dev/null +++ b/tasks/postgres/easy/employees/employee_gender_statistics/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "employee_gender_statistics", + "task_name": "Employee Gender Statistics", + "category_id": "employees", + "category_name": "Employees", + "description": "Aggregate the employees dataset into a gender_statistics table with counts of total/current staff by gender plus workforce percentage.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "reporting and analytics", + "data aggregation" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Enum \"employees\".\"employee_gender\" {\n \"M\"\n \"F\"\n}\n\nTable \"employees\".\"department\" {\n \"id\" bpchar(4) [pk, not null]\n \"dept_name\" varchar(40) [unique, not null]\n}\n\nTable \"employees\".\"department_employee\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16982_primary\"]\n department_id [type: btree, name: \"idx_16982_dept_no\"]\n }\n}\n\nTable \"employees\".\"department_manager\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16985_primary\"]\n department_id [type: btree, name: \"idx_16985_dept_no\"]\n }\n}\n\nTable \"employees\".\"employee\" {\n \"id\" int8 [pk, not null, increment]\n \"birth_date\" date [not null]\n \"first_name\" varchar(14) [not null]\n \"last_name\" varchar(16) [not null]\n \"gender\" employees.employee_gender [not null]\n \"hire_date\" date [not null]\n}\n\nTable \"employees\".\"salary\" {\n \"employee_id\" int8 [not null]\n \"amount\" int8 [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, from_date) [type: btree, name: \"idx_16991_primary\"]\n }\n}\n\nTable \"employees\".\"title\" {\n \"employee_id\" int8 [not null]\n \"title\" varchar(50) [not null]\n \"from_date\" date [not null]\n \"to_date\" date\n\n Indexes {\n (employee_id, title, from_date) [type: btree, name: \"idx_16994_primary\"]\n }\n}\n\nRef \"dept_emp_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_employee\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_emp_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_employee\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_manager\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_manager\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"salaries_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"salary\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"titles_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"title\".\"employee_id\" [update: restrict, delete: cascade]\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz" + } +} diff --git a/tasks/postgres/easy/employees/employee_gender_statistics/verify.py b/tasks/postgres/easy/employees/employee_gender_statistics/verify.py new file mode 100644 index 00000000..0bcedc52 --- /dev/null +++ b/tasks/postgres/easy/employees/employee_gender_statistics/verify.py @@ -0,0 +1,123 @@ +import os +import sys +import psycopg2 +from decimal import Decimal + +def rows_match(actual_row, expected_row): + """ + Compare two rows with appropriate tolerance. + For Decimal types: allows 0.1 tolerance + For other types: requires exact match + """ + if len(actual_row) != len(expected_row): + return False + + for actual, expected in zip(actual_row, expected_row): + if isinstance(actual, Decimal) and isinstance(expected, Decimal): + if abs(float(actual) - float(expected)) > 0.1: + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def verify_gender_statistics_results(conn) -> bool: + """Verify the gender statistics results.""" + with conn.cursor() as cur: + # Get actual results from the created table + cur.execute(""" + SELECT gender, total_employees, current_employees, percentage_of_workforce + FROM employees.gender_statistics + ORDER BY gender + """) + actual_results = cur.fetchall() + + # Execute ground truth query + cur.execute(""" + WITH current_emp AS ( + SELECT DISTINCT s.employee_id + FROM employees.salary s + WHERE s.to_date = DATE '9999-01-01' + ), + total_current AS ( + SELECT COUNT(*) AS cnt + FROM current_emp + ) + SELECT + e.gender::varchar AS gender, + COUNT(*) AS total_employees, + COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL) AS current_employees, + (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL + / NULLIF((SELECT cnt FROM total_current), 0) * 100 AS percentage_of_workforce + FROM employees.employee e + LEFT JOIN current_emp ce ON ce.employee_id = e.id + WHERE e.gender IN ('M','F') + GROUP BY e.gender + ORDER BY gender; + """) + expected_results = cur.fetchall() + + if len(actual_results) != len(expected_results): + print(f"āŒ Expected {len(expected_results)} gender statistics results, got {len(actual_results)}") + return False + + mismatches = 0 + for i, (actual, expected) in enumerate(zip(actual_results, expected_results)): + if not rows_match(actual, expected): + if mismatches < 5: # Only show first 5 mismatches + print(f"āŒ Row {i+1} mismatch: expected {expected}, got {actual}") + mismatches += 1 + + if mismatches > 0: + print(f"āŒ Total mismatches: {mismatches}") + return False + + print(f"āœ… Gender statistics results are correct ({len(actual_results)} records)") + return True + +def main(): + """Main verification function.""" + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify all four analysis results + success = verify_gender_statistics_results(conn) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/easy/employees/employee_projects_basic/description.md b/tasks/postgres/easy/employees/employee_projects_basic/description.md new file mode 100644 index 00000000..400ec200 --- /dev/null +++ b/tasks/postgres/easy/employees/employee_projects_basic/description.md @@ -0,0 +1,20 @@ +Create and manage a basic employee projects table to track company projects. The IT team needs you to build the database table structure and populate it with initial project data. + +## Your Tasks: + +1. **Create the employee_projects table** — build a new table in the `employees` schema: + + **Table: `employee_projects`** + * `project_id` (integer, primary key, auto-increment) + * `project_name` (varchar(100), not null) + * `start_date` (date, not null) + * `end_date` (date) + * `budget` (decimal(10,2)) + * `status` (varchar(20), default 'active') + +2. **Insert exactly this initial data into `employee_projects`**: + * Project 1: name='Database Modernization', start_date='2024-01-15', end_date='2024-06-30', budget=250000.00, status='active' + * Project 2: name='Employee Portal Upgrade', start_date='2024-02-01', end_date='2024-05-15', budget=180000.00, status='active' + * Project 3: name='HR Analytics Dashboard', start_date='2023-11-01', end_date='2024-01-31', budget=120000.00, status='active' + +This will establish the basic project tracking foundation for the company. diff --git a/tasks/postgres/easy/employees/employee_projects_basic/meta.json b/tasks/postgres/easy/employees/employee_projects_basic/meta.json new file mode 100644 index 00000000..4a96b21d --- /dev/null +++ b/tasks/postgres/easy/employees/employee_projects_basic/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "employee_projects_basic", + "task_name": "Employee Projects Basic", + "category_id": "employees", + "category_name": "Employees", + "description": "Create the employee_projects table with the specified schema and insert the three starter projects for modernization, portal upgrade, and analytics.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "schema design", + "data loading" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Enum \"employees\".\"employee_gender\" {\n \"M\"\n \"F\"\n}\n\nTable \"employees\".\"department\" {\n \"id\" bpchar(4) [pk, not null]\n \"dept_name\" varchar(40) [unique, not null]\n}\n\nTable \"employees\".\"department_employee\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16982_primary\"]\n department_id [type: btree, name: \"idx_16982_dept_no\"]\n }\n}\n\nTable \"employees\".\"department_manager\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16985_primary\"]\n department_id [type: btree, name: \"idx_16985_dept_no\"]\n }\n}\n\nTable \"employees\".\"employee\" {\n \"id\" int8 [pk, not null, increment]\n \"birth_date\" date [not null]\n \"first_name\" varchar(14) [not null]\n \"last_name\" varchar(16) [not null]\n \"gender\" employees.employee_gender [not null]\n \"hire_date\" date [not null]\n}\n\nTable \"employees\".\"salary\" {\n \"employee_id\" int8 [not null]\n \"amount\" int8 [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, from_date) [type: btree, name: \"idx_16991_primary\"]\n }\n}\n\nTable \"employees\".\"title\" {\n \"employee_id\" int8 [not null]\n \"title\" varchar(50) [not null]\n \"from_date\" date [not null]\n \"to_date\" date\n\n Indexes {\n (employee_id, title, from_date) [type: btree, name: \"idx_16994_primary\"]\n }\n}\n\nRef \"dept_emp_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_employee\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_emp_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_employee\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_manager\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_manager\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"salaries_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"salary\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"titles_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"title\".\"employee_id\" [update: restrict, delete: cascade]\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz" + } +} diff --git a/tasks/postgres/easy/employees/employee_projects_basic/verify.py b/tasks/postgres/easy/employees/employee_projects_basic/verify.py new file mode 100644 index 00000000..b9eaa447 --- /dev/null +++ b/tasks/postgres/easy/employees/employee_projects_basic/verify.py @@ -0,0 +1,116 @@ +""" +Verification script for PostgreSQL Task 5: Database Schema and Data Operations +""" + +import os +import sys +import psycopg2 +from decimal import Decimal + +def rows_match(actual_row, expected_row): + """ + Compare two rows with appropriate tolerance. + For Decimal types: allows 0.1 tolerance + For date types: convert to string for comparison + For other types: requires exact match + """ + if len(actual_row) != len(expected_row): + return False + + for actual, expected in zip(actual_row, expected_row): + if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)): + if abs(float(actual) - float(expected)) > 0.1: + return False + elif hasattr(actual, 'strftime'): # datetime.date or datetime.datetime + if str(actual) != str(expected): + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + + +def verify_project_data(conn) -> bool: + """Verify that project data was inserted and updated correctly.""" + with conn.cursor() as cur: + # Check project data after updates + cur.execute(""" + SELECT project_name, start_date, end_date, budget, status + FROM employees.employee_projects + ORDER BY project_name + """) + projects = cur.fetchall() + + if len(projects) != 3: + print(f"āŒ Expected 3 projects, found {len(projects)}") + return False + + # Expected final state after all updates + expected = { + 'Database Modernization': ('2024-01-15', '2024-06-30', 250000.00, 'active'), + 'Employee Portal Upgrade': ('2024-02-01', '2024-05-15', 180000.00, 'active'), + 'HR Analytics Dashboard': ('2023-11-01', '2024-01-31', 120000.00, 'active') + } + + for project in projects: + name = project[0] + if name not in expected: + print(f"āŒ Unexpected project: {name}") + return False + + exp = expected[name] + # Use rows_match for comparison + expected_row = (name,) + exp + if not rows_match(project, expected_row): + print(f"āŒ Project {name} data mismatch: expected {expected_row}, got {project}") + return False + + print("āœ… Project data is correct") + return True + +def main(): + """Main verification function.""" + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify all components + success = verify_project_data(conn) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/easy/employees/hiring_year_summary/description.md b/tasks/postgres/easy/employees/hiring_year_summary/description.md new file mode 100644 index 00000000..e32e4b8e --- /dev/null +++ b/tasks/postgres/easy/employees/hiring_year_summary/description.md @@ -0,0 +1,19 @@ +Create a hiring year summary table to help HR track employee retention trends over the years. This analysis shows how many employees were hired each year and how many are still with the company. + +## Your Task: + +**Create the hiring year summary table** — build a table called `hiring_year_summary` in the `employees` schema with these exact columns: + +* `hire_year` (integer) — year employees were hired +* `employees_hired` (integer) — number of employees hired that year +* `still_employed` (integer) — how many from that year are still employed (have active salary where to_date = '9999-01-01') +* `retention_rate` (decimal) — percentage still employed (still_employed / employees_hired * 100) + +## Requirements: + +1. Extract the hire year from the `hire_date` column in the `employees` table +2. Count total employees hired in each year +3. Determine which employees are still employed by checking for active salary records (to_date = '9999-01-01' in the `salaries` table) +4. Order results by hire_year in ascending order + +This analysis will help HR understand retention patterns and identify years with particularly high or low retention rates. diff --git a/tasks/postgres/easy/employees/hiring_year_summary/meta.json b/tasks/postgres/easy/employees/hiring_year_summary/meta.json new file mode 100644 index 00000000..fd4b8e6a --- /dev/null +++ b/tasks/postgres/easy/employees/hiring_year_summary/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "hiring_year_summary", + "task_name": "Hiring Year Summary", + "category_id": "employees", + "category_name": "Employees", + "description": "Summarize hires per year into hiring_year_summary, including still-employed counts and retention percentages using active salary rows.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "reporting and analytics", + "retention analysis" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Enum \"employees\".\"employee_gender\" {\n \"M\"\n \"F\"\n}\n\nTable \"employees\".\"department\" {\n \"id\" bpchar(4) [pk, not null]\n \"dept_name\" varchar(40) [unique, not null]\n}\n\nTable \"employees\".\"department_employee\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16982_primary\"]\n department_id [type: btree, name: \"idx_16982_dept_no\"]\n }\n}\n\nTable \"employees\".\"department_manager\" {\n \"employee_id\" int8 [not null]\n \"department_id\" bpchar(4) [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, department_id) [type: btree, name: \"idx_16985_primary\"]\n department_id [type: btree, name: \"idx_16985_dept_no\"]\n }\n}\n\nTable \"employees\".\"employee\" {\n \"id\" int8 [pk, not null, increment]\n \"birth_date\" date [not null]\n \"first_name\" varchar(14) [not null]\n \"last_name\" varchar(16) [not null]\n \"gender\" employees.employee_gender [not null]\n \"hire_date\" date [not null]\n}\n\nTable \"employees\".\"salary\" {\n \"employee_id\" int8 [not null]\n \"amount\" int8 [not null]\n \"from_date\" date [not null]\n \"to_date\" date [not null]\n\n Indexes {\n (employee_id, from_date) [type: btree, name: \"idx_16991_primary\"]\n }\n}\n\nTable \"employees\".\"title\" {\n \"employee_id\" int8 [not null]\n \"title\" varchar(50) [not null]\n \"from_date\" date [not null]\n \"to_date\" date\n\n Indexes {\n (employee_id, title, from_date) [type: btree, name: \"idx_16994_primary\"]\n }\n}\n\nRef \"dept_emp_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_employee\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_emp_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_employee\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"department_manager\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"dept_manager_ibfk_2\":\"employees\".\"department\".\"id\" < \"employees\".\"department_manager\".\"department_id\" [update: restrict, delete: cascade]\n\nRef \"salaries_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"salary\".\"employee_id\" [update: restrict, delete: cascade]\n\nRef \"titles_ibfk_1\":\"employees\".\"employee\".\"id\" < \"employees\".\"title\".\"employee_id\" [update: restrict, delete: cascade]\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz" + } +} diff --git a/tasks/postgres/easy/employees/hiring_year_summary/verify.py b/tasks/postgres/easy/employees/hiring_year_summary/verify.py new file mode 100644 index 00000000..227a9c8a --- /dev/null +++ b/tasks/postgres/easy/employees/hiring_year_summary/verify.py @@ -0,0 +1,127 @@ +""" +Verification script for PostgreSQL Task 3: Employee Demographics Report +""" + +import os +import sys +import psycopg2 +from decimal import Decimal + +def rows_match(actual_row, expected_row): + """ + Compare two rows with appropriate tolerance. + For Decimal types: allows 0.1 tolerance + For other types: requires exact match + """ + if len(actual_row) != len(expected_row): + return False + + for actual, expected in zip(actual_row, expected_row): + if isinstance(actual, Decimal) and isinstance(expected, Decimal): + if abs(float(actual) - float(expected)) > 0.1: + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def verify_hiring_year_results(conn) -> bool: + """Verify the hiring year summary results.""" + with conn.cursor() as cur: + # Get actual results from the created table + cur.execute(""" + SELECT hire_year, employees_hired, still_employed, retention_rate + FROM employees.hiring_year_summary + ORDER BY hire_year + """) + actual_results = cur.fetchall() + + # Execute ground truth query + cur.execute(""" + WITH current_emp AS ( + SELECT DISTINCT s.employee_id + FROM employees.salary s + WHERE s.to_date = DATE '9999-01-01' + ), + base AS ( + SELECT e.id, EXTRACT(YEAR FROM e.hire_date)::INT AS hire_year + FROM employees.employee e + WHERE e.hire_date IS NOT NULL + ) + SELECT + b.hire_year, + COUNT(*)::INT AS employees_hired, + COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL)::INT AS still_employed, + (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL + / NULLIF(COUNT(*), 0) * 100 AS retention_rate + FROM base b + LEFT JOIN current_emp ce ON ce.employee_id = b.id + GROUP BY b.hire_year + ORDER BY b.hire_year; + """) + expected_results = cur.fetchall() + + if len(actual_results) != len(expected_results): + print(f"āŒ Expected {len(expected_results)} hiring year results, got {len(actual_results)}") + return False + + mismatches = 0 + for i, (actual, expected) in enumerate(zip(actual_results, expected_results)): + if not rows_match(actual, expected): + if mismatches < 5: # Only show first 5 mismatches + print(f"āŒ Row {i+1} mismatch: expected {expected}, got {actual}") + mismatches += 1 + + if mismatches > 0: + print(f"āŒ Total mismatches: {mismatches}") + return False + + print(f"āœ… Hiring year summary results are correct ({len(actual_results)} records)") + return True + +def main(): + """Main verification function.""" + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify all four analysis results + success = verify_hiring_year_results(conn) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/easy/lego/basic_security_setup/description.md b/tasks/postgres/easy/lego/basic_security_setup/description.md new file mode 100644 index 00000000..26c4b8e7 --- /dev/null +++ b/tasks/postgres/easy/lego/basic_security_setup/description.md @@ -0,0 +1,34 @@ +Set up basic database security with role-based access control and Row-Level Security (RLS) for the LEGO database. + +## Your Tasks: + +### 1. Create Database Role and Permissions + +Create a new database role called `theme_analyst` with the following permissions: + +* `SELECT` permissions on all reference tables: `lego_themes`, `lego_colors`, `lego_parts`, `lego_part_categories` +* `SELECT` permissions on main data tables: `lego_sets`, `lego_inventories`, `lego_inventory_parts` +* No `INSERT`, `UPDATE`, or `DELETE` permissions on any tables + +### 2. Enable Row-Level Security + +Enable RLS on the following tables: + +* `lego_sets` +* `lego_inventories` +* `lego_inventory_parts` + +## Requirements: + +- Use `CREATE ROLE` to create the `theme_analyst` role +- Use `GRANT SELECT` statements to assign the appropriate permissions +- Use `ALTER TABLE ... ENABLE ROW LEVEL SECURITY` to enable RLS on each table + +## Expected Outcome: + +After completing these tasks: +- The `theme_analyst` role should exist with read-only access to specified tables +- Row-Level Security should be enabled (but not yet enforced with policies) on the three main data tables +- The role should have no write permissions on any table + +This sets up the foundation for implementing theme-based data isolation policies. diff --git a/tasks/postgres/easy/lego/basic_security_setup/meta.json b/tasks/postgres/easy/lego/basic_security_setup/meta.json new file mode 100644 index 00000000..acd57a85 --- /dev/null +++ b/tasks/postgres/easy/lego/basic_security_setup/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "basic_security_setup", + "task_name": "Basic Security Setup", + "category_id": "lego", + "category_name": "Lego", + "description": "Create the read-only theme_analyst role with SELECT rights on LEGO reference tables and enable row-level security on sets and inventory tables.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "security", + "access control" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Table \"lego_colors\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n \"rgb\" varchar(6) [not null]\n \"is_trans\" bpchar(1) [not null]\n}\n\nTable \"lego_inventories\" {\n \"id\" int4 [pk, not null, increment]\n \"version\" int4 [not null]\n \"set_num\" varchar(255) [not null]\n}\n\nTable \"lego_inventory_parts\" {\n \"inventory_id\" int4 [not null]\n \"part_num\" varchar(255) [not null]\n \"color_id\" int4 [not null]\n \"quantity\" int4 [not null]\n \"is_spare\" bool [not null]\n}\n\nTable \"lego_inventory_sets\" {\n \"inventory_id\" int4 [not null]\n \"set_num\" varchar(255) [not null]\n \"quantity\" int4 [not null]\n}\n\nTable \"lego_part_categories\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n}\n\nTable \"lego_parts\" {\n \"part_num\" varchar(255) [pk, not null]\n \"name\" text [not null]\n \"part_cat_id\" int4 [not null]\n}\n\nTable \"lego_sets\" {\n \"set_num\" varchar(255) [pk, not null]\n \"name\" varchar(255) [not null]\n \"year\" int4\n \"theme_id\" int4\n \"num_parts\" int4\n}\n\nTable \"lego_themes\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n \"parent_id\" int4\n}\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql" + } +} diff --git a/tasks/postgres/easy/lego/basic_security_setup/verify.py b/tasks/postgres/easy/lego/basic_security_setup/verify.py new file mode 100644 index 00000000..a1916a9a --- /dev/null +++ b/tasks/postgres/easy/lego/basic_security_setup/verify.py @@ -0,0 +1,129 @@ +""" +Verification script for PostgreSQL LEGO Task 4: Database Security and RLS Implementation +(Version 2 - Improved Robustness) +""" + +import os +import sys +import psycopg2 +import psycopg2.errors +from typing import Dict + +def get_connection_params() -> Dict[str, any]: + """Get database connection parameters from environment variables.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD"), + } + +def verify_role_creation(conn) -> bool: + """ + TASK 1 VERIFICATION: Check if theme_analyst role was created with proper permissions. + """ + print("\n-- Verifying Task 1: Role Creation and Permissions --") + with conn.cursor() as cur: + # Check if role exists + cur.execute("SELECT 1 FROM pg_roles WHERE rolname = 'theme_analyst';") + if not cur.fetchone(): + print("āŒ FAIL: The 'theme_analyst' role was not created.") + return False + print("āœ… OK: Role 'theme_analyst' exists.") + + # Check SELECT permissions on reference and main tables + all_tables = [ + 'lego_themes', 'lego_colors', 'lego_parts', 'lego_part_categories', + 'lego_sets', 'lego_inventories', 'lego_inventory_parts' + ] + for table in all_tables: + cur.execute( + """ + SELECT has_table_privilege('theme_analyst', %s, 'SELECT'); + """, + (table,) + ) + if not cur.fetchone()[0]: + print(f"āŒ FAIL: 'theme_analyst' role is missing SELECT permission on '{table}'.") + return False + print("āœ… OK: Role has correct SELECT permissions on all required tables.") + + # Check that no INSERT/UPDATE/DELETE permissions exist + for table in all_tables: + cur.execute( + """ + SELECT + has_table_privilege('theme_analyst', %s, 'INSERT') OR + has_table_privilege('theme_analyst', %s, 'UPDATE') OR + has_table_privilege('theme_analyst', %s, 'DELETE'); + """, + (table, table, table) + ) + if cur.fetchone()[0]: + print(f"āŒ FAIL: 'theme_analyst' role has unauthorized INSERT, UPDATE, or DELETE permission on '{table}'.") + return False + print("āœ… OK: Role does not have modification permissions.") + + print("āœ… PASS: 'theme_analyst' role created with correct permissions.") + return True + +def verify_rls_enabled(conn) -> bool: + """ + TASK 2 VERIFICATION: Check if Row-Level Security is enabled on required tables. + """ + print("\n-- Verifying Task 2: Row-Level Security Enablement --") + tables_to_check = ['lego_sets', 'lego_inventories', 'lego_inventory_parts'] + with conn.cursor() as cur: + for table in tables_to_check: + cur.execute( + "SELECT relrowsecurity FROM pg_class WHERE relname = %s;", (table,) + ) + rls_enabled = cur.fetchone() + if not rls_enabled or not rls_enabled[0]: + print(f"āŒ FAIL: RLS is not enabled on table '{table}'.") + return False + print(f"āœ… OK: RLS is enabled on table '{table}'.") + + print("āœ… PASS: Row-Level Security is enabled on all required tables.") + return True + +def main(): + """Main verification function.""" + print("=" * 60) + print("LEGO Database Security and RLS Verification Script") + print("=" * 60) + + conn_params = get_connection_params() + if not conn_params.get("database"): + print("āŒ CRITICAL: POSTGRES_DATABASE environment variable not set.") + sys.exit(1) + + conn = None + try: + conn = psycopg2.connect(**conn_params) + + results = [ + verify_role_creation(conn), + verify_rls_enabled(conn), + ] + + if all(results): + print("\nšŸŽ‰ Overall Result: PASS - All security tasks verified successfully!") + sys.exit(0) + else: + print("\nāŒ Overall Result: FAIL - One or more verification steps failed.") + sys.exit(1) + + except psycopg2.OperationalError as e: + print(f"āŒ CRITICAL: Could not connect to the database. Check credentials and host. Details: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ CRITICAL: An unexpected error occurred. Details: {e}") + sys.exit(1) + finally: + if conn: + conn.close() + +if __name__ == "__main__": + main() diff --git a/tasks/postgres/easy/lego/fix_data_inconsistencies/description.md b/tasks/postgres/easy/lego/fix_data_inconsistencies/description.md new file mode 100644 index 00000000..1b00dd85 --- /dev/null +++ b/tasks/postgres/easy/lego/fix_data_inconsistencies/description.md @@ -0,0 +1,39 @@ +Fix data inconsistencies in the LEGO database where the reported part count in the `lego_sets` table does not match the actual sum of non-spare parts in the latest inventory version. + +## Consistency Rule + +For any given `set_num`, the following must be true: +`lego_sets.num_parts = SUM(quantity)` FROM `lego_inventory_parts` WHERE `inventory_id` IN (latest inventory for that set) AND `is_spare` = false + +**Important**: If a set has no inventory records, the consistency check should be skipped. + +## Your Tasks: + +### Task 1: Identify Data Inconsistencies + +**Objective**: Write a single `SELECT` query to find all sets where the stored `num_parts` does not match the actual calculated number of parts from the latest inventory. + +1. **Find the Latest Inventory**: For each `set_num`, find its latest inventory id by getting the `MAX(version)` from the `lego_inventories` table. +2. **Calculate Actual Part Count**: For these latest inventories, join with `lego_inventory_parts` and calculate the `SUM(quantity)`, but only for parts where `is_spare` is false. +3. **Compare and Filter**: Join this calculated result back to the `lego_sets` table and return the rows where `lego_sets.num_parts` is different from your calculated sum. + +### Task 2: Fix Existing Inconsistencies + +**Objective**: Correct all mismatched `num_parts` values using a clear, multi-step process with a temporary table. + +#### Step 1: Create a Temporary Table +Create a temporary table (e.g., `correct_counts`) with two columns: `set_num` (text) and `actual_parts` (integer). + +#### Step 2: Populate the Temporary Table +Write an `INSERT` statement that calculates the correct part count for every single set listed in the `lego_sets` table. + +- The query must start by selecting from `public.lego_sets`. +- It must then `LEFT JOIN` to a subquery that contains the part-counting logic (finding the latest inventory version and summing the non-spare parts). +- Use `COALESCE` on the final result from the subquery to ensure that any set without parts or without an inventory record gets a value of `0`, not `NULL`. + +#### Step 3: Update from the Temporary Table +Write a final, simple `UPDATE` statement that joins the `lego_sets` table with your temporary table on `set_num` and sets `num_parts` to the `actual_parts` value. + +## Expected Outcome: + +After completing these tasks, all sets in the `lego_sets` table should have their `num_parts` correctly reflecting the sum of non-spare parts from their latest inventory version. diff --git a/tasks/postgres/easy/lego/fix_data_inconsistencies/meta.json b/tasks/postgres/easy/lego/fix_data_inconsistencies/meta.json new file mode 100644 index 00000000..ed6656ae --- /dev/null +++ b/tasks/postgres/easy/lego/fix_data_inconsistencies/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "fix_data_inconsistencies", + "task_name": "Fix Data Inconsistencies", + "category_id": "lego", + "category_name": "Lego", + "description": "Recalculate each LEGO set's part count from the latest inventory, stage the results, and update lego_sets.num_parts to remove mismatches.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "data integrity enforcement", + "data reconciliation" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Table \"lego_colors\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n \"rgb\" varchar(6) [not null]\n \"is_trans\" bpchar(1) [not null]\n}\n\nTable \"lego_inventories\" {\n \"id\" int4 [pk, not null, increment]\n \"version\" int4 [not null]\n \"set_num\" varchar(255) [not null]\n}\n\nTable \"lego_inventory_parts\" {\n \"inventory_id\" int4 [not null]\n \"part_num\" varchar(255) [not null]\n \"color_id\" int4 [not null]\n \"quantity\" int4 [not null]\n \"is_spare\" bool [not null]\n}\n\nTable \"lego_inventory_sets\" {\n \"inventory_id\" int4 [not null]\n \"set_num\" varchar(255) [not null]\n \"quantity\" int4 [not null]\n}\n\nTable \"lego_part_categories\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n}\n\nTable \"lego_parts\" {\n \"part_num\" varchar(255) [pk, not null]\n \"name\" text [not null]\n \"part_cat_id\" int4 [not null]\n}\n\nTable \"lego_sets\" {\n \"set_num\" varchar(255) [pk, not null]\n \"name\" varchar(255) [not null]\n \"year\" int4\n \"theme_id\" int4\n \"num_parts\" int4\n}\n\nTable \"lego_themes\" {\n \"id\" int4 [pk, not null, increment]\n \"name\" varchar(255) [not null]\n \"parent_id\" int4\n}\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql" + } +} diff --git a/tasks/postgres/easy/lego/fix_data_inconsistencies/verify.py b/tasks/postgres/easy/lego/fix_data_inconsistencies/verify.py new file mode 100644 index 00000000..9bb90ea7 --- /dev/null +++ b/tasks/postgres/easy/lego/fix_data_inconsistencies/verify.py @@ -0,0 +1,135 @@ +""" +Verification script for PostgreSQL LEGO Task 1: Parts Consistency Fix & Constraints +Version 2.1: Relaxed consistency check to allow for one known corner case mismatch. +""" + +import os +import sys +import psycopg2 +import psycopg2.errors +from typing import Optional, Tuple, List + + +def get_connection_params() -> dict: + """Get database connection parameters from environment variables.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD"), + } + + +def fetch_candidate_part_row(cur) -> Optional[Tuple[int, str, str, int]]: + """ + Picks a concrete, non-spare inventory part from the latest inventory of any set. + This provides a reliable target for testing update and insert triggers. + + Returns a tuple: (inventory_id, set_num, part_num, color_id) or None. + """ + cur.execute( + """ + WITH latest_inv AS ( + SELECT set_num, MAX(version) AS max_version + FROM public.lego_inventories + GROUP BY set_num + ), inv AS ( + SELECT li.id, li.set_num + FROM public.lego_inventories li + JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version + ) + SELECT i.id AS inventory_id, i.set_num, lip.part_num, lip.color_id + FROM inv i + JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id + WHERE lip.is_spare = false AND lip.quantity > 0 + LIMIT 1; + """ + ) + return cur.fetchone() + + +def get_mismatch_count(cur) -> int: + """Returns the number of sets where num_parts mismatches the computed actual sum.""" + cur.execute( + """ + WITH latest_inv AS ( + SELECT set_num, MAX(version) AS max_version + FROM public.lego_inventories + GROUP BY set_num + ), inv_latest AS ( + SELECT li.set_num, li.id + FROM public.lego_inventories li + JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version + ), parts_agg AS ( + SELECT + i.set_num, + SUM(lip.quantity) AS actual_parts + FROM inv_latest i + JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id + WHERE lip.is_spare = false + GROUP BY i.set_num + ) + SELECT COUNT(*) + FROM public.lego_sets s + LEFT JOIN parts_agg pa ON s.set_num = pa.set_num + WHERE s.num_parts <> COALESCE(pa.actual_parts, 0); + """ + ) + return cur.fetchone()[0] + + +def verify_data_consistency(conn) -> bool: + """ + TASK 1 VERIFICATION: Checks if the initial data fix was successful. + (Relaxed: Allows for one corner-case mismatch). + """ + print("\n-- Verifying Task 1: Data Consistency Fix (Relaxed) --") + with conn.cursor() as cur: + count = get_mismatch_count(cur) + # RELAXED CONDITION: Allow 0 or 1 mismatch to pass. + if count > 1: + print(f"āŒ FAIL: Found {count} sets with inconsistent part counts. Expected 0 or 1 after fix.") + return False + + print("āœ… PASS: Data consistency check passed (allowing for one known mismatch).") + return True + + +def main(): + """Main verification function.""" + print("=" * 60) + print("LEGO Database Consistency Verification Script") + print("=" * 60) + + conn_params = get_connection_params() + if not conn_params.get("database"): + print("āŒ CRITICAL: POSTGRES_DATABASE environment variable not set.") + sys.exit(1) + + try: + with psycopg2.connect(**conn_params) as conn: + conn.autocommit = False # Ensure we control transactions + + # Run all verification steps + results = [ + verify_data_consistency(conn), + ] + + if all(results): + print("\nšŸŽ‰ Overall Result: PASS - All tasks verified successfully!") + sys.exit(0) + else: + print("\nāŒ Overall Result: FAIL - One or more verification steps failed.") + sys.exit(1) + + except psycopg2.OperationalError as e: + print(f"āŒ CRITICAL: Could not connect to the database. Details: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ CRITICAL: An unexpected error occurred during verification. Details: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tasks/postgres/easy/sports/create_performance_indexes/description.md b/tasks/postgres/easy/sports/create_performance_indexes/description.md new file mode 100644 index 00000000..0cb05ead --- /dev/null +++ b/tasks/postgres/easy/sports/create_performance_indexes/description.md @@ -0,0 +1,19 @@ +Create indexes to optimize participant and statistics queries in the sports database. + +## Your Task: + +Create two indexes to improve query performance: + +1. **Index on participants_events table**: Create an index on the `participant_id` column of the `participants_events` table +2. **Composite index on stats table**: Create a composite index on the `stats` table using columns `stat_holder_type` and `stat_holder_id` (in that order) + +## Requirements: + +- Create an index on `participants_events(participant_id)` +- Create a composite index on `stats(stat_holder_type, stat_holder_id)` +- Index names can be anything you choose (e.g., `idx_participants_events_participant_id`, `idx_stats_holder`) +- Use the standard CREATE INDEX syntax + +## Expected Outcome: + +After creating these indexes, queries that involve participant filtering and statistics lookups will run significantly faster. diff --git a/tasks/postgres/easy/sports/create_performance_indexes/meta.json b/tasks/postgres/easy/sports/create_performance_indexes/meta.json new file mode 100644 index 00000000..0d3c84c5 --- /dev/null +++ b/tasks/postgres/easy/sports/create_performance_indexes/meta.json @@ -0,0 +1,23 @@ +{ + "task_id": "create_performance_indexes", + "task_name": "Create Performance Indexes", + "category_id": "sports", + "category_name": "Sports", + "description": "Create indexes on participants_events.participant_id and stats(stat_holder_type, stat_holder_id) to accelerate performance reporting.", + "author": "Lingxiao Du", + "created_at": "2025-11-15", + "difficulty": "L1", + "tags": [ + "performance optimization", + "indexing" + ], + "mcp": [ + "postgres" + ], + "meta_data": { + "stateType": "text", + "stateContent": "Table \"addresses\" {\n \"id\" int4 [not null, increment]\n \"location_id\" int4 [not null]\n \"language\" varchar(100)\n \"suite\" varchar(100)\n \"floor\" varchar(100)\n \"building\" varchar(100)\n \"street_number\" varchar(100)\n \"street_prefix\" varchar(100)\n \"street\" varchar(100)\n \"street_suffix\" varchar(100)\n \"neighborhood\" varchar(100)\n \"district\" varchar(100)\n \"locality\" varchar(100)\n \"county\" varchar(100)\n \"region\" varchar(100)\n \"postal_code\" varchar(100)\n \"country\" varchar(100)\n}\n\nTable \"affiliation_phases\" {\n \"id\" int4 [not null, increment]\n \"affiliation_id\" int4 [not null]\n \"ancestor_affiliation_id\" int4\n \"start_season_id\" int4\n \"start_date_time\" timestamp\n \"end_season_id\" int4\n \"end_date_time\" timestamp\n}\n\nTable \"affiliations\" {\n \"id\" int4 [not null, increment]\n \"affiliation_key\" varchar(100) [not null]\n \"affiliation_type\" varchar(100)\n \"publisher_id\" int4 [not null]\n}\n\nTable \"affiliations_documents\" {\n \"affiliation_id\" int4 [not null]\n \"document_id\" int4 [not null]\n}\n\nTable \"affiliations_events\" {\n \"affiliation_id\" int4 [not null]\n \"event_id\" int4 [not null]\n}\n\nTable \"affiliations_media\" {\n \"affiliation_id\" int4 [not null]\n \"media_id\" int4 [not null]\n}\n\nTable \"american_football_action_participants\" {\n \"id\" int4 [not null, increment]\n \"american_football_action_play_id\" int4 [not null]\n \"person_id\" int4 [not null]\n \"participant_role\" varchar(100) [not null]\n \"score_type\" varchar(100)\n \"field_line\" int4\n \"yardage\" int4\n \"score_credit\" int4\n \"yards_gained\" int4\n}\n\nTable \"american_football_action_plays\" {\n \"id\" int4 [not null, increment]\n \"american_football_event_state_id\" int4 [not null]\n \"play_type\" varchar(100)\n \"score_attempt_type\" varchar(100)\n \"drive_result\" varchar(100)\n \"points\" int4\n \"comment\" varchar(255)\n}\n\nTable \"american_football_defensive_stats\" {\n \"id\" int4 [not null, increment]\n \"tackles_total\" varchar(100)\n \"tackles_solo\" varchar(100)\n \"tackles_assists\" varchar(100)\n \"interceptions_total\" varchar(100)\n \"interceptions_yards\" varchar(100)\n \"interceptions_average\" varchar(100)\n \"interceptions_longest\" varchar(100)\n \"interceptions_touchdown\" varchar(100)\n \"quarterback_hurries\" varchar(100)\n \"sacks_total\" varchar(100)\n \"sacks_yards\" varchar(100)\n \"passes_defensed\" varchar(100)\n}\n\nTable \"american_football_down_progress_stats\" {\n \"id\" int4 [not null, increment]\n \"first_downs_total\" varchar(100)\n \"first_downs_pass\" varchar(100)\n \"first_downs_run\" varchar(100)\n \"first_downs_penalty\" varchar(100)\n \"conversions_third_down\" varchar(100)\n \"conversions_third_down_attempts\" varchar(100)\n \"conversions_third_down_percentage\" varchar(100)\n \"conversions_fourth_down\" varchar(100)\n \"conversions_fourth_down_attempts\" varchar(100)\n \"conversions_fourth_down_percentage\" varchar(100)\n}\n\nTable \"american_football_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int2\n \"sequence_number\" int4\n \"period_value\" int4\n \"period_time_elapsed\" varchar(100)\n \"period_time_remaining\" varchar(100)\n \"clock_state\" varchar(100)\n \"down\" int4\n \"team_in_possession_id\" int4\n \"distance_for_1st_down\" int4\n \"field_side\" varchar(100)\n \"field_line\" int4\n \"context\" varchar(40)\n}\n\nTable \"american_football_fumbles_stats\" {\n \"id\" int4 [not null, increment]\n \"fumbles_committed\" varchar(100)\n \"fumbles_forced\" varchar(100)\n \"fumbles_recovered\" varchar(100)\n \"fumbles_lost\" varchar(100)\n \"fumbles_yards_gained\" varchar(100)\n \"fumbles_own_committed\" varchar(100)\n \"fumbles_own_recovered\" varchar(100)\n \"fumbles_own_lost\" varchar(100)\n \"fumbles_own_yards_gained\" varchar(100)\n \"fumbles_opposing_committed\" varchar(100)\n \"fumbles_opposing_recovered\" varchar(100)\n \"fumbles_opposing_lost\" varchar(100)\n \"fumbles_opposing_yards_gained\" varchar(100)\n}\n\nTable \"american_football_offensive_stats\" {\n \"id\" int4 [not null, increment]\n \"offensive_plays_yards\" varchar(100)\n \"offensive_plays_number\" varchar(100)\n \"offensive_plays_average_yards_per\" varchar(100)\n \"possession_duration\" varchar(100)\n \"turnovers_giveaway\" varchar(100)\n}\n\nTable \"american_football_passing_stats\" {\n \"id\" int4 [not null, increment]\n \"passes_attempts\" varchar(100)\n \"passes_completions\" varchar(100)\n \"passes_percentage\" varchar(100)\n \"passes_yards_gross\" varchar(100)\n \"passes_yards_net\" varchar(100)\n \"passes_yards_lost\" varchar(100)\n \"passes_touchdowns\" varchar(100)\n \"passes_touchdowns_percentage\" varchar(100)\n \"passes_interceptions\" varchar(100)\n \"passes_interceptions_percentage\" varchar(100)\n \"passes_longest\" varchar(100)\n \"passes_average_yards_per\" varchar(100)\n \"passer_rating\" varchar(100)\n \"receptions_total\" varchar(100)\n \"receptions_yards\" varchar(100)\n \"receptions_touchdowns\" varchar(100)\n \"receptions_first_down\" varchar(100)\n \"receptions_longest\" varchar(100)\n \"receptions_average_yards_per\" varchar(100)\n}\n\nTable \"american_football_penalties_stats\" {\n \"id\" int4 [not null, increment]\n \"penalties_total\" varchar(100)\n \"penalty_yards\" varchar(100)\n \"penalty_first_downs\" varchar(100)\n}\n\nTable \"american_football_rushing_stats\" {\n \"id\" int4 [not null, increment]\n \"rushes_attempts\" varchar(100)\n \"rushes_yards\" varchar(100)\n \"rushes_touchdowns\" varchar(100)\n \"rushing_average_yards_per\" varchar(100)\n \"rushes_first_down\" varchar(100)\n \"rushes_longest\" varchar(100)\n}\n\nTable \"american_football_sacks_against_stats\" {\n \"id\" int4 [not null, increment]\n \"sacks_against_yards\" varchar(100)\n \"sacks_against_total\" varchar(100)\n}\n\nTable \"american_football_scoring_stats\" {\n \"id\" int4 [not null, increment]\n \"touchdowns_total\" varchar(100)\n \"touchdowns_passing\" varchar(100)\n \"touchdowns_rushing\" varchar(100)\n \"touchdowns_special_teams\" varchar(100)\n \"touchdowns_defensive\" varchar(100)\n \"extra_points_attempts\" varchar(100)\n \"extra_points_made\" varchar(100)\n \"extra_points_missed\" varchar(100)\n \"extra_points_blocked\" varchar(100)\n \"field_goal_attempts\" varchar(100)\n \"field_goals_made\" varchar(100)\n \"field_goals_missed\" varchar(100)\n \"field_goals_blocked\" varchar(100)\n \"safeties_against\" varchar(100)\n \"two_point_conversions_attempts\" varchar(100)\n \"two_point_conversions_made\" varchar(100)\n \"touchbacks_total\" varchar(100)\n}\n\nTable \"american_football_special_teams_stats\" {\n \"id\" int4 [not null, increment]\n \"returns_punt_total\" varchar(100)\n \"returns_punt_yards\" varchar(100)\n \"returns_punt_average\" varchar(100)\n \"returns_punt_longest\" varchar(100)\n \"returns_punt_touchdown\" varchar(100)\n \"returns_kickoff_total\" varchar(100)\n \"returns_kickoff_yards\" varchar(100)\n \"returns_kickoff_average\" varchar(100)\n \"returns_kickoff_longest\" varchar(100)\n \"returns_kickoff_touchdown\" varchar(100)\n \"returns_total\" varchar(100)\n \"returns_yards\" varchar(100)\n \"punts_total\" varchar(100)\n \"punts_yards_gross\" varchar(100)\n \"punts_yards_net\" varchar(100)\n \"punts_longest\" varchar(100)\n \"punts_inside_20\" varchar(100)\n \"punts_inside_20_percentage\" varchar(100)\n \"punts_average\" varchar(100)\n \"punts_blocked\" varchar(100)\n \"touchbacks_total\" varchar(100)\n \"touchbacks_total_percentage\" varchar(100)\n \"touchbacks_kickoffs\" varchar(100)\n \"touchbacks_kickoffs_percentage\" varchar(100)\n \"touchbacks_punts\" varchar(100)\n \"touchbacks_punts_percentage\" varchar(100)\n \"touchbacks_interceptions\" varchar(100)\n \"touchbacks_interceptions_percentage\" varchar(100)\n \"fair_catches\" varchar(100)\n}\n\nTable \"baseball_action_contact_details\" {\n \"id\" int4 [not null, increment]\n \"baseball_action_pitch_id\" int4 [not null]\n \"location\" varchar(100)\n \"strength\" varchar(100)\n \"velocity\" int4\n \"comment\" text\n \"trajectory_coordinates\" varchar(100)\n \"trajectory_formula\" varchar(100)\n}\n\nTable \"baseball_action_pitches\" {\n \"id\" int4 [not null, increment]\n \"baseball_action_play_id\" int4 [not null]\n \"sequence_number\" int4\n \"baseball_defensive_group_id\" int4\n \"umpire_call\" varchar(100)\n \"pitch_location\" varchar(100)\n \"pitch_type\" varchar(100)\n \"pitch_velocity\" int4\n \"comment\" text\n \"trajectory_coordinates\" varchar(100)\n \"trajectory_formula\" varchar(100)\n \"ball_type\" varchar(40)\n \"strike_type\" varchar(40)\n}\n\nTable \"baseball_action_plays\" {\n \"id\" int4 [not null, increment]\n \"baseball_event_state_id\" int4 [not null]\n \"play_type\" varchar(100)\n \"notation\" varchar(100)\n \"notation_yaml\" text\n \"baseball_defensive_group_id\" int4\n \"comment\" varchar(255)\n \"runner_on_first_advance\" int4\n \"runner_on_second_advance\" int4\n \"runner_on_third_advance\" int4\n \"outs_recorded\" int4\n \"rbi\" int4\n \"runs_scored\" int4\n \"earned_runs_scored\" varchar(100)\n}\n\nTable \"baseball_action_substitutions\" {\n \"id\" int4 [not null, increment]\n \"baseball_event_state_id\" int4 [not null]\n \"sequence_number\" int4\n \"person_type\" varchar(100)\n \"person_original_id\" int4\n \"person_original_position_id\" int4\n \"person_original_lineup_slot\" int4\n \"person_replacing_id\" int4\n \"person_replacing_position_id\" int4\n \"person_replacing_lineup_slot\" int4\n \"substitution_reason\" varchar(100)\n \"comment\" varchar(100)\n}\n\nTable \"baseball_defensive_group\" {\n \"id\" int4 [not null, increment]\n}\n\nTable \"baseball_defensive_players\" {\n \"id\" int4 [not null, increment]\n \"baseball_defensive_group_id\" int4 [not null]\n \"player_id\" int4 [not null]\n \"position_id\" int4 [not null]\n}\n\nTable \"baseball_defensive_stats\" {\n \"id\" int4 [not null, increment]\n \"double_plays\" int4\n \"triple_plays\" int4\n \"putouts\" int4\n \"assists\" int4\n \"errors\" int4\n \"fielding_percentage\" numeric\n \"defensive_average\" numeric\n \"errors_passed_ball\" int4\n \"errors_catchers_interference\" int4\n}\n\nTable \"baseball_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int2\n \"sequence_number\" int4\n \"at_bat_number\" int4\n \"inning_value\" int4\n \"inning_half\" varchar(100)\n \"outs\" int4\n \"balls\" int4\n \"strikes\" int4\n \"runner_on_first_id\" int4\n \"runner_on_second_id\" int4\n \"runner_on_third_id\" int4\n \"runner_on_first\" int2\n \"runner_on_second\" int2\n \"runner_on_third\" int2\n \"runs_this_inning_half\" int4\n \"pitcher_id\" int4\n \"batter_id\" int4\n \"batter_side\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"baseball_offensive_stats\" {\n \"id\" int4 [not null, increment]\n \"average\" numeric\n \"runs_scored\" int4\n \"at_bats\" int4\n \"hits\" int4\n \"rbi\" int4\n \"total_bases\" int4\n \"slugging_percentage\" numeric\n \"bases_on_balls\" int4\n \"strikeouts\" int4\n \"left_on_base\" int4\n \"left_in_scoring_position\" int4\n \"singles\" int4\n \"doubles\" int4\n \"triples\" int4\n \"home_runs\" int4\n \"grand_slams\" int4\n \"at_bats_per_rbi\" numeric\n \"plate_appearances_per_rbi\" numeric\n \"at_bats_per_home_run\" numeric\n \"plate_appearances_per_home_run\" numeric\n \"sac_flies\" int4\n \"sac_bunts\" int4\n \"grounded_into_double_play\" int4\n \"moved_up\" int4\n \"on_base_percentage\" numeric\n \"stolen_bases\" int4\n \"stolen_bases_caught\" int4\n \"stolen_bases_average\" numeric\n \"hit_by_pitch\" int4\n \"defensive_interferance_reaches\" int4\n \"on_base_plus_slugging\" numeric\n \"plate_appearances\" int4\n \"hits_extra_base\" int4\n}\n\nTable \"baseball_pitching_stats\" {\n \"id\" int4 [not null, increment]\n \"runs_allowed\" int4\n \"singles_allowed\" int4\n \"doubles_allowed\" int4\n \"triples_allowed\" int4\n \"home_runs_allowed\" int4\n \"innings_pitched\" varchar(20)\n \"hits\" int4\n \"earned_runs\" int4\n \"unearned_runs\" int4\n \"bases_on_balls\" int4\n \"bases_on_balls_intentional\" int4\n \"strikeouts\" int4\n \"strikeout_to_bb_ratio\" numeric\n \"number_of_pitches\" int4\n \"era\" numeric\n \"inherited_runners_scored\" int4\n \"pick_offs\" int4\n \"errors_hit_with_pitch\" int4\n \"errors_wild_pitch\" int4\n \"balks\" int4\n \"wins\" int4\n \"losses\" int4\n \"saves\" int4\n \"shutouts\" int4\n \"games_complete\" int4\n \"games_finished\" int4\n \"winning_percentage\" numeric\n \"event_credit\" varchar(40)\n \"save_credit\" varchar(40)\n}\n\nTable \"basketball_defensive_stats\" {\n \"id\" int4 [not null, increment]\n \"steals_total\" varchar(100)\n \"steals_per_game\" varchar(100)\n \"blocks_total\" varchar(100)\n \"blocks_per_game\" varchar(100)\n}\n\nTable \"basketball_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int4\n \"sequence_number\" int4\n \"period_value\" varchar(100)\n \"period_time_elapsed\" varchar(100)\n \"period_time_remaining\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"basketball_offensive_stats\" {\n \"id\" int4 [not null, increment]\n \"field_goals_made\" int4\n \"field_goals_attempted\" int4\n \"field_goals_percentage\" varchar(100)\n \"field_goals_per_game\" varchar(100)\n \"field_goals_attempted_per_game\" varchar(100)\n \"field_goals_percentage_adjusted\" varchar(100)\n \"three_pointers_made\" int4\n \"three_pointers_attempted\" int4\n \"three_pointers_percentage\" varchar(100)\n \"three_pointers_per_game\" varchar(100)\n \"three_pointers_attempted_per_game\" varchar(100)\n \"free_throws_made\" varchar(100)\n \"free_throws_attempted\" varchar(100)\n \"free_throws_percentage\" varchar(100)\n \"free_throws_per_game\" varchar(100)\n \"free_throws_attempted_per_game\" varchar(100)\n \"points_scored_total\" varchar(100)\n \"points_scored_per_game\" varchar(100)\n \"assists_total\" varchar(100)\n \"assists_per_game\" varchar(100)\n \"turnovers_total\" varchar(100)\n \"turnovers_per_game\" varchar(100)\n \"points_scored_off_turnovers\" varchar(100)\n \"points_scored_in_paint\" varchar(100)\n \"points_scored_on_second_chance\" varchar(100)\n \"points_scored_on_fast_break\" varchar(100)\n}\n\nTable \"basketball_rebounding_stats\" {\n \"id\" int4 [not null, increment]\n \"rebounds_total\" varchar(100)\n \"rebounds_per_game\" varchar(100)\n \"rebounds_defensive\" varchar(100)\n \"rebounds_offensive\" varchar(100)\n \"team_rebounds_total\" varchar(100)\n \"team_rebounds_per_game\" varchar(100)\n \"team_rebounds_defensive\" varchar(100)\n \"team_rebounds_offensive\" varchar(100)\n}\n\nTable \"basketball_team_stats\" {\n \"id\" int4 [not null, increment]\n \"timeouts_left\" varchar(100)\n \"largest_lead\" varchar(100)\n \"fouls_total\" varchar(100)\n \"turnover_margin\" varchar(100)\n}\n\nTable \"bookmakers\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_key\" varchar(100)\n \"publisher_id\" int4 [not null]\n \"location_id\" int4\n}\n\nTable \"core_person_stats\" {\n \"id\" int4 [not null, increment]\n \"time_played_event\" varchar(40)\n \"time_played_total\" varchar(40)\n \"time_played_event_average\" varchar(40)\n \"events_played\" int4\n \"events_started\" int4\n \"position_id\" int4\n}\n\nTable \"core_stats\" {\n \"id\" int4 [not null, increment]\n \"score\" varchar(100)\n \"score_opposing\" varchar(100)\n \"score_attempts\" varchar(100)\n \"score_attempts_opposing\" varchar(100)\n \"score_percentage\" varchar(100)\n \"score_percentage_opposing\" varchar(100)\n}\n\nTable \"db_info\" {\n \"version\" varchar(100) [not null, default: 16]\n}\n\nTable \"display_names\" {\n \"id\" int4 [not null, increment]\n \"language\" varchar(100) [not null]\n \"entity_type\" varchar(100) [not null]\n \"entity_id\" int4 [not null]\n \"full_name\" varchar(100)\n \"first_name\" varchar(100)\n \"middle_name\" varchar(100)\n \"last_name\" varchar(100)\n \"alias\" varchar(100)\n \"abbreviation\" varchar(100)\n \"short_name\" varchar(100)\n \"prefix\" varchar(20)\n \"suffix\" varchar(20)\n}\n\nTable \"document_classes\" {\n \"id\" int4 [not null, increment]\n \"name\" varchar(100)\n}\n\nTable \"document_contents\" {\n \"id\" int4 [not null, increment]\n \"document_id\" int4 [not null]\n \"sportsml\" varchar(200)\n \"abstract\" text\n}\n\nTable \"document_fixtures\" {\n \"id\" int4 [not null, increment]\n \"fixture_key\" varchar(100)\n \"publisher_id\" int4 [not null]\n \"name\" varchar(100)\n \"document_class_id\" int4 [not null]\n}\n\nTable \"document_fixtures_events\" {\n \"id\" int4 [not null, increment]\n \"document_fixture_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"latest_document_id\" int4 [not null]\n \"last_update\" timestamp\n}\n\nTable \"document_package_entry\" {\n \"id\" int4 [not null, increment]\n \"document_package_id\" int4 [not null]\n \"rank\" varchar(100)\n \"document_id\" int4 [not null]\n \"headline\" varchar(100)\n \"short_headline\" varchar(100)\n}\n\nTable \"document_packages\" {\n \"id\" int4 [not null, increment]\n \"package_key\" varchar(100)\n \"package_name\" varchar(100)\n \"date_time\" date\n}\n\nTable \"documents\" {\n \"id\" int4 [not null, increment]\n \"doc_id\" varchar(75) [not null]\n \"publisher_id\" int4 [not null]\n \"date_time\" timestamp\n \"title\" varchar(255)\n \"language\" varchar(100)\n \"priority\" varchar(100)\n \"revision_id\" varchar(75)\n \"stats_coverage\" varchar(100)\n \"document_fixture_id\" int4 [not null]\n \"source_id\" int4\n \"db_loading_date_time\" timestamp\n}\n\nTable \"documents_media\" {\n \"id\" int4 [not null, increment]\n \"document_id\" int4 [not null]\n \"media_id\" int4 [not null]\n \"media_caption_id\" int4 [not null]\n}\n\nTable \"events\" {\n \"id\" int4 [not null, increment]\n \"event_key\" varchar(100) [not null]\n \"publisher_id\" int4 [not null]\n \"start_date_time\" timestamp\n \"site_id\" int4\n \"site_alignment\" varchar(100)\n \"event_status\" varchar(100)\n \"duration\" varchar(100)\n \"attendance\" varchar(100)\n \"last_update\" timestamp\n}\n\nTable \"events_documents\" {\n \"event_id\" int4 [not null]\n \"document_id\" int4 [not null]\n}\n\nTable \"events_media\" {\n \"event_id\" int4 [not null]\n \"media_id\" int4 [not null]\n}\n\nTable \"events_sub_seasons\" {\n \"event_id\" int4 [not null]\n \"sub_season_id\" int4 [not null]\n}\n\nTable \"ice_hockey_action_participants\" {\n \"id\" int4 [not null, increment]\n \"ice_hockey_action_play_id\" int4 [not null]\n \"person_id\" int4 [not null]\n \"participant_role\" varchar(100) [not null]\n \"point_credit\" int4\n}\n\nTable \"ice_hockey_action_plays\" {\n \"id\" int4 [not null, increment]\n \"ice_hockey_event_state_id\" int4 [not null]\n \"play_type\" varchar(100)\n \"score_attempt_type\" varchar(100)\n \"play_result\" varchar(100)\n \"comment\" varchar(255)\n}\n\nTable \"ice_hockey_defensive_stats\" {\n \"id\" int4 [not null, increment]\n \"shots_power_play_allowed\" varchar(100)\n \"shots_penalty_shot_allowed\" varchar(100)\n \"goals_power_play_allowed\" varchar(100)\n \"goals_penalty_shot_allowed\" varchar(100)\n \"goals_against_average\" varchar(100)\n \"saves\" varchar(100)\n \"save_percentage\" varchar(100)\n \"penalty_killing_amount\" varchar(100)\n \"penalty_killing_percentage\" varchar(100)\n \"shots_blocked\" varchar(100)\n \"takeaways\" varchar(100)\n \"shutouts\" varchar(100)\n \"minutes_penalty_killing\" varchar(100)\n \"hits\" varchar(100)\n \"goals_empty_net_allowed\" varchar(100)\n \"goals_short_handed_allowed\" varchar(100)\n \"goals_shootout_allowed\" varchar(100)\n \"shots_shootout_allowed\" varchar(100)\n}\n\nTable \"ice_hockey_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int4\n \"sequence_number\" int4\n \"period_value\" varchar(100)\n \"period_time_elapsed\" varchar(100)\n \"period_time_remaining\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"ice_hockey_offensive_stats\" {\n \"id\" int4 [not null, increment]\n \"goals_game_winning\" varchar(100)\n \"goals_game_tying\" varchar(100)\n \"goals_power_play\" varchar(100)\n \"goals_short_handed\" varchar(100)\n \"goals_even_strength\" varchar(100)\n \"goals_empty_net\" varchar(100)\n \"goals_overtime\" varchar(100)\n \"goals_shootout\" varchar(100)\n \"goals_penalty_shot\" varchar(100)\n \"assists\" varchar(100)\n \"points\" varchar(100)\n \"power_play_amount\" varchar(100)\n \"power_play_percentage\" varchar(100)\n \"shots_penalty_shot_taken\" varchar(100)\n \"shots_penalty_shot_missed\" varchar(100)\n \"shots_penalty_shot_percentage\" varchar(100)\n \"giveaways\" varchar(100)\n \"minutes_power_play\" varchar(100)\n \"faceoff_wins\" varchar(100)\n \"faceoff_losses\" varchar(100)\n \"faceoff_win_percentage\" varchar(100)\n \"scoring_chances\" varchar(100)\n}\n\nTable \"ice_hockey_player_stats\" {\n \"id\" int4 [not null, increment]\n \"plus_minus\" varchar(100)\n}\n\nTable \"injury_phases\" {\n \"id\" int4 [not null, increment]\n \"person_id\" int4 [not null]\n \"injury_status\" varchar(100)\n \"injury_type\" varchar(100)\n \"injury_comment\" varchar(100)\n \"disabled_list\" varchar(100)\n \"start_date_time\" timestamp\n \"end_date_time\" timestamp\n \"season_id\" int4\n \"phase_type\" varchar(100)\n \"injury_side\" varchar(100)\n}\n\nTable \"key_aliases\" {\n \"id\" int4 [not null, increment]\n \"key_id\" int4 [not null]\n \"key_root_id\" int4 [not null]\n}\n\nTable \"key_roots\" {\n \"id\" int4 [not null, increment]\n \"key_type\" varchar(100)\n}\n\nTable \"latest_revisions\" {\n \"id\" int4 [not null, increment]\n \"revision_id\" varchar(75) [not null]\n \"latest_document_id\" int4 [not null]\n}\n\nTable \"locations\" {\n \"id\" int4 [not null, increment]\n \"timezone\" varchar(100)\n \"latitude\" varchar(100)\n \"longitude\" varchar(100)\n \"country_code\" varchar(100)\n}\n\nTable \"media\" {\n \"id\" int4 [not null, increment]\n \"object_id\" int4\n \"source_id\" int4\n \"revision_id\" int4\n \"media_type\" varchar(100)\n \"publisher_id\" int4 [not null]\n \"date_time\" varchar(100)\n \"credit_id\" int4 [not null]\n \"db_loading_date_time\" timestamp\n \"creation_location_id\" int4 [not null]\n}\n\nTable \"media_captions\" {\n \"id\" int4 [not null, increment]\n \"media_id\" int4 [not null]\n \"caption_type\" varchar(100)\n \"caption\" varchar(100)\n \"caption_author_id\" int4 [not null]\n \"language\" varchar(100)\n \"caption_size\" varchar(100)\n}\n\nTable \"media_contents\" {\n \"id\" int4 [not null, increment]\n \"media_id\" int4 [not null]\n \"object\" varchar(100)\n \"format\" varchar(100)\n \"mime_type\" varchar(100)\n \"height\" varchar(100)\n \"width\" varchar(100)\n \"duration\" varchar(100)\n \"file_size\" varchar(100)\n \"resolution\" varchar(100)\n}\n\nTable \"media_keywords\" {\n \"id\" int4 [not null, increment]\n \"keyword\" varchar(100)\n \"media_id\" int4 [not null]\n}\n\nTable \"motor_racing_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int4\n \"sequence_number\" int4\n \"lap\" varchar(100)\n \"laps_remaining\" varchar(100)\n \"time_elapsed\" varchar(100)\n \"flag_state\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"motor_racing_qualifying_stats\" {\n \"id\" int4 [not null, increment]\n \"grid\" varchar(100)\n \"pole_position\" varchar(100)\n \"pole_wins\" varchar(100)\n \"qualifying_speed\" varchar(100)\n \"qualifying_speed_units\" varchar(100)\n \"qualifying_time\" varchar(100)\n \"qualifying_position\" varchar(100)\n}\n\nTable \"motor_racing_race_stats\" {\n \"id\" int4 [not null, increment]\n \"time_behind_leader\" varchar(100)\n \"laps_behind_leader\" varchar(100)\n \"time_ahead_follower\" varchar(100)\n \"laps_ahead_follower\" varchar(100)\n \"time\" varchar(100)\n \"points\" varchar(100)\n \"points_rookie\" varchar(100)\n \"bonus\" varchar(100)\n \"laps_completed\" varchar(100)\n \"laps_leading_total\" varchar(100)\n \"distance_leading\" varchar(100)\n \"distance_completed\" varchar(100)\n \"distance_units\" varchar(40)\n \"speed_average\" varchar(40)\n \"speed_units\" varchar(40)\n \"status\" varchar(40)\n \"finishes_top_5\" varchar(40)\n \"finishes_top_10\" varchar(40)\n \"starts\" varchar(40)\n \"finishes\" varchar(40)\n \"non_finishes\" varchar(40)\n \"wins\" varchar(40)\n \"races_leading\" varchar(40)\n \"money\" varchar(40)\n \"money_units\" varchar(40)\n \"leads_total\" varchar(40)\n}\n\nTable \"outcome_totals\" {\n \"id\" int4 [not null, increment]\n \"standing_subgroup_id\" int4 [not null]\n \"outcome_holder_type\" varchar(100)\n \"outcome_holder_id\" int4\n \"rank\" varchar(100)\n \"wins\" varchar(100)\n \"losses\" varchar(100)\n \"ties\" varchar(100)\n \"undecideds\" varchar(100)\n \"winning_percentage\" varchar(100)\n \"points_scored_for\" varchar(100)\n \"points_scored_against\" varchar(100)\n \"points_difference\" varchar(100)\n \"standing_points\" varchar(100)\n \"streak_type\" varchar(100)\n \"streak_duration\" varchar(100)\n \"streak_total\" varchar(100)\n \"streak_start\" date\n \"streak_end\" date\n}\n\nTable \"participants_events\" {\n \"id\" int4 [not null, increment]\n \"participant_type\" varchar(100) [not null]\n \"participant_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"alignment\" varchar(100)\n \"score\" varchar(100)\n \"event_outcome\" varchar(100)\n \"rank\" int4\n}\n\nTable \"periods\" {\n \"id\" int4 [not null, increment]\n \"participant_event_id\" int4 [not null]\n \"period_value\" varchar(100)\n \"score\" varchar(100)\n}\n\nTable \"person_event_metadata\" {\n \"id\" int4 [not null, increment]\n \"person_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"status\" varchar(100)\n \"health\" varchar(100)\n \"weight\" varchar(100)\n \"role_id\" int4\n \"position_id\" int4\n \"team_id\" int4\n \"lineup_slot\" int4\n \"lineup_slot_sequence\" int4\n}\n\nTable \"person_phases\" {\n \"id\" int4 [not null, increment]\n \"person_id\" int4 [not null]\n \"membership_type\" varchar(40) [not null]\n \"membership_id\" int4 [not null]\n \"role_id\" int4\n \"role_status\" varchar(40)\n \"phase_status\" varchar(40)\n \"uniform_number\" varchar(20)\n \"regular_position_id\" int4\n \"regular_position_depth\" varchar(40)\n \"height\" varchar(100)\n \"weight\" varchar(100)\n \"start_date_time\" timestamp\n \"start_season_id\" int4\n \"end_date_time\" timestamp\n \"end_season_id\" int4\n \"entry_reason\" varchar(40)\n \"exit_reason\" varchar(40)\n \"selection_level\" int4\n \"selection_sublevel\" int4\n \"selection_overall\" int4\n}\n\nTable \"persons\" {\n \"id\" int4 [not null, increment]\n \"person_key\" varchar(100) [not null]\n \"publisher_id\" int4 [not null]\n \"gender\" varchar(20)\n \"birth_date\" varchar(30)\n \"death_date\" varchar(30)\n \"birth_location_id\" int4\n \"hometown_location_id\" int4\n \"residence_location_id\" int4\n \"death_location_id\" int4\n}\n\nTable \"persons_documents\" {\n \"person_id\" int4 [not null]\n \"document_id\" int4 [not null]\n}\n\nTable \"persons_media\" {\n \"person_id\" int4 [not null]\n \"media_id\" int4 [not null]\n}\n\nTable \"positions\" {\n \"id\" int4 [not null, increment]\n \"affiliation_id\" int4 [not null]\n \"abbreviation\" varchar(100) [not null]\n}\n\nTable \"publishers\" {\n \"id\" int4 [not null, increment]\n \"publisher_key\" varchar(100) [not null]\n \"publisher_name\" varchar(100)\n}\n\nTable \"roles\" {\n \"id\" int4 [not null, increment]\n \"role_key\" varchar(100) [not null]\n \"role_name\" varchar(100)\n \"comment\" varchar(100)\n}\n\nTable \"seasons\" {\n \"id\" int4 [not null, increment]\n \"season_key\" int4 [not null]\n \"publisher_id\" int4 [not null]\n \"league_id\" int4 [not null]\n \"start_date_time\" timestamp\n \"end_date_time\" timestamp\n}\n\nTable \"sites\" {\n \"id\" int4 [not null, increment]\n \"site_key\" int4 [not null]\n \"publisher_id\" int4 [not null]\n \"location_id\" int4\n}\n\nTable \"soccer_defensive_stats\" {\n \"id\" int4 [not null, increment]\n \"shots_penalty_shot_allowed\" varchar(100)\n \"goals_penalty_shot_allowed\" varchar(100)\n \"goals_against_average\" varchar(100)\n \"goals_against_total\" varchar(100)\n \"saves\" varchar(100)\n \"save_percentage\" varchar(100)\n \"catches_punches\" varchar(100)\n \"shots_on_goal_total\" varchar(100)\n \"shots_shootout_total\" varchar(100)\n \"shots_shootout_allowed\" varchar(100)\n \"shots_blocked\" varchar(100)\n \"shutouts\" varchar(100)\n}\n\nTable \"soccer_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int4\n \"sequence_number\" int4\n \"period_value\" varchar(100)\n \"period_time_elapsed\" varchar(100)\n \"period_time_remaining\" varchar(100)\n \"minutes_elapsed\" varchar(100)\n \"period_minute_elapsed\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"soccer_foul_stats\" {\n \"id\" int4 [not null, increment]\n \"fouls_suffered\" varchar(100)\n \"fouls_commited\" varchar(100)\n \"cautions_total\" varchar(100)\n \"cautions_pending\" varchar(100)\n \"caution_points_total\" varchar(100)\n \"caution_points_pending\" varchar(100)\n \"ejections_total\" varchar(100)\n}\n\nTable \"soccer_offensive_stats\" {\n \"id\" int4 [not null, increment]\n \"goals_game_winning\" varchar(100)\n \"goals_game_tying\" varchar(100)\n \"goals_overtime\" varchar(100)\n \"goals_shootout\" varchar(100)\n \"goals_total\" varchar(100)\n \"assists_game_winning\" varchar(100)\n \"assists_game_tying\" varchar(100)\n \"assists_overtime\" varchar(100)\n \"assists_total\" varchar(100)\n \"points\" varchar(100)\n \"shots_total\" varchar(100)\n \"shots_on_goal_total\" varchar(100)\n \"shots_hit_frame\" varchar(100)\n \"shots_penalty_shot_taken\" varchar(100)\n \"shots_penalty_shot_scored\" varchar(100)\n \"shots_penalty_shot_missed\" varchar(40)\n \"shots_penalty_shot_percentage\" varchar(40)\n \"shots_shootout_taken\" varchar(40)\n \"shots_shootout_scored\" varchar(40)\n \"shots_shootout_missed\" varchar(40)\n \"shots_shootout_percentage\" varchar(40)\n \"giveaways\" varchar(40)\n \"offsides\" varchar(40)\n \"corner_kicks\" varchar(40)\n \"hat_tricks\" varchar(40)\n}\n\nTable \"standing_subgroups\" {\n \"id\" int4 [not null, increment]\n \"standing_id\" int4 [not null]\n \"affiliation_id\" int4 [not null]\n}\n\nTable \"standings\" {\n \"id\" int4 [not null, increment]\n \"affiliation_id\" int4 [not null]\n \"standing_type\" varchar(100)\n \"sub_season_id\" int4 [not null]\n \"last_updated\" varchar(100)\n \"duration_scope\" varchar(100)\n \"competition_scope\" varchar(100)\n \"competition_scope_id\" varchar(100)\n \"alignment_scope\" varchar(100)\n \"site_scope\" varchar(100)\n \"scoping_label\" varchar(100)\n \"publisher_id\" int4 [not null]\n \"source\" varchar(100)\n}\n\nTable \"stats\" {\n \"id\" int4 [not null, increment]\n \"stat_repository_type\" varchar(100)\n \"stat_repository_id\" int4 [not null]\n \"stat_holder_type\" varchar(100)\n \"stat_holder_id\" int4\n \"stat_coverage_type\" varchar(100)\n \"stat_coverage_id\" int4\n \"context\" varchar(40) [not null]\n}\n\nTable \"sub_periods\" {\n \"id\" int4 [not null, increment]\n \"period_id\" int4 [not null]\n \"sub_period_value\" varchar(100)\n \"score\" varchar(100)\n}\n\nTable \"sub_seasons\" {\n \"id\" int4 [not null, increment]\n \"sub_season_key\" varchar(100) [not null]\n \"season_id\" int4 [not null]\n \"sub_season_type\" varchar(100) [not null]\n \"start_date_time\" timestamp\n \"end_date_time\" timestamp\n}\n\nTable \"team_american_football_stats\" {\n \"id\" int4 [not null, increment]\n \"yards_per_attempt\" varchar(100)\n \"average_starting_position\" varchar(100)\n \"timeouts\" varchar(100)\n \"time_of_possession\" varchar(100)\n \"turnover_ratio\" varchar(100)\n}\n\nTable \"team_phases\" {\n \"id\" int4 [not null, increment]\n \"team_id\" int4 [not null]\n \"start_season_id\" int4\n \"end_season_id\" int4\n \"affiliation_id\" int4 [not null]\n \"start_date_time\" varchar(100)\n \"end_date_time\" varchar(100)\n \"phase_status\" varchar(40)\n \"role_id\" int4\n}\n\nTable \"teams\" {\n \"id\" int4 [not null, increment]\n \"team_key\" varchar(100) [not null]\n \"publisher_id\" int4 [not null]\n \"home_site_id\" int4\n}\n\nTable \"teams_documents\" {\n \"team_id\" int4 [not null]\n \"document_id\" int4 [not null]\n}\n\nTable \"teams_media\" {\n \"team_id\" int4 [not null]\n \"media_id\" int4 [not null]\n}\n\nTable \"tennis_action_points\" {\n \"id\" int4 [not null, increment]\n \"sub_period_id\" varchar(100)\n \"sequence_number\" varchar(100)\n \"win_type\" varchar(100)\n}\n\nTable \"tennis_action_volleys\" {\n \"id\" int4 [not null, increment]\n \"sequence_number\" varchar(100)\n \"tennis_action_points_id\" int4\n \"landing_location\" varchar(100)\n \"swing_type\" varchar(100)\n \"result\" varchar(100)\n \"spin_type\" varchar(100)\n \"trajectory_details\" varchar(100)\n}\n\nTable \"tennis_event_states\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"current_state\" int4\n \"sequence_number\" int4\n \"tennis_set\" varchar(100)\n \"game\" varchar(100)\n \"server_person_id\" int4\n \"server_score\" varchar(100)\n \"receiver_person_id\" int4\n \"receiver_score\" varchar(100)\n \"service_number\" varchar(100)\n \"context\" varchar(40)\n}\n\nTable \"tennis_return_stats\" {\n \"id\" int4 [not null, increment]\n \"returns_played\" varchar(100)\n \"matches_played\" varchar(100)\n \"first_service_return_points_won\" varchar(100)\n \"first_service_return_points_won_pct\" varchar(100)\n \"second_service_return_points_won\" varchar(100)\n \"second_service_return_points_won_pct\" varchar(100)\n \"return_games_played\" varchar(100)\n \"return_games_won\" varchar(100)\n \"return_games_won_pct\" varchar(100)\n \"break_points_played\" varchar(100)\n \"break_points_converted\" varchar(100)\n \"break_points_converted_pct\" varchar(100)\n}\n\nTable \"tennis_service_stats\" {\n \"id\" int4 [not null, increment]\n \"services_played\" varchar(100)\n \"matches_played\" varchar(100)\n \"aces\" varchar(100)\n \"first_services_good\" varchar(100)\n \"first_services_good_pct\" varchar(100)\n \"first_service_points_won\" varchar(100)\n \"first_service_points_won_pct\" varchar(100)\n \"second_service_points_won\" varchar(100)\n \"second_service_points_won_pct\" varchar(100)\n \"service_games_played\" varchar(100)\n \"service_games_won\" varchar(100)\n \"service_games_won_pct\" varchar(100)\n \"break_points_played\" varchar(100)\n \"break_points_saved\" varchar(100)\n \"break_points_saved_pct\" varchar(100)\n}\n\nTable \"wagering_moneylines\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"date_time\" timestamp\n \"team_id\" int4 [not null]\n \"person_id\" int4\n \"rotation_key\" varchar(100)\n \"comment\" varchar(100)\n \"vigorish\" varchar(100)\n \"line\" varchar(100)\n \"line_opening\" varchar(100)\n \"prediction\" varchar(100)\n}\n\nTable \"wagering_odds_lines\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"date_time\" timestamp\n \"team_id\" int4 [not null]\n \"person_id\" int4\n \"rotation_key\" varchar(100)\n \"comment\" varchar(100)\n \"numerator\" varchar(100)\n \"denominator\" varchar(100)\n \"prediction\" varchar(100)\n \"payout_calculation\" varchar(100)\n \"payout_amount\" varchar(100)\n}\n\nTable \"wagering_runlines\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"date_time\" timestamp\n \"team_id\" int4 [not null]\n \"person_id\" int4\n \"rotation_key\" varchar(100)\n \"comment\" varchar(100)\n \"vigorish\" varchar(100)\n \"line\" varchar(100)\n \"line_opening\" varchar(100)\n \"line_value\" varchar(100)\n \"prediction\" varchar(100)\n}\n\nTable \"wagering_straight_spread_lines\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"date_time\" timestamp\n \"team_id\" int4 [not null]\n \"person_id\" int4\n \"rotation_key\" varchar(100)\n \"comment\" varchar(100)\n \"vigorish\" varchar(100)\n \"line_value\" varchar(100)\n \"line_value_opening\" varchar(100)\n \"prediction\" varchar(100)\n}\n\nTable \"wagering_total_score_lines\" {\n \"id\" int4 [not null, increment]\n \"bookmaker_id\" int4 [not null]\n \"event_id\" int4 [not null]\n \"date_time\" timestamp\n \"team_id\" int4 [not null]\n \"person_id\" int4\n \"rotation_key\" varchar(100)\n \"comment\" varchar(100)\n \"vigorish\" varchar(100)\n \"line_over\" varchar(100)\n \"line_under\" varchar(100)\n \"total\" varchar(100)\n \"total_opening\" varchar(100)\n \"prediction\" varchar(100)\n}\n\nTable \"weather_conditions\" {\n \"id\" int4 [not null, increment]\n \"event_id\" int4 [not null]\n \"temperature\" varchar(100)\n \"temperature_units\" varchar(40)\n \"humidity\" varchar(100)\n \"clouds\" varchar(100)\n \"wind_direction\" varchar(100)\n \"wind_velocity\" varchar(100)\n \"weather_code\" varchar(100)\n}\n", + "stateUrl": null, + "stateOriginalUrl": "https://github.com/yugabyte/yugabyte-db/blob/master/sample/sportsdb_tables.sql" + } +} diff --git a/tasks/postgres/easy/sports/create_performance_indexes/verify.py b/tasks/postgres/easy/sports/create_performance_indexes/verify.py new file mode 100644 index 00000000..2b9ef089 --- /dev/null +++ b/tasks/postgres/easy/sports/create_performance_indexes/verify.py @@ -0,0 +1,133 @@ +""" +Verification script for PostgreSQL Sports Task 3: Query Performance Optimization +""" + +import os +import sys +import psycopg2 +from decimal import Decimal + +def rows_match(actual_row, expected_row): + """ + Compare two rows with appropriate tolerance. + For Decimal types: allows 0.001 tolerance + For other types: requires exact match + """ + if len(actual_row) != len(expected_row): + return False + + for actual, expected in zip(actual_row, expected_row): + if isinstance(actual, Decimal) and isinstance(expected, Decimal): + if abs(float(actual) - float(expected)) > 0.001: + return False + elif isinstance(actual, float) and isinstance(expected, float): + if abs(actual - expected) > 0.001: + return False + elif actual != expected: + return False + + return True + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE", "sports"), + "user": os.getenv("POSTGRES_USERNAME", "postgres"), + "password": os.getenv("POSTGRES_PASSWORD", "postgres") + } + + +def verify_performance_optimization(conn) -> bool: + """Verify that key performance optimization indexes have been implemented.""" + with conn.cursor() as cur: + print("\nšŸ” Checking for critical performance indexes...") + + # Check 1: participants_events.participant_id index (critical for subqueries) + cur.execute(""" + SELECT indexname, indexdef + FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'participants_events' + AND indexdef LIKE '%participant_id%' + """) + participant_indexes = cur.fetchall() + has_participant_index = len(participant_indexes) > 0 + + # Check 2: stats table optimization (critical for subquery filtering) + cur.execute(""" + SELECT indexname, indexdef + FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'stats' + AND indexdef LIKE '%stat_holder_type%' + AND indexdef LIKE '%stat_holder_id%' + """) + stats_indexes = cur.fetchall() + has_stats_index = len(stats_indexes) > 0 + + # Report findings + critical_indexes_found = 0 + + if has_participant_index: + print("āœ… Found participant filtering index on participants_events.participant_id") + critical_indexes_found += 1 + else: + print("āŒ Missing critical index on participants_events.participant_id") + + if has_stats_index: + print("āœ… Found subquery optimization index on stats table") + critical_indexes_found += 1 + else: + print("āŒ Missing critical index on stats table") + + # Must have both critical indexes for this subquery-heavy query + if critical_indexes_found >= 2: + print(f"\nāœ… Performance optimization: PASS ({critical_indexes_found}/2 critical indexes found)") + return True + else: + print(f"\nāŒ Performance optimization: FAIL ({critical_indexes_found}/2 critical indexes found)") + print(" Create these critical indexes:") + print(" - CREATE INDEX ON participants_events(participant_id);") + print(" - CREATE INDEX ON stats(stat_holder_type, stat_holder_id);") + return False + +def main(): + """Main verification function.""" + print("=" * 50) + print("Verifying Sports Task 3: Query Performance Optimization") + print("=" * 50) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify all components + success = verify_performance_optimization(conn) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/chinook/customer_data_migration/customer_data.pkl b/tasks/postgres/standard/chinook/customer_data_migration/customer_data.pkl similarity index 100% rename from tasks/postgres/chinook/customer_data_migration/customer_data.pkl rename to tasks/postgres/standard/chinook/customer_data_migration/customer_data.pkl diff --git a/tasks/postgres/chinook/customer_data_migration/description.md b/tasks/postgres/standard/chinook/customer_data_migration/description.md similarity index 100% rename from tasks/postgres/chinook/customer_data_migration/description.md rename to tasks/postgres/standard/chinook/customer_data_migration/description.md diff --git a/tasks/postgres/chinook/customer_data_migration/meta.json b/tasks/postgres/standard/chinook/customer_data_migration/meta.json similarity index 100% rename from tasks/postgres/chinook/customer_data_migration/meta.json rename to tasks/postgres/standard/chinook/customer_data_migration/meta.json diff --git a/tasks/postgres/standard/chinook/customer_data_migration/verify.py b/tasks/postgres/standard/chinook/customer_data_migration/verify.py new file mode 100644 index 00000000..2992e36a --- /dev/null +++ b/tasks/postgres/standard/chinook/customer_data_migration/verify.py @@ -0,0 +1,158 @@ +""" +Verification script for PostgreSQL Task 2: Customer Data Migration +""" + +import os +import sys +import psycopg2 +import pickle + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def load_expected_customers(): + """Load the expected customer data from pickle file.""" + import os + script_dir = os.path.dirname(os.path.abspath(__file__)) + pkl_path = os.path.join(script_dir, 'customer_data.pkl') + + try: + with open(pkl_path, 'rb') as f: + return pickle.load(f) + except FileNotFoundError: + print(f"āŒ customer_data.pkl not found at {pkl_path}. Please generate customer data first.") + return None + except Exception as e: + print(f"āŒ Error loading customer data: {e}") + return None + +def verify_migrated_customers(conn, expected_customers) -> bool: + """Verify migrated customers by comparing with expected data as sets.""" + with conn.cursor() as cur: + # Get all customers with ID > 59 (the migrated ones) + cur.execute(''' + SELECT "FirstName", "LastName", "Company", "Address", "City", + "State", "Country", "PostalCode", "Phone", "Email", + "SupportRepId", "Fax" + FROM "Customer" + WHERE "CustomerId" > 59 + ''') + + actual_customers = cur.fetchall() + + if len(actual_customers) != len(expected_customers): + print(f"āŒ Expected {len(expected_customers)} migrated customers, found {len(actual_customers)}") + return False + + # Convert expected customers to tuples for set comparison + expected_tuples = set() + for expected in expected_customers: + expected_tuple = ( + expected['FirstName'], expected['LastName'], expected['Company'], + expected['Address'], expected['City'], expected['State'], + expected['Country'], expected['PostalCode'], expected['Phone'], + expected['Email'], 3, None # SupportRepId=3, Fax=None + ) + expected_tuples.add(expected_tuple) + + # Convert actual customers to set with proper type conversion + actual_tuples = set() + for row in actual_customers: + # Convert all fields to strings for consistent comparison + actual_tuple = ( + str(row[0]) if row[0] is not None else '', # FirstName + str(row[1]) if row[1] is not None else '', # LastName + str(row[2]) if row[2] is not None else '', # Company + str(row[3]) if row[3] is not None else '', # Address + str(row[4]) if row[4] is not None else '', # City + str(row[5]) if row[5] is not None else '', # State + str(row[6]) if row[6] is not None else '', # Country + str(row[7]) if row[7] is not None else '', # PostalCode + str(row[8]) if row[8] is not None else '', # Phone + str(row[9]) if row[9] is not None else '', # Email + int(row[10]) if row[10] is not None else None, # SupportRepId + row[11] # Fax (should be None) + ) + actual_tuples.add(actual_tuple) + + # Check if sets are equal + if actual_tuples != expected_tuples: + missing_in_actual = expected_tuples - actual_tuples + extra_in_actual = actual_tuples - expected_tuples + + print(f"āŒ Customer data sets don't match!") + if missing_in_actual: + print(f" Missing {len(missing_in_actual)} expected customers") + for missing in list(missing_in_actual)[:3]: # Show first 3 + print(f" Missing: {missing[0]} {missing[1]} - {missing[2]}") + if len(missing_in_actual) > 3: + print(f" ... and {len(missing_in_actual) - 3} more") + + if extra_in_actual: + print(f" Found {len(extra_in_actual)} unexpected customers") + for extra in list(extra_in_actual)[:3]: # Show first 3 + print(f" Extra: {extra[0]} {extra[1]} - {extra[2]}") + if len(extra_in_actual) > 3: + print(f" ... and {len(extra_in_actual) - 3} more") + + return False + + print(f"āœ… All {len(expected_customers)} customers migrated correctly") + print(f"āœ… All customers assigned to SupportRepId 3") + print(f"āœ… All customers have Fax field set to NULL") + print(f"āœ… Customer data sets match exactly (order-independent)") + + return True + +def main(): + """Main verification function.""" + print("=" * 60) + print("Verifying Customer Data Migration Task") + print("=" * 60) + + # Load expected customer data + expected_customers = load_expected_customers() + if not expected_customers: + sys.exit(1) + + print(f"Loaded {len(expected_customers)} expected customer records") + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + # Verify migration + success = verify_migrated_customers(conn, expected_customers) + + conn.close() + + if success: + print("\nšŸŽ‰ Task verification: PASS") + sys.exit(0) + else: + print("\nāŒ Task verification: FAIL") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/chinook/employee_hierarchy_management/description.md b/tasks/postgres/standard/chinook/employee_hierarchy_management/description.md similarity index 100% rename from tasks/postgres/chinook/employee_hierarchy_management/description.md rename to tasks/postgres/standard/chinook/employee_hierarchy_management/description.md diff --git a/tasks/postgres/chinook/employee_hierarchy_management/meta.json b/tasks/postgres/standard/chinook/employee_hierarchy_management/meta.json similarity index 100% rename from tasks/postgres/chinook/employee_hierarchy_management/meta.json rename to tasks/postgres/standard/chinook/employee_hierarchy_management/meta.json diff --git a/tasks/postgres/chinook/employee_hierarchy_management/verify.py b/tasks/postgres/standard/chinook/employee_hierarchy_management/verify.py similarity index 100% rename from tasks/postgres/chinook/employee_hierarchy_management/verify.py rename to tasks/postgres/standard/chinook/employee_hierarchy_management/verify.py diff --git a/tasks/postgres/chinook/sales_and_music_charts/description.md b/tasks/postgres/standard/chinook/sales_and_music_charts/description.md similarity index 100% rename from tasks/postgres/chinook/sales_and_music_charts/description.md rename to tasks/postgres/standard/chinook/sales_and_music_charts/description.md diff --git a/tasks/postgres/chinook/sales_and_music_charts/meta.json b/tasks/postgres/standard/chinook/sales_and_music_charts/meta.json similarity index 100% rename from tasks/postgres/chinook/sales_and_music_charts/meta.json rename to tasks/postgres/standard/chinook/sales_and_music_charts/meta.json diff --git a/tasks/postgres/chinook/sales_and_music_charts/verify.py b/tasks/postgres/standard/chinook/sales_and_music_charts/verify.py similarity index 100% rename from tasks/postgres/chinook/sales_and_music_charts/verify.py rename to tasks/postgres/standard/chinook/sales_and_music_charts/verify.py diff --git a/tasks/postgres/dvdrental/customer_analysis_fix/description.md b/tasks/postgres/standard/dvdrental/customer_analysis_fix/description.md similarity index 100% rename from tasks/postgres/dvdrental/customer_analysis_fix/description.md rename to tasks/postgres/standard/dvdrental/customer_analysis_fix/description.md diff --git a/tasks/postgres/dvdrental/customer_analysis_fix/meta.json b/tasks/postgres/standard/dvdrental/customer_analysis_fix/meta.json similarity index 100% rename from tasks/postgres/dvdrental/customer_analysis_fix/meta.json rename to tasks/postgres/standard/dvdrental/customer_analysis_fix/meta.json diff --git a/tasks/postgres/dvdrental/customer_analysis_fix/verify.py b/tasks/postgres/standard/dvdrental/customer_analysis_fix/verify.py similarity index 100% rename from tasks/postgres/dvdrental/customer_analysis_fix/verify.py rename to tasks/postgres/standard/dvdrental/customer_analysis_fix/verify.py diff --git a/tasks/postgres/dvdrental/customer_analytics_optimization/description.md b/tasks/postgres/standard/dvdrental/customer_analytics_optimization/description.md similarity index 100% rename from tasks/postgres/dvdrental/customer_analytics_optimization/description.md rename to tasks/postgres/standard/dvdrental/customer_analytics_optimization/description.md diff --git a/tasks/postgres/dvdrental/customer_analytics_optimization/meta.json b/tasks/postgres/standard/dvdrental/customer_analytics_optimization/meta.json similarity index 100% rename from tasks/postgres/dvdrental/customer_analytics_optimization/meta.json rename to tasks/postgres/standard/dvdrental/customer_analytics_optimization/meta.json diff --git a/tasks/postgres/standard/dvdrental/customer_analytics_optimization/verify.py b/tasks/postgres/standard/dvdrental/customer_analytics_optimization/verify.py new file mode 100644 index 00000000..1d0f3d6e --- /dev/null +++ b/tasks/postgres/standard/dvdrental/customer_analytics_optimization/verify.py @@ -0,0 +1,82 @@ +""" +Verification script for PostgreSQL Task 1: Customer Payment Query Optimization +""" + +import os +import sys +import psycopg2 + +def get_connection_params() -> dict: + """Get database connection parameters.""" + return { + "host": os.getenv("POSTGRES_HOST", "localhost"), + "port": int(os.getenv("POSTGRES_PORT", 5432)), + "database": os.getenv("POSTGRES_DATABASE"), + "user": os.getenv("POSTGRES_USERNAME"), + "password": os.getenv("POSTGRES_PASSWORD") + } + +def check_payment_customer_id_index(conn) -> bool: + """Check if there's any index on payment.customer_id column.""" + with conn.cursor() as cur: + cur.execute(""" + SELECT indexname, indexdef + FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'payment' + AND indexdef LIKE '%customer_id%' + """) + indexes = cur.fetchall() + print(indexes) + return len(indexes) > 0, indexes + +def main(): + """Main verification function.""" + print("=" * 60) + print("PostgreSQL Task 1 Verification: Customer Payment Query Optimization") + print("=" * 60) + + # Get connection parameters + conn_params = get_connection_params() + + if not conn_params["database"]: + print("āŒ No database specified") + sys.exit(1) + + try: + # Connect to database + conn = psycopg2.connect(**conn_params) + + print("\nšŸ” Checking for customer_id index on payment table...") + + # Check if any index exists on payment.customer_id + has_index, indexes = check_payment_customer_id_index(conn) + + if has_index: + print("āœ… Found index(es) on payment.customer_id:") + for index_name, index_def in indexes: + print(f" - {index_name}: {index_def}") + else: + print("āŒ No index found on payment.customer_id column") + + conn.close() + + if has_index: + print(f"\nšŸŽ‰ Task verification: PASS") + print(f" - Index on payment.customer_id exists") + sys.exit(0) + else: + print(f"\nāŒ Task verification: FAIL") + print(f" - No index found on payment.customer_id") + print(f" - Create an index on payment(customer_id) to optimize the queries") + sys.exit(1) + + except psycopg2.Error as e: + print(f"āŒ Database error: {e}") + sys.exit(1) + except Exception as e: + print(f"āŒ Verification error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tasks/postgres/dvdrental/film_inventory_management/description.md b/tasks/postgres/standard/dvdrental/film_inventory_management/description.md similarity index 100% rename from tasks/postgres/dvdrental/film_inventory_management/description.md rename to tasks/postgres/standard/dvdrental/film_inventory_management/description.md diff --git a/tasks/postgres/dvdrental/film_inventory_management/meta.json b/tasks/postgres/standard/dvdrental/film_inventory_management/meta.json similarity index 100% rename from tasks/postgres/dvdrental/film_inventory_management/meta.json rename to tasks/postgres/standard/dvdrental/film_inventory_management/meta.json diff --git a/tasks/postgres/dvdrental/film_inventory_management/verify.py b/tasks/postgres/standard/dvdrental/film_inventory_management/verify.py similarity index 100% rename from tasks/postgres/dvdrental/film_inventory_management/verify.py rename to tasks/postgres/standard/dvdrental/film_inventory_management/verify.py diff --git a/tasks/postgres/employees/employee_demographics_report/description.md b/tasks/postgres/standard/employees/employee_demographics_report/description.md similarity index 100% rename from tasks/postgres/employees/employee_demographics_report/description.md rename to tasks/postgres/standard/employees/employee_demographics_report/description.md diff --git a/tasks/postgres/employees/employee_demographics_report/meta.json b/tasks/postgres/standard/employees/employee_demographics_report/meta.json similarity index 100% rename from tasks/postgres/employees/employee_demographics_report/meta.json rename to tasks/postgres/standard/employees/employee_demographics_report/meta.json diff --git a/tasks/postgres/employees/employee_demographics_report/verify.py b/tasks/postgres/standard/employees/employee_demographics_report/verify.py similarity index 100% rename from tasks/postgres/employees/employee_demographics_report/verify.py rename to tasks/postgres/standard/employees/employee_demographics_report/verify.py diff --git a/tasks/postgres/employees/employee_performance_analysis/description.md b/tasks/postgres/standard/employees/employee_performance_analysis/description.md similarity index 100% rename from tasks/postgres/employees/employee_performance_analysis/description.md rename to tasks/postgres/standard/employees/employee_performance_analysis/description.md diff --git a/tasks/postgres/employees/employee_performance_analysis/meta.json b/tasks/postgres/standard/employees/employee_performance_analysis/meta.json similarity index 100% rename from tasks/postgres/employees/employee_performance_analysis/meta.json rename to tasks/postgres/standard/employees/employee_performance_analysis/meta.json diff --git a/tasks/postgres/employees/employee_performance_analysis/verify.py b/tasks/postgres/standard/employees/employee_performance_analysis/verify.py similarity index 100% rename from tasks/postgres/employees/employee_performance_analysis/verify.py rename to tasks/postgres/standard/employees/employee_performance_analysis/verify.py diff --git a/tasks/postgres/employees/employee_project_tracking/description.md b/tasks/postgres/standard/employees/employee_project_tracking/description.md similarity index 100% rename from tasks/postgres/employees/employee_project_tracking/description.md rename to tasks/postgres/standard/employees/employee_project_tracking/description.md diff --git a/tasks/postgres/employees/employee_project_tracking/meta.json b/tasks/postgres/standard/employees/employee_project_tracking/meta.json similarity index 100% rename from tasks/postgres/employees/employee_project_tracking/meta.json rename to tasks/postgres/standard/employees/employee_project_tracking/meta.json diff --git a/tasks/postgres/employees/employee_project_tracking/verify.py b/tasks/postgres/standard/employees/employee_project_tracking/verify.py similarity index 100% rename from tasks/postgres/employees/employee_project_tracking/verify.py rename to tasks/postgres/standard/employees/employee_project_tracking/verify.py diff --git a/tasks/postgres/employees/employee_retention_analysis/description.md b/tasks/postgres/standard/employees/employee_retention_analysis/description.md similarity index 100% rename from tasks/postgres/employees/employee_retention_analysis/description.md rename to tasks/postgres/standard/employees/employee_retention_analysis/description.md diff --git a/tasks/postgres/employees/employee_retention_analysis/meta.json b/tasks/postgres/standard/employees/employee_retention_analysis/meta.json similarity index 100% rename from tasks/postgres/employees/employee_retention_analysis/meta.json rename to tasks/postgres/standard/employees/employee_retention_analysis/meta.json diff --git a/tasks/postgres/employees/employee_retention_analysis/verify.py b/tasks/postgres/standard/employees/employee_retention_analysis/verify.py similarity index 100% rename from tasks/postgres/employees/employee_retention_analysis/verify.py rename to tasks/postgres/standard/employees/employee_retention_analysis/verify.py diff --git a/tasks/postgres/employees/executive_dashboard_automation/description.md b/tasks/postgres/standard/employees/executive_dashboard_automation/description.md similarity index 100% rename from tasks/postgres/employees/executive_dashboard_automation/description.md rename to tasks/postgres/standard/employees/executive_dashboard_automation/description.md diff --git a/tasks/postgres/employees/executive_dashboard_automation/meta.json b/tasks/postgres/standard/employees/executive_dashboard_automation/meta.json similarity index 100% rename from tasks/postgres/employees/executive_dashboard_automation/meta.json rename to tasks/postgres/standard/employees/executive_dashboard_automation/meta.json diff --git a/tasks/postgres/employees/executive_dashboard_automation/verify.py b/tasks/postgres/standard/employees/executive_dashboard_automation/verify.py similarity index 100% rename from tasks/postgres/employees/executive_dashboard_automation/verify.py rename to tasks/postgres/standard/employees/executive_dashboard_automation/verify.py diff --git a/tasks/postgres/employees/management_structure_analysis/description.md b/tasks/postgres/standard/employees/management_structure_analysis/description.md similarity index 100% rename from tasks/postgres/employees/management_structure_analysis/description.md rename to tasks/postgres/standard/employees/management_structure_analysis/description.md diff --git a/tasks/postgres/employees/management_structure_analysis/meta.json b/tasks/postgres/standard/employees/management_structure_analysis/meta.json similarity index 100% rename from tasks/postgres/employees/management_structure_analysis/meta.json rename to tasks/postgres/standard/employees/management_structure_analysis/meta.json diff --git a/tasks/postgres/employees/management_structure_analysis/verify.py b/tasks/postgres/standard/employees/management_structure_analysis/verify.py similarity index 100% rename from tasks/postgres/employees/management_structure_analysis/verify.py rename to tasks/postgres/standard/employees/management_structure_analysis/verify.py diff --git a/tasks/postgres/lego/consistency_enforcement/description.md b/tasks/postgres/standard/lego/consistency_enforcement/description.md similarity index 100% rename from tasks/postgres/lego/consistency_enforcement/description.md rename to tasks/postgres/standard/lego/consistency_enforcement/description.md diff --git a/tasks/postgres/lego/consistency_enforcement/meta.json b/tasks/postgres/standard/lego/consistency_enforcement/meta.json similarity index 100% rename from tasks/postgres/lego/consistency_enforcement/meta.json rename to tasks/postgres/standard/lego/consistency_enforcement/meta.json diff --git a/tasks/postgres/lego/consistency_enforcement/verify.py b/tasks/postgres/standard/lego/consistency_enforcement/verify.py similarity index 100% rename from tasks/postgres/lego/consistency_enforcement/verify.py rename to tasks/postgres/standard/lego/consistency_enforcement/verify.py diff --git a/tasks/postgres/lego/database_security_policies/description.md b/tasks/postgres/standard/lego/database_security_policies/description.md similarity index 100% rename from tasks/postgres/lego/database_security_policies/description.md rename to tasks/postgres/standard/lego/database_security_policies/description.md diff --git a/tasks/postgres/lego/database_security_policies/meta.json b/tasks/postgres/standard/lego/database_security_policies/meta.json similarity index 100% rename from tasks/postgres/lego/database_security_policies/meta.json rename to tasks/postgres/standard/lego/database_security_policies/meta.json diff --git a/tasks/postgres/lego/database_security_policies/verify.py b/tasks/postgres/standard/lego/database_security_policies/verify.py similarity index 100% rename from tasks/postgres/lego/database_security_policies/verify.py rename to tasks/postgres/standard/lego/database_security_policies/verify.py diff --git a/tasks/postgres/lego/transactional_inventory_transfer/description.md b/tasks/postgres/standard/lego/transactional_inventory_transfer/description.md similarity index 100% rename from tasks/postgres/lego/transactional_inventory_transfer/description.md rename to tasks/postgres/standard/lego/transactional_inventory_transfer/description.md diff --git a/tasks/postgres/lego/transactional_inventory_transfer/meta.json b/tasks/postgres/standard/lego/transactional_inventory_transfer/meta.json similarity index 100% rename from tasks/postgres/lego/transactional_inventory_transfer/meta.json rename to tasks/postgres/standard/lego/transactional_inventory_transfer/meta.json diff --git a/tasks/postgres/lego/transactional_inventory_transfer/verify.py b/tasks/postgres/standard/lego/transactional_inventory_transfer/verify.py similarity index 100% rename from tasks/postgres/lego/transactional_inventory_transfer/verify.py rename to tasks/postgres/standard/lego/transactional_inventory_transfer/verify.py diff --git a/tasks/postgres/security/rls_business_access/description.md b/tasks/postgres/standard/security/rls_business_access/description.md similarity index 100% rename from tasks/postgres/security/rls_business_access/description.md rename to tasks/postgres/standard/security/rls_business_access/description.md diff --git a/tasks/postgres/security/rls_business_access/ground_truth.sql b/tasks/postgres/standard/security/rls_business_access/ground_truth.sql similarity index 100% rename from tasks/postgres/security/rls_business_access/ground_truth.sql rename to tasks/postgres/standard/security/rls_business_access/ground_truth.sql diff --git a/tasks/postgres/security/rls_business_access/meta.json b/tasks/postgres/standard/security/rls_business_access/meta.json similarity index 100% rename from tasks/postgres/security/rls_business_access/meta.json rename to tasks/postgres/standard/security/rls_business_access/meta.json diff --git a/tasks/postgres/security/rls_business_access/prepare_environment.py b/tasks/postgres/standard/security/rls_business_access/prepare_environment.py similarity index 100% rename from tasks/postgres/security/rls_business_access/prepare_environment.py rename to tasks/postgres/standard/security/rls_business_access/prepare_environment.py diff --git a/tasks/postgres/security/rls_business_access/verify.py b/tasks/postgres/standard/security/rls_business_access/verify.py similarity index 100% rename from tasks/postgres/security/rls_business_access/verify.py rename to tasks/postgres/standard/security/rls_business_access/verify.py diff --git a/tasks/postgres/security/user_permission_audit/description.md b/tasks/postgres/standard/security/user_permission_audit/description.md similarity index 100% rename from tasks/postgres/security/user_permission_audit/description.md rename to tasks/postgres/standard/security/user_permission_audit/description.md diff --git a/tasks/postgres/security/user_permission_audit/ground_truth.sql b/tasks/postgres/standard/security/user_permission_audit/ground_truth.sql similarity index 100% rename from tasks/postgres/security/user_permission_audit/ground_truth.sql rename to tasks/postgres/standard/security/user_permission_audit/ground_truth.sql diff --git a/tasks/postgres/security/user_permission_audit/meta.json b/tasks/postgres/standard/security/user_permission_audit/meta.json similarity index 100% rename from tasks/postgres/security/user_permission_audit/meta.json rename to tasks/postgres/standard/security/user_permission_audit/meta.json diff --git a/tasks/postgres/security/user_permission_audit/prepare_environment.py b/tasks/postgres/standard/security/user_permission_audit/prepare_environment.py similarity index 100% rename from tasks/postgres/security/user_permission_audit/prepare_environment.py rename to tasks/postgres/standard/security/user_permission_audit/prepare_environment.py diff --git a/tasks/postgres/security/user_permission_audit/verify.py b/tasks/postgres/standard/security/user_permission_audit/verify.py similarity index 100% rename from tasks/postgres/security/user_permission_audit/verify.py rename to tasks/postgres/standard/security/user_permission_audit/verify.py diff --git a/tasks/postgres/sports/baseball_player_analysis/description.md b/tasks/postgres/standard/sports/baseball_player_analysis/description.md similarity index 100% rename from tasks/postgres/sports/baseball_player_analysis/description.md rename to tasks/postgres/standard/sports/baseball_player_analysis/description.md diff --git a/tasks/postgres/sports/baseball_player_analysis/meta.json b/tasks/postgres/standard/sports/baseball_player_analysis/meta.json similarity index 100% rename from tasks/postgres/sports/baseball_player_analysis/meta.json rename to tasks/postgres/standard/sports/baseball_player_analysis/meta.json diff --git a/tasks/postgres/sports/baseball_player_analysis/verify.py b/tasks/postgres/standard/sports/baseball_player_analysis/verify.py similarity index 100% rename from tasks/postgres/sports/baseball_player_analysis/verify.py rename to tasks/postgres/standard/sports/baseball_player_analysis/verify.py diff --git a/tasks/postgres/sports/participant_report_optimization/description.md b/tasks/postgres/standard/sports/participant_report_optimization/description.md similarity index 100% rename from tasks/postgres/sports/participant_report_optimization/description.md rename to tasks/postgres/standard/sports/participant_report_optimization/description.md diff --git a/tasks/postgres/sports/participant_report_optimization/meta.json b/tasks/postgres/standard/sports/participant_report_optimization/meta.json similarity index 100% rename from tasks/postgres/sports/participant_report_optimization/meta.json rename to tasks/postgres/standard/sports/participant_report_optimization/meta.json diff --git a/tasks/postgres/sports/participant_report_optimization/verify.py b/tasks/postgres/standard/sports/participant_report_optimization/verify.py similarity index 100% rename from tasks/postgres/sports/participant_report_optimization/verify.py rename to tasks/postgres/standard/sports/participant_report_optimization/verify.py diff --git a/tasks/postgres/sports/team_roster_management/description.md b/tasks/postgres/standard/sports/team_roster_management/description.md similarity index 100% rename from tasks/postgres/sports/team_roster_management/description.md rename to tasks/postgres/standard/sports/team_roster_management/description.md diff --git a/tasks/postgres/sports/team_roster_management/meta.json b/tasks/postgres/standard/sports/team_roster_management/meta.json similarity index 100% rename from tasks/postgres/sports/team_roster_management/meta.json rename to tasks/postgres/standard/sports/team_roster_management/meta.json diff --git a/tasks/postgres/sports/team_roster_management/verify.py b/tasks/postgres/standard/sports/team_roster_management/verify.py similarity index 100% rename from tasks/postgres/sports/team_roster_management/verify.py rename to tasks/postgres/standard/sports/team_roster_management/verify.py diff --git a/tasks/postgres/vectors/dba_vector_analysis/description.md b/tasks/postgres/standard/vectors/dba_vector_analysis/description.md similarity index 100% rename from tasks/postgres/vectors/dba_vector_analysis/description.md rename to tasks/postgres/standard/vectors/dba_vector_analysis/description.md diff --git a/tasks/postgres/vectors/dba_vector_analysis/ground_truth.sql b/tasks/postgres/standard/vectors/dba_vector_analysis/ground_truth.sql similarity index 100% rename from tasks/postgres/vectors/dba_vector_analysis/ground_truth.sql rename to tasks/postgres/standard/vectors/dba_vector_analysis/ground_truth.sql diff --git a/tasks/postgres/vectors/dba_vector_analysis/meta.json b/tasks/postgres/standard/vectors/dba_vector_analysis/meta.json similarity index 100% rename from tasks/postgres/vectors/dba_vector_analysis/meta.json rename to tasks/postgres/standard/vectors/dba_vector_analysis/meta.json diff --git a/tasks/postgres/vectors/dba_vector_analysis/prepare_environment.py b/tasks/postgres/standard/vectors/dba_vector_analysis/prepare_environment.py similarity index 100% rename from tasks/postgres/vectors/dba_vector_analysis/prepare_environment.py rename to tasks/postgres/standard/vectors/dba_vector_analysis/prepare_environment.py diff --git a/tasks/postgres/vectors/dba_vector_analysis/verify.py b/tasks/postgres/standard/vectors/dba_vector_analysis/verify.py similarity index 100% rename from tasks/postgres/vectors/dba_vector_analysis/verify.py rename to tasks/postgres/standard/vectors/dba_vector_analysis/verify.py diff --git a/tasks/postgres/vectors/vectors_setup.py b/tasks/postgres/standard/vectors/vectors_setup.py similarity index 100% rename from tasks/postgres/vectors/vectors_setup.py rename to tasks/postgres/standard/vectors/vectors_setup.py From 5011af17f3ec3ed938ec386bf88a05eaaf33e5fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Czjwu0522=E2=80=9D?= Date: Tue, 2 Dec 2025 14:40:02 +0000 Subject: [PATCH 2/2] fix: fix aggr for easy task suite, fix overcomplicated error standarlize --- src/aggregators/aggregate_results.py | 34 ++++++++++++++++++++-------- src/errors.py | 4 ---- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/aggregators/aggregate_results.py b/src/aggregators/aggregate_results.py index 88c99346..30b54e3d 100755 --- a/src/aggregators/aggregate_results.py +++ b/src/aggregators/aggregate_results.py @@ -84,14 +84,27 @@ def collect_results(exp_dir: Path, k: int) -> Dict[str, Dict[str, Any]]: results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) # Current layout: results//__/run-N/__/ + # Some pipelines include task-set suffix in service dir (e.g., "filesystem-easy"). + # Normalize such names back to canonical service keys used by tasks/ (filesystem, github, notion, playwright, postgres). + + def normalize_service_name(name: str) -> str: + # Strip known task-set suffixes like "-easy" or "-standard" + if name.endswith("-easy") or name.endswith("-standard"): + base = name.rsplit("-", 1)[0] + else: + base = name + + # Map variant names to canonical service + if base == "playwright_webarena": + return "playwright" + return base for model_service_dir in exp_dir.iterdir(): if not model_service_dir.is_dir() or "__" not in model_service_dir.name: continue model, service = model_service_dir.name.split("__", 1) - # Normalize service name: treat playwright_webarena as playwright - if service == "playwright_webarena": - service = "playwright" + # Normalize service name (handles playwright_webarena and *-easy/*-standard suffixes) + service = normalize_service_name(service) for run_idx in range(1, k + 1): run_dir = model_service_dir / f"run-{run_idx}" @@ -906,6 +919,7 @@ def main(): help="Which task subset to aggregate (default: standard)" ) parser.add_argument("--push", action="store_true", help="Push to GitHub (default to main)") + # Note: we intentionally do not expose flags that include invalid models by default args = parser.parse_args() @@ -942,13 +956,15 @@ def main(): # Print validation report with summary table print_validation_report(complete_models, incomplete_models, invalid_models, all_tasks, args.k, single_run_models, results) - - if not complete_models: + + # Determine which models to include in output (strict: only complete models) + models_for_output = dict(complete_models) + if not models_for_output: return 1 # Calculate metrics print("\nšŸ“Š Calculating metrics...") - summary = calculate_metrics(complete_models, all_tasks, args.k, single_run_models) + summary = calculate_metrics(models_for_output, all_tasks, args.k, single_run_models) summary["experiment_name"] = args.exp_name summary["task_set"] = args.task_set @@ -960,12 +976,12 @@ def main(): # Generate model_results print("šŸ“ Generating model_results...") - generate_model_results(exp_dir, complete_models, all_tasks) - print(f" Created {len(complete_models)} model directories") + generate_model_results(exp_dir, models_for_output, all_tasks) + print(f" Created {len(models_for_output)} model directories") # Generate task_results print("šŸ“ Generating task_results...") - generate_task_results(exp_dir, complete_models, all_tasks) + generate_task_results(exp_dir, models_for_output, all_tasks) print(f" Created {total_tasks} task files") # Generate README diff --git a/src/errors.py b/src/errors.py index d39d5f05..759ba5b9 100644 --- a/src/errors.py +++ b/src/errors.py @@ -46,14 +46,10 @@ def standardize_error_message(error: str, mcp_service: Optional[str] = None) -> "connection refused" in error_str.lower() or "econnrefused" in error_str.lower() ): base_msg = "Connection refused" - elif "authentication" in error_str.lower() or "unauthorized" in error_str.lower(): - base_msg = "Authentication failed" elif "not found" in error_str.lower(): base_msg = "Resource not found" elif "already exists" in error_str.lower(): base_msg = "Resource already exists" - elif "mcp" in error_str.lower() and "error" in error_str.lower(): - base_msg = "MCP service error" else: # Return original message if no standardization applies return error_str