From 084e29ee45b58cf78c4354eeb3b03f6496dae7b4 Mon Sep 17 00:00:00 2001 From: splimon Date: Fri, 3 Apr 2026 14:59:58 -1000 Subject: [PATCH 1/8] Add Documentation Notebooks: Connect GitHub Gold Standard to GHTorrent and download via Kaiaulu Adds five notebooks that build a pipeline to contextualize the GitHub Gold Standard sentiment dataset (7,122 comments) with GHTorrent project context and re-download comment data via Kaiaulu: - Notebook 1: Load the GitHub Gold Standard sentiment CSV into a GHTorrent MySQL database - Notebook 2: Explore GHTorrent tables to map sentiment comments to main project repos - Notebook 3: Auto-generate Kaiaulu .yml config files for 82 main project repos - Notebook 4: Download and parse commit comments via Kaiaulu - Notebook 5: Download and parse PR inline comments via Kaiaulu --- 01_load_sentiment_csv_to_mysql.ipynb | 187 +++++++++++++ 02_explore_gh_torrent_tables.ipynb | 237 ++++++++++++++++ 03_scale_config_files.ipynb | 403 +++++++++++++++++++++++++++ 04_download_commit_comments.ipynb | 162 +++++++++++ 05_download_PR_inline_comments.ipynb | 186 +++++++++++++ 5 files changed, 1175 insertions(+) create mode 100644 01_load_sentiment_csv_to_mysql.ipynb create mode 100644 02_explore_gh_torrent_tables.ipynb create mode 100644 03_scale_config_files.ipynb create mode 100644 04_download_commit_comments.ipynb create mode 100644 05_download_PR_inline_comments.ipynb diff --git a/01_load_sentiment_csv_to_mysql.ipynb b/01_load_sentiment_csv_to_mysql.ipynb new file mode 100644 index 0000000..78548ab --- /dev/null +++ b/01_load_sentiment_csv_to_mysql.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0b0b299a", + "metadata": {}, + "source": [ + "# Load Sentiment CSV into GHTorrent MySQL (Notebook 1)\n", + "\n", + "This notebook shows how to load the GitHub Gold Standard sentiment CSV into a MySQL database that already has the GHTorrent 2004 dump. It also includes quick checks to make sure the data loaded correctly." + ] + }, + { + "cell_type": "markdown", + "id": "bd368c02", + "metadata": {}, + "source": [ + "### Planned Output\n", + "By the end of this notebook, you should have:\n", + "1. A `comment_sentiment` table in MySQL\n", + "2. All rows from `comment_sentiment.csv` loaded\n", + "3. Query results that confirm row counts and valid joins to GHTorrent project data" + ] + }, + { + "cell_type": "markdown", + "id": "0acf37b4", + "metadata": {}, + "source": [ + "### Step 1: Get the data ready\n", + "\n", + "1. Download the [GitHub Gold Standard dataset](https://figshare.com/articles/dataset/A_gold_standard_for_polarity_of_emotions_of_software_developers_in_GitHub/11604597?file=21001260).\n", + "2. Rename the file to `comment_sentiment.csv`.\n", + "3. Download the [GHTorrent 2004 MySQL Database Dump](https://web.archive.org/web/20150206005357/http://ghtorrent.org/msr14.html) and make sure it is already loaded in your MySQL database (example: `github`).\n", + "4. Make sure MySQL can read your CSV file path (e.g., `~/Desktop/github/sentiment_github_dataset/comment_sentiment.csv`)\n", + "\n", + "Optional reference: [GHTorrent schema diagram](https://web.archive.org/web/20150206005412/http://ghtorrent.org/relational.html)." + ] + }, + { + "cell_type": "markdown", + "id": "038f5498", + "metadata": {}, + "source": [ + "### Step 2: Create the table, load the CSV, and run the original join queries\n", + "\n", + "Use these copy-ready blocks one at a time.\n", + "\n", + "Start MySQL with local file loading turned on. Run on bash:\n", + "\n", + "```bash\n", + "mysql --local-infile=1 -u root -p\n", + "```\n", + "---\n", + "\n", + "Select your database (e.g., `github`):\n", + "\n", + "```sql\n", + "USE github;\n", + "```\n", + "---\n", + "\n", + "Drop old table if it exists (safe to re-run):\n", + "\n", + "```sql\n", + "DROP TABLE IF EXISTS comment_sentiment;\n", + "```\n", + "\n", + "---\n", + "\n", + "Create table:\n", + "\n", + "```sql\n", + "CREATE TABLE comment_sentiment (\n", + " ID INT NULL,\n", + " Polarity VARCHAR(256) NULL,\n", + " Text TEXT NULL\n", + ");\n", + "```\n", + "\n", + "---\n", + "\n", + "Load CSV (replace with your absolute path if needed):\n", + "\n", + "```sql\n", + "LOAD DATA LOCAL INFILE 'comment_sentiment.csv'\n", + "INTO TABLE comment_sentiment\n", + "FIELDS TERMINATED BY ';'\n", + "ENCLOSED BY '\"'\n", + "LINES TERMINATED BY '\\n'\n", + "IGNORE 1 LINES\n", + "(ID, Polarity, Text);\n", + "```\n", + "\n", + "---\n", + "\n", + "Query 1 — show joined sentiment + commit + project rows (sample view):\n", + "\n", + "```sql\n", + "-- Returns joined rows from sentiment comments to commit/project data\n", + "SELECT * FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + "INNER JOIN commits c ON c.id = cc.commit_id\n", + "INNER JOIN projects p ON c.project_id = p.id;\n", + "```\n", + "\n", + "---\n", + "\n", + "Query 2 — count sentiment-linked comments by project name:\n", + "\n", + "```sql\n", + "-- Aggregates joined rows by project name and sorts by largest counts\n", + "SELECT name, count(name) as count FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + "INNER JOIN commits c ON c.id = cc.commit_id\n", + "INNER JOIN projects p ON c.project_id = p.id\n", + "GROUP BY name\n", + "ORDER BY count desc;\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "f20c3d16", + "metadata": {}, + "source": [ + "### Step 3: Run validation checks\n", + "\n", + "Use these checks to confirm the load worked correctly.\n", + "\n", + "Check 1 — total rows (expected: 7122):\n", + "\n", + "```sql\n", + "SELECT COUNT(*) AS total_rows FROM comment_sentiment;\n", + "```\n", + "\n", + "---\n", + "\n", + "Check 2 — distinct comment IDs (expected: 7122):\n", + "\n", + "```sql\n", + "SELECT COUNT(DISTINCT ID) AS distinct_comment_ids FROM comment_sentiment;\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "65b9dc8c", + "metadata": {}, + "source": [ + "### Optional troubleshooting\n", + "\n", + "If `LOAD DATA LOCAL INFILE` fails or the row count is too low:\n", + "\n", + "1. Check the row count. If it is below 7,122 comments, try the fixes below:\n", + "\n", + "```sql\n", + "SELECT COUNT(*) AS total_rows FROM comment_sentiment;\n", + "```\n", + "\n", + "2. Try these fixes:\n", + "- Use an absolute file path in `LOAD DATA LOCAL INFILE`\n", + "- Make sure `--local-infile=1` is enabled\n", + "- Make sure the file format matches your settings (`;` delimiter and quoted text)\n", + "\n", + "3. If needed, use the following Python CSV loader script (([import_csv_to_mysql.py](https://github.com/user-attachments/files/25094159/import_csv_to_mysql.py))), then run the same checks again. This option uses Python's CSV parser and requires the installation of `mysql-connector-python`." + ] + }, + { + "cell_type": "markdown", + "id": "55c20287", + "metadata": {}, + "source": [ + "### When to move to Notebook 2\n", + "\n", + "Move to Notebook 2 only after `total_rows = 7122` and join results are greater than zero." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/02_explore_gh_torrent_tables.ipynb b/02_explore_gh_torrent_tables.ipynb new file mode 100644 index 0000000..b352660 --- /dev/null +++ b/02_explore_gh_torrent_tables.ipynb @@ -0,0 +1,237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1b3dd1e0", + "metadata": {}, + "source": [ + "# Explore GHTorrent Tables for Sentiment Mapping (Notebook 2)\n", + "\n", + "This notebook helps you understand where sentiment-labeled comments are stored in GHTorrent and how they connect to projects. These checks are for exploration and validation. You do not need to run every query to run the end-to-end workflow." + ] + }, + { + "cell_type": "markdown", + "id": "57b61bad", + "metadata": {}, + "source": [ + "### Planned Output\n", + "By the end of this notebook, you should have:\n", + "1. A clear view of how sentiment comments are split across commit vs PR comment tables\n", + "2. A ranked list of projects with sentiment-labeled commit comments\n", + "3. A ranked list of projects with sentiment-labeled PR comments\n", + "4. A global summary of comments reachable from canonical repos vs forks" + ] + }, + { + "cell_type": "markdown", + "id": "ac09f08e", + "metadata": {}, + "source": [ + "### Check 1: How sentiment comments are distributed\n", + "\n", + "Use these queries to see how many sentiment comments are in commit comments, PR comments, and both tables.\n", + "\n", + "Count sentiment comments in `commit_comments` (expected: 4317):\n", + "\n", + "```sql\n", + "SELECT COUNT(*)\n", + "FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", + "```\n", + "\n", + "---\n", + "\n", + "Count sentiment comments in `pull_request_comments` (expected: 2890):\n", + "\n", + "```sql\n", + "SELECT COUNT(*)\n", + "FROM comment_sentiment s\n", + "INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + "```\n", + "\n", + "---\n", + "\n", + "Count overlap that appears in both tables (expected: 85):\n", + "\n", + "```sql\n", + "SELECT COUNT(*) AS both_tables\n", + "FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + "INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + "```\n", + "\n", + "Quick interpretation:\n", + "- Commit-only = 4317 - 85 = 4232\n", + "- PR-only = 2890 - 85 = 2805\n", + "- Both = 85\n", + "- Total unique comments = 7122" + ] + }, + { + "cell_type": "markdown", + "id": "f6cee0f8", + "metadata": {}, + "source": [ + "### Check 2: Projects with the most sentiment-labeled commit comments\n", + "\n", + "Use this to rank projects by number of labeled commit comments.\n", + "\n", + "```sql\n", + "SELECT p.id, p.name, p.url, COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN commits c ON p.id = c.project_id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "```\n", + "\n", + "---\n", + "\n", + "Use this to inspect example rows for one project (replace `{owner}` and `{repo}`):\n", + "\n", + "```sql\n", + "SELECT c.sha, p.url, p.name, s.ID AS comment_id, s.Text AS comment_text\n", + "FROM commits c\n", + "INNER JOIN projects p ON c.project_id = p.id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "WHERE p.url = 'https://api.github.com/repos/{owner}/{repo}';\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a37a5ebc", + "metadata": {}, + "source": [ + "### Check 3: Projects with the most sentiment-labeled PR comments\n", + "\n", + "Use this to rank projects by number of labeled PR comments.\n", + "\n", + "```sql\n", + "SELECT p.id, p.name, p.url, COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", + "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", + "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a6a740e8", + "metadata": {}, + "source": [ + "### Check 4: Canonical repo vs fork accessibility summary\n", + "\n", + "This query estimates how many sentiment comments are reachable from canonical repos vs only from forks.\n", + "\n", + "```sql\n", + "WITH RECURSIVE project_root AS (\n", + " SELECT p.id AS project_id, p.id AS root_id\n", + " FROM projects p\n", + " WHERE p.forked_from IS NULL\n", + " UNION ALL\n", + " SELECT c.id AS project_id, pr.root_id\n", + " FROM projects c\n", + " JOIN project_root pr ON c.forked_from = pr.project_id\n", + "),\n", + "comment_project_rows AS (\n", + " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + " JOIN commits c ON cc.commit_id = c.id\n", + "\n", + " UNION ALL\n", + "\n", + " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "\n", + " UNION ALL\n", + "\n", + " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "),\n", + "labeled AS (\n", + " SELECT\n", + " cpr.comment_id,\n", + " cpr.source_tag,\n", + " pr.root_id,\n", + " (cpr.project_id = pr.root_id) AS is_canonical\n", + " FROM comment_project_rows cpr\n", + " JOIN project_root pr ON pr.project_id = cpr.project_id\n", + "),\n", + "comment_flags AS (\n", + " SELECT\n", + " root_id,\n", + " source_tag,\n", + " comment_id,\n", + " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", + " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", + " FROM labeled\n", + " GROUP BY root_id, source_tag, comment_id\n", + "),\n", + "global_counts AS (\n", + " SELECT\n", + " COUNT(*) AS mapped_comment_ids,\n", + " SUM(CASE WHEN has_canonical = 1 THEN 1 ELSE 0 END) AS canonical_accessible,\n", + " SUM(CASE WHEN has_fork = 1 THEN 1 ELSE 0 END) AS fork_accessible,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", + " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", + " FROM comment_flags\n", + ")\n", + "SELECT\n", + " mapped_comment_ids,\n", + " canonical_accessible,\n", + " fork_accessible,\n", + " canonical_only,\n", + " fork_only,\n", + " both_sides,\n", + " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", + " ROUND(100 * canonical_only / NULLIF(mapped_comment_ids, 0), 2) AS canonical_only_pct,\n", + " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", + "FROM global_counts;\n", + "```\n", + "\n", + "Expected values from prior runs:\n", + "- `canonical_only`: 4555\n", + "- `fork_only`: 569\n", + "- `both_sides`: 2083\n", + "- Canonical reachable rate: about 93.2%" + ] + }, + { + "cell_type": "markdown", + "id": "52c9ee7e", + "metadata": {}, + "source": [ + "### When to move on to Notebook 3\n", + "\n", + "You can move to Notebook 3 when all of these are true:\n", + "\n", + "1. Check 1 totals are consistent (commit + PR - overlap = 7122).\n", + "2. Check 2 returns project rows for commit-comment mappings (not empty).\n", + "3. Check 3 returns project rows for PR-comment mappings (not empty).\n", + "4. Check 4 runs successfully and shows non-zero canonical reachability.\n", + "\n", + "If any check is empty or fails, fix the data/join issue first before moving on." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/03_scale_config_files.ipynb b/03_scale_config_files.ipynb new file mode 100644 index 0000000..2c82d59 --- /dev/null +++ b/03_scale_config_files.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6fa72e9d", + "metadata": {}, + "source": [ + "# Scale and Automate Config Generation (Notebook 3)\n", + "\n", + "This notebook generates Kaiaulu config files for each main project repo in the GHTorrent database.\n", + "\n", + "**What this notebook does:**\n", + "1. Queries MySQL/GHTorrent to identify canonical repos with sentiment-labeled comments\n", + "2. Generates a `.yml` config file per repo (using `trinitycore.yml` as a template) and writes them to Kaiaulu's `conf/` directory\n", + "\n", + "**What comes next** — once configs are written, use these Kaiaulu vignettes to download and parse comments:\n", + "- `vignettes/download_github_events.Rmd` → commit comments\n", + "- `vignettes/download_github_pull_request_comments.Rmd` → PR inline comments" + ] + }, + { + "cell_type": "markdown", + "id": "a18a63e8", + "metadata": {}, + "source": [ + "### Planned Output\n", + "\n", + "1. One `.yml` config file per main project repo in the GHTorrent database, written to Kaiaulu's `conf/` directory." + ] + }, + { + "cell_type": "markdown", + "id": "622cb929", + "metadata": {}, + "source": [ + "### Step 1: Import Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bc36cfe", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import subprocess\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "import yaml\n", + "from sqlalchemy import create_engine, text" + ] + }, + { + "cell_type": "markdown", + "id": "c2d1ae1f", + "metadata": {}, + "source": [ + "### Step 2: Set Paths and Configuration\n", + "\n", + "Update the variables below before running:\n", + "- **`KAIAULU_REPO`** — path to your local Kaiaulu repo\n", + "- **`MYSQL_DB`** / **`MYSQL_PASSWORD`** — your database credentials\n", + "- **`MAX_REPOS`** — set to an integer to limit the number of repos processed, or `None` to process all\n", + "- **`WRITE_CONFIGS`** — set to `False` to do a dry run without writing any files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20fb9b60", + "metadata": {}, + "outputs": [], + "source": [ + "# Paths\n", + "KAIAULU_REPO = (Path(\".\").resolve() / \"..\" / \"kaiaulu\").resolve()\n", + "\n", + "# Kaiaulu-owned inputs/outputs\n", + "CONF_DIR = KAIAULU_REPO / \"conf\"\n", + "TEMPLATE_PATH = CONF_DIR / \"trinitycore.yml\"\n", + "\n", + "# Repo selection cap (None = all main project repos)\n", + "MAX_REPOS = None\n", + "\n", + "# MySQL connection (override with env vars if needed)\n", + "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", + "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", + "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"ADD_DB_NAME_HERE\")\n", + "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", + "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")\n", + "\n", + "# Toggle writing config files to Kaiaulu conf/\n", + "WRITE_CONFIGS = True" + ] + }, + { + "cell_type": "markdown", + "id": "aa923139", + "metadata": {}, + "source": [ + "### Step 3: Query Canonical Repos from GHTorrent\n", + "\n", + "Queries MySQL to find main (non-fork) repos that have at least one sentiment-labeled comment (commit or PR). Results are loaded into `repos`.\n", + "\n", + "Expected output (~82 repos):\n", + "\n", + "| | owner | repo |\n", + "|---|---|---|\n", + "| 0 | akka | akka |\n", + "| 1 | antirez | redis |\n", + "| 2 | ariya | phantomjs |\n", + "| 3 | automapper | automapper |\n", + "| 4 | bartaz | impress.js |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5641db76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "canonical repos found: 82\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ownerrepo
0akkaakka
1antirezredis
2ariyaphantomjs
3automapperautomapper
4bartazimpress.js
\n", + "
" + ], + "text/plain": [ + " owner repo\n", + "0 akka akka\n", + "1 antirez redis\n", + "2 ariya phantomjs\n", + "3 automapper automapper\n", + "4 bartaz impress.js" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Query canonical repos that have sentiment-labeled comments\n", + "engine = create_engine(\n", + " f'mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}'\n", + " )\n", + "\n", + "sql = \"\"\"\n", + "WITH RECURSIVE project_root AS (\n", + " SELECT p.id AS project_id, p.id AS root_id\n", + " FROM projects p\n", + " WHERE p.forked_from IS NULL\n", + " UNION ALL\n", + " SELECT c.id AS project_id, pr.root_id\n", + " FROM projects c\n", + " JOIN project_root pr ON c.forked_from = pr.project_id\n", + "),\n", + "comment_project_rows AS (\n", + " SELECT cs.ID AS comment_id, c.project_id\n", + " FROM comment_sentiment cs\n", + " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + " JOIN commits c ON cc.commit_id = c.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + ")\n", + "SELECT DISTINCT LOWER(u.login) AS owner, LOWER(p.name) AS repo\n", + "FROM comment_project_rows cpr\n", + "JOIN project_root pr ON pr.project_id = cpr.project_id\n", + "JOIN projects p ON p.id = pr.root_id\n", + "JOIN users u ON u.id = p.owner_id\n", + "ORDER BY owner, repo\n", + "\"\"\"\n", + "\n", + "repos = pd.read_sql(text(sql), con=engine)\n", + "print('repos found:', len(repos))\n", + "repos.head()" + ] + }, + { + "cell_type": "markdown", + "id": "46104941", + "metadata": {}, + "source": [ + "### Step 4: Generate and Write Config Files\n", + "\n", + "Builds a `.yml` config file for each repo using `trinitycore.yml` as a template and writes it to Kaiaulu's `conf/` directory.\n", + "\n", + "Each config follows this structure:\n", + "```yaml\n", + "project:\n", + " website: https://github.com/{owner}/{repo}\n", + "issue_tracker:\n", + " github:\n", + " project_key_1:\n", + " owner: {owner}\n", + " repo: {repo}\n", + " issue_or_pr_comment: rawdata/github/{owner}/{repo}/issue_or_pr_comment/\n", + " issue_event: rawdata/github/{owner}/{repo}/issue_event/\n", + " commit: rawdata/github/{owner}/{repo}/commit/\n", + " commit_comments: rawdata/github/{owner}/{repo}/commit_comments/\n", + " pr_comments: rawdata/github/{owner}/{repo}/pr_comments/\n", + "```\n", + "\n", + "Expected output: a list of written `.yml` filenames, e.g. `['akka.yml', 'redis.yml', ...]`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a926ed1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "repos selected for config generation: 82\n", + "written configs: ['akka.yml', 'redis.yml', 'phantomjs.yml', 'automapper.yml', 'impress.js.yml', 'bitcoin.yml', 'boto.yml', 'craftbukkit.yml', 'cakephp.yml', 'compass.yml', 'clojure.yml', 'slim.yml', 'diaspora.yml', 'django-cms.yml', 'django.yml', 'django-debug-toolbar.yml', 'elasticsearch.yml', 'codeigniter.yml', 'facebook-android-sdk.yml', 'folly.yml', 'hiphop-php.yml', 'php-sdk.yml', 'tornado.yml', 'thinkup.yml', 'android.yml', 'gitlabhq.yml', 'html5-boilerplate.yml', 'devtools.yml', 'chosen.yml', 'sparkleshare.yml', 'octopress.yml', 'actionbarsherlock.yml', 'blueprint-css.yml', 'http-parser.yml', 'libuv.yml', 'node.yml', 'jquery.yml', 'requests.yml', 'beanstalkd.yml', 'libgit2.yml', 'ccv.yml', 'mangos.yml', 'd3.yml', 'memcached.yml', 'sick-beard.yml', 'flask.yml', 'jekyll.yml', 'mongo.yml', 'mono.yml', 'plupload.yml', 'three.js.yml', 'homebrew.yml', 'nancy.yml', 'storm.yml', 'netty.yml', 'openframeworks.yml', 'devise.yml', 'rails.yml', 'reddit.yml', 'restsharp.yml', 'kestrel.yml', 'shiny.yml', 'miniprofiler.yml', 'sbt.yml', 'scala.yml', 'scalatra.yml', 'phpunit.yml', 'servicestack.yml', 'signalr.yml', 'symfony.yml', 'paperclip.yml', 'trinitycore.yml', 'finagle.yml', 'flockdb.yml', 'gizzard.yml', 'zipkin.yml', 'redcarpet.yml', 'xbmc.yml', 'symfony.yml', 'knitr.yml', 'zf2.yml', 'foundation.yml']\n" + ] + } + ], + "source": [ + "# Build YAML configs for 82 project repos using trinitycore.yml as the base template\n", + "header_lines = [\n", + " \"# -*- yaml -*-\",\n", + " \"# https://github.com/sailuh/kaiaulu\",\n", + " \"#\",\n", + " \"# Copying and distribution of this file, with or without modification,\",\n", + " \"# are permitted in any medium without royalty provided the copyright\",\n", + " \"# notice and this notice are preserved. This file is offered as-is,\",\n", + " \"# without any warranty.\",\n", + " \"\",\n", + " \"# Project Configuration File #\",\n", + " \"#\",\n", + " \"# To perform analysis on open source projects, you need to manually\",\n", + " \"# collect some information from the project's website. As there is\",\n", + " \"# no standardized website format, this file serves to distill\",\n", + " \"# important data source information so it can be reused by others\",\n", + " \"# and understood by Kaiaulu.\",\n", + " \"#\",\n", + " \"# Please check https://github.com/sailuh/kaiaulu/tree/master/conf to\",\n", + " \"# see if a project configuration file already exists. Otherwise, we\",\n", + " \"# would appreciate if you share your curated file with us by sending a\",\n", + " \"# Pull Request: https://github.com/sailuh/kaiaulu/pulls\",\n", + " \"#\",\n", + " \"# Note, you do NOT need to specify this entire file to conduct analysis.\",\n", + " \"# Each R Notebook uses a different portion of this file. To know what\",\n", + " \"# information is used, see the project configuration file section at\",\n", + " \"# the start of each R Notebook.\",\n", + " \"#\",\n", + " \"# Please comment unused parameters instead of deleting them for clarity.\",\n", + " \"# If you have questions, please open a discussion:\",\n", + " \"# https://github.com/sailuh/kaiaulu/discussions\",\n", + " \"\",\n", + "]\n", + "\n", + "def build_conf(template, owner, repo):\n", + " conf = template.copy()\n", + " conf.setdefault(\"project\", {})\n", + " conf[\"project\"][\"website\"] = f\"https://github.com/{owner}/{repo}\"\n", + "\n", + " conf.setdefault(\"issue_tracker\", {})\n", + " conf[\"issue_tracker\"].setdefault(\"github\", {})\n", + " conf[\"issue_tracker\"][\"github\"].setdefault(\"project_key_1\", {})\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"owner\"] = owner\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"repo\"] = repo\n", + "\n", + " # Keep relative paths so data lands under backend cwd (sentiment_github_dataset)\n", + " base_path = f\"rawdata/github/{owner}/{repo}\"\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"issue_or_pr_comment\"] = f\"{base_path}/issue_or_pr_comment/\"\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"issue_event\"] = f\"{base_path}/issue_event/\"\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"commit\"] = f\"{base_path}/commit/\"\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"commit_comments\"] = f\"{base_path}/commit_comments/\"\n", + " conf[\"issue_tracker\"][\"github\"][\"project_key_1\"][\"pr_comments\"] = f\"{base_path}/pr_comments/\"\n", + " return conf\n", + "\n", + "with open(TEMPLATE_PATH, \"r\", encoding=\"utf-8\") as f:\n", + " template_conf = yaml.safe_load(f)\n", + "\n", + "if MAX_REPOS is None:\n", + " pilot = repos.copy()\n", + "else:\n", + " pilot = repos.head(MAX_REPOS)\n", + "\n", + "print(f\"repos selected for config generation: {len(pilot)}\")\n", + "\n", + "written = []\n", + "for row in pilot.itertuples(index=False):\n", + " owner = row.owner\n", + " repo = row.repo\n", + " target_path = CONF_DIR / f\"{repo}.yml\"\n", + " conf = build_conf(template_conf, owner, repo)\n", + " yaml_body = yaml.safe_dump(conf, sort_keys=False)\n", + " if WRITE_CONFIGS:\n", + " with open(target_path, \"w\", encoding=\"utf-8\") as out:\n", + " out.write(\"\\n\".join(header_lines))\n", + " out.write(\"\\n\")\n", + " out.write(yaml_body)\n", + " written.append(target_path.name)\n", + "print(\"written configs:\", written)" + ] + }, + { + "cell_type": "markdown", + "id": "oiibwri0k6", + "metadata": {}, + "source": [ + "### When to Move On to Notebook 4\n", + "\n", + "Move to Notebook 4 after all of the following are true:\n", + "\n", + "1. The 82 `.yml` files generated from Step 4 exist in Kaiaulu's `conf/` directory.\n", + "4. Spot-check a few configs to confirm the `owner`, `repo`, and `rawdata/` paths are populated correctly and follow the formatting indicated in Step 4." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/04_download_commit_comments.ipynb b/04_download_commit_comments.ipynb new file mode 100644 index 0000000..b26da40 --- /dev/null +++ b/04_download_commit_comments.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c9604aaa", + "metadata": {}, + "source": [ + "# Download Commit Comments with Kaiaulu (Notebook 4)\n", + "\n", + "This notebook shows how to download GitHub commit comments using Kaiaulu’s `download_github_events.Rmd` notebook in the `/vignettes` folder." + ] + }, + { + "cell_type": "markdown", + "id": "a8369dad", + "metadata": {}, + "source": [ + "### Planned Output\n", + "\n", + "1. A parsed commit-comments CSV saved to `rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv` in Kaiaulu" + ] + }, + { + "cell_type": "markdown", + "id": "b5ad8e21", + "metadata": {}, + "source": [ + "### Step 1: Confirm your working directory\n", + "\n", + "1. Open the Kaiaulu project in RStudio.\n", + "2. Run `getwd()` in the R console to check your current working directory.\n", + "3. If the directory is not Kaiaulu, set it with `setwd()` (for example, `setwd(\"~/Desktop/github/kaiaulu\")`)." + ] + }, + { + "cell_type": "markdown", + "id": "049737cf", + "metadata": {}, + "source": [ + "### Step 2: Create a personal access token\n", + "\n", + "This workflow makes many GitHub API requests, so you need a personal access token.\n", + "\n", + "Follow the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/authenticating-to-github/creating-a-personal-access-token#:~:text=Creating%20a%20token.%201%20Verify%20your%20email%20address%2C,able%20to%20see%20the%20token%20again.%20More%20items) and create a **classic** token:\n", + "\n", + "1. Go to **GitHub → Settings → Developer settings → Personal access tokens → Tokens (classic)**.\n", + "2. Select **Generate new token (classic)**.\n", + "3. Add a note (for example, \"Download GitHub commit + PR comments via Kaiaulu\").\n", + "4. Enable the `public_repo` scope for public repositories.\n", + "5. Generate the token, then copy and store it securely.\n", + "\n", + "Save the token in `~/.ssh/github_token` on your local machine." + ] + }, + { + "cell_type": "markdown", + "id": "976db067", + "metadata": {}, + "source": [ + "### Step 3: Run `download_github_events.Rmd` chunks in RStudio\n", + "\n", + "Run the following chunks in **RStudio**. These chunks should already exist in `download_github_events.Rmd`.\n", + "\n", + "### Chunk 1: Set up dependencies\n", + "\n", + "---\n", + "```{r warning=FALSE,message=FALSE}\n", + "rm(list = ls())\n", + "require(kaiaulu)\n", + "require(data.table)\n", + "require(jsonlite)\n", + "require(knitr)\n", + "```\n", + "---\n", + "\n", + "### Chunk 2: Set required parameters\n", + "\n", + "Replace `kaiaulu.yml` with the `.yml` file for the project you want to process. You created these files in Step 4 of `03_scale_config_files.ipynb`.\n", + "\n", + "---\n", + "```{r}\n", + "conf <- parse_config(\"../conf/kaiaulu.yml\")\n", + "owner <- get_github_owner(conf, \"project_key_1\") # Has to match github organization (e.g. github.com/sailuh)\n", + "repo <- get_github_repo(conf, \"project_key_1\") # Has to match github repository (e.g. github.com/sailuh/perceive)\n", + "save_path_issue_or_pr_comments <- path.expand(get_github_issue_or_pr_comment_path(conf, \"project_key_1\"))\n", + "save_path_issue_event <- get_github_issue_event_path(conf, \"project_key_1\")\n", + "save_path_commit <- get_github_commit_path(conf, \"project_key_1\")\n", + "save_path_commit_comments <- get_github_commit_comment_path(conf, \"project_key_1\")\n", + "\n", + "# your file github_token contains the GitHub token API obtained in the steps above\n", + "token <- scan(\"~/.ssh/github_token\",what=\"character\",quiet=TRUE)\n", + "```\n", + "---\n", + "\n", + "### Chunk 3: Download Commit Comments\n", + "\n", + "This downloads commit-comment JSON files into `rawdata` in your current working directory. The runtime depends on how many comments the project has.\n", + "\n", + "**IMPORTANT:** This chunk uses `gh_next()` to fetch paginated results and expects `gh` version 1.2.0. If you see a `gh_next()` paging bug (for example, repeated writes to the same page), downgrade to `gh` 1.2.0.\n", + "\n", + "---\n", + "\n", + "```{r Collect all project commit comments, eval = FALSE}\n", + "dir.create(save_path_commit_comments, recursive = TRUE, showWarnings = FALSE)\n", + "gh_response <- github_api_project_commit_comments(owner,repo,token)\n", + "github_api_iterate_pages(token,gh_response,save_path_commit_comments,prefix=\"commit_comments\")\n", + "```\n", + "\n", + "---\n", + "\n", + "### Chunk 4: Parse Commit Comments\n", + "\n", + "After all JSON files are downloaded, run the **Parsing Raw Data to Csv** chunk for commit comments. You should see a table named `all_commit_comments` in your R environment with columns such as `comment_id`, `commit_id`, `author_login`, `author_id`, `line`, `created_at`, and `updated_at`.\n", + "\n", + "---\n", + "\n", + "```{r}\n", + "all_commit_comments <- lapply(list.files(save_path_commit_comments,full.names = TRUE),read_json)\n", + "all_commit_comments <- lapply(all_commit_comments,github_parse_project_commit_comments)\n", + "all_commit_comments <- rbindlist(all_commit_comments,fill=TRUE)\n", + "\n", + "kable(head(all_commit_comments))\n", + "\n", + "# Save the data table for commit comments as a CSV\n", + "out_csv <- file.path(dirname(save_path_commit_comments), paste0(owner, \"_\", repo, \"_commit_comments.csv\"))\n", + "data.table::fwrite(all_commit_comments, out_csv)\n", + "cat(\"Saved:\", out_csv, \"\\n\")\n", + "```\n", + "\n", + "---\n", + "\n", + "### Final Output\n", + "\n", + "Final output path:\n", + "`rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv`" + ] + }, + { + "cell_type": "markdown", + "id": "99981420", + "metadata": {}, + "source": [ + "### When to move on to Notebook 5\n", + "\n", + "Move to Notebook 5 after all of the following are true:\n", + "\n", + "1. The commit-comment JSON files have been downloaded successfully.\n", + "2. The parsed table `all_commit_comments` looks correct in RStudio.\n", + "3. The CSV file exists at:\n", + " `rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv`\n", + "4. Spot-check a few rows to confirm key fields (such as `comment_id`, `commit_id`, and `author_login`) are populated as expected." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/05_download_PR_inline_comments.ipynb b/05_download_PR_inline_comments.ipynb new file mode 100644 index 0000000..3cbcbfe --- /dev/null +++ b/05_download_PR_inline_comments.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "79eaeb30", + "metadata": {}, + "source": [ + "# Download PR Inline Comments with Kaiaulu (Notebook 5)\n", + "\n", + "This notebook shows how to download pull request inline comments using Kaiaulu’s `download_github_pull_request_comments.Rmd` notebook in the `/vignettes` folder." + ] + }, + { + "cell_type": "markdown", + "id": "edcea498", + "metadata": {}, + "source": [ + "### Planned Output\n", + "\n", + "1. A parsed commit-comments CSV saved to `rawdata/github/{owner}/{repo}/{owner}_{repo}_pr_inline_comments.csv` in Kaiaulu" + ] + }, + { + "cell_type": "markdown", + "id": "4f30bc9f", + "metadata": {}, + "source": [ + "Before starting, complete Steps 1 and 2 in `04_download_commit_comments.ipynb` (confirm working directory and create a GitHub personal access token).\n", + "\n", + "### Step 1: Run download_github_pull_request_comments.Rmd chunks in RStudio\n", + "\n", + "Run the following chunks in **RStudio**. These chunks should already exist in `download_github_pull_request_comments.Rmd`.\n", + "\n", + "### Chunk 1: Set up dependencies\n", + "\n", + "---\n", + "\n", + "```{r warning=FALSE,message=FALSE}\n", + "rm(list = ls())\n", + "require(kaiaulu)\n", + "require(data.table)\n", + "require(jsonlite)\n", + "require(knitr)\n", + "require(magrittr)\n", + "require(gt)\n", + "require(lubridate)\n", + "```\n", + "\n", + "--- \n", + "\n", + "### Chunk 2: Set required parameters\n", + "\n", + "Replace `kaiaulu.yml` with the `.yml` file for the project you want to process. You created these files in Step 4 of Notebook 3 (`03_scale_config_files.ipynb`).\n", + "\n", + "---\n", + "\n", + "```{r warning=FALSE}\n", + "conf <- parse_config(\"../conf/kaiaulu.yml\")\n", + "owner <- get_github_owner(conf, \"project_key_1\") # Has to match github organization (e.g. github.com/sailuh)\n", + "repo <- get_github_repo(conf, \"project_key_1\") # Has to match github repository (e.g. github.com/sailuh/perceive)\n", + "\n", + "# Path you wish to save all raw data.\n", + "save_path_pull_request <- get_github_pull_request_path(conf, \"project_key_1\")\n", + "save_path_pr_comments <- get_github_pr_comments_path(conf, \"project_key_1\")\n", + "save_path_issue_or_pr_comments <- get_github_issue_or_pr_comment_path(conf, \"project_key_1\")\n", + "save_path_pr_reviews <- get_github_pr_review_path(conf, \"project_key_1\")\n", + "\n", + "# Lower API \n", + "save_path_pull_request <- get_github_pull_request_path(conf, \"project_key_1\")\n", + "save_path_pr_commits <- get_github_pr_commits_path(conf, \"project_key_1\")\n", + "save_path_pr_files <- get_github_pr_files_path(conf, \"project_key_1\")\n", + "save_path_pr_reviews <- get_github_pr_review_path(conf, \"project_key_1\")\n", + "save_path_pr_comments <- get_github_pr_comments_path(conf, \"project_key_1\")\n", + "\n", + "# Create all folder directories\n", + "#create_file_directory(conf)\n", + "```\n", + "\n", + "---\n", + "\n", + "### Chunk 3: Personal Access Token\n", + "\n", + "Point to the GitHub token created in Step 2 of Notebook 4.\n", + "\n", + "---\n", + "\n", + "```{r Scan GitHub Token}\n", + "# your file github_token (a text file) contains the GitHub token API\n", + "token <- scan(\"~/.ssh/github_token\",what=\"character\",quiet=TRUE)\n", + "```\n", + "\n", + "---\n", + "\n", + "### Chunk 4: Download Pull Request In-Line Code Comments\n", + "\n", + "This chunk downloads PR inline-comment JSON files into `rawdata` in your current working directory. The runtime depends on how many comments the project has.\n", + "\n", + "**IMPORTANT:** This chunk uses `gh_next()` to fetch paginated results and expects `gh` version 1.2.0. If you see a `gh_next()` paging bug (for example, repeated writes to the same page), downgrade to `gh` 1.2.0.\n", + "\n", + "--- \n", + "\n", + "```{r Collect Comments from Pull Requests, eval = FALSE}\n", + "dir.create(save_path_pr_comments, recursive = TRUE, showWarnings = FALSE)\n", + "gh_response <- github_api_project_pull_request_inline_comments_refresh(owner, repo, token, save_path_pr_comments)\n", + "github_api_iterate_pages(token, gh_response, save_path_pr_comments, prefix=\"pr_comments\")\n", + "```\n", + "\n", + "---\n", + "\n", + "### Chunk 5: Parse PR Inline Comments\n", + "\n", + "After all JSON files are downloaded, run the parse chunk for PR inline comments. You should see a table named `inline_comments` in your R environment with columns such as `review_id`, `comment_id`, `html_url`, `created_at`, `updated_at`, `comment_user_login`, `author_association`, `file_path`, `start_line`, `line`, `original_start_line`, `original_line`, `position`, `diff_hunk`, `body`, and `commit_id`.\n", + "\n", + "---\n", + "\n", + "```{r Parse Comments from Pull Requests}\n", + "inline_comments <- lapply(list.files(save_path_pr_comments, full.names = TRUE), read_json)\n", + "inline_comments <- lapply(inline_comments, github_parse_project_pull_request_inline_comments)\n", + "inline_comments <- rbindlist(inline_comments, fill = TRUE)\n", + "head(inline_comments,2) %>%\n", + " gt(auto_align = FALSE) \n", + "```\n", + "\n", + "---\n", + "\n", + "If `fwrite` complains about list/`NULL` columns (common for line/position fields), copy this chunk and run it right after the parse chunk:\n", + "\n", + "```{r Create CSV for Parsed Comments}\n", + "as_char_or_na <- function(x) {\n", + " if (is.null(x) || length(x) == 0) return(NA_character_)\n", + " if (is.list(x)) {\n", + " return(vapply(x, function(e) {\n", + " if (is.null(e) || length(e) == 0) NA_character_ else as.character(e[[1]])\n", + " }, character(1)))\n", + " }\n", + " as.character(x)\n", + "}\n", + "as_int_or_na <- function(x) {\n", + " if (is.null(x) || length(x) == 0) return(NA_integer_)\n", + " if (is.list(x)) {\n", + " return(vapply(x, function(e) {\n", + " if (is.null(e) || length(e) == 0) NA_integer_ else suppressWarnings(as.integer(e[[1]]))\n", + " }, integer(1)))\n", + " }\n", + " suppressWarnings(as.integer(x))\n", + "}\n", + "\n", + "for (nm in intersect(c(\"file_path\",\"diff_hunk\",\"body\",\"html_url\",\"created_at\",\"updated_at\",\"comment_user_login\",\"author_association\",\"commit_id\"), names(inline_comments))) {\n", + " if (is.list(inline_comments[[nm]])) inline_comments[[nm]] <- as_char_or_na(inline_comments[[nm]])\n", + "}\n", + "for (nm in intersect(c(\"review_id\",\"comment_id\",\"start_line\",\"line\",\"original_start_line\",\"original_line\",\"position\"), names(inline_comments))) {\n", + " if (is.list(inline_comments[[nm]])) inline_comments[[nm]] <- as_int_or_na(inline_comments[[nm]])\n", + "}\n", + "\n", + "out_csv <- file.path(dirname(save_path_pr_comments), paste0(owner, \"_\", repo, \"_pr_inline_comments.csv\"))\n", + "data.table::fwrite(inline_comments, out_csv)\n", + "cat(\"Saved:\", out_csv, \"\\n\")\n", + "```\n", + "\n", + "### Final Output\n", + "\n", + "Final output path:\n", + "`rawdata/github/{owner}/{repo}/{owner}_{repo}_pr_inline_comments.csv`" + ] + }, + { + "cell_type": "markdown", + "id": "1e3807c9", + "metadata": {}, + "source": [ + "### Next Steps\n", + "\n", + "1. Run Notebooks 4 and 5 for each project configuration (`.yml`) you want to process.\n", + "2. Confirm that each run generates the expected commit-comment and PR inline-comment CSV outputs.\n", + "3. Use `comment_id` as the join key to transfer sentiment labels to both commit comments and PR inline comments." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 357ac9af2d2709e55def4e1e88fb0a1d32275724 Mon Sep 17 00:00:00 2001 From: splimon Date: Fri, 10 Apr 2026 14:24:25 -1000 Subject: [PATCH 2/8] Update Documentation Notebooks Revise 3 notebooks for sentiment dataseet documentation: - Notebook 1: Load the GitHub Gold Standard sentiment CSV into a GHTorrent MySQL database - Notebook 2: Explore GHTorrent tables to map sentiment comments to canonical project repos - Notebook 3: Auto-generate Kaiaulu .yml config files for 82 canonical project repos --- 01_load_sentiment_csv_to_mysql.ipynb | 187 ----- 02_explore_gh_torrent_tables.ipynb | 237 ------ 04_download_commit_comments.ipynb | 162 ----- 05_download_PR_inline_comments.ipynb | 186 ----- .../01_load_sentiment_csv_to_mysql.ipynb | 677 ++++++++++++++++++ notebooks/02_explore_gh_torrent_tables.ipynb | 413 +++++++++++ .../03_scale_config_files.ipynb | 60 +- 7 files changed, 1122 insertions(+), 800 deletions(-) delete mode 100644 01_load_sentiment_csv_to_mysql.ipynb delete mode 100644 02_explore_gh_torrent_tables.ipynb delete mode 100644 04_download_commit_comments.ipynb delete mode 100644 05_download_PR_inline_comments.ipynb create mode 100644 notebooks/01_load_sentiment_csv_to_mysql.ipynb create mode 100644 notebooks/02_explore_gh_torrent_tables.ipynb rename 03_scale_config_files.ipynb => notebooks/03_scale_config_files.ipynb (79%) diff --git a/01_load_sentiment_csv_to_mysql.ipynb b/01_load_sentiment_csv_to_mysql.ipynb deleted file mode 100644 index 78548ab..0000000 --- a/01_load_sentiment_csv_to_mysql.ipynb +++ /dev/null @@ -1,187 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0b0b299a", - "metadata": {}, - "source": [ - "# Load Sentiment CSV into GHTorrent MySQL (Notebook 1)\n", - "\n", - "This notebook shows how to load the GitHub Gold Standard sentiment CSV into a MySQL database that already has the GHTorrent 2004 dump. It also includes quick checks to make sure the data loaded correctly." - ] - }, - { - "cell_type": "markdown", - "id": "bd368c02", - "metadata": {}, - "source": [ - "### Planned Output\n", - "By the end of this notebook, you should have:\n", - "1. A `comment_sentiment` table in MySQL\n", - "2. All rows from `comment_sentiment.csv` loaded\n", - "3. Query results that confirm row counts and valid joins to GHTorrent project data" - ] - }, - { - "cell_type": "markdown", - "id": "0acf37b4", - "metadata": {}, - "source": [ - "### Step 1: Get the data ready\n", - "\n", - "1. Download the [GitHub Gold Standard dataset](https://figshare.com/articles/dataset/A_gold_standard_for_polarity_of_emotions_of_software_developers_in_GitHub/11604597?file=21001260).\n", - "2. Rename the file to `comment_sentiment.csv`.\n", - "3. Download the [GHTorrent 2004 MySQL Database Dump](https://web.archive.org/web/20150206005357/http://ghtorrent.org/msr14.html) and make sure it is already loaded in your MySQL database (example: `github`).\n", - "4. Make sure MySQL can read your CSV file path (e.g., `~/Desktop/github/sentiment_github_dataset/comment_sentiment.csv`)\n", - "\n", - "Optional reference: [GHTorrent schema diagram](https://web.archive.org/web/20150206005412/http://ghtorrent.org/relational.html)." - ] - }, - { - "cell_type": "markdown", - "id": "038f5498", - "metadata": {}, - "source": [ - "### Step 2: Create the table, load the CSV, and run the original join queries\n", - "\n", - "Use these copy-ready blocks one at a time.\n", - "\n", - "Start MySQL with local file loading turned on. Run on bash:\n", - "\n", - "```bash\n", - "mysql --local-infile=1 -u root -p\n", - "```\n", - "---\n", - "\n", - "Select your database (e.g., `github`):\n", - "\n", - "```sql\n", - "USE github;\n", - "```\n", - "---\n", - "\n", - "Drop old table if it exists (safe to re-run):\n", - "\n", - "```sql\n", - "DROP TABLE IF EXISTS comment_sentiment;\n", - "```\n", - "\n", - "---\n", - "\n", - "Create table:\n", - "\n", - "```sql\n", - "CREATE TABLE comment_sentiment (\n", - " ID INT NULL,\n", - " Polarity VARCHAR(256) NULL,\n", - " Text TEXT NULL\n", - ");\n", - "```\n", - "\n", - "---\n", - "\n", - "Load CSV (replace with your absolute path if needed):\n", - "\n", - "```sql\n", - "LOAD DATA LOCAL INFILE 'comment_sentiment.csv'\n", - "INTO TABLE comment_sentiment\n", - "FIELDS TERMINATED BY ';'\n", - "ENCLOSED BY '\"'\n", - "LINES TERMINATED BY '\\n'\n", - "IGNORE 1 LINES\n", - "(ID, Polarity, Text);\n", - "```\n", - "\n", - "---\n", - "\n", - "Query 1 — show joined sentiment + commit + project rows (sample view):\n", - "\n", - "```sql\n", - "-- Returns joined rows from sentiment comments to commit/project data\n", - "SELECT * FROM comment_sentiment s\n", - "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - "INNER JOIN commits c ON c.id = cc.commit_id\n", - "INNER JOIN projects p ON c.project_id = p.id;\n", - "```\n", - "\n", - "---\n", - "\n", - "Query 2 — count sentiment-linked comments by project name:\n", - "\n", - "```sql\n", - "-- Aggregates joined rows by project name and sorts by largest counts\n", - "SELECT name, count(name) as count FROM comment_sentiment s\n", - "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - "INNER JOIN commits c ON c.id = cc.commit_id\n", - "INNER JOIN projects p ON c.project_id = p.id\n", - "GROUP BY name\n", - "ORDER BY count desc;\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "f20c3d16", - "metadata": {}, - "source": [ - "### Step 3: Run validation checks\n", - "\n", - "Use these checks to confirm the load worked correctly.\n", - "\n", - "Check 1 — total rows (expected: 7122):\n", - "\n", - "```sql\n", - "SELECT COUNT(*) AS total_rows FROM comment_sentiment;\n", - "```\n", - "\n", - "---\n", - "\n", - "Check 2 — distinct comment IDs (expected: 7122):\n", - "\n", - "```sql\n", - "SELECT COUNT(DISTINCT ID) AS distinct_comment_ids FROM comment_sentiment;\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "65b9dc8c", - "metadata": {}, - "source": [ - "### Optional troubleshooting\n", - "\n", - "If `LOAD DATA LOCAL INFILE` fails or the row count is too low:\n", - "\n", - "1. Check the row count. If it is below 7,122 comments, try the fixes below:\n", - "\n", - "```sql\n", - "SELECT COUNT(*) AS total_rows FROM comment_sentiment;\n", - "```\n", - "\n", - "2. Try these fixes:\n", - "- Use an absolute file path in `LOAD DATA LOCAL INFILE`\n", - "- Make sure `--local-infile=1` is enabled\n", - "- Make sure the file format matches your settings (`;` delimiter and quoted text)\n", - "\n", - "3. If needed, use the following Python CSV loader script (([import_csv_to_mysql.py](https://github.com/user-attachments/files/25094159/import_csv_to_mysql.py))), then run the same checks again. This option uses Python's CSV parser and requires the installation of `mysql-connector-python`." - ] - }, - { - "cell_type": "markdown", - "id": "55c20287", - "metadata": {}, - "source": [ - "### When to move to Notebook 2\n", - "\n", - "Move to Notebook 2 only after `total_rows = 7122` and join results are greater than zero." - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/02_explore_gh_torrent_tables.ipynb b/02_explore_gh_torrent_tables.ipynb deleted file mode 100644 index b352660..0000000 --- a/02_explore_gh_torrent_tables.ipynb +++ /dev/null @@ -1,237 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1b3dd1e0", - "metadata": {}, - "source": [ - "# Explore GHTorrent Tables for Sentiment Mapping (Notebook 2)\n", - "\n", - "This notebook helps you understand where sentiment-labeled comments are stored in GHTorrent and how they connect to projects. These checks are for exploration and validation. You do not need to run every query to run the end-to-end workflow." - ] - }, - { - "cell_type": "markdown", - "id": "57b61bad", - "metadata": {}, - "source": [ - "### Planned Output\n", - "By the end of this notebook, you should have:\n", - "1. A clear view of how sentiment comments are split across commit vs PR comment tables\n", - "2. A ranked list of projects with sentiment-labeled commit comments\n", - "3. A ranked list of projects with sentiment-labeled PR comments\n", - "4. A global summary of comments reachable from canonical repos vs forks" - ] - }, - { - "cell_type": "markdown", - "id": "ac09f08e", - "metadata": {}, - "source": [ - "### Check 1: How sentiment comments are distributed\n", - "\n", - "Use these queries to see how many sentiment comments are in commit comments, PR comments, and both tables.\n", - "\n", - "Count sentiment comments in `commit_comments` (expected: 4317):\n", - "\n", - "```sql\n", - "SELECT COUNT(*)\n", - "FROM comment_sentiment s\n", - "INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", - "```\n", - "\n", - "---\n", - "\n", - "Count sentiment comments in `pull_request_comments` (expected: 2890):\n", - "\n", - "```sql\n", - "SELECT COUNT(*)\n", - "FROM comment_sentiment s\n", - "INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - "```\n", - "\n", - "---\n", - "\n", - "Count overlap that appears in both tables (expected: 85):\n", - "\n", - "```sql\n", - "SELECT COUNT(*) AS both_tables\n", - "FROM comment_sentiment s\n", - "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - "INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - "```\n", - "\n", - "Quick interpretation:\n", - "- Commit-only = 4317 - 85 = 4232\n", - "- PR-only = 2890 - 85 = 2805\n", - "- Both = 85\n", - "- Total unique comments = 7122" - ] - }, - { - "cell_type": "markdown", - "id": "f6cee0f8", - "metadata": {}, - "source": [ - "### Check 2: Projects with the most sentiment-labeled commit comments\n", - "\n", - "Use this to rank projects by number of labeled commit comments.\n", - "\n", - "```sql\n", - "SELECT p.id, p.name, p.url, COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN commits c ON p.id = c.project_id\n", - "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", - "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "```\n", - "\n", - "---\n", - "\n", - "Use this to inspect example rows for one project (replace `{owner}` and `{repo}`):\n", - "\n", - "```sql\n", - "SELECT c.sha, p.url, p.name, s.ID AS comment_id, s.Text AS comment_text\n", - "FROM commits c\n", - "INNER JOIN projects p ON c.project_id = p.id\n", - "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", - "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", - "WHERE p.url = 'https://api.github.com/repos/{owner}/{repo}';\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "a37a5ebc", - "metadata": {}, - "source": [ - "### Check 3: Projects with the most sentiment-labeled PR comments\n", - "\n", - "Use this to rank projects by number of labeled PR comments.\n", - "\n", - "```sql\n", - "SELECT p.id, p.name, p.url, COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", - "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", - "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "a6a740e8", - "metadata": {}, - "source": [ - "### Check 4: Canonical repo vs fork accessibility summary\n", - "\n", - "This query estimates how many sentiment comments are reachable from canonical repos vs only from forks.\n", - "\n", - "```sql\n", - "WITH RECURSIVE project_root AS (\n", - " SELECT p.id AS project_id, p.id AS root_id\n", - " FROM projects p\n", - " WHERE p.forked_from IS NULL\n", - " UNION ALL\n", - " SELECT c.id AS project_id, pr.root_id\n", - " FROM projects c\n", - " JOIN project_root pr ON c.forked_from = pr.project_id\n", - "),\n", - "comment_project_rows AS (\n", - " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", - " JOIN commits c ON cc.commit_id = c.id\n", - "\n", - " UNION ALL\n", - "\n", - " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "\n", - " UNION ALL\n", - "\n", - " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "),\n", - "labeled AS (\n", - " SELECT\n", - " cpr.comment_id,\n", - " cpr.source_tag,\n", - " pr.root_id,\n", - " (cpr.project_id = pr.root_id) AS is_canonical\n", - " FROM comment_project_rows cpr\n", - " JOIN project_root pr ON pr.project_id = cpr.project_id\n", - "),\n", - "comment_flags AS (\n", - " SELECT\n", - " root_id,\n", - " source_tag,\n", - " comment_id,\n", - " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", - " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", - " FROM labeled\n", - " GROUP BY root_id, source_tag, comment_id\n", - "),\n", - "global_counts AS (\n", - " SELECT\n", - " COUNT(*) AS mapped_comment_ids,\n", - " SUM(CASE WHEN has_canonical = 1 THEN 1 ELSE 0 END) AS canonical_accessible,\n", - " SUM(CASE WHEN has_fork = 1 THEN 1 ELSE 0 END) AS fork_accessible,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", - " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", - " FROM comment_flags\n", - ")\n", - "SELECT\n", - " mapped_comment_ids,\n", - " canonical_accessible,\n", - " fork_accessible,\n", - " canonical_only,\n", - " fork_only,\n", - " both_sides,\n", - " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", - " ROUND(100 * canonical_only / NULLIF(mapped_comment_ids, 0), 2) AS canonical_only_pct,\n", - " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", - "FROM global_counts;\n", - "```\n", - "\n", - "Expected values from prior runs:\n", - "- `canonical_only`: 4555\n", - "- `fork_only`: 569\n", - "- `both_sides`: 2083\n", - "- Canonical reachable rate: about 93.2%" - ] - }, - { - "cell_type": "markdown", - "id": "52c9ee7e", - "metadata": {}, - "source": [ - "### When to move on to Notebook 3\n", - "\n", - "You can move to Notebook 3 when all of these are true:\n", - "\n", - "1. Check 1 totals are consistent (commit + PR - overlap = 7122).\n", - "2. Check 2 returns project rows for commit-comment mappings (not empty).\n", - "3. Check 3 returns project rows for PR-comment mappings (not empty).\n", - "4. Check 4 runs successfully and shows non-zero canonical reachability.\n", - "\n", - "If any check is empty or fails, fix the data/join issue first before moving on." - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/04_download_commit_comments.ipynb b/04_download_commit_comments.ipynb deleted file mode 100644 index b26da40..0000000 --- a/04_download_commit_comments.ipynb +++ /dev/null @@ -1,162 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c9604aaa", - "metadata": {}, - "source": [ - "# Download Commit Comments with Kaiaulu (Notebook 4)\n", - "\n", - "This notebook shows how to download GitHub commit comments using Kaiaulu’s `download_github_events.Rmd` notebook in the `/vignettes` folder." - ] - }, - { - "cell_type": "markdown", - "id": "a8369dad", - "metadata": {}, - "source": [ - "### Planned Output\n", - "\n", - "1. A parsed commit-comments CSV saved to `rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv` in Kaiaulu" - ] - }, - { - "cell_type": "markdown", - "id": "b5ad8e21", - "metadata": {}, - "source": [ - "### Step 1: Confirm your working directory\n", - "\n", - "1. Open the Kaiaulu project in RStudio.\n", - "2. Run `getwd()` in the R console to check your current working directory.\n", - "3. If the directory is not Kaiaulu, set it with `setwd()` (for example, `setwd(\"~/Desktop/github/kaiaulu\")`)." - ] - }, - { - "cell_type": "markdown", - "id": "049737cf", - "metadata": {}, - "source": [ - "### Step 2: Create a personal access token\n", - "\n", - "This workflow makes many GitHub API requests, so you need a personal access token.\n", - "\n", - "Follow the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/authenticating-to-github/creating-a-personal-access-token#:~:text=Creating%20a%20token.%201%20Verify%20your%20email%20address%2C,able%20to%20see%20the%20token%20again.%20More%20items) and create a **classic** token:\n", - "\n", - "1. Go to **GitHub → Settings → Developer settings → Personal access tokens → Tokens (classic)**.\n", - "2. Select **Generate new token (classic)**.\n", - "3. Add a note (for example, \"Download GitHub commit + PR comments via Kaiaulu\").\n", - "4. Enable the `public_repo` scope for public repositories.\n", - "5. Generate the token, then copy and store it securely.\n", - "\n", - "Save the token in `~/.ssh/github_token` on your local machine." - ] - }, - { - "cell_type": "markdown", - "id": "976db067", - "metadata": {}, - "source": [ - "### Step 3: Run `download_github_events.Rmd` chunks in RStudio\n", - "\n", - "Run the following chunks in **RStudio**. These chunks should already exist in `download_github_events.Rmd`.\n", - "\n", - "### Chunk 1: Set up dependencies\n", - "\n", - "---\n", - "```{r warning=FALSE,message=FALSE}\n", - "rm(list = ls())\n", - "require(kaiaulu)\n", - "require(data.table)\n", - "require(jsonlite)\n", - "require(knitr)\n", - "```\n", - "---\n", - "\n", - "### Chunk 2: Set required parameters\n", - "\n", - "Replace `kaiaulu.yml` with the `.yml` file for the project you want to process. You created these files in Step 4 of `03_scale_config_files.ipynb`.\n", - "\n", - "---\n", - "```{r}\n", - "conf <- parse_config(\"../conf/kaiaulu.yml\")\n", - "owner <- get_github_owner(conf, \"project_key_1\") # Has to match github organization (e.g. github.com/sailuh)\n", - "repo <- get_github_repo(conf, \"project_key_1\") # Has to match github repository (e.g. github.com/sailuh/perceive)\n", - "save_path_issue_or_pr_comments <- path.expand(get_github_issue_or_pr_comment_path(conf, \"project_key_1\"))\n", - "save_path_issue_event <- get_github_issue_event_path(conf, \"project_key_1\")\n", - "save_path_commit <- get_github_commit_path(conf, \"project_key_1\")\n", - "save_path_commit_comments <- get_github_commit_comment_path(conf, \"project_key_1\")\n", - "\n", - "# your file github_token contains the GitHub token API obtained in the steps above\n", - "token <- scan(\"~/.ssh/github_token\",what=\"character\",quiet=TRUE)\n", - "```\n", - "---\n", - "\n", - "### Chunk 3: Download Commit Comments\n", - "\n", - "This downloads commit-comment JSON files into `rawdata` in your current working directory. The runtime depends on how many comments the project has.\n", - "\n", - "**IMPORTANT:** This chunk uses `gh_next()` to fetch paginated results and expects `gh` version 1.2.0. If you see a `gh_next()` paging bug (for example, repeated writes to the same page), downgrade to `gh` 1.2.0.\n", - "\n", - "---\n", - "\n", - "```{r Collect all project commit comments, eval = FALSE}\n", - "dir.create(save_path_commit_comments, recursive = TRUE, showWarnings = FALSE)\n", - "gh_response <- github_api_project_commit_comments(owner,repo,token)\n", - "github_api_iterate_pages(token,gh_response,save_path_commit_comments,prefix=\"commit_comments\")\n", - "```\n", - "\n", - "---\n", - "\n", - "### Chunk 4: Parse Commit Comments\n", - "\n", - "After all JSON files are downloaded, run the **Parsing Raw Data to Csv** chunk for commit comments. You should see a table named `all_commit_comments` in your R environment with columns such as `comment_id`, `commit_id`, `author_login`, `author_id`, `line`, `created_at`, and `updated_at`.\n", - "\n", - "---\n", - "\n", - "```{r}\n", - "all_commit_comments <- lapply(list.files(save_path_commit_comments,full.names = TRUE),read_json)\n", - "all_commit_comments <- lapply(all_commit_comments,github_parse_project_commit_comments)\n", - "all_commit_comments <- rbindlist(all_commit_comments,fill=TRUE)\n", - "\n", - "kable(head(all_commit_comments))\n", - "\n", - "# Save the data table for commit comments as a CSV\n", - "out_csv <- file.path(dirname(save_path_commit_comments), paste0(owner, \"_\", repo, \"_commit_comments.csv\"))\n", - "data.table::fwrite(all_commit_comments, out_csv)\n", - "cat(\"Saved:\", out_csv, \"\\n\")\n", - "```\n", - "\n", - "---\n", - "\n", - "### Final Output\n", - "\n", - "Final output path:\n", - "`rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv`" - ] - }, - { - "cell_type": "markdown", - "id": "99981420", - "metadata": {}, - "source": [ - "### When to move on to Notebook 5\n", - "\n", - "Move to Notebook 5 after all of the following are true:\n", - "\n", - "1. The commit-comment JSON files have been downloaded successfully.\n", - "2. The parsed table `all_commit_comments` looks correct in RStudio.\n", - "3. The CSV file exists at:\n", - " `rawdata/github/{owner}/{repo}/{owner}_{repo}_commit_comments.csv`\n", - "4. Spot-check a few rows to confirm key fields (such as `comment_id`, `commit_id`, and `author_login`) are populated as expected." - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/05_download_PR_inline_comments.ipynb b/05_download_PR_inline_comments.ipynb deleted file mode 100644 index 3cbcbfe..0000000 --- a/05_download_PR_inline_comments.ipynb +++ /dev/null @@ -1,186 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "79eaeb30", - "metadata": {}, - "source": [ - "# Download PR Inline Comments with Kaiaulu (Notebook 5)\n", - "\n", - "This notebook shows how to download pull request inline comments using Kaiaulu’s `download_github_pull_request_comments.Rmd` notebook in the `/vignettes` folder." - ] - }, - { - "cell_type": "markdown", - "id": "edcea498", - "metadata": {}, - "source": [ - "### Planned Output\n", - "\n", - "1. A parsed commit-comments CSV saved to `rawdata/github/{owner}/{repo}/{owner}_{repo}_pr_inline_comments.csv` in Kaiaulu" - ] - }, - { - "cell_type": "markdown", - "id": "4f30bc9f", - "metadata": {}, - "source": [ - "Before starting, complete Steps 1 and 2 in `04_download_commit_comments.ipynb` (confirm working directory and create a GitHub personal access token).\n", - "\n", - "### Step 1: Run download_github_pull_request_comments.Rmd chunks in RStudio\n", - "\n", - "Run the following chunks in **RStudio**. These chunks should already exist in `download_github_pull_request_comments.Rmd`.\n", - "\n", - "### Chunk 1: Set up dependencies\n", - "\n", - "---\n", - "\n", - "```{r warning=FALSE,message=FALSE}\n", - "rm(list = ls())\n", - "require(kaiaulu)\n", - "require(data.table)\n", - "require(jsonlite)\n", - "require(knitr)\n", - "require(magrittr)\n", - "require(gt)\n", - "require(lubridate)\n", - "```\n", - "\n", - "--- \n", - "\n", - "### Chunk 2: Set required parameters\n", - "\n", - "Replace `kaiaulu.yml` with the `.yml` file for the project you want to process. You created these files in Step 4 of Notebook 3 (`03_scale_config_files.ipynb`).\n", - "\n", - "---\n", - "\n", - "```{r warning=FALSE}\n", - "conf <- parse_config(\"../conf/kaiaulu.yml\")\n", - "owner <- get_github_owner(conf, \"project_key_1\") # Has to match github organization (e.g. github.com/sailuh)\n", - "repo <- get_github_repo(conf, \"project_key_1\") # Has to match github repository (e.g. github.com/sailuh/perceive)\n", - "\n", - "# Path you wish to save all raw data.\n", - "save_path_pull_request <- get_github_pull_request_path(conf, \"project_key_1\")\n", - "save_path_pr_comments <- get_github_pr_comments_path(conf, \"project_key_1\")\n", - "save_path_issue_or_pr_comments <- get_github_issue_or_pr_comment_path(conf, \"project_key_1\")\n", - "save_path_pr_reviews <- get_github_pr_review_path(conf, \"project_key_1\")\n", - "\n", - "# Lower API \n", - "save_path_pull_request <- get_github_pull_request_path(conf, \"project_key_1\")\n", - "save_path_pr_commits <- get_github_pr_commits_path(conf, \"project_key_1\")\n", - "save_path_pr_files <- get_github_pr_files_path(conf, \"project_key_1\")\n", - "save_path_pr_reviews <- get_github_pr_review_path(conf, \"project_key_1\")\n", - "save_path_pr_comments <- get_github_pr_comments_path(conf, \"project_key_1\")\n", - "\n", - "# Create all folder directories\n", - "#create_file_directory(conf)\n", - "```\n", - "\n", - "---\n", - "\n", - "### Chunk 3: Personal Access Token\n", - "\n", - "Point to the GitHub token created in Step 2 of Notebook 4.\n", - "\n", - "---\n", - "\n", - "```{r Scan GitHub Token}\n", - "# your file github_token (a text file) contains the GitHub token API\n", - "token <- scan(\"~/.ssh/github_token\",what=\"character\",quiet=TRUE)\n", - "```\n", - "\n", - "---\n", - "\n", - "### Chunk 4: Download Pull Request In-Line Code Comments\n", - "\n", - "This chunk downloads PR inline-comment JSON files into `rawdata` in your current working directory. The runtime depends on how many comments the project has.\n", - "\n", - "**IMPORTANT:** This chunk uses `gh_next()` to fetch paginated results and expects `gh` version 1.2.0. If you see a `gh_next()` paging bug (for example, repeated writes to the same page), downgrade to `gh` 1.2.0.\n", - "\n", - "--- \n", - "\n", - "```{r Collect Comments from Pull Requests, eval = FALSE}\n", - "dir.create(save_path_pr_comments, recursive = TRUE, showWarnings = FALSE)\n", - "gh_response <- github_api_project_pull_request_inline_comments_refresh(owner, repo, token, save_path_pr_comments)\n", - "github_api_iterate_pages(token, gh_response, save_path_pr_comments, prefix=\"pr_comments\")\n", - "```\n", - "\n", - "---\n", - "\n", - "### Chunk 5: Parse PR Inline Comments\n", - "\n", - "After all JSON files are downloaded, run the parse chunk for PR inline comments. You should see a table named `inline_comments` in your R environment with columns such as `review_id`, `comment_id`, `html_url`, `created_at`, `updated_at`, `comment_user_login`, `author_association`, `file_path`, `start_line`, `line`, `original_start_line`, `original_line`, `position`, `diff_hunk`, `body`, and `commit_id`.\n", - "\n", - "---\n", - "\n", - "```{r Parse Comments from Pull Requests}\n", - "inline_comments <- lapply(list.files(save_path_pr_comments, full.names = TRUE), read_json)\n", - "inline_comments <- lapply(inline_comments, github_parse_project_pull_request_inline_comments)\n", - "inline_comments <- rbindlist(inline_comments, fill = TRUE)\n", - "head(inline_comments,2) %>%\n", - " gt(auto_align = FALSE) \n", - "```\n", - "\n", - "---\n", - "\n", - "If `fwrite` complains about list/`NULL` columns (common for line/position fields), copy this chunk and run it right after the parse chunk:\n", - "\n", - "```{r Create CSV for Parsed Comments}\n", - "as_char_or_na <- function(x) {\n", - " if (is.null(x) || length(x) == 0) return(NA_character_)\n", - " if (is.list(x)) {\n", - " return(vapply(x, function(e) {\n", - " if (is.null(e) || length(e) == 0) NA_character_ else as.character(e[[1]])\n", - " }, character(1)))\n", - " }\n", - " as.character(x)\n", - "}\n", - "as_int_or_na <- function(x) {\n", - " if (is.null(x) || length(x) == 0) return(NA_integer_)\n", - " if (is.list(x)) {\n", - " return(vapply(x, function(e) {\n", - " if (is.null(e) || length(e) == 0) NA_integer_ else suppressWarnings(as.integer(e[[1]]))\n", - " }, integer(1)))\n", - " }\n", - " suppressWarnings(as.integer(x))\n", - "}\n", - "\n", - "for (nm in intersect(c(\"file_path\",\"diff_hunk\",\"body\",\"html_url\",\"created_at\",\"updated_at\",\"comment_user_login\",\"author_association\",\"commit_id\"), names(inline_comments))) {\n", - " if (is.list(inline_comments[[nm]])) inline_comments[[nm]] <- as_char_or_na(inline_comments[[nm]])\n", - "}\n", - "for (nm in intersect(c(\"review_id\",\"comment_id\",\"start_line\",\"line\",\"original_start_line\",\"original_line\",\"position\"), names(inline_comments))) {\n", - " if (is.list(inline_comments[[nm]])) inline_comments[[nm]] <- as_int_or_na(inline_comments[[nm]])\n", - "}\n", - "\n", - "out_csv <- file.path(dirname(save_path_pr_comments), paste0(owner, \"_\", repo, \"_pr_inline_comments.csv\"))\n", - "data.table::fwrite(inline_comments, out_csv)\n", - "cat(\"Saved:\", out_csv, \"\\n\")\n", - "```\n", - "\n", - "### Final Output\n", - "\n", - "Final output path:\n", - "`rawdata/github/{owner}/{repo}/{owner}_{repo}_pr_inline_comments.csv`" - ] - }, - { - "cell_type": "markdown", - "id": "1e3807c9", - "metadata": {}, - "source": [ - "### Next Steps\n", - "\n", - "1. Run Notebooks 4 and 5 for each project configuration (`.yml`) you want to process.\n", - "2. Confirm that each run generates the expected commit-comment and PR inline-comment CSV outputs.\n", - "3. Use `comment_id` as the join key to transfer sentiment labels to both commit comments and PR inline comments." - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/01_load_sentiment_csv_to_mysql.ipynb b/notebooks/01_load_sentiment_csv_to_mysql.ipynb new file mode 100644 index 0000000..05b5841 --- /dev/null +++ b/notebooks/01_load_sentiment_csv_to_mysql.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-title", + "metadata": {}, + "source": [ + "# Load Sentiment CSV into GHTorrent MySQL (Notebook 1)\n", + "\n", + "This notebook loads the GitHub Gold Standard sentiment CSV into a MySQL database that contains the GHTorrent 2004 dump. \n", + "\n", + "The Gold Standard dataset (`github_gold.csv`) contains 7,122 GitHub pull request and commit comments that were manually annotated by researchers with sentiment polarity labels (positive, negative, or neutral). It was published by Novielli et al. 2020 in [\"Can We Use SE-specific Sentiment Analysis Tools in a Cross-Platform Setting?\"](https://doi.org/10.1145/3379597.3387446).\n", + "\n", + "The GHTorrent 2004 MySQL database contains the contextual project and commit data from GitHub (e.g., author, timestamp, etc.). By joining the Gold Standard sentiment CSV to this database, we recover the context that is missing from the CSV. Specifically, which project each comment belongs to, who wrote it, and when. \n", + "\n", + "That context enables two types of dataset expansion using Kaiaulu:\n", + "- **Temporal expansion**: re-download the same projects' comments from 2004 through 2025. This captures comments that were posted after the Gold Standard was originally collected\n", + "- **Horizontal expansion**: download additional data sources for those same projects (e.g., source code, version control history) that can be linked back to sentiment labels\n", + "\n", + "### Planned Output\n", + "By the end of this notebook, you should have:\n", + "1. A `comment_sentiment` table in MySQL with all 7,122 rows from the CSV\n", + "2. Query results confirming each sentiment comment joins to a commit and project in GHTorrent\n", + "3. A ranked view of which projects have the most sentiment-labeled comments" + ] + }, + { + "cell_type": "markdown", + "id": "cell-prereqs", + "metadata": {}, + "source": [ + "### Step 1: Get the data ready\n", + "\n", + "Before running any code in this notebook:\n", + "\n", + "1. Download the [GitHub Gold Standard dataset](https://figshare.com/articles/dataset/A_gold_standard_for_polarity_of_emotions_of_software_developers_in_GitHub/11604597?file=21001260) (`github_gold.csv`).\n", + "2. Download the [GHTorrent 2004 MySQL Database Dump](https://web.archive.org/web/20150206005357/http://ghtorrent.org/msr14.html) (use the MySQL dump). Once downloaded, load it into your local MySQL instance\n", + "- Option A: MySQL Workbench\n", + " 1. Open MySQL Workbench and connect to your local server\n", + " 2. Go to Server → Data Import\n", + " 3. Select Import from Self-Contained File and choose the downloaded `.sql` file\n", + " 4. Under Default Target Schema, type a name for the database (e.g. `github`). Create it first if needed via File → New Query Tab → `CREATE DATABASE github;`\n", + " 5. Click 'Start Import' and wait for it to finish (this may take several minutes)\n", + "\n", + "- Option B: terminal\n", + " ```bash\n", + " # Create the database first\n", + " mysql -u root -p -e \"CREATE DATABASE github;\"\n", + "\n", + " # Load the dump (replace the path with wherever you saved the file)\n", + " mysql -u root -p github < /path/to/msr14-mysql.sql\n", + " ```\n", + "\n", + "After loading, verify the import worked by running the following in Workbench or the terminal, you should see tables like `projects`, `commits`, `commit_comments`, `pull_requests`, and `users`:\n", + "\n", + "```sql\n", + "USE github;\n", + "SHOW TABLES;\n", + "```\n", + "\n", + "Optional reference: [GHTorrent schema diagram](https://web.archive.org/web/20150206005412/http://ghtorrent.org/relational.html)." + ] + }, + { + "cell_type": "markdown", + "id": "cell-step2-header", + "metadata": {}, + "source": [ + "### Step 2: Install and import dependencies\n", + "\n", + "This notebook connects to MySQL directly from Python using `mysql-connector-python`, which lets every query run inline and display results as DataFrames. `pandas` and `sqlalchemy` handle query results and the DataFrame display." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-imports", + "metadata": {}, + "outputs": [], + "source": [ + "import mysql.connector\n", + "import pandas as pd\n", + "from sqlalchemy import create_engine, text" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step3-header", + "metadata": {}, + "source": [ + "### Step 3: Configure your MySQL connection\n", + "\n", + "Update the variables below to match your local MySQL setup. `CSV_PATH` should point to the path you saved `github_gold.csv`." + ] + }, + { + "cell_type": "markdown", + "id": "23c4b3f0", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-config", + "metadata": {}, + "outputs": [], + "source": [ + "MYSQL_HOST = \"localhost\"\n", + "MYSQL_PORT = 3306\n", + "MYSQL_USER = \"root\"\n", + "MYSQL_PASSWORD = \"ADD_PASSWORD_HERE\"\n", + "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", + "\n", + "# Absolute path to comment_sentiment.csv on your machine\n", + "CSV_PATH = \"/Users/sheilalimon/Downloads/github_gold.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step4-header", + "metadata": {}, + "source": [ + "### Step 4: Create the `comment_sentiment` table\n", + "\n", + "Create a fresh table with three columns matching the CSV structure: `ID` (GitHub comment ID), `Polarity` (positive/negative/neutral), and `Text` (the comment body).\n", + "\n", + "The `ID` column is what we use to join sentiment labels to commit comments and PR comments in GHTorrent. The `comment_id` field in those tables corresponds directly to this `ID`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-create-table", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dropped existing comment_sentiment table (if any).\n", + "Created comment_sentiment table.\n" + ] + } + ], + "source": [ + "conn = mysql.connector.connect(\n", + " host=MYSQL_HOST,\n", + " port=MYSQL_PORT,\n", + " user=MYSQL_USER,\n", + " password=MYSQL_PASSWORD,\n", + " database=MYSQL_DB\n", + ")\n", + "cursor = conn.cursor()\n", + "\n", + "cursor.execute(\"DROP TABLE IF EXISTS comment_sentiment;\")\n", + "print(\"Dropped existing comment_sentiment table (if any).\")\n", + "\n", + "cursor.execute(\"\"\"\n", + " CREATE TABLE comment_sentiment (\n", + " ID INT NULL,\n", + " Polarity VARCHAR(256) NULL,\n", + " Text TEXT NULL\n", + " );\n", + "\"\"\")\n", + "conn.commit()\n", + "print(\"Created comment_sentiment table.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step5-header", + "metadata": {}, + "source": [ + "### Step 5: Load the CSV into MySQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-load-csv", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inserted 7122 rows into comment_sentiment.\n" + ] + } + ], + "source": [ + "import csv\n", + "\n", + "insert_sql = \"INSERT INTO comment_sentiment (ID, Polarity, Text) VALUES (%s, %s, %s)\"\n", + "\n", + "rows_inserted = 0\n", + "with open(CSV_PATH, newline='', encoding='utf-8') as f:\n", + " reader = csv.reader(f, delimiter=';', quotechar='\"')\n", + " next(reader) # skip header row\n", + " for row in reader:\n", + " if len(row) >= 3:\n", + " cursor.execute(insert_sql, (row[0] or None, row[1] or None, row[2] or None))\n", + " rows_inserted += 1\n", + "\n", + "conn.commit()\n", + "print(f\"Inserted {rows_inserted} rows into comment_sentiment.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step6-header", + "metadata": {}, + "source": [ + "### Step 6: Validate the load\n", + "\n", + "We expect 7,122 rows (one per annotated comment in the Gold Standard dataset).\n", + "\n", + "We also check that each row has a unique `ID`, since these IDs are what we use to join to the GHTorrent MySQL database dump." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-validate", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rows : 7122 (expected 7122)\n", + "Distinct IDs : 7122 (expected 7122)\n", + "PASS: all rows loaded and all IDs are unique.\n" + ] + } + ], + "source": [ + "engine = create_engine(\n", + " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "\n", + "with engine.connect() as con:\n", + " total_rows = pd.read_sql(text(\"SELECT COUNT(*) AS total_rows FROM comment_sentiment;\"), con)\n", + " distinct_ids = pd.read_sql(text(\"SELECT COUNT(DISTINCT ID) AS distinct_ids FROM comment_sentiment;\"), con)\n", + "\n", + "total = total_rows['total_rows'].iloc[0]\n", + "unique = distinct_ids['distinct_ids'].iloc[0]\n", + "\n", + "print(f\"Total rows : {total} (expected 7122)\")\n", + "print(f\"Distinct IDs : {unique} (expected 7122)\")\n", + "\n", + "if total == 7122 and unique == 7122:\n", + " print(\"PASS: all rows loaded and all IDs are unique.\")\n", + "else:\n", + " print(\"WARNING: counts do not match expected values. Re-check CSV path and delimiter.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step7-header", + "metadata": {}, + "source": [ + "### Step 7: Query 1 - join sentiment comments to commits and projects\n", + "\n", + "This query joins `comment_sentiment` → `commit_comments` → `commits` → `projects` to confirm that the sentiment IDs match to commit comments in GHTorrent.\n", + "\n", + "If the result is empty, the IDs in the CSV do not match the `comment_id` values in GHTorrent's `commit_comments` table, which means either the wrong dump was loaded or the database name is incorrect." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-query1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rows returned (showing up to 10): 10\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentiment_idpolarityproject_nameproject_urlcommit_shacomment_text
04063186neutraljekyllhttps://api.github.com/repos/mojombo/jekyllcb521b7f9a6887051b982a2053cd402ff019594eNo. I still see the wrong twins.  * https://gi...
13894703neutraljqueryhttps://api.github.com/repos/jquery/jqueryf6e86c3ca4d527d5453a0b5b9591ef38b5d3c000Reverted.\"
21971084neutralMaNGOShttps://api.github.com/repos/mangos/MaNGOSabfc99ef522b8b6353d051a002d026530ec7d253You can leave a queue while in queue ? (before...
31827828positiveMaNGOShttps://api.github.com/repos/mangos/MaNGOS915b77339711ec1278ac06ec80d206133bdb427aDidn't look at SpellTargetRestrictions XD\"
4232603neutralclojurehttps://api.github.com/repos/clojure/clojureb43bf20e1ba864c817ada237042cfdc8922831c0Not sure about what kind of line lengths the p...
53565454positivenettyhttps://api.github.com/repos/netty/netty1fee1ef74ed8ac515c19a7f8eebd16f41a37b7b6@normanmaurer Nice catch ! Did you make the sa...
63504879neutralnettyhttps://api.github.com/repos/netty/nettycfd514d099fb41b2a467ca208fe1334bb04f8f6cThat's why I didn't close after sending the cl...
73413199neutralnettyhttps://api.github.com/repos/netty/netty78d8f05c218cab107255c4dc1a1344aef138d379Build result for 78d8f05c218cab107255c4dc1a134...
83404541neutralnettyhttps://api.github.com/repos/netty/nettyfd0084ecfa254bc5f619f50ec50a8cb8e3cc083eWhy you think using ImmediateEventExecutor is ...
92290082neutraljqueryhttps://api.github.com/repos/jquery/jquerycef044d82ec0d338b2b69756d3ba08692fb80ae4These are the ones we currently hardcode in Te...
\n", + "
" + ], + "text/plain": [ + " sentiment_id polarity project_name \\\n", + "0 4063186 neutral jekyll \n", + "1 3894703 neutral jquery \n", + "2 1971084 neutral MaNGOS \n", + "3 1827828 positive MaNGOS \n", + "4 232603 neutral clojure \n", + "5 3565454 positive netty \n", + "6 3504879 neutral netty \n", + "7 3413199 neutral netty \n", + "8 3404541 neutral netty \n", + "9 2290082 neutral jquery \n", + "\n", + " project_url \\\n", + "0 https://api.github.com/repos/mojombo/jekyll \n", + "1 https://api.github.com/repos/jquery/jquery \n", + "2 https://api.github.com/repos/mangos/MaNGOS \n", + "3 https://api.github.com/repos/mangos/MaNGOS \n", + "4 https://api.github.com/repos/clojure/clojure \n", + "5 https://api.github.com/repos/netty/netty \n", + "6 https://api.github.com/repos/netty/netty \n", + "7 https://api.github.com/repos/netty/netty \n", + "8 https://api.github.com/repos/netty/netty \n", + "9 https://api.github.com/repos/jquery/jquery \n", + "\n", + " commit_sha \\\n", + "0 cb521b7f9a6887051b982a2053cd402ff019594e \n", + "1 f6e86c3ca4d527d5453a0b5b9591ef38b5d3c000 \n", + "2 abfc99ef522b8b6353d051a002d026530ec7d253 \n", + "3 915b77339711ec1278ac06ec80d206133bdb427a \n", + "4 b43bf20e1ba864c817ada237042cfdc8922831c0 \n", + "5 1fee1ef74ed8ac515c19a7f8eebd16f41a37b7b6 \n", + "6 cfd514d099fb41b2a467ca208fe1334bb04f8f6c \n", + "7 78d8f05c218cab107255c4dc1a1344aef138d379 \n", + "8 fd0084ecfa254bc5f619f50ec50a8cb8e3cc083e \n", + "9 cef044d82ec0d338b2b69756d3ba08692fb80ae4 \n", + "\n", + " comment_text \n", + "0 No. I still see the wrong twins. * https://gi... \n", + "1 Reverted.\" \n", + "2 You can leave a queue while in queue ? (before... \n", + "3 Didn't look at SpellTargetRestrictions XD\" \n", + "4 Not sure about what kind of line lengths the p... \n", + "5 @normanmaurer Nice catch ! Did you make the sa... \n", + "6 That's why I didn't close after sending the cl... \n", + "7 Build result for 78d8f05c218cab107255c4dc1a134... \n", + "8 Why you think using ImmediateEventExecutor is ... \n", + "9 These are the ones we currently hardcode in Te... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query1 = \"\"\"\n", + "SELECT\n", + " s.ID AS sentiment_id,\n", + " s.Polarity AS polarity,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " c.sha AS commit_sha,\n", + " s.Text AS comment_text\n", + "FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + "INNER JOIN commits c ON c.id = cc.commit_id\n", + "INNER JOIN projects p ON c.project_id = p.id\n", + "LIMIT 10;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " result1 = pd.read_sql(text(query1), con)\n", + "\n", + "print(f\"Rows returned (showing up to 10): {len(result1)}\")\n", + "result1" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step8-header", + "metadata": {}, + "source": [ + "### Step 8: Query 2 - count sentiment-labeled comments by project\n", + "\n", + "This query ranks projects by how many of their commit comments were included in the Gold Standard Dataset. This tells us which projects are most represented in the labeled dataset. \n", + "\n", + "These are the same projects we will later generate Kaiaulu config files for in Notebook 3, so we can re-download their comment history and expand the dataset through temporal or horizonal expansion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-query2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Projects with sentiment-labeled commit comments: 85\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
project_namelabeled_comment_count
0TrinityCore813
1MaNGOS623
2rails557
3CraftBukkit363
4jquery246
.........
80impress.js1
81ccv1
82hiphop-php1
83uwom-server1
84jquery-nodom1
\n", + "

85 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " project_name labeled_comment_count\n", + "0 TrinityCore 813\n", + "1 MaNGOS 623\n", + "2 rails 557\n", + "3 CraftBukkit 363\n", + "4 jquery 246\n", + ".. ... ...\n", + "80 impress.js 1\n", + "81 ccv 1\n", + "82 hiphop-php 1\n", + "83 uwom-server 1\n", + "84 jquery-nodom 1\n", + "\n", + "[85 rows x 2 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query2 = \"\"\"\n", + "SELECT\n", + " p.name AS project_name,\n", + " COUNT(s.ID) AS labeled_comment_count\n", + "FROM comment_sentiment s\n", + "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + "INNER JOIN commits c ON c.id = cc.commit_id\n", + "INNER JOIN projects p ON c.project_id = p.id\n", + "GROUP BY p.name\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " result2 = pd.read_sql(text(query2), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled commit comments: {len(result2)}\")\n", + "result2" + ] + }, + { + "cell_type": "markdown", + "id": "cell-transition", + "metadata": {}, + "source": [ + "### When to move to Notebook 2\n", + "\n", + "Move to Notebook 2 only after all of the following are true:\n", + "\n", + "1. `total_rows = 7122` and `distinct_ids = 7122` (Step 6 shows PASS)\n", + "2. Query 1 (Step 7) returns at least one row. This means sentiment IDs are successfully joining to GHTorrent commits and projects\n", + "3. Query 2 (Step 8) returns a non-empty project ranking\n", + "\n", + "If Query 1 or 2 return zero rows but validation passed, the most likely cause is that the wrong GHTorrent dump was loaded (the dump must be the MSR 2014 version linked in Step 1)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/02_explore_gh_torrent_tables.ipynb b/notebooks/02_explore_gh_torrent_tables.ipynb new file mode 100644 index 0000000..4d14460 --- /dev/null +++ b/notebooks/02_explore_gh_torrent_tables.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1b3dd1e0", + "metadata": {}, + "source": [ + "# Explore GHTorrent Tables for Sentiment Mapping (Notebook 2)\n", + "\n", + "This notebook explores the GHTorrent database to understand where the 7,122 Gold Standard sentiment comments are stored and how they connect to real projects.\n", + "\n", + "The goal is to answer which projects have sentiment-labeled comments and whether those comments are reachable from a canonical (non-fork) repository.\n", + "\n", + "### Planned Output\n", + "By the end of this notebook, you should have:\n", + "1. A breakdown of how sentiment comments are split between commit comment and PR comment tables\n", + "2. A ranked list of projects with the most sentiment-labeled commit comments\n", + "3. A ranked list of projects with the most sentiment-labeled PR comments\n", + "4. A global summary of how many sentiment comments are reachable via canonical repos vs. forks only" + ] + }, + { + "cell_type": "markdown", + "id": "cell-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies and connect to MySQL\n", + "\n", + "We reuse the same connection pattern as Notebook 1. Update the credentials below to match your local MySQL setup." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-imports", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine, text" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-config", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected to MySQL.\n" + ] + } + ], + "source": [ + "MYSQL_HOST = \"localhost\"\n", + "MYSQL_PORT = 3306\n", + "MYSQL_USER = \"root\"\n", + "MYSQL_PASSWORD = \"password\"\n", + "MYSQL_DB = \"github\"\n", + "\n", + "engine = create_engine(\n", + " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "print(\"Connected to MySQL.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check1-header", + "metadata": {}, + "source": [ + "### Check 1: How sentiment comments are distributed across tables\n", + "\n", + "There are two kinds of Github comments in GHTorrent: commit comments (discussions posted about commits) and PR inline comments (left on a specific line of code in a pull request).\n", + "\n", + "The Gold Standard dataset includes both types. The same `ID` value maps to `comment_id` in both `commit_comments` and `pull_request_comments`.\n", + "\n", + "This check tells us how many sentiment IDs join to each table. Some IDs appear in both tables (overlap = 85), meaning a small number of comments were captured under both endpoints in GHTorrent. The total unique IDs should sum to 7,122.\n", + "\n", + "Expected values:\n", + "- Commit comment matches: ~4,317\n", + "- PR comment matches: ~2,890\n", + "- Overlap (both): ~85\n", + "- Commit-only: 4,232 | PR-only: 2,805 | Total unique: 7,122" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check1", + "metadata": {}, + "outputs": [], + "source": [ + "with engine.connect() as con:\n", + " commit_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS commit_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " pr_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS pr_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " overlap = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS overlap\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + "commit_only = commit_count - overlap\n", + "pr_only = pr_count - overlap\n", + "total_unique = commit_only + pr_only + overlap\n", + "\n", + "summary = pd.DataFrame({\n", + " 'Category': ['Commit matches', 'PR matches', 'Overlap (both)', 'Commit-only', 'PR-only', 'Total unique'],\n", + " 'Count': [commit_count, pr_count, overlap, commit_only, pr_only, total_unique],\n", + " 'Expected': [4317, 2890, 85, 4232, 2805, 7122]\n", + "})\n", + "display(summary)\n", + "\n", + "if total_unique == 7122:\n", + " print(\"PASS: total unique IDs = 7122.\")\n", + "else:\n", + " print(f\"WARNING: total unique IDs = {total_unique}, expected 7122.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check2-header", + "metadata": {}, + "source": [ + "### Check 2: Projects with the most sentiment-labeled commit comments\n", + "\n", + "This query ranks projects by how many of their commit comments are in the Gold Standard. This tells us which projects are most heavily represented in the labeled data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check2", + "metadata": {}, + "outputs": [], + "source": [ + "query_check2 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN commits c ON p.id = c.project_id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check2 = pd.read_sql(text(query_check2), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled commit comments: {len(check2)}\")\n", + "display(check2)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check3-header", + "metadata": {}, + "source": [ + "### Check 3: Projects with the most sentiment-labeled PR comments\n", + "\n", + "Same ranking, but for pull request inline comments. PR inline comments are joined through `pull_requests` via `base_repo_id`. The base repo is the canoonical project the PR was opened in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check3", + "metadata": {}, + "outputs": [], + "source": [ + "query_check3 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", + "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", + "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check3 = pd.read_sql(text(query_check3), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled PR comments: {len(check3)}\")\n", + "display(check3)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check4-header", + "metadata": {}, + "source": [ + "### Check 4: Canonical repo vs fork accessibility\n", + "\n", + "GitHub projects get forked frequently. A fork shares the same commit history as its upstream repo, which means the same comment IDs can appear under both the original (canonical) project and one or more forks in GHTorrent.\n", + "\n", + "This matters for Notebook 3. When we generate Kaiaulu config files, we only want to target canonical repos. Downloading from a fork is redundant since the canonical repo already contains all the same commits.\n", + "\n", + "Expected values:\n", + "- `canonical_only`: ~4,555\n", + "- `fork_only`: ~569 (these will be missed when targeting canonical repos only)\n", + "- `both_sides`: ~2,083\n", + "- Canonical reachable rate: ~92.1%" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-check4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Canonical vs fork accessibility summary:\n", + " canonical_only : 4555.0 (expected ~4555)\n", + " fork_only : 569.0 (expected ~569)\n", + " both_sides : 2083.0 (expected ~2083)\n", + " fork_only_pct : 7.9% (expected ~7.9%)\n", + " canonical_reachable % : 92.1% (expected ~92.1%)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canonical_onlyfork_onlyboth_sidesfork_only_pctcanonical_reachable_pct
04555.0569.02083.07.992.1
\n", + "
" + ], + "text/plain": [ + " canonical_only fork_only both_sides fork_only_pct \\\n", + "0 4555.0 569.0 2083.0 7.9 \n", + "\n", + " canonical_reachable_pct \n", + "0 92.1 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "query_check4 = \"\"\"\n", + "WITH RECURSIVE project_root AS (\n", + " SELECT p.id AS project_id, p.id AS root_id\n", + " FROM projects p\n", + " WHERE p.forked_from IS NULL\n", + " UNION ALL\n", + " SELECT c.id AS project_id, pr.root_id\n", + " FROM projects c\n", + " JOIN project_root pr ON c.forked_from = pr.project_id\n", + "),\n", + "comment_project_rows AS (\n", + " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + " JOIN commits c ON cc.commit_id = c.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "),\n", + "labeled AS (\n", + " SELECT\n", + " cpr.comment_id,\n", + " cpr.source_tag,\n", + " pr.root_id,\n", + " (cpr.project_id = pr.root_id) AS is_canonical\n", + " FROM comment_project_rows cpr\n", + " JOIN project_root pr ON pr.project_id = cpr.project_id\n", + "),\n", + "comment_flags AS (\n", + " SELECT\n", + " root_id, source_tag, comment_id,\n", + " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", + " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", + " FROM labeled\n", + " GROUP BY root_id, source_tag, comment_id\n", + "),\n", + "global_counts AS (\n", + " SELECT\n", + " COUNT(*) AS mapped_comment_ids,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", + " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", + " FROM comment_flags\n", + ")\n", + "SELECT\n", + " canonical_only,\n", + " fork_only,\n", + " both_sides,\n", + " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", + " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", + "FROM global_counts;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check4 = pd.read_sql(text(query_check4), con)\n", + "\n", + "print(\"Canonical vs fork accessibility summary:\")\n", + "print(f\" canonical_only : {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", + "print(f\" fork_only : {check4['fork_only'].iloc[0]} (expected ~569)\")\n", + "print(f\" both_sides : {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", + "print(f\" fork_only_pct : {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", + "print(f\" canonical_reachable % : {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", + "display(check4)" + ] + }, + { + "cell_type": "markdown", + "id": "52c9ee7e", + "metadata": {}, + "source": [ + "### When to move on to Notebook 3\n", + "\n", + "Move to Notebook 3 when all of the following are true:\n", + "\n", + "1. Check 1: PASS printed and total unique IDs = 7,122\n", + "2. Check 2: returns a non-empty DataFrame of projects with commit comment matches\n", + "3. Check 3: returns a non-empty DataFrame of projects with PR comment matches\n", + "4. Check 4: runs without error and shows a non-zero `canonical_reachable_pct`\n", + "\n", + "If any check returns zero rows, the most likely cause is that `comment_sentiment` was not loaded correctly in Notebook 1. Re-run Notebook 1 first.\n", + "\n", + "The ~7.9% of comments that are `fork_only` will be missed when we target only canonical repos in Notebook 3. This is an acceptable tradeoff. We document it here so the limitation is visible." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/03_scale_config_files.ipynb b/notebooks/03_scale_config_files.ipynb similarity index 79% rename from 03_scale_config_files.ipynb rename to notebooks/03_scale_config_files.ipynb index 2c82d59..104c0fb 100644 --- a/03_scale_config_files.ipynb +++ b/notebooks/03_scale_config_files.ipynb @@ -7,11 +7,15 @@ "source": [ "# Scale and Automate Config Generation (Notebook 3)\n", "\n", - "This notebook generates Kaiaulu config files for each main project repo in the GHTorrent database.\n", + "This notebook generates Kaiaulu config files for each canonical project repo that has sentiment-labeled comments in GHTorrent.\n", + "\n", + "Kaiaulu uses `.yml` config files to know which GitHub projects to download data from, where to save the raw JSON files, and how to structure the output. Without a config file for a project, Kaiaulu has no way to download it. In this notebook, we will automatically generate one config per canonical repo. Anyone can re-run the same config against the live GitHub API and get fresh data.\n", + "\n", + "**Why canonical repos only?** As shown in Notebook 2 (Check 4), ~93.2% of sentiment comments are reachable from canonical (non-fork) repos. Since the comment coverage from forked repos is minimal, we target only canonical repos since they provide most of the sentiment-labeled coverage.\n", "\n", "**What this notebook does:**\n", - "1. Queries MySQL/GHTorrent to identify canonical repos with sentiment-labeled comments\n", - "2. Generates a `.yml` config file per repo (using `trinitycore.yml` as a template) and writes them to Kaiaulu's `conf/` directory\n", + "1. Queries MySQL/GHTorrent to identify canonical repos with sentiment-labeled comments (~82 repos)\n", + "2. Generates a `.yml` config file per repo using `trinitycore.yml` as a template and writes them to Kaiaulu's `conf/` directory\n", "\n", "**What comes next** — once configs are written, use these Kaiaulu vignettes to download and parse comments:\n", "- `vignettes/download_github_events.Rmd` → commit comments\n", @@ -38,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1bc36cfe", "metadata": {}, "outputs": [], @@ -60,15 +64,15 @@ "### Step 2: Set Paths and Configuration\n", "\n", "Update the variables below before running:\n", - "- **`KAIAULU_REPO`** — path to your local Kaiaulu repo\n", - "- **`MYSQL_DB`** / **`MYSQL_PASSWORD`** — your database credentials\n", - "- **`MAX_REPOS`** — set to an integer to limit the number of repos processed, or `None` to process all\n", - "- **`WRITE_CONFIGS`** — set to `False` to do a dry run without writing any files" + "- **`KAIAULU_REPO`** - path to your local Kaiaulu repo\n", + "- **`MYSQL_PASSWORD`** / **`MYSQL_DB`** - must match what you used in Notebooks 1 and 2\n", + "- **`MAX_REPOS`** - set to an integer to limit the number of repos processed (useful for a dry run), or `None` to process all 82 project repos\n", + "- **`WRITE_CONFIGS`** - set to `False` to preview without writing any files to disk" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "20fb9b60", "metadata": {}, "outputs": [], @@ -86,9 +90,9 @@ "# MySQL connection (override with env vars if needed)\n", "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", - "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"ADD_DB_NAME_HERE\")\n", + "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\")\n", "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", - "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")\n", + "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"password\")\n", "\n", "# Toggle writing config files to Kaiaulu conf/\n", "WRITE_CONFIGS = True" @@ -101,17 +105,9 @@ "source": [ "### Step 3: Query Canonical Repos from GHTorrent\n", "\n", - "Queries MySQL to find main (non-fork) repos that have at least one sentiment-labeled comment (commit or PR). Results are loaded into `repos`.\n", + "This query shows canonical repos that have at least one sentiment-labeled commit or PR comment.\n", "\n", - "Expected output (~82 repos):\n", - "\n", - "| | owner | repo |\n", - "|---|---|---|\n", - "| 0 | akka | akka |\n", - "| 1 | antirez | redis |\n", - "| 2 | ariya | phantomjs |\n", - "| 3 | automapper | automapper |\n", - "| 4 | bartaz | impress.js |" + "The expected output is 82 repos." ] }, { @@ -248,9 +244,13 @@ "source": [ "### Step 4: Generate and Write Config Files\n", "\n", - "Builds a `.yml` config file for each repo using `trinitycore.yml` as a template and writes it to Kaiaulu's `conf/` directory.\n", + "Each config file tells Kaiaulu:\n", + "- Which GitHub owner and repo to target\n", + "- Where to save raw JSON downloads (under `rawdata/github/{owner}/{repo}/`)\n", + "\n", + "Before running this cell, download [trinitycore.yml](https://raw.githubusercontent.com/splimon/kaiaulu-sentiment/refs/heads/378-github-commit-comments-downloader-function/conf/trinitycore.yml) and place it in the `conf/` folder of your local Kaiaulu repo.\n", "\n", - "Each config follows this structure:\n", + "The config files should follow this structure:\n", "```yaml\n", "project:\n", " website: https://github.com/{owner}/{repo}\n", @@ -266,7 +266,7 @@ " pr_comments: rawdata/github/{owner}/{repo}/pr_comments/\n", "```\n", "\n", - "Expected output: a list of written `.yml` filenames, e.g. `['akka.yml', 'redis.yml', ...]`" + "Expected output: a list of 82 written `.yml` filenames, e.g. `['akka.yml', 'redis.yml', ...]`" ] }, { @@ -370,12 +370,16 @@ "id": "oiibwri0k6", "metadata": {}, "source": [ - "### When to Move On to Notebook 4\n", + "### Next Steps\n", + "\n", + "Config files have been written to `kaiaulu/conf/`. From here, the workflow moves entirely into Kaiaulu.\n", + "\n", + "To download commit and PR inline comments for each project, follow the Kaiaulu vignettes:\n", "\n", - "Move to Notebook 4 after all of the following are true:\n", + "- **`vignettes/download_github_events.Rmd`** — downloads commit comments via `/repos/{owner}/{repo}/comments` and parses them into a data.table with columns: `comment_id`, `commit_id`, `author_login`, `author_id`, `line`, `created_at`, `updated_at`\n", + "- **`vignettes/download_github_pull_request_comments.Rmd`** — downloads PR inline comments via `/repos/{owner}/{repo}/pulls/comments` and parses them into a data.table with columns: `review_id`, `comment_id`, `html_url`, `created_at`, `updated_at`, `comment_user_login`, `author_association`, `file_path`, `start_line`, `line`, `original_start_line`, `original_line`, `position`, `diff_hunk`, `body`, `commit_id`\n", "\n", - "1. The 82 `.yml` files generated from Step 4 exist in Kaiaulu's `conf/` directory.\n", - "4. Spot-check a few configs to confirm the `owner`, `repo`, and `rawdata/` paths are populated correctly and follow the formatting indicated in Step 4." + "Run both vignettes for each of the 82 configs generated above. Once the CSVs exist at `rawdata/github/{owner}/{repo}/`, proceed to `vignettes/sentiment_analysis.Rmd` to train the sentiment model and generate polarity predictions." ] } ], From 3b28ffb69a524818a6aa5688434084be0744108f Mon Sep 17 00:00:00 2001 From: splimon Date: Thu, 16 Apr 2026 23:55:18 -1000 Subject: [PATCH 3/8] Add Notebook 4: Inner Join Kaiaulu Comments --- .../01_load_sentiment_csv_to_mysql.ipynb | 233 +- ...2_contextualize_github_gold_standard.ipynb | 5606 +++++++++++++++++ notebooks/02_explore_gh_torrent_tables.ipynb | 413 -- notebooks/03_scale_config_files.ipynb | 132 +- .../04_inner_join_kaiaulu_comments.ipynb | 245 + 5 files changed, 5952 insertions(+), 677 deletions(-) create mode 100644 notebooks/02_contextualize_github_gold_standard.ipynb delete mode 100644 notebooks/02_explore_gh_torrent_tables.ipynb create mode 100644 notebooks/04_inner_join_kaiaulu_comments.ipynb diff --git a/notebooks/01_load_sentiment_csv_to_mysql.ipynb b/notebooks/01_load_sentiment_csv_to_mysql.ipynb index 05b5841..457763a 100644 --- a/notebooks/01_load_sentiment_csv_to_mysql.ipynb +++ b/notebooks/01_load_sentiment_csv_to_mysql.ipynb @@ -5,23 +5,19 @@ "id": "cell-title", "metadata": {}, "source": [ - "# Load Sentiment CSV into GHTorrent MySQL (Notebook 1)\n", + "# Notebook 1: Load the GitHub Gold Standard into MySQL\n", "\n", - "This notebook loads the GitHub Gold Standard sentiment CSV into a MySQL database that contains the GHTorrent 2004 dump. \n", + "The [Gold Standard dataset](https://figshare.com/articles/dataset/A_gold_standard_for_polarity_of_emotions_of_software_developers_in_GitHub/11604597?file=21001260) contains 7,122 GitHub pull request and commit comments that were manually annotated by researchers with sentiment polarity labels (positive, negative, or neutral). It was published by Novielli et al. 2020 in [\"Can We Use SE-specific Sentiment Analysis Tools in a Cross-Platform Setting?\"](https://doi.org/10.1145/3379597.3387446).\n", "\n", - "The Gold Standard dataset (`github_gold.csv`) contains 7,122 GitHub pull request and commit comments that were manually annotated by researchers with sentiment polarity labels (positive, negative, or neutral). It was published by Novielli et al. 2020 in [\"Can We Use SE-specific Sentiment Analysis Tools in a Cross-Platform Setting?\"](https://doi.org/10.1145/3379597.3387446).\n", + "**Here's the problem**: this dataset contains no contextual information about a comment. We don't know where the comment came from, who wrote it, or when.\n", "\n", - "The GHTorrent 2004 MySQL database contains the contextual project and commit data from GitHub (e.g., author, timestamp, etc.). By joining the Gold Standard sentiment CSV to this database, we recover the context that is missing from the CSV. Specifically, which project each comment belongs to, who wrote it, and when. \n", + "To fix that, we're going to load the Gold Standard CSV into the same MySQL database as the [GHTorrent 2004 dump](https://web.archive.org/web/20150206005357/http://ghtorrent.org/msr14.html). The GHTorrent 2004 dump contains the contextual project and commit data from GitHub (e.g., author, timestamp, etc.). By joining the Gold Standard sentiment CSV to this database, we recover the context that is missing from the CSV. Once both datasets are in the same database, we can JOIN them together using the `comment ID`.\n", "\n", - "That context enables two types of dataset expansion using Kaiaulu:\n", - "- **Temporal expansion**: re-download the same projects' comments from 2004 through 2025. This captures comments that were posted after the Gold Standard was originally collected\n", - "- **Horizontal expansion**: download additional data sources for those same projects (e.g., source code, version control history) that can be linked back to sentiment labels\n", + "This is the first step in a pipeline that ends with sentiment-labeled comment data that Kaiaulu can use for analysis.\n", "\n", - "### Planned Output\n", - "By the end of this notebook, you should have:\n", - "1. A `comment_sentiment` table in MySQL with all 7,122 rows from the CSV\n", - "2. Query results confirming each sentiment comment joins to a commit and project in GHTorrent\n", - "3. A ranked view of which projects have the most sentiment-labeled comments" + "That context enables two types of dataset expansion using Kaiaulu:\n", + "1. **Temporal expansion**: re-download the same projects' comments from 2004 through 2025. This captures comments that were posted after the Gold Standard was originally collected\n", + "2. **Horizontal expansion**: download additional data sources for those same projects (e.g., source code, version control history) that can be linked back to sentiment labels" ] }, { @@ -66,14 +62,12 @@ "id": "cell-step2-header", "metadata": {}, "source": [ - "### Step 2: Install and import dependencies\n", - "\n", - "This notebook connects to MySQL directly from Python using `mysql-connector-python`, which lets every query run inline and display results as DataFrames. `pandas` and `sqlalchemy` handle query results and the DataFrame display." + "### Step 2: Import dependencies" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "cell-imports", "metadata": {}, "outputs": [], @@ -88,20 +82,14 @@ "id": "cell-step3-header", "metadata": {}, "source": [ - "### Step 3: Configure your MySQL connection\n", + "### Step 3: Set your MySQL connection details\n", "\n", "Update the variables below to match your local MySQL setup. `CSV_PATH` should point to the path you saved `github_gold.csv`." ] }, - { - "cell_type": "markdown", - "id": "23c4b3f0", - "metadata": {}, - "source": [] - }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "cell-config", "metadata": {}, "outputs": [], @@ -110,10 +98,10 @@ "MYSQL_PORT = 3306\n", "MYSQL_USER = \"root\"\n", "MYSQL_PASSWORD = \"ADD_PASSWORD_HERE\"\n", - "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", + "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", "\n", - "# Absolute path to comment_sentiment.csv on your machine\n", - "CSV_PATH = \"/Users/sheilalimon/Downloads/github_gold.csv\"" + "# Path to github_gold.csv on your local machine\n", + "CSV_PATH = \"PATH_TO/github_gold.csv\"" ] }, { @@ -125,12 +113,12 @@ "\n", "Create a fresh table with three columns matching the CSV structure: `ID` (GitHub comment ID), `Polarity` (positive/negative/neutral), and `Text` (the comment body).\n", "\n", - "The `ID` column is what we use to join sentiment labels to commit comments and PR comments in GHTorrent. The `comment_id` field in those tables corresponds directly to this `ID`." + "The `ID` column is what we use to join each sentiment label to the matching comment in GHTorrent's `commit_comments` and `pull_request_comments` tables." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 22, "id": "cell-create-table", "metadata": {}, "outputs": [ @@ -177,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "cell-load-csv", "metadata": {}, "outputs": [ @@ -212,16 +200,16 @@ "id": "cell-step6-header", "metadata": {}, "source": [ - "### Step 6: Validate the load\n", + "### Step 6: Verify the load\n", "\n", "We expect 7,122 rows (one per annotated comment in the Gold Standard dataset).\n", "\n", - "We also check that each row has a unique `ID`, since these IDs are what we use to join to the GHTorrent MySQL database dump." + "We also check that each row has a unique `ID`." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "cell-validate", "metadata": {}, "outputs": [ @@ -261,16 +249,16 @@ "id": "cell-step7-header", "metadata": {}, "source": [ - "### Step 7: Query 1 - join sentiment comments to commits and projects\n", + "### Step 7: Check that comment IDs join to GHTorrent\n", "\n", - "This query joins `comment_sentiment` → `commit_comments` → `commits` → `projects` to confirm that the sentiment IDs match to commit comments in GHTorrent.\n", + "Now let's do a quick sanity check. Do the `ID` values in our new table actually match `comment_id` values in GHTorrent's `commit_comments` table?\n", "\n", - "If the result is empty, the IDs in the CSV do not match the `comment_id` values in GHTorrent's `commit_comments` table, which means either the wrong dump was loaded or the database name is incorrect." + "If you get zero rows here, the IDs aren't matching up. The most likely cause is that the wrong GHTorrent dump was loaded. Make sure you used the MSR 2014 version linked in Step 1." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "cell-query1", "metadata": {}, "outputs": [ @@ -455,7 +443,7 @@ "9 These are the ones we currently hardcode in Te... " ] }, - "execution_count": 10, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -482,175 +470,6 @@ "print(f\"Rows returned (showing up to 10): {len(result1)}\")\n", "result1" ] - }, - { - "cell_type": "markdown", - "id": "cell-step8-header", - "metadata": {}, - "source": [ - "### Step 8: Query 2 - count sentiment-labeled comments by project\n", - "\n", - "This query ranks projects by how many of their commit comments were included in the Gold Standard Dataset. This tells us which projects are most represented in the labeled dataset. \n", - "\n", - "These are the same projects we will later generate Kaiaulu config files for in Notebook 3, so we can re-download their comment history and expand the dataset through temporal or horizonal expansion." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-query2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Projects with sentiment-labeled commit comments: 85\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
project_namelabeled_comment_count
0TrinityCore813
1MaNGOS623
2rails557
3CraftBukkit363
4jquery246
.........
80impress.js1
81ccv1
82hiphop-php1
83uwom-server1
84jquery-nodom1
\n", - "

85 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " project_name labeled_comment_count\n", - "0 TrinityCore 813\n", - "1 MaNGOS 623\n", - "2 rails 557\n", - "3 CraftBukkit 363\n", - "4 jquery 246\n", - ".. ... ...\n", - "80 impress.js 1\n", - "81 ccv 1\n", - "82 hiphop-php 1\n", - "83 uwom-server 1\n", - "84 jquery-nodom 1\n", - "\n", - "[85 rows x 2 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query2 = \"\"\"\n", - "SELECT\n", - " p.name AS project_name,\n", - " COUNT(s.ID) AS labeled_comment_count\n", - "FROM comment_sentiment s\n", - "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - "INNER JOIN commits c ON c.id = cc.commit_id\n", - "INNER JOIN projects p ON c.project_id = p.id\n", - "GROUP BY p.name\n", - "ORDER BY labeled_comment_count DESC;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " result2 = pd.read_sql(text(query2), con)\n", - "\n", - "print(f\"Projects with sentiment-labeled commit comments: {len(result2)}\")\n", - "result2" - ] - }, - { - "cell_type": "markdown", - "id": "cell-transition", - "metadata": {}, - "source": [ - "### When to move to Notebook 2\n", - "\n", - "Move to Notebook 2 only after all of the following are true:\n", - "\n", - "1. `total_rows = 7122` and `distinct_ids = 7122` (Step 6 shows PASS)\n", - "2. Query 1 (Step 7) returns at least one row. This means sentiment IDs are successfully joining to GHTorrent commits and projects\n", - "3. Query 2 (Step 8) returns a non-empty project ranking\n", - "\n", - "If Query 1 or 2 return zero rows but validation passed, the most likely cause is that the wrong GHTorrent dump was loaded (the dump must be the MSR 2014 version linked in Step 1)." - ] } ], "metadata": { diff --git a/notebooks/02_contextualize_github_gold_standard.ipynb b/notebooks/02_contextualize_github_gold_standard.ipynb new file mode 100644 index 0000000..f5d5312 --- /dev/null +++ b/notebooks/02_contextualize_github_gold_standard.ipynb @@ -0,0 +1,5606 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1b3dd1e0", + "metadata": {}, + "source": [ + "# Notebook 2: Contextualize the Github Gold Standard Dataset\n", + "\n", + "By this point, you should have the Gold Standard and GHTorrent 2004 dump loaded into MySQL. Since both datasets share the same comment IDs, we can join them to add contextual columns (e.g., project, author, timestamp) to the Gold Standard's three columns (`ID`, `polarity`, `text`).\n", + "\n", + "But, before we create the contextualized Github Gold Standard dataset, we need to understand what we're working with. \n", + "\n", + "How are the 7,122 IDs split between commit comments and PR comments? Which projects show up the most? And are these comments reachable from canonical (non-fork) repos, or contained in forks?\n", + "\n", + "The answers to these questions inform how we handle the data and which projects we target when generating project config files in Notebook 3." + ] + }, + { + "cell_type": "markdown", + "id": "cell-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies and connect to MySQL" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "cell-imports", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine, text\n", + "\n", + "pd.set_option('display.max_rows', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-config", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected to MySQL.\n" + ] + } + ], + "source": [ + "MYSQL_HOST = \"localhost\"\n", + "MYSQL_PORT = 3306\n", + "MYSQL_USER = \"root\"\n", + "MYSQL_PASSWORD = \"ADD_PASSWORD_HERE\"\n", + "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", + "\n", + "engine = create_engine(\n", + " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "print(\"Connected to MySQL.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check1-header", + "metadata": {}, + "source": [ + "### Check 1: How are the sentiment comments distributed?\n", + "\n", + "GHTorrent stores two kinds of GitHub comments: commit comments (discussions on a specific commit) and PR inline comments (left on a line of code in a pull request).\n", + "\n", + "To understand what commit comments look like, [here](https://github.com/openssl/openssl/commit/4817504d069b4c5082161b02a22116ad75f822b1#commitcomment-5942359) are examples of commit comments under a commit that introduced a popular software vulnerability. To understand what PR inline comments look like, refer to the [GitHub Pull Requests Cheatsheet](https://github.com/sailuh/kaiaulu_cheatsheet/blob/main/cheatsheets/github-comments-cheatsheet.pdf).\n", + "\n", + "The Gold Standard includes both types. The same `ID` maps to `comment_id` in both `commit_comments` and `pull_request_comments`. So, the first thing to figure out is which table each sentiment ID lands in. Some IDs appear in both tables (overlap = 85), meaning a small number of comments were captured under both endpoints in GHTorrent. The total unique IDs should sum to 7,122.\n", + "\n", + "Expected values:\n", + "- Commit comment matches: ~4,317\n", + "- PR comment matches: ~2,890\n", + "- Overlap (both): ~85\n", + "- Commit-only: 4,232 | PR-only: 2,805 | Total unique: 7,122" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "cell-check1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CategoryCountExpected
0Commit matches43174317
1PR matches28902890
2Overlap (both)8585
3Commit-only42324232
4PR-only28052805
5Total unique71227122
\n", + "
" + ], + "text/plain": [ + " Category Count Expected\n", + "0 Commit matches 4317 4317\n", + "1 PR matches 2890 2890\n", + "2 Overlap (both) 85 85\n", + "3 Commit-only 4232 4232\n", + "4 PR-only 2805 2805\n", + "5 Total unique 7122 7122" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PASS: total unique IDs = 7122.\n" + ] + } + ], + "source": [ + "with engine.connect() as con:\n", + " commit_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS commit_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " pr_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS pr_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " overlap = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS overlap\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + "commit_only = commit_count - overlap\n", + "pr_only = pr_count - overlap\n", + "total_unique = commit_only + pr_only + overlap\n", + "\n", + "summary = pd.DataFrame({\n", + " 'Category': ['Commit matches', 'PR matches', 'Overlap (both)', 'Commit-only', 'PR-only', 'Total unique'],\n", + " 'Count': [commit_count, pr_count, overlap, commit_only, pr_only, total_unique],\n", + " 'Expected': [4317, 2890, 85, 4232, 2805, 7122]\n", + "})\n", + "display(summary)\n", + "\n", + "if total_unique == 7122:\n", + " print(\"PASS: total unique IDs = 7122.\")\n", + "else:\n", + " print(f\"WARNING: total unique IDs = {total_unique}, expected 7122.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check2-header", + "metadata": {}, + "source": [ + "### Check 2: Which projects have the most labeled commit comments?\n", + "\n", + "Let's see which projects' commit comments are most heavily represented in the Gold Standard. This is a preview of which projects we'll be generating Kaiaulu config files for in Notebook 3." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "cell-check2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Projects with sentiment-labeled commit comments: 453\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
project_idproject_nameproject_urllabeled_comment_count
012TrinityCorehttps://api.github.com/repos/TrinityCore/Trini...800
1289MaNGOShttps://api.github.com/repos/mangos/MaNGOS622
278852railshttps://api.github.com/repos/rails/rails448
322980CraftBukkithttps://api.github.com/repos/Bukkit/CraftBukkit357
425875jqueryhttps://api.github.com/repos/jquery/jquery235
591331diasporahttps://api.github.com/repos/diaspora/diaspora160
63583xbmchttps://api.github.com/repos/xbmc/xbmc130
722981nettyhttps://api.github.com/repos/netty/netty80
826388html5-boilerplatehttps://api.github.com/repos/h5bp/html5-boiler...80
924292nodehttps://api.github.com/repos/joyent/node65
1050618three.jshttps://api.github.com/repos/mrdoob/three.js65
111akkahttps://api.github.com/repos/akka/akka62
1279163homebrewhttps://api.github.com/repos/mxcl/homebrew57
1391020gitlabhqhttps://api.github.com/repos/gitlabhq/gitlabhq55
1451671symfonyhttps://api.github.com/repos/symfony/symfony46
159215openFrameworkshttps://api.github.com/repos/openframeworks/op...40
1663250cakephphttps://api.github.com/repos/cakephp/cakephp33
1710593libuvhttps://api.github.com/repos/joyent/libuv29
1895385devisehttps://api.github.com/repos/plataformatec/devise22
1974914requestshttps://api.github.com/repos/kennethreitz/requ...20
2069158djangohttps://api.github.com/repos/django/django19
2179166jekyllhttps://api.github.com/repos/mojombo/jekyll18
2259607CodeIgniterhttps://api.github.com/repos/EllisLab/CodeIgniter15
2377319Sick-Beardhttps://api.github.com/repos/midgetspy/Sick-Beard15
24104307papercliphttps://api.github.com/repos/thoughtbot/paperclip14
259636libgit2https://api.github.com/repos/libgit2/libgit213
265214xbmchttps://api.github.com/repos/jmarshallnz/xbmc12
2751669foundationhttps://api.github.com/repos/zurb/foundation12
2864176ThinkUphttps://api.github.com/repos/ginatrapani/ThinkUp12
2981335railshttps://api.github.com/repos/flooose/rails12
3010380redcarpethttps://api.github.com/repos/vmg/redcarpet11
3164918phpunithttps://api.github.com/repos/sebastianbergmann...11
32105378compasshttps://api.github.com/repos/chriseppstein/com...11
33107534scalahttps://api.github.com/repos/scala/scala11
3462501php-sdkhttps://api.github.com/repos/facebook/php-sdk10
3582328railshttps://api.github.com/repos/pinetops/rails9
3610629SignalRhttps://api.github.com/repos/SignalR/SignalR8
3714327Nancyhttps://api.github.com/repos/NancyFx/Nancy8
3817515elasticsearchhttps://api.github.com/repos/elasticsearch/ela...8
3919580facebook-android-sdkhttps://api.github.com/repos/facebook/facebook...8
4081320railshttps://api.github.com/repos/zetter/rails8
4111phantomjshttps://api.github.com/repos/ariya/phantomjs7
425164xbmchttps://api.github.com/repos/theuni/xbmc7
437242redishttps://api.github.com/repos/antirez/redis7
4426101nodehttps://api.github.com/repos/indutny/node7
4574915symfonyhttps://api.github.com/repos/xphere-forks/symfony7
4675984reddithttps://api.github.com/repos/reddit/reddit7
4776945botohttps://api.github.com/repos/boto/boto7
4876946django-debug-toolbarhttps://api.github.com/repos/django-debug-tool...7
4978835django-cmshttps://api.github.com/repos/divio/django-cms7
5080514railshttps://api.github.com/repos/andhapp/rails7
515176xbmchttps://api.github.com/repos/davilla/xbmc6
5212976pluploadhttps://api.github.com/repos/moxiecode/plupload6
5317566ActionBarSherlockhttps://api.github.com/repos/JakeWharton/Actio...6
5465250thinkuphttps://api.github.com/repos/mwilkie/thinkup6
5579948railshttps://api.github.com/repos/bjeanes/rails6
56107186gizzardhttps://api.github.com/repos/twitter/gizzard6
57784bitcoin-githttps://api.github.com/repos/gavinandresen/bit...5
585215xbmchttps://api.github.com/repos/Montellese/xbmc5
595232xbmchttps://api.github.com/repos/elupus/xbmc5
6014328ServiceStackhttps://api.github.com/repos/ServiceStack/Serv...5
6147382chosenhttps://api.github.com/repos/harvesthq/chosen5
6259683CodeIgniterhttps://api.github.com/repos/dchill42/CodeIgniter5
6376118reddithttps://api.github.com/repos/andre-d/reddit5
64107302xsbthttps://api.github.com/repos/snowplow/xsbt5
65107823scalahttps://api.github.com/repos/paulp/scala5
66454bitcoinhttps://api.github.com/repos/bitcoin/bitcoin4
675438xbmchttps://api.github.com/repos/0wing/xbmc4
687241xbmchttps://api.github.com/repos/FernetMenta/xbmc4
6942644d3https://api.github.com/repos/mbostock/d34
7053712symfonyhttps://api.github.com/repos/vicb/symfony4
7154148symfonyhttps://api.github.com/repos/schmittjoh/symfony4
7280581railshttps://api.github.com/repos/SAP-Oxygen/rails4
7384869railshttps://api.github.com/repos/slave-but-free/rails4
746knitrhttps://api.github.com/repos/yihui/knitr3
757shinyhttps://api.github.com/repos/rstudio/shiny3
769mongohttps://api.github.com/repos/mongodb/mongo3
773750http-parserhttps://api.github.com/repos/joyent/http-parser3
785182xbmchttps://api.github.com/repos/Voyager1/xbmc3
795230xbmchttps://api.github.com/repos/jimfcarroll/xbmc3
8013509TrinityCorehttps://api.github.com/repos/Havenard/TrinityCore3
8113597monohttps://api.github.com/repos/Unity-Technologie...3
8216134MiniProfilerhttps://api.github.com/repos/SamSaffron/MiniPr...3
8319786clojurehttps://api.github.com/repos/clojure/clojure3
8453713symfonyhttps://api.github.com/repos/bschussek/symfony3
8564197zf2https://api.github.com/repos/bakura10/zf23
8665107Slimhttps://api.github.com/repos/codeguy/Slim3
8771786tornadohttps://api.github.com/repos/facebook/tornado3
8880231railshttps://api.github.com/repos/sikachu/rails3
8980618railshttps://api.github.com/repos/steveklabnik/rails3
9084052railshttps://api.github.com/repos/bbenezech/rails3
9188617Homebrewhttps://api.github.com/repos/MindTooth/Homebrew3
9292167homebrewhttps://api.github.com/repos/axelsteiner/homebrew3
9392243homebrewhttps://api.github.com/repos/rgov/homebrew3
94101997octopresshttps://api.github.com/repos/imathis/octopress3
95107187sbthttps://api.github.com/repos/sbt/sbt3
96107811scalahttps://api.github.com/repos/adriaanm/scala3
97108589TC-Eluna-3.3.5ahttps://api.github.com/repos/ElunaLuaEngine/TC...3
982devtoolshttps://api.github.com/repos/hadley/devtools2
995162xbmchttps://api.github.com/repos/opdenkamp/xbmc2
1005202xbmchttps://api.github.com/repos/cptspiff/xbmc2
1015240xbmchttps://api.github.com/repos/bobo1on1/xbmc2
1025283xbmchttps://api.github.com/repos/Memphiz/xbmc2
1035326beanstalkdhttps://api.github.com/repos/kr/beanstalkd2
1045494xbmchttps://api.github.com/repos/koying/xbmc2
1056244xbmchttps://api.github.com/repos/jpsdr/xbmc2
1068255redishttps://api.github.com/repos/charsyam/redis2
1078948redishttps://api.github.com/repos/evilsocket/redis2
10811582openFrameworkshttps://api.github.com/repos/bilderbuchi/openF...2
10912554SparkleSharehttps://api.github.com/repos/hbons/SparkleShare2
11013197TrinityCorehttps://api.github.com/repos/kandera/TrinityCore2
11115018RestSharphttps://api.github.com/repos/restsharp/RestSharp2
11216402stormhttps://api.github.com/repos/nathanmarz/storm2
11323311CraftBukkithttps://api.github.com/repos/ScoreUnder/CraftB...2
11425879nodehttps://api.github.com/repos/isaacs/node2
11526240nodehttps://api.github.com/repos/kuebk/node2
11626766nodehttps://api.github.com/repos/mattrobenolt/node2
11729190html5-boilerplatehttps://api.github.com/repos/bentruyman/html5-...2
11853743symfonyhttps://api.github.com/repos/hhamon/symfony2
11954403symfonyhttps://api.github.com/repos/drak/symfony2
12054691symfonyhttps://api.github.com/repos/Tobion/symfony2
12161016CodeIgniterhttps://api.github.com/repos/and-ers/CodeIgniter2
12263690zf2https://api.github.com/repos/weierophinney/zf22
12365762cakephphttps://api.github.com/repos/markstory/cakephp2
12465946cakephphttps://api.github.com/repos/ADmad/cakephp2
12565983ThinkUphttps://api.github.com/repos/brandonroberts/Th...2
12667281cakephphttps://api.github.com/repos/schrolli/cakephp2
12768893phpunithttps://api.github.com/repos/fizzka/phpunit2
12869177djangohttps://api.github.com/repos/andrewgodwin/django2
12969349djangohttps://api.github.com/repos/carljm/django2
13069897djangohttps://api.github.com/repos/melinath/django2
13179642railshttps://api.github.com/repos/drogus/rails2
13280002railshttps://api.github.com/repos/joshk/rails2
13380498railshttps://api.github.com/repos/bratish/rails2
13480535railshttps://api.github.com/repos/anildigital/rails2
13580559railshttps://api.github.com/repos/sishen/rails2
13680818railshttps://api.github.com/repos/goncalossilva/rails2
13781332railshttps://api.github.com/repos/jenseng/rails2
13881621railshttps://api.github.com/repos/jenslukowski/rails2
13982863railshttps://api.github.com/repos/gururuby/rails2
14088593homebrewhttps://api.github.com/repos/docwhat/homebrew2
14188732homebrewhttps://api.github.com/repos/mbrunthaler/homebrew2
14290945homebrewhttps://api.github.com/repos/clusty/homebrew2
14391892homebrewhttps://api.github.com/repos/semaperepelitsa/h...2
14492348homebrewhttps://api.github.com/repos/alastairandrew/ho...2
14592859homebrewhttps://api.github.com/repos/danielb2/homebrew2
14693198homebrewhttps://api.github.com/repos/simleo/homebrew2
14794288homebrewhttps://api.github.com/repos/vhbit/homebrew2
148100124diasporahttps://api.github.com/repos/christophe-de/dia...2
149102734octopresshttps://api.github.com/repos/andrewreid/octopress2
150106160finaglehttps://api.github.com/repos/twitter/finagle2
151107312sbthttps://api.github.com/repos/gkossakowski/sbt2
152107535scalatrahttps://api.github.com/repos/scalatra/scalatra2
153107829scalahttps://api.github.com/repos/magarciaEPFL/scala2
1548follyhttps://api.github.com/repos/facebook/folly1
155893bitcoinhttps://api.github.com/repos/TheBlueMatt/bitcoin1
156935devtoolshttps://api.github.com/repos/yoni/devtools1
1571024bitcoinhttps://api.github.com/repos/laanwj/bitcoin1
1582710phantomjshttps://api.github.com/repos/dburrows/phantomjs1
1593814mongohttps://api.github.com/repos/guanqun/mongo1
1603840mongohttps://api.github.com/repos/stulentsev/mongo1
1613897http-parserhttps://api.github.com/repos/bnoordhuis/http-p...1
1624262mongohttps://api.github.com/repos/amcfague/mongo1
1635208xbmchttps://api.github.com/repos/Fneufneu/xbmc1
1645228xbmchttps://api.github.com/repos/anssih/xbmc1
1655234xbmchttps://api.github.com/repos/pieh/xbmc1
1665236xbmchttps://api.github.com/repos/llyzs/xbmc1
1675273XBMChttps://api.github.com/repos/HarryMuscle/XBMC1
1685297xbmchttps://api.github.com/repos/garbear/xbmc1
1695422xbmchttps://api.github.com/repos/doozan/xbmc1
1705428xbmchttps://api.github.com/repos/cbxbiker61/xbmc1
1715448xbmchttps://api.github.com/repos/vdrfan/xbmc1
1725485xbmchttps://api.github.com/repos/malard/xbmc1
1735715mangoshttps://api.github.com/repos/vermie/mangos1
1745828xbmchttps://api.github.com/repos/xbmcfanboy/xbmc1
1755832xbmchttps://api.github.com/repos/taxigps/xbmc1
1765837xbmchttps://api.github.com/repos/aviksil/xbmc1
1775858xbmchttps://api.github.com/repos/MichaelAnders/xbmc1
1785887xbmchttps://api.github.com/repos/PSyton/xbmc1
1795962xbmchttps://api.github.com/repos/adam-aph/xbmc1
1806250xbmchttps://api.github.com/repos/herrnst/xbmc1
1816258xbmchttps://api.github.com/repos/xhaggi/xbmc1
1826349xbmchttps://api.github.com/repos/manio/xbmc1
1836472xbmchttps://api.github.com/repos/dragonflight/xbmc1
1846505xbmchttps://api.github.com/repos/maheus/xbmc1
1856609xbmchttps://api.github.com/repos/elbeardmorez/xbmc1
1866790xbmchttps://api.github.com/repos/DigitalDJ/xbmc1
1876819xbmchttps://api.github.com/repos/FlyingRat/xbmc1
1887180xbmchttps://api.github.com/repos/Jalle19/xbmc1
1897404redishttps://api.github.com/repos/mrb/redis1
1907427ccvhttps://api.github.com/repos/liuliu/ccv1
1917457xbmchttps://api.github.com/repos/phil65/xbmc1
1928478redishttps://api.github.com/repos/marcelaraujo/redis1
1938605redishttps://api.github.com/repos/jumping/redis1
1948634redishttps://api.github.com/repos/melvyn-sopacua/redis1
1958955memcachedhttps://api.github.com/repos/memcached/memcached1
1969014hiphop-phphttps://api.github.com/repos/fungos/hiphop-php1
19710468xbmca10https://api.github.com/repos/npeacock/xbmca101
19810617libgit2https://api.github.com/repos/phkelley/libgit21
19910622libgit2https://api.github.com/repos/martinwoodward/li...1
20010632libgit2https://api.github.com/repos/carlosmn/libgit21
20111494openFrameworkshttps://api.github.com/repos/obviousjim/openFr...1
20212102openFrameworkshttps://api.github.com/repos/openFrameworks-Ra...1
20312529libgit2https://api.github.com/repos/glesserd/libgit21
20412934SparkleSharehttps://api.github.com/repos/serras/SparkleShare1
20512941TrinityCorehttps://api.github.com/repos/0omega/TrinityCore1
20612943TrinityCorehttps://api.github.com/repos/blipi/TrinityCore1
20712964TrinityCorehttps://api.github.com/repos/zorix/TrinityCore1
20812972TrinityCorehttps://api.github.com/repos/Naervin/TrinityCore1
20912973TrinityCorehttps://api.github.com/repos/4m1g0/TrinityCore1
21013410pluploadhttps://api.github.com/repos/dtmax/plupload1
21113514uwom-serverhttps://api.github.com/repos/WarHead/uwom-server1
21213516TrinityCorehttps://api.github.com/repos/hacknowledge/Trin...1
21313526TrinityCorehttps://api.github.com/repos/johnholiver/Trini...1
21413558TrinityCorehttps://api.github.com/repos/Baeumchen/Trinity...1
21513688monohttps://api.github.com/repos/cyplo/mono1
21613724monohttps://api.github.com/repos/QuickJack/mono1
21713765monohttps://api.github.com/repos/ermshiperete/mono1
21813775monohttps://api.github.com/repos/ukplc/mono1
21914092monohttps://api.github.com/repos/killabytenow/mono1
22014523ServiceStackhttps://api.github.com/repos/jeffgabhart/Servi...1
22114593ServiceStackhttps://api.github.com/repos/leon-andria/Servi...1
22214912AutoMapperhttps://api.github.com/repos/AutoMapper/AutoMa...1
22314993Nancyhttps://api.github.com/repos/thedersen/Nancy1
22415521RestSharphttps://api.github.com/repos/crdeutsch/RestSharp1
22515699RestSharphttps://api.github.com/repos/Haacked/RestSharp1
22615836Nancyhttps://api.github.com/repos/mat-mcloughlin/Nancy1
22717907elasticsearchhttps://api.github.com/repos/imotov/elasticsearch1
22818328elasticsearchhttps://api.github.com/repos/javanna/elasticse...1
22918863elasticsearchhttps://api.github.com/repos/brwe/elasticsearch1
23023040CraftBukkithttps://api.github.com/repos/Zaraza107/CraftBu...1
23123203CraftBukkithttps://api.github.com/repos/cyberdudedk/Craft...1
23223245CraftBukkithttps://api.github.com/repos/TheEliteFour/Craf...1
23323411CraftBukkithttps://api.github.com/repos/dumptruckman/Craf...1
23424886nettyhttps://api.github.com/repos/jpinner/netty1
23525148nettyhttps://api.github.com/repos/CruzBishop/netty1
23625358nettyhttps://api.github.com/repos/zcourts/netty1
23725520nettyhttps://api.github.com/repos/Melon1017/netty1
23825884nodehttps://api.github.com/repos/felixge/node1
23926062nodehttps://api.github.com/repos/TooTallNate/node1
24026508nodehttps://api.github.com/repos/pixelglow/node1
24127020jqueryhttps://api.github.com/repos/louisremi/jquery1
24227040nodehttps://api.github.com/repos/interruptz/node1
24327075jqueryhttps://api.github.com/repos/rwaldron/jquery1
24427190jqueryhttps://api.github.com/repos/gnarf/jquery1
24527236jqueryhttps://api.github.com/repos/SlexAxton/jquery1
24627269jqueryhttps://api.github.com/repos/alexisabril/jquery1
24727330nodehttps://api.github.com/repos/laverdet/node1
24827379nodehttps://api.github.com/repos/JSBizon/node1
24927504impress.jshttps://api.github.com/repos/bartaz/impress.js1
25027570jqueryhttps://api.github.com/repos/mikesherov/jquery1
25127761jquery-nodomhttps://api.github.com/repos/kpozin/jquery-nodom1
25227796jqueryhttps://api.github.com/repos/orkel/jquery1
25327906jqueryhttps://api.github.com/repos/joelbirchler/jquery1
25428346jqueryhttps://api.github.com/repos/gibson042/jquery1
25529698html5-boilerplatehttps://api.github.com/repos/cleanforestco/htm...1
25632974nodehttps://api.github.com/repos/mcurcio/node1
25734354nodehttps://api.github.com/repos/iizukanao/node1
25834632jqueryhttps://api.github.com/repos/danilsomsikov/jquery1
25935620jqueryhttps://api.github.com/repos/shalecraig/jquery1
26035652nodehttps://api.github.com/repos/WebReflection/node1
26136228nodehttps://api.github.com/repos/hueniverse/node1
26242719d3https://api.github.com/repos/jasondavies/d31
26343107d3https://api.github.com/repos/GerHobbelt/d31
26449358chosenhttps://api.github.com/repos/Sikwan/chosen1
26552432foundationhttps://api.github.com/repos/jvivs/foundation1
26653485symfonyhttps://api.github.com/repos/fabpot/symfony1
26753494symfonyhttps://api.github.com/repos/usefulthink/symfony1
26853548symfonyhttps://api.github.com/repos/jwage/symfony1
26953598symfonyhttps://api.github.com/repos/weaverryan/symfony1
27053694symfonyhttps://api.github.com/repos/francisbesset/sym...1
27153715symfonyhttps://api.github.com/repos/mvrhov/symfony1
27253792symfonyhttps://api.github.com/repos/lsmith77/symfony1
27353993symfonyhttps://api.github.com/repos/stloyd/symfony1
27454713symfonyhttps://api.github.com/repos/tacman/symfony1
27555185symfonyhttps://api.github.com/repos/jfsimon/symfony1
27655564three.jshttps://api.github.com/repos/AddictArts/three.js1
27756114symfonyhttps://api.github.com/repos/dlsniper/symfony1
27856501three.jshttps://api.github.com/repos/tapio/three.js1
27957041symfonyhttps://api.github.com/repos/ircmaxell/symfony1
28057142three.jshttps://api.github.com/repos/bhouston/three.js1
28158640symfonyhttps://api.github.com/repos/gnugat/symfony1
28258697three.jshttps://api.github.com/repos/kevinoe/three.js1
28358762three.jshttps://api.github.com/repos/lminko/three.js1
28459627CodeIgniterhttps://api.github.com/repos/darkwhispering/Co...1
28559890CodeIgniterhttps://api.github.com/repos/zechdc/CodeIgniter1
28660294CodeIgniterhttps://api.github.com/repos/vlakoff/CodeIgniter1
28760784CodeIgniterhttps://api.github.com/repos/BillHeaton/CodeIg...1
28861226CodeIgniterhttps://api.github.com/repos/chrispassas/CodeI...1
28962502zf2https://api.github.com/repos/zendframework/zf21
29063700zf2https://api.github.com/repos/DASPRiD/zf21
29163898zf2https://api.github.com/repos/ezimuel/zf21
29264184zf2https://api.github.com/repos/Maks3w/zf21
29364379zf2https://api.github.com/repos/davidwindell/zf21
29464412zf2https://api.github.com/repos/jacobkiers/zf21
29564680zf2https://api.github.com/repos/samsonasik/zf21
29665260thinkuphttps://api.github.com/repos/mithaler/thinkup1
29765317ThinkUphttps://api.github.com/repos/samwho/ThinkUp1
29865338ThinkUphttps://api.github.com/repos/anildash/ThinkUp1
29965349ThinkUphttps://api.github.com/repos/kylehase/ThinkUp1
30065530ThinkUphttps://api.github.com/repos/rgroves/ThinkUp1
30165796cakephphttps://api.github.com/repos/ceeram/cakephp1
30265889ThinkUphttps://api.github.com/repos/bleything/ThinkUp1
30365980cakephphttps://api.github.com/repos/zoghal/cakephp1
30469254djangohttps://api.github.com/repos/niwibe/django1
30569304djangohttps://api.github.com/repos/pvanderlinden/django1
30672032flaskhttps://api.github.com/repos/mitsuhiko/flask1
30775330requestshttps://api.github.com/repos/dandrzejewski/req...1
30875722requestshttps://api.github.com/repos/nicoddemus/requests1
30976156reddithttps://api.github.com/repos/k21/reddit1
31077048django-debug-toolbarhttps://api.github.com/repos/msaelices/django-...1
31177422botohttps://api.github.com/repos/irskep/boto1
31277458botohttps://api.github.com/repos/goura/boto1
31377717botohttps://api.github.com/repos/fayazkhan/boto1
31478347Sick-Beardhttps://api.github.com/repos/bshep/Sick-Beard1
31578356Sick-Beardhttps://api.github.com/repos/ozeraser/Sick-Beard1
31678461Sick-Beardhttps://api.github.com/repos/jorgenpt/Sick-Beard1
31778634Sick-Beardhttps://api.github.com/repos/EchelonFour/Sick-...1
31878808Sick-Beardhttps://api.github.com/repos/mozvip/Sick-Beard1
31978942Sick-Beardhttps://api.github.com/repos/Prinz23/Sick-Beard1
32079408railshttps://api.github.com/repos/coderrr/rails1
32179648railshttps://api.github.com/repos/zires/rails1
32279876railshttps://api.github.com/repos/bigfix/rails1
32379958railshttps://api.github.com/repos/rafaelfranca/rails1
32479972railshttps://api.github.com/repos/github/rails1
32580096railshttps://api.github.com/repos/cldwalker/rails1
32680105railshttps://api.github.com/repos/indirect/rails1
32780289railshttps://api.github.com/repos/arunagw/rails1
32880307railshttps://api.github.com/repos/raysrashmi/rails1
32980322railshttps://api.github.com/repos/smartinez87/rails1
33080363railshttps://api.github.com/repos/castlerock/rails1
33180437railshttps://api.github.com/repos/senny/rails1
33280670railshttps://api.github.com/repos/acroca/rails1
33380682railshttps://api.github.com/repos/gazay/rails1
33480790django-cmshttps://api.github.com/repos/powderflask/djang...1
33580961railshttps://api.github.com/repos/kennyj/rails1
33680974rails-1https://api.github.com/repos/tanin47/rails-11
33781033railshttps://api.github.com/repos/nashby/rails1
33881294railshttps://api.github.com/repos/larskanis/rails1
33981346railshttps://api.github.com/repos/kielkowicz/rails1
34081764railshttps://api.github.com/repos/bogdan/rails1
34181842railshttps://api.github.com/repos/tigrish/rails1
34282391railshttps://api.github.com/repos/homakov/rails1
34382577railshttps://api.github.com/repos/dylanahsmith/rails1
34482701railshttps://api.github.com/repos/revans/rails1
34583262railshttps://api.github.com/repos/blowmage/rails1
34683426jekyllhttps://api.github.com/repos/robru/jekyll1
34783454railshttps://api.github.com/repos/Grandrath/rails1
34883533railshttps://api.github.com/repos/gsphanikumar/rails1
34983737railshttps://api.github.com/repos/gaurish/rails1
35084573railshttps://api.github.com/repos/morgancurrie/rails1
35184757railshttps://api.github.com/repos/Jiebour/rails1
35285511railshttps://api.github.com/repos/frodsan/rails1
35386503railshttps://api.github.com/repos/ankit8898/rails1
35486821railshttps://api.github.com/repos/versioncontrol/rails1
35587060homebrewhttps://api.github.com/repos/godfat/homebrew1
35687169homebrewhttps://api.github.com/repos/wright/homebrew1
35787351homebrewhttps://api.github.com/repos/bdd/homebrew1
35887973homebrewhttps://api.github.com/repos/greedy/homebrew1
35988010jekyllhttps://api.github.com/repos/edeustace/jekyll1
36088104homebrewhttps://api.github.com/repos/jlcapps/homebrew1
36188197homebrewhttps://api.github.com/repos/justinclift/homebrew1
36288203homebrewhttps://api.github.com/repos/tusbar/homebrew1
36388479homebrewhttps://api.github.com/repos/neglectedvalue/ho...1
36488666homebrewhttps://api.github.com/repos/losmuertos/homebrew1
36588681homebrewhttps://api.github.com/repos/larseggert/homebrew1
36688705homebrewhttps://api.github.com/repos/donspaulding/home...1
36788715jekyllhttps://api.github.com/repos/metamatt/jekyll1
36888751homebrewhttps://api.github.com/repos/jacknagel/homebrew1
36988845homebrewhttps://api.github.com/repos/vibrog/homebrew1
37088884homebrewhttps://api.github.com/repos/catsby/homebrew1
37188975homebrewhttps://api.github.com/repos/thoughtpolice/hom...1
37289039homebrewhttps://api.github.com/repos/jedi4ever/homebrew1
37389091homebrewhttps://api.github.com/repos/dch/homebrew1
37489305homebrewhttps://api.github.com/repos/mistydemeo/homebrew1
37589327homebrewhttps://api.github.com/repos/tonit/homebrew1
37689384homebrewhttps://api.github.com/repos/svenax/homebrew1
37789541homebrewhttps://api.github.com/repos/jcupitt/homebrew1
37889949homebrewhttps://api.github.com/repos/wesen/homebrew1
37990205homebrewhttps://api.github.com/repos/vertis/homebrew1
38090433homebrewhttps://api.github.com/repos/2bits/homebrew1
38190568homebrewhttps://api.github.com/repos/jwilkins/homebrew1
38290602homebrewhttps://api.github.com/repos/anatol/homebrew1
38390714homebrewhttps://api.github.com/repos/msabramo/homebrew1
38490813homebrewhttps://api.github.com/repos/azarbayejani/home...1
38590961homebrewhttps://api.github.com/repos/nicolasdespres/ho...1
38691026homebrewhttps://api.github.com/repos/yllan/homebrew1
38791062homebrewhttps://api.github.com/repos/samueljohn/homebrew1
38891113homebrewhttps://api.github.com/repos/cartazio/homebrew1
38991159homebrewhttps://api.github.com/repos/fish2000/homebrew1
39091240homebrewhttps://api.github.com/repos/glejeune/homebrew1
39191270homebrewhttps://api.github.com/repos/bpiwowar/homebrew1
39291294homebrewhttps://api.github.com/repos/ingmar/homebrew1
39391326homebrewhttps://api.github.com/repos/funnymanva/homebrew1
39491496homebrewhttps://api.github.com/repos/nmadura/homebrew1
39591611homebrewhttps://api.github.com/repos/sandeep048/homebrew1
39691638homebrewhttps://api.github.com/repos/lifepillar/homebrew1
39791741homebrewhttps://api.github.com/repos/ummels/homebrew1
39891854homebrewhttps://api.github.com/repos/nevir/homebrew1
39991864homebrewhttps://api.github.com/repos/mrjbq7/homebrew1
40091916homebrewhttps://api.github.com/repos/zhangcheng/homebrew1
40191918homebrewhttps://api.github.com/repos/rmndk/homebrew1
40291967homebrewhttps://api.github.com/repos/rhysd/homebrew1
40392060homebrewhttps://api.github.com/repos/mattyr/homebrew1
40492372homebrewhttps://api.github.com/repos/AstonJ/homebrew1
40592399homebrewhttps://api.github.com/repos/raedwulf/homebrew1
40692749homebrewhttps://api.github.com/repos/rays/homebrew1
40792789homebrewhttps://api.github.com/repos/OldCrow/homebrew1
40892830homebrewhttps://api.github.com/repos/vogonistic/homebrew1
40992839homebrewhttps://api.github.com/repos/ashirazi/homebrew1
41092866homebrewhttps://api.github.com/repos/sheerun/homebrew1
41192978homebrewhttps://api.github.com/repos/marr/homebrew1
41293004homebrewhttps://api.github.com/repos/wix/homebrew1
41393049homebrewhttps://api.github.com/repos/andriytyurnikov/h...1
41493112homebrewhttps://api.github.com/repos/tinystatemachine/...1
41593180homebrewhttps://api.github.com/repos/mashtizadeh/homebrew1
41693250homebrewhttps://api.github.com/repos/peabody124/homebrew1
41793295homebrewhttps://api.github.com/repos/handlename/homebrew1
41893310homebrewhttps://api.github.com/repos/crishoj/homebrew1
41993377homebrewhttps://api.github.com/repos/eladg/homebrew1
42093428homebrewhttps://api.github.com/repos/chrmoritz/homebrew1
42193913homebrewhttps://api.github.com/repos/natritmeyer/homebrew1
42294037homebrewhttps://api.github.com/repos/chenpc/homebrew1
42395895gitlabhqhttps://api.github.com/repos/zzet/gitlabhq1
42496401gitlabhqhttps://api.github.com/repos/mikew/gitlabhq1
42596669gitlabhqhttps://api.github.com/repos/proverbface/gitlabhq1
42696736gitlabhqhttps://api.github.com/repos/drahamim/gitlabhq1
42796815gitlabhqhttps://api.github.com/repos/senny/gitlabhq1
42897857devisehttps://api.github.com/repos/rahearn/devise1
429100090diasporahttps://api.github.com/repos/Gonzih/diaspora1
430100137diasporahttps://api.github.com/repos/Raven24/diaspora1
431101472blueprint-csshttps://api.github.com/repos/joshuaclayton/blu...1
432102063octopresshttps://api.github.com/repos/pilif/octopress1
433104578papercliphttps://api.github.com/repos/yar/paperclip1
434105484compasshttps://api.github.com/repos/gmclelland/compass1
435105965compasshttps://api.github.com/repos/cimmanon/compass1
436106161kestrelhttps://api.github.com/repos/robey/kestrel1
437106447finaglehttps://api.github.com/repos/benpence/finagle1
438106680akkahttps://api.github.com/repos/paulpach/akka1
439106686akkahttps://api.github.com/repos/metamorph/akka1
440106779akkahttps://api.github.com/repos/scullxbones/akka1
441106793akkahttps://api.github.com/repos/drewhk/akka1
442107188xsbthttps://api.github.com/repos/retronym/xsbt1
443107255xsbthttps://api.github.com/repos/vigdorchik/xsbt1
444107317xsbthttps://api.github.com/repos/ebowman/xsbt1
445107377sbthttps://api.github.com/repos/ezh/sbt1
446107809scalahttps://api.github.com/repos/odersky/scala1
447107815scalahttps://api.github.com/repos/lrytz/scala1
448107819scalahttps://api.github.com/repos/soc/scala1
449107824scalahttps://api.github.com/repos/phaller/scala1
450108259scalahttps://api.github.com/repos/Ichoran/scala1
451108348mongohttps://api.github.com/repos/idning/mongo1
452108646djangohttps://api.github.com/repos/zbenjamin/django1
\n", + "
" + ], + "text/plain": [ + " project_id project_name \\\n", + "0 12 TrinityCore \n", + "1 289 MaNGOS \n", + "2 78852 rails \n", + "3 22980 CraftBukkit \n", + "4 25875 jquery \n", + "5 91331 diaspora \n", + "6 3583 xbmc \n", + "7 22981 netty \n", + "8 26388 html5-boilerplate \n", + "9 24292 node \n", + "10 50618 three.js \n", + "11 1 akka \n", + "12 79163 homebrew \n", + "13 91020 gitlabhq \n", + "14 51671 symfony \n", + "15 9215 openFrameworks \n", + "16 63250 cakephp \n", + "17 10593 libuv \n", + "18 95385 devise \n", + "19 74914 requests \n", + "20 69158 django \n", + "21 79166 jekyll \n", + "22 59607 CodeIgniter \n", + "23 77319 Sick-Beard \n", + "24 104307 paperclip \n", + "25 9636 libgit2 \n", + "26 5214 xbmc \n", + "27 51669 foundation \n", + "28 64176 ThinkUp \n", + "29 81335 rails \n", + "30 10380 redcarpet \n", + "31 64918 phpunit \n", + "32 105378 compass \n", + "33 107534 scala \n", + "34 62501 php-sdk \n", + "35 82328 rails \n", + "36 10629 SignalR \n", + "37 14327 Nancy \n", + "38 17515 elasticsearch \n", + "39 19580 facebook-android-sdk \n", + "40 81320 rails \n", + "41 11 phantomjs \n", + "42 5164 xbmc \n", + "43 7242 redis \n", + "44 26101 node \n", + "45 74915 symfony \n", + "46 75984 reddit \n", + "47 76945 boto \n", + "48 76946 django-debug-toolbar \n", + "49 78835 django-cms \n", + "50 80514 rails \n", + "51 5176 xbmc \n", + "52 12976 plupload \n", + "53 17566 ActionBarSherlock \n", + "54 65250 thinkup \n", + "55 79948 rails \n", + "56 107186 gizzard \n", + "57 784 bitcoin-git \n", + "58 5215 xbmc \n", + "59 5232 xbmc \n", + "60 14328 ServiceStack \n", + "61 47382 chosen \n", + "62 59683 CodeIgniter \n", + "63 76118 reddit \n", + "64 107302 xsbt \n", + "65 107823 scala \n", + "66 454 bitcoin \n", + "67 5438 xbmc \n", + "68 7241 xbmc \n", + "69 42644 d3 \n", + "70 53712 symfony \n", + "71 54148 symfony \n", + "72 80581 rails \n", + "73 84869 rails \n", + "74 6 knitr \n", + "75 7 shiny \n", + "76 9 mongo \n", + "77 3750 http-parser \n", + "78 5182 xbmc \n", + "79 5230 xbmc \n", + "80 13509 TrinityCore \n", + "81 13597 mono \n", + "82 16134 MiniProfiler \n", + "83 19786 clojure \n", + "84 53713 symfony \n", + "85 64197 zf2 \n", + "86 65107 Slim \n", + "87 71786 tornado \n", + "88 80231 rails \n", + "89 80618 rails \n", + "90 84052 rails \n", + "91 88617 Homebrew \n", + "92 92167 homebrew \n", + "93 92243 homebrew \n", + "94 101997 octopress \n", + "95 107187 sbt \n", + "96 107811 scala \n", + "97 108589 TC-Eluna-3.3.5a \n", + "98 2 devtools \n", + "99 5162 xbmc \n", + "100 5202 xbmc \n", + "101 5240 xbmc \n", + "102 5283 xbmc \n", + "103 5326 beanstalkd \n", + "104 5494 xbmc \n", + "105 6244 xbmc \n", + "106 8255 redis \n", + "107 8948 redis \n", + "108 11582 openFrameworks \n", + "109 12554 SparkleShare \n", + "110 13197 TrinityCore \n", + "111 15018 RestSharp \n", + "112 16402 storm \n", + "113 23311 CraftBukkit \n", + "114 25879 node \n", + "115 26240 node \n", + "116 26766 node \n", + "117 29190 html5-boilerplate \n", + "118 53743 symfony \n", + "119 54403 symfony \n", + "120 54691 symfony \n", + "121 61016 CodeIgniter \n", + "122 63690 zf2 \n", + "123 65762 cakephp \n", + "124 65946 cakephp \n", + "125 65983 ThinkUp \n", + "126 67281 cakephp \n", + "127 68893 phpunit \n", + "128 69177 django \n", + "129 69349 django \n", + "130 69897 django \n", + "131 79642 rails \n", + "132 80002 rails \n", + "133 80498 rails \n", + "134 80535 rails \n", + "135 80559 rails \n", + "136 80818 rails \n", + "137 81332 rails \n", + "138 81621 rails \n", + "139 82863 rails \n", + "140 88593 homebrew \n", + "141 88732 homebrew \n", + "142 90945 homebrew \n", + "143 91892 homebrew \n", + "144 92348 homebrew \n", + "145 92859 homebrew \n", + "146 93198 homebrew \n", + "147 94288 homebrew \n", + "148 100124 diaspora \n", + "149 102734 octopress \n", + "150 106160 finagle \n", + "151 107312 sbt \n", + "152 107535 scalatra \n", + "153 107829 scala \n", + "154 8 folly \n", + "155 893 bitcoin \n", + "156 935 devtools \n", + "157 1024 bitcoin \n", + "158 2710 phantomjs \n", + "159 3814 mongo \n", + "160 3840 mongo \n", + "161 3897 http-parser \n", + "162 4262 mongo \n", + "163 5208 xbmc \n", + "164 5228 xbmc \n", + "165 5234 xbmc \n", + "166 5236 xbmc \n", + "167 5273 XBMC \n", + "168 5297 xbmc \n", + "169 5422 xbmc \n", + "170 5428 xbmc \n", + "171 5448 xbmc \n", + "172 5485 xbmc \n", + "173 5715 mangos \n", + "174 5828 xbmc \n", + "175 5832 xbmc \n", + "176 5837 xbmc \n", + "177 5858 xbmc \n", + "178 5887 xbmc \n", + "179 5962 xbmc \n", + "180 6250 xbmc \n", + "181 6258 xbmc \n", + "182 6349 xbmc \n", + "183 6472 xbmc \n", + "184 6505 xbmc \n", + "185 6609 xbmc \n", + "186 6790 xbmc \n", + "187 6819 xbmc \n", + "188 7180 xbmc \n", + "189 7404 redis \n", + "190 7427 ccv \n", + "191 7457 xbmc \n", + "192 8478 redis \n", + "193 8605 redis \n", + "194 8634 redis \n", + "195 8955 memcached \n", + "196 9014 hiphop-php \n", + "197 10468 xbmca10 \n", + "198 10617 libgit2 \n", + "199 10622 libgit2 \n", + "200 10632 libgit2 \n", + "201 11494 openFrameworks \n", + "202 12102 openFrameworks \n", + "203 12529 libgit2 \n", + "204 12934 SparkleShare \n", + "205 12941 TrinityCore \n", + "206 12943 TrinityCore \n", + "207 12964 TrinityCore \n", + "208 12972 TrinityCore \n", + "209 12973 TrinityCore \n", + "210 13410 plupload \n", + "211 13514 uwom-server \n", + "212 13516 TrinityCore \n", + "213 13526 TrinityCore \n", + "214 13558 TrinityCore \n", + "215 13688 mono \n", + "216 13724 mono \n", + "217 13765 mono \n", + "218 13775 mono \n", + "219 14092 mono \n", + "220 14523 ServiceStack \n", + "221 14593 ServiceStack \n", + "222 14912 AutoMapper \n", + "223 14993 Nancy \n", + "224 15521 RestSharp \n", + "225 15699 RestSharp \n", + "226 15836 Nancy \n", + "227 17907 elasticsearch \n", + "228 18328 elasticsearch \n", + "229 18863 elasticsearch \n", + "230 23040 CraftBukkit \n", + "231 23203 CraftBukkit \n", + "232 23245 CraftBukkit \n", + "233 23411 CraftBukkit \n", + "234 24886 netty \n", + "235 25148 netty \n", + "236 25358 netty \n", + "237 25520 netty \n", + "238 25884 node \n", + "239 26062 node \n", + "240 26508 node \n", + "241 27020 jquery \n", + "242 27040 node \n", + "243 27075 jquery \n", + "244 27190 jquery \n", + "245 27236 jquery \n", + "246 27269 jquery \n", + "247 27330 node \n", + "248 27379 node \n", + "249 27504 impress.js \n", + "250 27570 jquery \n", + "251 27761 jquery-nodom \n", + "252 27796 jquery \n", + "253 27906 jquery \n", + "254 28346 jquery \n", + "255 29698 html5-boilerplate \n", + "256 32974 node \n", + "257 34354 node \n", + "258 34632 jquery \n", + "259 35620 jquery \n", + "260 35652 node \n", + "261 36228 node \n", + "262 42719 d3 \n", + "263 43107 d3 \n", + "264 49358 chosen \n", + "265 52432 foundation \n", + "266 53485 symfony \n", + "267 53494 symfony \n", + "268 53548 symfony \n", + "269 53598 symfony \n", + "270 53694 symfony \n", + "271 53715 symfony \n", + "272 53792 symfony \n", + "273 53993 symfony \n", + "274 54713 symfony \n", + "275 55185 symfony \n", + "276 55564 three.js \n", + "277 56114 symfony \n", + "278 56501 three.js \n", + "279 57041 symfony \n", + "280 57142 three.js \n", + "281 58640 symfony \n", + "282 58697 three.js \n", + "283 58762 three.js \n", + "284 59627 CodeIgniter \n", + "285 59890 CodeIgniter \n", + "286 60294 CodeIgniter \n", + "287 60784 CodeIgniter \n", + "288 61226 CodeIgniter \n", + "289 62502 zf2 \n", + "290 63700 zf2 \n", + "291 63898 zf2 \n", + "292 64184 zf2 \n", + "293 64379 zf2 \n", + "294 64412 zf2 \n", + "295 64680 zf2 \n", + "296 65260 thinkup \n", + "297 65317 ThinkUp \n", + "298 65338 ThinkUp \n", + "299 65349 ThinkUp \n", + "300 65530 ThinkUp \n", + "301 65796 cakephp \n", + "302 65889 ThinkUp \n", + "303 65980 cakephp \n", + "304 69254 django \n", + "305 69304 django \n", + "306 72032 flask \n", + "307 75330 requests \n", + "308 75722 requests \n", + "309 76156 reddit \n", + "310 77048 django-debug-toolbar \n", + "311 77422 boto \n", + "312 77458 boto \n", + "313 77717 boto \n", + "314 78347 Sick-Beard \n", + "315 78356 Sick-Beard \n", + "316 78461 Sick-Beard \n", + "317 78634 Sick-Beard \n", + "318 78808 Sick-Beard \n", + "319 78942 Sick-Beard \n", + "320 79408 rails \n", + "321 79648 rails \n", + "322 79876 rails \n", + "323 79958 rails \n", + "324 79972 rails \n", + "325 80096 rails \n", + "326 80105 rails \n", + "327 80289 rails \n", + "328 80307 rails \n", + "329 80322 rails \n", + "330 80363 rails \n", + "331 80437 rails \n", + "332 80670 rails \n", + "333 80682 rails \n", + "334 80790 django-cms \n", + "335 80961 rails \n", + "336 80974 rails-1 \n", + "337 81033 rails \n", + "338 81294 rails \n", + "339 81346 rails \n", + "340 81764 rails \n", + "341 81842 rails \n", + "342 82391 rails \n", + "343 82577 rails \n", + "344 82701 rails \n", + "345 83262 rails \n", + "346 83426 jekyll \n", + "347 83454 rails \n", + "348 83533 rails \n", + "349 83737 rails \n", + "350 84573 rails \n", + "351 84757 rails \n", + "352 85511 rails \n", + "353 86503 rails \n", + "354 86821 rails \n", + "355 87060 homebrew \n", + "356 87169 homebrew \n", + "357 87351 homebrew \n", + "358 87973 homebrew \n", + "359 88010 jekyll \n", + "360 88104 homebrew \n", + "361 88197 homebrew \n", + "362 88203 homebrew \n", + "363 88479 homebrew \n", + "364 88666 homebrew \n", + "365 88681 homebrew \n", + "366 88705 homebrew \n", + "367 88715 jekyll \n", + "368 88751 homebrew \n", + "369 88845 homebrew \n", + "370 88884 homebrew \n", + "371 88975 homebrew \n", + "372 89039 homebrew \n", + "373 89091 homebrew \n", + "374 89305 homebrew \n", + "375 89327 homebrew \n", + "376 89384 homebrew \n", + "377 89541 homebrew \n", + "378 89949 homebrew \n", + "379 90205 homebrew \n", + "380 90433 homebrew \n", + "381 90568 homebrew \n", + "382 90602 homebrew \n", + "383 90714 homebrew \n", + "384 90813 homebrew \n", + "385 90961 homebrew \n", + "386 91026 homebrew \n", + "387 91062 homebrew \n", + "388 91113 homebrew \n", + "389 91159 homebrew \n", + "390 91240 homebrew \n", + "391 91270 homebrew \n", + "392 91294 homebrew \n", + "393 91326 homebrew \n", + "394 91496 homebrew \n", + "395 91611 homebrew \n", + "396 91638 homebrew \n", + "397 91741 homebrew \n", + "398 91854 homebrew \n", + "399 91864 homebrew \n", + "400 91916 homebrew \n", + "401 91918 homebrew \n", + "402 91967 homebrew \n", + "403 92060 homebrew \n", + "404 92372 homebrew \n", + "405 92399 homebrew \n", + "406 92749 homebrew \n", + "407 92789 homebrew \n", + "408 92830 homebrew \n", + "409 92839 homebrew \n", + "410 92866 homebrew \n", + "411 92978 homebrew \n", + "412 93004 homebrew \n", + "413 93049 homebrew \n", + "414 93112 homebrew \n", + "415 93180 homebrew \n", + "416 93250 homebrew \n", + "417 93295 homebrew \n", + "418 93310 homebrew \n", + "419 93377 homebrew \n", + "420 93428 homebrew \n", + "421 93913 homebrew \n", + "422 94037 homebrew \n", + "423 95895 gitlabhq \n", + "424 96401 gitlabhq \n", + "425 96669 gitlabhq \n", + "426 96736 gitlabhq \n", + "427 96815 gitlabhq \n", + "428 97857 devise \n", + "429 100090 diaspora \n", + "430 100137 diaspora \n", + "431 101472 blueprint-css \n", + "432 102063 octopress \n", + "433 104578 paperclip \n", + "434 105484 compass \n", + "435 105965 compass \n", + "436 106161 kestrel \n", + "437 106447 finagle \n", + "438 106680 akka \n", + "439 106686 akka \n", + "440 106779 akka \n", + "441 106793 akka \n", + "442 107188 xsbt \n", + "443 107255 xsbt \n", + "444 107317 xsbt \n", + "445 107377 sbt \n", + "446 107809 scala \n", + "447 107815 scala \n", + "448 107819 scala \n", + "449 107824 scala \n", + "450 108259 scala \n", + "451 108348 mongo \n", + "452 108646 django \n", + "\n", + " project_url labeled_comment_count \n", + "0 https://api.github.com/repos/TrinityCore/Trini... 800 \n", + "1 https://api.github.com/repos/mangos/MaNGOS 622 \n", + "2 https://api.github.com/repos/rails/rails 448 \n", + "3 https://api.github.com/repos/Bukkit/CraftBukkit 357 \n", + "4 https://api.github.com/repos/jquery/jquery 235 \n", + "5 https://api.github.com/repos/diaspora/diaspora 160 \n", + "6 https://api.github.com/repos/xbmc/xbmc 130 \n", + "7 https://api.github.com/repos/netty/netty 80 \n", + "8 https://api.github.com/repos/h5bp/html5-boiler... 80 \n", + "9 https://api.github.com/repos/joyent/node 65 \n", + "10 https://api.github.com/repos/mrdoob/three.js 65 \n", + "11 https://api.github.com/repos/akka/akka 62 \n", + "12 https://api.github.com/repos/mxcl/homebrew 57 \n", + "13 https://api.github.com/repos/gitlabhq/gitlabhq 55 \n", + "14 https://api.github.com/repos/symfony/symfony 46 \n", + "15 https://api.github.com/repos/openframeworks/op... 40 \n", + "16 https://api.github.com/repos/cakephp/cakephp 33 \n", + "17 https://api.github.com/repos/joyent/libuv 29 \n", + "18 https://api.github.com/repos/plataformatec/devise 22 \n", + "19 https://api.github.com/repos/kennethreitz/requ... 20 \n", + "20 https://api.github.com/repos/django/django 19 \n", + "21 https://api.github.com/repos/mojombo/jekyll 18 \n", + "22 https://api.github.com/repos/EllisLab/CodeIgniter 15 \n", + "23 https://api.github.com/repos/midgetspy/Sick-Beard 15 \n", + "24 https://api.github.com/repos/thoughtbot/paperclip 14 \n", + "25 https://api.github.com/repos/libgit2/libgit2 13 \n", + "26 https://api.github.com/repos/jmarshallnz/xbmc 12 \n", + "27 https://api.github.com/repos/zurb/foundation 12 \n", + "28 https://api.github.com/repos/ginatrapani/ThinkUp 12 \n", + "29 https://api.github.com/repos/flooose/rails 12 \n", + "30 https://api.github.com/repos/vmg/redcarpet 11 \n", + "31 https://api.github.com/repos/sebastianbergmann... 11 \n", + "32 https://api.github.com/repos/chriseppstein/com... 11 \n", + "33 https://api.github.com/repos/scala/scala 11 \n", + "34 https://api.github.com/repos/facebook/php-sdk 10 \n", + "35 https://api.github.com/repos/pinetops/rails 9 \n", + "36 https://api.github.com/repos/SignalR/SignalR 8 \n", + "37 https://api.github.com/repos/NancyFx/Nancy 8 \n", + "38 https://api.github.com/repos/elasticsearch/ela... 8 \n", + "39 https://api.github.com/repos/facebook/facebook... 8 \n", + "40 https://api.github.com/repos/zetter/rails 8 \n", + "41 https://api.github.com/repos/ariya/phantomjs 7 \n", + "42 https://api.github.com/repos/theuni/xbmc 7 \n", + "43 https://api.github.com/repos/antirez/redis 7 \n", + "44 https://api.github.com/repos/indutny/node 7 \n", + "45 https://api.github.com/repos/xphere-forks/symfony 7 \n", + "46 https://api.github.com/repos/reddit/reddit 7 \n", + "47 https://api.github.com/repos/boto/boto 7 \n", + "48 https://api.github.com/repos/django-debug-tool... 7 \n", + "49 https://api.github.com/repos/divio/django-cms 7 \n", + "50 https://api.github.com/repos/andhapp/rails 7 \n", + "51 https://api.github.com/repos/davilla/xbmc 6 \n", + "52 https://api.github.com/repos/moxiecode/plupload 6 \n", + "53 https://api.github.com/repos/JakeWharton/Actio... 6 \n", + "54 https://api.github.com/repos/mwilkie/thinkup 6 \n", + "55 https://api.github.com/repos/bjeanes/rails 6 \n", + "56 https://api.github.com/repos/twitter/gizzard 6 \n", + "57 https://api.github.com/repos/gavinandresen/bit... 5 \n", + "58 https://api.github.com/repos/Montellese/xbmc 5 \n", + "59 https://api.github.com/repos/elupus/xbmc 5 \n", + "60 https://api.github.com/repos/ServiceStack/Serv... 5 \n", + "61 https://api.github.com/repos/harvesthq/chosen 5 \n", + "62 https://api.github.com/repos/dchill42/CodeIgniter 5 \n", + "63 https://api.github.com/repos/andre-d/reddit 5 \n", + "64 https://api.github.com/repos/snowplow/xsbt 5 \n", + "65 https://api.github.com/repos/paulp/scala 5 \n", + "66 https://api.github.com/repos/bitcoin/bitcoin 4 \n", + "67 https://api.github.com/repos/0wing/xbmc 4 \n", + "68 https://api.github.com/repos/FernetMenta/xbmc 4 \n", + "69 https://api.github.com/repos/mbostock/d3 4 \n", + "70 https://api.github.com/repos/vicb/symfony 4 \n", + "71 https://api.github.com/repos/schmittjoh/symfony 4 \n", + "72 https://api.github.com/repos/SAP-Oxygen/rails 4 \n", + "73 https://api.github.com/repos/slave-but-free/rails 4 \n", + "74 https://api.github.com/repos/yihui/knitr 3 \n", + "75 https://api.github.com/repos/rstudio/shiny 3 \n", + "76 https://api.github.com/repos/mongodb/mongo 3 \n", + "77 https://api.github.com/repos/joyent/http-parser 3 \n", + "78 https://api.github.com/repos/Voyager1/xbmc 3 \n", + "79 https://api.github.com/repos/jimfcarroll/xbmc 3 \n", + "80 https://api.github.com/repos/Havenard/TrinityCore 3 \n", + "81 https://api.github.com/repos/Unity-Technologie... 3 \n", + "82 https://api.github.com/repos/SamSaffron/MiniPr... 3 \n", + "83 https://api.github.com/repos/clojure/clojure 3 \n", + "84 https://api.github.com/repos/bschussek/symfony 3 \n", + "85 https://api.github.com/repos/bakura10/zf2 3 \n", + "86 https://api.github.com/repos/codeguy/Slim 3 \n", + "87 https://api.github.com/repos/facebook/tornado 3 \n", + "88 https://api.github.com/repos/sikachu/rails 3 \n", + "89 https://api.github.com/repos/steveklabnik/rails 3 \n", + "90 https://api.github.com/repos/bbenezech/rails 3 \n", + "91 https://api.github.com/repos/MindTooth/Homebrew 3 \n", + "92 https://api.github.com/repos/axelsteiner/homebrew 3 \n", + "93 https://api.github.com/repos/rgov/homebrew 3 \n", + "94 https://api.github.com/repos/imathis/octopress 3 \n", + "95 https://api.github.com/repos/sbt/sbt 3 \n", + "96 https://api.github.com/repos/adriaanm/scala 3 \n", + "97 https://api.github.com/repos/ElunaLuaEngine/TC... 3 \n", + "98 https://api.github.com/repos/hadley/devtools 2 \n", + "99 https://api.github.com/repos/opdenkamp/xbmc 2 \n", + "100 https://api.github.com/repos/cptspiff/xbmc 2 \n", + "101 https://api.github.com/repos/bobo1on1/xbmc 2 \n", + "102 https://api.github.com/repos/Memphiz/xbmc 2 \n", + "103 https://api.github.com/repos/kr/beanstalkd 2 \n", + "104 https://api.github.com/repos/koying/xbmc 2 \n", + "105 https://api.github.com/repos/jpsdr/xbmc 2 \n", + "106 https://api.github.com/repos/charsyam/redis 2 \n", + "107 https://api.github.com/repos/evilsocket/redis 2 \n", + "108 https://api.github.com/repos/bilderbuchi/openF... 2 \n", + "109 https://api.github.com/repos/hbons/SparkleShare 2 \n", + "110 https://api.github.com/repos/kandera/TrinityCore 2 \n", + "111 https://api.github.com/repos/restsharp/RestSharp 2 \n", + "112 https://api.github.com/repos/nathanmarz/storm 2 \n", + "113 https://api.github.com/repos/ScoreUnder/CraftB... 2 \n", + "114 https://api.github.com/repos/isaacs/node 2 \n", + "115 https://api.github.com/repos/kuebk/node 2 \n", + "116 https://api.github.com/repos/mattrobenolt/node 2 \n", + "117 https://api.github.com/repos/bentruyman/html5-... 2 \n", + "118 https://api.github.com/repos/hhamon/symfony 2 \n", + "119 https://api.github.com/repos/drak/symfony 2 \n", + "120 https://api.github.com/repos/Tobion/symfony 2 \n", + "121 https://api.github.com/repos/and-ers/CodeIgniter 2 \n", + "122 https://api.github.com/repos/weierophinney/zf2 2 \n", + "123 https://api.github.com/repos/markstory/cakephp 2 \n", + "124 https://api.github.com/repos/ADmad/cakephp 2 \n", + "125 https://api.github.com/repos/brandonroberts/Th... 2 \n", + "126 https://api.github.com/repos/schrolli/cakephp 2 \n", + "127 https://api.github.com/repos/fizzka/phpunit 2 \n", + "128 https://api.github.com/repos/andrewgodwin/django 2 \n", + "129 https://api.github.com/repos/carljm/django 2 \n", + "130 https://api.github.com/repos/melinath/django 2 \n", + "131 https://api.github.com/repos/drogus/rails 2 \n", + "132 https://api.github.com/repos/joshk/rails 2 \n", + "133 https://api.github.com/repos/bratish/rails 2 \n", + "134 https://api.github.com/repos/anildigital/rails 2 \n", + "135 https://api.github.com/repos/sishen/rails 2 \n", + "136 https://api.github.com/repos/goncalossilva/rails 2 \n", + "137 https://api.github.com/repos/jenseng/rails 2 \n", + "138 https://api.github.com/repos/jenslukowski/rails 2 \n", + "139 https://api.github.com/repos/gururuby/rails 2 \n", + "140 https://api.github.com/repos/docwhat/homebrew 2 \n", + "141 https://api.github.com/repos/mbrunthaler/homebrew 2 \n", + "142 https://api.github.com/repos/clusty/homebrew 2 \n", + "143 https://api.github.com/repos/semaperepelitsa/h... 2 \n", + "144 https://api.github.com/repos/alastairandrew/ho... 2 \n", + "145 https://api.github.com/repos/danielb2/homebrew 2 \n", + "146 https://api.github.com/repos/simleo/homebrew 2 \n", + "147 https://api.github.com/repos/vhbit/homebrew 2 \n", + "148 https://api.github.com/repos/christophe-de/dia... 2 \n", + "149 https://api.github.com/repos/andrewreid/octopress 2 \n", + "150 https://api.github.com/repos/twitter/finagle 2 \n", + "151 https://api.github.com/repos/gkossakowski/sbt 2 \n", + "152 https://api.github.com/repos/scalatra/scalatra 2 \n", + "153 https://api.github.com/repos/magarciaEPFL/scala 2 \n", + "154 https://api.github.com/repos/facebook/folly 1 \n", + "155 https://api.github.com/repos/TheBlueMatt/bitcoin 1 \n", + "156 https://api.github.com/repos/yoni/devtools 1 \n", + "157 https://api.github.com/repos/laanwj/bitcoin 1 \n", + "158 https://api.github.com/repos/dburrows/phantomjs 1 \n", + "159 https://api.github.com/repos/guanqun/mongo 1 \n", + "160 https://api.github.com/repos/stulentsev/mongo 1 \n", + "161 https://api.github.com/repos/bnoordhuis/http-p... 1 \n", + "162 https://api.github.com/repos/amcfague/mongo 1 \n", + "163 https://api.github.com/repos/Fneufneu/xbmc 1 \n", + "164 https://api.github.com/repos/anssih/xbmc 1 \n", + "165 https://api.github.com/repos/pieh/xbmc 1 \n", + "166 https://api.github.com/repos/llyzs/xbmc 1 \n", + "167 https://api.github.com/repos/HarryMuscle/XBMC 1 \n", + "168 https://api.github.com/repos/garbear/xbmc 1 \n", + "169 https://api.github.com/repos/doozan/xbmc 1 \n", + "170 https://api.github.com/repos/cbxbiker61/xbmc 1 \n", + "171 https://api.github.com/repos/vdrfan/xbmc 1 \n", + "172 https://api.github.com/repos/malard/xbmc 1 \n", + "173 https://api.github.com/repos/vermie/mangos 1 \n", + "174 https://api.github.com/repos/xbmcfanboy/xbmc 1 \n", + "175 https://api.github.com/repos/taxigps/xbmc 1 \n", + "176 https://api.github.com/repos/aviksil/xbmc 1 \n", + "177 https://api.github.com/repos/MichaelAnders/xbmc 1 \n", + "178 https://api.github.com/repos/PSyton/xbmc 1 \n", + "179 https://api.github.com/repos/adam-aph/xbmc 1 \n", + "180 https://api.github.com/repos/herrnst/xbmc 1 \n", + "181 https://api.github.com/repos/xhaggi/xbmc 1 \n", + "182 https://api.github.com/repos/manio/xbmc 1 \n", + "183 https://api.github.com/repos/dragonflight/xbmc 1 \n", + "184 https://api.github.com/repos/maheus/xbmc 1 \n", + "185 https://api.github.com/repos/elbeardmorez/xbmc 1 \n", + "186 https://api.github.com/repos/DigitalDJ/xbmc 1 \n", + "187 https://api.github.com/repos/FlyingRat/xbmc 1 \n", + "188 https://api.github.com/repos/Jalle19/xbmc 1 \n", + "189 https://api.github.com/repos/mrb/redis 1 \n", + "190 https://api.github.com/repos/liuliu/ccv 1 \n", + "191 https://api.github.com/repos/phil65/xbmc 1 \n", + "192 https://api.github.com/repos/marcelaraujo/redis 1 \n", + "193 https://api.github.com/repos/jumping/redis 1 \n", + "194 https://api.github.com/repos/melvyn-sopacua/redis 1 \n", + "195 https://api.github.com/repos/memcached/memcached 1 \n", + "196 https://api.github.com/repos/fungos/hiphop-php 1 \n", + "197 https://api.github.com/repos/npeacock/xbmca10 1 \n", + "198 https://api.github.com/repos/phkelley/libgit2 1 \n", + "199 https://api.github.com/repos/martinwoodward/li... 1 \n", + "200 https://api.github.com/repos/carlosmn/libgit2 1 \n", + "201 https://api.github.com/repos/obviousjim/openFr... 1 \n", + "202 https://api.github.com/repos/openFrameworks-Ra... 1 \n", + "203 https://api.github.com/repos/glesserd/libgit2 1 \n", + "204 https://api.github.com/repos/serras/SparkleShare 1 \n", + "205 https://api.github.com/repos/0omega/TrinityCore 1 \n", + "206 https://api.github.com/repos/blipi/TrinityCore 1 \n", + "207 https://api.github.com/repos/zorix/TrinityCore 1 \n", + "208 https://api.github.com/repos/Naervin/TrinityCore 1 \n", + "209 https://api.github.com/repos/4m1g0/TrinityCore 1 \n", + "210 https://api.github.com/repos/dtmax/plupload 1 \n", + "211 https://api.github.com/repos/WarHead/uwom-server 1 \n", + "212 https://api.github.com/repos/hacknowledge/Trin... 1 \n", + "213 https://api.github.com/repos/johnholiver/Trini... 1 \n", + "214 https://api.github.com/repos/Baeumchen/Trinity... 1 \n", + "215 https://api.github.com/repos/cyplo/mono 1 \n", + "216 https://api.github.com/repos/QuickJack/mono 1 \n", + "217 https://api.github.com/repos/ermshiperete/mono 1 \n", + "218 https://api.github.com/repos/ukplc/mono 1 \n", + "219 https://api.github.com/repos/killabytenow/mono 1 \n", + "220 https://api.github.com/repos/jeffgabhart/Servi... 1 \n", + "221 https://api.github.com/repos/leon-andria/Servi... 1 \n", + "222 https://api.github.com/repos/AutoMapper/AutoMa... 1 \n", + "223 https://api.github.com/repos/thedersen/Nancy 1 \n", + "224 https://api.github.com/repos/crdeutsch/RestSharp 1 \n", + "225 https://api.github.com/repos/Haacked/RestSharp 1 \n", + "226 https://api.github.com/repos/mat-mcloughlin/Nancy 1 \n", + "227 https://api.github.com/repos/imotov/elasticsearch 1 \n", + "228 https://api.github.com/repos/javanna/elasticse... 1 \n", + "229 https://api.github.com/repos/brwe/elasticsearch 1 \n", + "230 https://api.github.com/repos/Zaraza107/CraftBu... 1 \n", + "231 https://api.github.com/repos/cyberdudedk/Craft... 1 \n", + "232 https://api.github.com/repos/TheEliteFour/Craf... 1 \n", + "233 https://api.github.com/repos/dumptruckman/Craf... 1 \n", + "234 https://api.github.com/repos/jpinner/netty 1 \n", + "235 https://api.github.com/repos/CruzBishop/netty 1 \n", + "236 https://api.github.com/repos/zcourts/netty 1 \n", + "237 https://api.github.com/repos/Melon1017/netty 1 \n", + "238 https://api.github.com/repos/felixge/node 1 \n", + "239 https://api.github.com/repos/TooTallNate/node 1 \n", + "240 https://api.github.com/repos/pixelglow/node 1 \n", + "241 https://api.github.com/repos/louisremi/jquery 1 \n", + "242 https://api.github.com/repos/interruptz/node 1 \n", + "243 https://api.github.com/repos/rwaldron/jquery 1 \n", + "244 https://api.github.com/repos/gnarf/jquery 1 \n", + "245 https://api.github.com/repos/SlexAxton/jquery 1 \n", + "246 https://api.github.com/repos/alexisabril/jquery 1 \n", + "247 https://api.github.com/repos/laverdet/node 1 \n", + "248 https://api.github.com/repos/JSBizon/node 1 \n", + "249 https://api.github.com/repos/bartaz/impress.js 1 \n", + "250 https://api.github.com/repos/mikesherov/jquery 1 \n", + "251 https://api.github.com/repos/kpozin/jquery-nodom 1 \n", + "252 https://api.github.com/repos/orkel/jquery 1 \n", + "253 https://api.github.com/repos/joelbirchler/jquery 1 \n", + "254 https://api.github.com/repos/gibson042/jquery 1 \n", + "255 https://api.github.com/repos/cleanforestco/htm... 1 \n", + "256 https://api.github.com/repos/mcurcio/node 1 \n", + "257 https://api.github.com/repos/iizukanao/node 1 \n", + "258 https://api.github.com/repos/danilsomsikov/jquery 1 \n", + "259 https://api.github.com/repos/shalecraig/jquery 1 \n", + "260 https://api.github.com/repos/WebReflection/node 1 \n", + "261 https://api.github.com/repos/hueniverse/node 1 \n", + "262 https://api.github.com/repos/jasondavies/d3 1 \n", + "263 https://api.github.com/repos/GerHobbelt/d3 1 \n", + "264 https://api.github.com/repos/Sikwan/chosen 1 \n", + "265 https://api.github.com/repos/jvivs/foundation 1 \n", + "266 https://api.github.com/repos/fabpot/symfony 1 \n", + "267 https://api.github.com/repos/usefulthink/symfony 1 \n", + "268 https://api.github.com/repos/jwage/symfony 1 \n", + "269 https://api.github.com/repos/weaverryan/symfony 1 \n", + "270 https://api.github.com/repos/francisbesset/sym... 1 \n", + "271 https://api.github.com/repos/mvrhov/symfony 1 \n", + "272 https://api.github.com/repos/lsmith77/symfony 1 \n", + "273 https://api.github.com/repos/stloyd/symfony 1 \n", + "274 https://api.github.com/repos/tacman/symfony 1 \n", + "275 https://api.github.com/repos/jfsimon/symfony 1 \n", + "276 https://api.github.com/repos/AddictArts/three.js 1 \n", + "277 https://api.github.com/repos/dlsniper/symfony 1 \n", + "278 https://api.github.com/repos/tapio/three.js 1 \n", + "279 https://api.github.com/repos/ircmaxell/symfony 1 \n", + "280 https://api.github.com/repos/bhouston/three.js 1 \n", + "281 https://api.github.com/repos/gnugat/symfony 1 \n", + "282 https://api.github.com/repos/kevinoe/three.js 1 \n", + "283 https://api.github.com/repos/lminko/three.js 1 \n", + "284 https://api.github.com/repos/darkwhispering/Co... 1 \n", + "285 https://api.github.com/repos/zechdc/CodeIgniter 1 \n", + "286 https://api.github.com/repos/vlakoff/CodeIgniter 1 \n", + "287 https://api.github.com/repos/BillHeaton/CodeIg... 1 \n", + "288 https://api.github.com/repos/chrispassas/CodeI... 1 \n", + "289 https://api.github.com/repos/zendframework/zf2 1 \n", + "290 https://api.github.com/repos/DASPRiD/zf2 1 \n", + "291 https://api.github.com/repos/ezimuel/zf2 1 \n", + "292 https://api.github.com/repos/Maks3w/zf2 1 \n", + "293 https://api.github.com/repos/davidwindell/zf2 1 \n", + "294 https://api.github.com/repos/jacobkiers/zf2 1 \n", + "295 https://api.github.com/repos/samsonasik/zf2 1 \n", + "296 https://api.github.com/repos/mithaler/thinkup 1 \n", + "297 https://api.github.com/repos/samwho/ThinkUp 1 \n", + "298 https://api.github.com/repos/anildash/ThinkUp 1 \n", + "299 https://api.github.com/repos/kylehase/ThinkUp 1 \n", + "300 https://api.github.com/repos/rgroves/ThinkUp 1 \n", + "301 https://api.github.com/repos/ceeram/cakephp 1 \n", + "302 https://api.github.com/repos/bleything/ThinkUp 1 \n", + "303 https://api.github.com/repos/zoghal/cakephp 1 \n", + "304 https://api.github.com/repos/niwibe/django 1 \n", + "305 https://api.github.com/repos/pvanderlinden/django 1 \n", + "306 https://api.github.com/repos/mitsuhiko/flask 1 \n", + "307 https://api.github.com/repos/dandrzejewski/req... 1 \n", + "308 https://api.github.com/repos/nicoddemus/requests 1 \n", + "309 https://api.github.com/repos/k21/reddit 1 \n", + "310 https://api.github.com/repos/msaelices/django-... 1 \n", + "311 https://api.github.com/repos/irskep/boto 1 \n", + "312 https://api.github.com/repos/goura/boto 1 \n", + "313 https://api.github.com/repos/fayazkhan/boto 1 \n", + "314 https://api.github.com/repos/bshep/Sick-Beard 1 \n", + "315 https://api.github.com/repos/ozeraser/Sick-Beard 1 \n", + "316 https://api.github.com/repos/jorgenpt/Sick-Beard 1 \n", + "317 https://api.github.com/repos/EchelonFour/Sick-... 1 \n", + "318 https://api.github.com/repos/mozvip/Sick-Beard 1 \n", + "319 https://api.github.com/repos/Prinz23/Sick-Beard 1 \n", + "320 https://api.github.com/repos/coderrr/rails 1 \n", + "321 https://api.github.com/repos/zires/rails 1 \n", + "322 https://api.github.com/repos/bigfix/rails 1 \n", + "323 https://api.github.com/repos/rafaelfranca/rails 1 \n", + "324 https://api.github.com/repos/github/rails 1 \n", + "325 https://api.github.com/repos/cldwalker/rails 1 \n", + "326 https://api.github.com/repos/indirect/rails 1 \n", + "327 https://api.github.com/repos/arunagw/rails 1 \n", + "328 https://api.github.com/repos/raysrashmi/rails 1 \n", + "329 https://api.github.com/repos/smartinez87/rails 1 \n", + "330 https://api.github.com/repos/castlerock/rails 1 \n", + "331 https://api.github.com/repos/senny/rails 1 \n", + "332 https://api.github.com/repos/acroca/rails 1 \n", + "333 https://api.github.com/repos/gazay/rails 1 \n", + "334 https://api.github.com/repos/powderflask/djang... 1 \n", + "335 https://api.github.com/repos/kennyj/rails 1 \n", + "336 https://api.github.com/repos/tanin47/rails-1 1 \n", + "337 https://api.github.com/repos/nashby/rails 1 \n", + "338 https://api.github.com/repos/larskanis/rails 1 \n", + "339 https://api.github.com/repos/kielkowicz/rails 1 \n", + "340 https://api.github.com/repos/bogdan/rails 1 \n", + "341 https://api.github.com/repos/tigrish/rails 1 \n", + "342 https://api.github.com/repos/homakov/rails 1 \n", + "343 https://api.github.com/repos/dylanahsmith/rails 1 \n", + "344 https://api.github.com/repos/revans/rails 1 \n", + "345 https://api.github.com/repos/blowmage/rails 1 \n", + "346 https://api.github.com/repos/robru/jekyll 1 \n", + "347 https://api.github.com/repos/Grandrath/rails 1 \n", + "348 https://api.github.com/repos/gsphanikumar/rails 1 \n", + "349 https://api.github.com/repos/gaurish/rails 1 \n", + "350 https://api.github.com/repos/morgancurrie/rails 1 \n", + "351 https://api.github.com/repos/Jiebour/rails 1 \n", + "352 https://api.github.com/repos/frodsan/rails 1 \n", + "353 https://api.github.com/repos/ankit8898/rails 1 \n", + "354 https://api.github.com/repos/versioncontrol/rails 1 \n", + "355 https://api.github.com/repos/godfat/homebrew 1 \n", + "356 https://api.github.com/repos/wright/homebrew 1 \n", + "357 https://api.github.com/repos/bdd/homebrew 1 \n", + "358 https://api.github.com/repos/greedy/homebrew 1 \n", + "359 https://api.github.com/repos/edeustace/jekyll 1 \n", + "360 https://api.github.com/repos/jlcapps/homebrew 1 \n", + "361 https://api.github.com/repos/justinclift/homebrew 1 \n", + "362 https://api.github.com/repos/tusbar/homebrew 1 \n", + "363 https://api.github.com/repos/neglectedvalue/ho... 1 \n", + "364 https://api.github.com/repos/losmuertos/homebrew 1 \n", + "365 https://api.github.com/repos/larseggert/homebrew 1 \n", + "366 https://api.github.com/repos/donspaulding/home... 1 \n", + "367 https://api.github.com/repos/metamatt/jekyll 1 \n", + "368 https://api.github.com/repos/jacknagel/homebrew 1 \n", + "369 https://api.github.com/repos/vibrog/homebrew 1 \n", + "370 https://api.github.com/repos/catsby/homebrew 1 \n", + "371 https://api.github.com/repos/thoughtpolice/hom... 1 \n", + "372 https://api.github.com/repos/jedi4ever/homebrew 1 \n", + "373 https://api.github.com/repos/dch/homebrew 1 \n", + "374 https://api.github.com/repos/mistydemeo/homebrew 1 \n", + "375 https://api.github.com/repos/tonit/homebrew 1 \n", + "376 https://api.github.com/repos/svenax/homebrew 1 \n", + "377 https://api.github.com/repos/jcupitt/homebrew 1 \n", + "378 https://api.github.com/repos/wesen/homebrew 1 \n", + "379 https://api.github.com/repos/vertis/homebrew 1 \n", + "380 https://api.github.com/repos/2bits/homebrew 1 \n", + "381 https://api.github.com/repos/jwilkins/homebrew 1 \n", + "382 https://api.github.com/repos/anatol/homebrew 1 \n", + "383 https://api.github.com/repos/msabramo/homebrew 1 \n", + "384 https://api.github.com/repos/azarbayejani/home... 1 \n", + "385 https://api.github.com/repos/nicolasdespres/ho... 1 \n", + "386 https://api.github.com/repos/yllan/homebrew 1 \n", + "387 https://api.github.com/repos/samueljohn/homebrew 1 \n", + "388 https://api.github.com/repos/cartazio/homebrew 1 \n", + "389 https://api.github.com/repos/fish2000/homebrew 1 \n", + "390 https://api.github.com/repos/glejeune/homebrew 1 \n", + "391 https://api.github.com/repos/bpiwowar/homebrew 1 \n", + "392 https://api.github.com/repos/ingmar/homebrew 1 \n", + "393 https://api.github.com/repos/funnymanva/homebrew 1 \n", + "394 https://api.github.com/repos/nmadura/homebrew 1 \n", + "395 https://api.github.com/repos/sandeep048/homebrew 1 \n", + "396 https://api.github.com/repos/lifepillar/homebrew 1 \n", + "397 https://api.github.com/repos/ummels/homebrew 1 \n", + "398 https://api.github.com/repos/nevir/homebrew 1 \n", + "399 https://api.github.com/repos/mrjbq7/homebrew 1 \n", + "400 https://api.github.com/repos/zhangcheng/homebrew 1 \n", + "401 https://api.github.com/repos/rmndk/homebrew 1 \n", + "402 https://api.github.com/repos/rhysd/homebrew 1 \n", + "403 https://api.github.com/repos/mattyr/homebrew 1 \n", + "404 https://api.github.com/repos/AstonJ/homebrew 1 \n", + "405 https://api.github.com/repos/raedwulf/homebrew 1 \n", + "406 https://api.github.com/repos/rays/homebrew 1 \n", + "407 https://api.github.com/repos/OldCrow/homebrew 1 \n", + "408 https://api.github.com/repos/vogonistic/homebrew 1 \n", + "409 https://api.github.com/repos/ashirazi/homebrew 1 \n", + "410 https://api.github.com/repos/sheerun/homebrew 1 \n", + "411 https://api.github.com/repos/marr/homebrew 1 \n", + "412 https://api.github.com/repos/wix/homebrew 1 \n", + "413 https://api.github.com/repos/andriytyurnikov/h... 1 \n", + "414 https://api.github.com/repos/tinystatemachine/... 1 \n", + "415 https://api.github.com/repos/mashtizadeh/homebrew 1 \n", + "416 https://api.github.com/repos/peabody124/homebrew 1 \n", + "417 https://api.github.com/repos/handlename/homebrew 1 \n", + "418 https://api.github.com/repos/crishoj/homebrew 1 \n", + "419 https://api.github.com/repos/eladg/homebrew 1 \n", + "420 https://api.github.com/repos/chrmoritz/homebrew 1 \n", + "421 https://api.github.com/repos/natritmeyer/homebrew 1 \n", + "422 https://api.github.com/repos/chenpc/homebrew 1 \n", + "423 https://api.github.com/repos/zzet/gitlabhq 1 \n", + "424 https://api.github.com/repos/mikew/gitlabhq 1 \n", + "425 https://api.github.com/repos/proverbface/gitlabhq 1 \n", + "426 https://api.github.com/repos/drahamim/gitlabhq 1 \n", + "427 https://api.github.com/repos/senny/gitlabhq 1 \n", + "428 https://api.github.com/repos/rahearn/devise 1 \n", + "429 https://api.github.com/repos/Gonzih/diaspora 1 \n", + "430 https://api.github.com/repos/Raven24/diaspora 1 \n", + "431 https://api.github.com/repos/joshuaclayton/blu... 1 \n", + "432 https://api.github.com/repos/pilif/octopress 1 \n", + "433 https://api.github.com/repos/yar/paperclip 1 \n", + "434 https://api.github.com/repos/gmclelland/compass 1 \n", + "435 https://api.github.com/repos/cimmanon/compass 1 \n", + "436 https://api.github.com/repos/robey/kestrel 1 \n", + "437 https://api.github.com/repos/benpence/finagle 1 \n", + "438 https://api.github.com/repos/paulpach/akka 1 \n", + "439 https://api.github.com/repos/metamorph/akka 1 \n", + "440 https://api.github.com/repos/scullxbones/akka 1 \n", + "441 https://api.github.com/repos/drewhk/akka 1 \n", + "442 https://api.github.com/repos/retronym/xsbt 1 \n", + "443 https://api.github.com/repos/vigdorchik/xsbt 1 \n", + "444 https://api.github.com/repos/ebowman/xsbt 1 \n", + "445 https://api.github.com/repos/ezh/sbt 1 \n", + "446 https://api.github.com/repos/odersky/scala 1 \n", + "447 https://api.github.com/repos/lrytz/scala 1 \n", + "448 https://api.github.com/repos/soc/scala 1 \n", + "449 https://api.github.com/repos/phaller/scala 1 \n", + "450 https://api.github.com/repos/Ichoran/scala 1 \n", + "451 https://api.github.com/repos/idning/mongo 1 \n", + "452 https://api.github.com/repos/zbenjamin/django 1 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "query_check2 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN commits c ON p.id = c.project_id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check2 = pd.read_sql(text(query_check2), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled commit comments: {len(check2)}\")\n", + "display(check2)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check3-header", + "metadata": {}, + "source": [ + "### Check 3: Which projects have the most labeled PR comments?\n", + "\n", + "Now, let's do the same project ranking for the pull request inline comments most heavily represented in the Gold Standard." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "cell-check3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Projects with sentiment-labeled PR comments: 64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
project_idproject_nameproject_urllabeled_comment_count
01akkahttps://api.github.com/repos/akka/akka404
151671symfonyhttps://api.github.com/repos/symfony/symfony315
278852railshttps://api.github.com/repos/rails/rails177
324292nodehttps://api.github.com/repos/joyent/node163
462502zf2https://api.github.com/repos/zendframework/zf2154
525875jqueryhttps://api.github.com/repos/jquery/jquery150
679163homebrewhttps://api.github.com/repos/mxcl/homebrew138
79636libgit2https://api.github.com/repos/libgit2/libgit2122
863250cakephphttps://api.github.com/repos/cakephp/cakephp111
9107534scalahttps://api.github.com/repos/scala/scala110
1010629SignalRhttps://api.github.com/repos/SignalR/SignalR90
1169158djangohttps://api.github.com/repos/django/django87
123583xbmchttps://api.github.com/repos/xbmc/xbmc85
1322981nettyhttps://api.github.com/repos/netty/netty82
14454bitcoinhttps://api.github.com/repos/bitcoin/bitcoin77
1591020gitlabhqhttps://api.github.com/repos/gitlabhq/gitlabhq54
1659607CodeIgniterhttps://api.github.com/repos/EllisLab/CodeIgniter50
1779166jekyllhttps://api.github.com/repos/mojombo/jekyll42
1812TrinityCorehttps://api.github.com/repos/TrinityCore/Trini...38
1922980CraftBukkithttps://api.github.com/repos/Bukkit/CraftBukkit38
2014327Nancyhttps://api.github.com/repos/NancyFx/Nancy37
2142644d3https://api.github.com/repos/mbostock/d331
2278835django-cmshttps://api.github.com/repos/divio/django-cms25
239215openFrameworkshttps://api.github.com/repos/openframeworks/op...24
2491331diasporahttps://api.github.com/repos/diaspora/diaspora23
2510593libuvhttps://api.github.com/repos/joyent/libuv19
26106160finaglehttps://api.github.com/repos/twitter/finagle16
27107672zipkinhttps://api.github.com/repos/twitter/zipkin15
2875984reddithttps://api.github.com/repos/reddit/reddit14
296knitrhttps://api.github.com/repos/yihui/knitr14
3095385devisehttps://api.github.com/repos/plataformatec/devise14
3147382chosenhttps://api.github.com/repos/harvesthq/chosen13
3216402stormhttps://api.github.com/repos/nathanmarz/storm13
3376945botohttps://api.github.com/repos/boto/boto13
3417515elasticsearchhttps://api.github.com/repos/elasticsearch/ela...12
3574914requestshttps://api.github.com/repos/kennethreitz/requ...12
3664176ThinkUphttps://api.github.com/repos/ginatrapani/ThinkUp11
37104307papercliphttps://api.github.com/repos/thoughtbot/paperclip11
3871786tornadohttps://api.github.com/repos/facebook/tornado9
3913566monohttps://api.github.com/repos/mono/mono8
4026388html5-boilerplatehttps://api.github.com/repos/h5bp/html5-boiler...8
4111phantomjshttps://api.github.com/repos/ariya/phantomjs7
4215018RestSharphttps://api.github.com/repos/restsharp/RestSharp6
4310380redcarpethttps://api.github.com/repos/vmg/redcarpet6
4477319Sick-Beardhttps://api.github.com/repos/midgetspy/Sick-Beard4
452devtoolshttps://api.github.com/repos/hadley/devtools4
46105378compasshttps://api.github.com/repos/chriseppstein/com...4
4723781androidhttps://api.github.com/repos/github/android3
483750http-parserhttps://api.github.com/repos/joyent/http-parser3
4917566ActionBarSherlockhttps://api.github.com/repos/JakeWharton/Actio...3
509mongohttps://api.github.com/repos/mongodb/mongo3
5116134MiniProfilerhttps://api.github.com/repos/SamSaffron/MiniPr...2
52101997octopresshttps://api.github.com/repos/imathis/octopress2
5362501php-sdkhttps://api.github.com/repos/facebook/php-sdk2
5414328ServiceStackhttps://api.github.com/repos/ServiceStack/Serv...2
55107085flockdbhttps://api.github.com/repos/twitter/flockdb2
5651669foundationhttps://api.github.com/repos/zurb/foundation1
577242redishttps://api.github.com/repos/antirez/redis1
5827504impress.jshttps://api.github.com/repos/bartaz/impress.js1
5950618three.jshttps://api.github.com/repos/mrdoob/three.js1
60107186gizzardhttps://api.github.com/repos/twitter/gizzard1
61107187sbthttps://api.github.com/repos/sbt/sbt1
6265107Slimhttps://api.github.com/repos/codeguy/Slim1
6314912AutoMapperhttps://api.github.com/repos/AutoMapper/AutoMa...1
\n", + "
" + ], + "text/plain": [ + " project_id project_name \\\n", + "0 1 akka \n", + "1 51671 symfony \n", + "2 78852 rails \n", + "3 24292 node \n", + "4 62502 zf2 \n", + "5 25875 jquery \n", + "6 79163 homebrew \n", + "7 9636 libgit2 \n", + "8 63250 cakephp \n", + "9 107534 scala \n", + "10 10629 SignalR \n", + "11 69158 django \n", + "12 3583 xbmc \n", + "13 22981 netty \n", + "14 454 bitcoin \n", + "15 91020 gitlabhq \n", + "16 59607 CodeIgniter \n", + "17 79166 jekyll \n", + "18 12 TrinityCore \n", + "19 22980 CraftBukkit \n", + "20 14327 Nancy \n", + "21 42644 d3 \n", + "22 78835 django-cms \n", + "23 9215 openFrameworks \n", + "24 91331 diaspora \n", + "25 10593 libuv \n", + "26 106160 finagle \n", + "27 107672 zipkin \n", + "28 75984 reddit \n", + "29 6 knitr \n", + "30 95385 devise \n", + "31 47382 chosen \n", + "32 16402 storm \n", + "33 76945 boto \n", + "34 17515 elasticsearch \n", + "35 74914 requests \n", + "36 64176 ThinkUp \n", + "37 104307 paperclip \n", + "38 71786 tornado \n", + "39 13566 mono \n", + "40 26388 html5-boilerplate \n", + "41 11 phantomjs \n", + "42 15018 RestSharp \n", + "43 10380 redcarpet \n", + "44 77319 Sick-Beard \n", + "45 2 devtools \n", + "46 105378 compass \n", + "47 23781 android \n", + "48 3750 http-parser \n", + "49 17566 ActionBarSherlock \n", + "50 9 mongo \n", + "51 16134 MiniProfiler \n", + "52 101997 octopress \n", + "53 62501 php-sdk \n", + "54 14328 ServiceStack \n", + "55 107085 flockdb \n", + "56 51669 foundation \n", + "57 7242 redis \n", + "58 27504 impress.js \n", + "59 50618 three.js \n", + "60 107186 gizzard \n", + "61 107187 sbt \n", + "62 65107 Slim \n", + "63 14912 AutoMapper \n", + "\n", + " project_url labeled_comment_count \n", + "0 https://api.github.com/repos/akka/akka 404 \n", + "1 https://api.github.com/repos/symfony/symfony 315 \n", + "2 https://api.github.com/repos/rails/rails 177 \n", + "3 https://api.github.com/repos/joyent/node 163 \n", + "4 https://api.github.com/repos/zendframework/zf2 154 \n", + "5 https://api.github.com/repos/jquery/jquery 150 \n", + "6 https://api.github.com/repos/mxcl/homebrew 138 \n", + "7 https://api.github.com/repos/libgit2/libgit2 122 \n", + "8 https://api.github.com/repos/cakephp/cakephp 111 \n", + "9 https://api.github.com/repos/scala/scala 110 \n", + "10 https://api.github.com/repos/SignalR/SignalR 90 \n", + "11 https://api.github.com/repos/django/django 87 \n", + "12 https://api.github.com/repos/xbmc/xbmc 85 \n", + "13 https://api.github.com/repos/netty/netty 82 \n", + "14 https://api.github.com/repos/bitcoin/bitcoin 77 \n", + "15 https://api.github.com/repos/gitlabhq/gitlabhq 54 \n", + "16 https://api.github.com/repos/EllisLab/CodeIgniter 50 \n", + "17 https://api.github.com/repos/mojombo/jekyll 42 \n", + "18 https://api.github.com/repos/TrinityCore/Trini... 38 \n", + "19 https://api.github.com/repos/Bukkit/CraftBukkit 38 \n", + "20 https://api.github.com/repos/NancyFx/Nancy 37 \n", + "21 https://api.github.com/repos/mbostock/d3 31 \n", + "22 https://api.github.com/repos/divio/django-cms 25 \n", + "23 https://api.github.com/repos/openframeworks/op... 24 \n", + "24 https://api.github.com/repos/diaspora/diaspora 23 \n", + "25 https://api.github.com/repos/joyent/libuv 19 \n", + "26 https://api.github.com/repos/twitter/finagle 16 \n", + "27 https://api.github.com/repos/twitter/zipkin 15 \n", + "28 https://api.github.com/repos/reddit/reddit 14 \n", + "29 https://api.github.com/repos/yihui/knitr 14 \n", + "30 https://api.github.com/repos/plataformatec/devise 14 \n", + "31 https://api.github.com/repos/harvesthq/chosen 13 \n", + "32 https://api.github.com/repos/nathanmarz/storm 13 \n", + "33 https://api.github.com/repos/boto/boto 13 \n", + "34 https://api.github.com/repos/elasticsearch/ela... 12 \n", + "35 https://api.github.com/repos/kennethreitz/requ... 12 \n", + "36 https://api.github.com/repos/ginatrapani/ThinkUp 11 \n", + "37 https://api.github.com/repos/thoughtbot/paperclip 11 \n", + "38 https://api.github.com/repos/facebook/tornado 9 \n", + "39 https://api.github.com/repos/mono/mono 8 \n", + "40 https://api.github.com/repos/h5bp/html5-boiler... 8 \n", + "41 https://api.github.com/repos/ariya/phantomjs 7 \n", + "42 https://api.github.com/repos/restsharp/RestSharp 6 \n", + "43 https://api.github.com/repos/vmg/redcarpet 6 \n", + "44 https://api.github.com/repos/midgetspy/Sick-Beard 4 \n", + "45 https://api.github.com/repos/hadley/devtools 4 \n", + "46 https://api.github.com/repos/chriseppstein/com... 4 \n", + "47 https://api.github.com/repos/github/android 3 \n", + "48 https://api.github.com/repos/joyent/http-parser 3 \n", + "49 https://api.github.com/repos/JakeWharton/Actio... 3 \n", + "50 https://api.github.com/repos/mongodb/mongo 3 \n", + "51 https://api.github.com/repos/SamSaffron/MiniPr... 2 \n", + "52 https://api.github.com/repos/imathis/octopress 2 \n", + "53 https://api.github.com/repos/facebook/php-sdk 2 \n", + "54 https://api.github.com/repos/ServiceStack/Serv... 2 \n", + "55 https://api.github.com/repos/twitter/flockdb 2 \n", + "56 https://api.github.com/repos/zurb/foundation 1 \n", + "57 https://api.github.com/repos/antirez/redis 1 \n", + "58 https://api.github.com/repos/bartaz/impress.js 1 \n", + "59 https://api.github.com/repos/mrdoob/three.js 1 \n", + "60 https://api.github.com/repos/twitter/gizzard 1 \n", + "61 https://api.github.com/repos/sbt/sbt 1 \n", + "62 https://api.github.com/repos/codeguy/Slim 1 \n", + "63 https://api.github.com/repos/AutoMapper/AutoMa... 1 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "query_check3 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", + "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", + "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check3 = pd.read_sql(text(query_check3), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled PR comments: {len(check3)}\")\n", + "display(check3)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check4-header", + "metadata": {}, + "source": [ + "### Check 4: Are the labeled comments reachable from canonical repos?\n", + "\n", + "Projects on GitHub get forked all the time. Since forks share commit history with their upstream, the same comment IDs can appear under multiple projects in GHTorrent. This matters for Notebook 3 (config files generation). We want to know: if we only generate Kaiaulu configs for canonical (non-fork) repos, how much labeled data will we miss? The purpose of this query is to inform our coverage strategy going into Notebook 3.\n", + "\n", + "Expected values:\n", + "- `canonical_only`: ~4,555\n", + "- `fork_only`: ~569 (these will be missed when targeting canonical repos only)\n", + "- `both_sides`: ~2,083\n", + "- Fork only rate: ~7.9%\n", + "- Canonical reachable rate: ~92.1%\n", + "\n", + "From these values, we can see that ~92.1% of comments are reachable from canonical repos. The ~7.9% that are fork-only will be skipped when we generate project config files in Notebook 3. This is an acceptable tradeoff. We document it here so the limitation is visible." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "cell-check4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Canonical vs fork accessibility summary:\n", + " canonical_only : 4555.0 (expected ~4555)\n", + " fork_only : 569.0 (expected ~569)\n", + " both_sides : 2083.0 (expected ~2083)\n", + " fork_only_pct : 7.9% (expected ~7.9%)\n", + " canonical_reachable % : 92.1% (expected ~92.1%)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canonical_onlyfork_onlyboth_sidesfork_only_pctcanonical_reachable_pct
04555.0569.02083.07.992.1
\n", + "
" + ], + "text/plain": [ + " canonical_only fork_only both_sides fork_only_pct \\\n", + "0 4555.0 569.0 2083.0 7.9 \n", + "\n", + " canonical_reachable_pct \n", + "0 92.1 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "query_check4 = \"\"\"\n", + "WITH RECURSIVE project_root AS (\n", + " SELECT p.id AS project_id, p.id AS root_id\n", + " FROM projects p\n", + " WHERE p.forked_from IS NULL\n", + " UNION ALL\n", + " SELECT c.id AS project_id, pr.root_id\n", + " FROM projects c\n", + " JOIN project_root pr ON c.forked_from = pr.project_id\n", + "),\n", + "comment_project_rows AS (\n", + " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + " JOIN commits c ON cc.commit_id = c.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "),\n", + "labeled AS (\n", + " SELECT\n", + " cpr.comment_id,\n", + " cpr.source_tag,\n", + " pr.root_id,\n", + " (cpr.project_id = pr.root_id) AS is_canonical\n", + " FROM comment_project_rows cpr\n", + " JOIN project_root pr ON pr.project_id = cpr.project_id\n", + "),\n", + "comment_flags AS (\n", + " SELECT\n", + " root_id, source_tag, comment_id,\n", + " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", + " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", + " FROM labeled\n", + " GROUP BY root_id, source_tag, comment_id\n", + "),\n", + "global_counts AS (\n", + " SELECT\n", + " COUNT(*) AS mapped_comment_ids,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", + " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", + " FROM comment_flags\n", + ")\n", + "SELECT\n", + " canonical_only,\n", + " fork_only,\n", + " both_sides,\n", + " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", + " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", + "FROM global_counts;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check4 = pd.read_sql(text(query_check4), con)\n", + "\n", + "print(\"Canonical vs fork accessibility summary:\")\n", + "print(f\" canonical_only : {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", + "print(f\" fork_only : {check4['fork_only'].iloc[0]} (expected ~569)\")\n", + "print(f\" both_sides : {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", + "print(f\" fork_only_pct : {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", + "print(f\" canonical_reachable % : {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", + "display(check4)" + ] + }, + { + "cell_type": "markdown", + "id": "1af5516e", + "metadata": {}, + "source": [ + "### Step 5: Build the contextualized dataset\n", + "\n", + "Now that we know which projects have sentiment-labeled comments and how they map across tables, we can build the contextualized dataset.\n", + "\n", + "As mentioned, the Gold Standard currently has three columns (`ID`, `polarity`, `text`). We're going to add six more columns from GHTorrent so that Notebook 4 can INNER JOIN on `comment_id` against Kaiaulu's downloaded comment data:\n", + "\n", + "1. `created_at` - Comment timestamp\n", + "2. `author_login` - Author username\n", + "3. `author_name` - Author First Name & Last Name\n", + "4. `author_email` - Author email\n", + "5. `owner` - Project owner\n", + "6. `repo` - Project repo name" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "77ab4187", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rows: 7122 (expected 7122)\n", + " From commit comments : 4317\n", + " From PR comments only: 2805\n" + ] + } + ], + "source": [ + "# Pull context for commit comments\n", + "commit_context_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " cc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u.name AS author_name,\n", + " u.email AS author_email,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + "JOIN users u ON cc.user_id = u.id\n", + "JOIN commits c ON cc.commit_id = c.id\n", + "JOIN projects p ON c.project_id = p.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "\"\"\"\n", + "\n", + "# Pull context for PR comments\n", + "pr_context_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " prc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u.name AS author_name,\n", + " u.email AS author_email,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN pull_request_comments prc ON cs.ID = CAST(prc.comment_id AS UNSIGNED)\n", + "JOIN users u ON prc.user_id = u.id\n", + "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "JOIN projects p ON pr.base_repo_id = p.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " commit_ctx = pd.read_sql(text(commit_context_sql), con)\n", + " pr_ctx = pd.read_sql(text(pr_context_sql), con)\n", + "\n", + "# Deduplicate within each source: keep first match per comment_id\n", + "commit_ctx = commit_ctx.drop_duplicates(subset='comment_id', keep='first')\n", + "pr_ctx = pr_ctx.drop_duplicates(subset='comment_id', keep='first')\n", + "\n", + "# Merge: prefer commit comment rows; fill in PR-only rows for IDs not in commit set\n", + "commit_ids = set(commit_ctx['comment_id'])\n", + "pr_only = pr_ctx[~pr_ctx['comment_id'].isin(commit_ids)]\n", + "contextualized = (\n", + " pd.concat([commit_ctx, pr_only], ignore_index=True)\n", + " .sort_values('comment_id')\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "print(f\"Total rows: {len(contextualized)} (expected 7122)\")\n", + "print(f\" From commit comments : {len(commit_ctx)}\")\n", + "print(f\" From PR comments only: {len(pr_only)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9e04de22", + "metadata": {}, + "source": [ + "### Step 6: Compare original vs. contextualized dataset\n", + "\n", + "Let's see a quick before/after to see what columns we added. The original Gold Standard has three columns, while the new contextualized version we created has ten." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "2850d211", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original GitHub Gold Standard (first 5 rows):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDPolarityText
04063186neutralNo. I still see the wrong twins.  * https://gi...
13894703neutralReverted.\"
21971084neutralYou can leave a queue while in queue ? (before...
31827828positiveDidn't look at SpellTargetRestrictions XD\"
4232603neutralNot sure about what kind of line lengths the p...
\n", + "
" + ], + "text/plain": [ + " ID Polarity Text\n", + "0 4063186 neutral No. I still see the wrong twins. * https://gi...\n", + "1 3894703 neutral Reverted.\"\n", + "2 1971084 neutral You can leave a queue while in queue ? (before...\n", + "3 1827828 positive Didn't look at SpellTargetRestrictions XD\"\n", + "4 232603 neutral Not sure about what kind of line lengths the p..." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Contextualized dataset with additional GHTorrent columns (first 5 rows):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
comment_idpolaritytextcreated_atauthor_loginauthor_nameauthor_emailownerrepo
0135negativeIf you mean #any_instance, you were better off...2008-04-12 04:54:27tomafroTom Wardtom@popdog.netrailsrails
1138negativeYou're a nasty code smell.\"2008-04-12 06:00:58joevandykJoe Van Dykjoe@tanga.comrailsrails
2196negativeMost users has winrar installed, which afaik h...2008-04-13 04:36:06augustlAugust Lilleaasaugust@augustl.comrailsrails
3318negative(apologies for the weird formatting there, i d...2008-04-15 08:47:51lazyatomJames Adamjames@lazyatom.comrailsrails
4919neutralAre there any other ivars I missed ?\"2008-05-06 02:16:34lifoPratikpratiknaik@gmail.comrailsrails
\n", + "
" + ], + "text/plain": [ + " comment_id polarity text \\\n", + "0 135 negative If you mean #any_instance, you were better off... \n", + "1 138 negative You're a nasty code smell.\" \n", + "2 196 negative Most users has winrar installed, which afaik h... \n", + "3 318 negative (apologies for the weird formatting there, i d... \n", + "4 919 neutral Are there any other ivars I missed ?\" \n", + "\n", + " created_at author_login author_name author_email \\\n", + "0 2008-04-12 04:54:27 tomafro Tom Ward tom@popdog.net \n", + "1 2008-04-12 06:00:58 joevandyk Joe Van Dyk joe@tanga.com \n", + "2 2008-04-13 04:36:06 augustl August Lilleaas august@augustl.com \n", + "3 2008-04-15 08:47:51 lazyatom James Adam james@lazyatom.com \n", + "4 2008-05-06 02:16:34 lifo Pratik pratiknaik@gmail.com \n", + "\n", + " owner repo \n", + "0 rails rails \n", + "1 rails rails \n", + "2 rails rails \n", + "3 rails rails \n", + "4 rails rails " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Note: 784 of 7122 rows have a NULL author_email (11.0%). GitHub stopped exposing emails in the API, so this is expected.\n" + ] + } + ], + "source": [ + "with engine.connect() as con:\n", + " original = pd.read_sql(text(\"SELECT ID, Polarity, Text FROM comment_sentiment LIMIT 5;\"), con)\n", + "\n", + "print(\"Original GitHub Gold Standard (first 5 rows):\")\n", + "display(original)\n", + "\n", + "print(\"\\nContextualized dataset with additional GHTorrent columns (first 5 rows):\")\n", + "display(contextualized.head())\n", + "\n", + "null_emails = contextualized['author_email'].isna().sum()\n", + "print(f\"\\nNote: {null_emails} of {len(contextualized)} rows have a NULL author_email ({round(100*null_emails/len(contextualized), 1)}%). GitHub stopped exposing emails in the API, so this is expected.\")" + ] + }, + { + "cell_type": "markdown", + "id": "a04e860a", + "metadata": {}, + "source": [ + "### Step 7: Save the contextualized dataset\n", + "\n", + "This CSV is the output of Notebook 2 and the input to Notebook 4. We'll save it to `data/github_gold_standard_contextualized.csv`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d4e8585", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "output_path = os.path.join(os.path.dirname(os.getcwd()), \"data\", \"github_gold_standard_contextualized.csv\")\n", + "os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + "\n", + "contextualized.to_csv(output_path, index=False)\n", + "print(f\"Saved: {output_path}\")\n", + "print(f\"Rows: {len(contextualized)}, Columns: {list(contextualized.columns)}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/02_explore_gh_torrent_tables.ipynb b/notebooks/02_explore_gh_torrent_tables.ipynb deleted file mode 100644 index 4d14460..0000000 --- a/notebooks/02_explore_gh_torrent_tables.ipynb +++ /dev/null @@ -1,413 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1b3dd1e0", - "metadata": {}, - "source": [ - "# Explore GHTorrent Tables for Sentiment Mapping (Notebook 2)\n", - "\n", - "This notebook explores the GHTorrent database to understand where the 7,122 Gold Standard sentiment comments are stored and how they connect to real projects.\n", - "\n", - "The goal is to answer which projects have sentiment-labeled comments and whether those comments are reachable from a canonical (non-fork) repository.\n", - "\n", - "### Planned Output\n", - "By the end of this notebook, you should have:\n", - "1. A breakdown of how sentiment comments are split between commit comment and PR comment tables\n", - "2. A ranked list of projects with the most sentiment-labeled commit comments\n", - "3. A ranked list of projects with the most sentiment-labeled PR comments\n", - "4. A global summary of how many sentiment comments are reachable via canonical repos vs. forks only" - ] - }, - { - "cell_type": "markdown", - "id": "cell-step1-header", - "metadata": {}, - "source": [ - "### Step 1: Import dependencies and connect to MySQL\n", - "\n", - "We reuse the same connection pattern as Notebook 1. Update the credentials below to match your local MySQL setup." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "cell-imports", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine, text" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "cell-config", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connected to MySQL.\n" - ] - } - ], - "source": [ - "MYSQL_HOST = \"localhost\"\n", - "MYSQL_PORT = 3306\n", - "MYSQL_USER = \"root\"\n", - "MYSQL_PASSWORD = \"password\"\n", - "MYSQL_DB = \"github\"\n", - "\n", - "engine = create_engine(\n", - " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", - ")\n", - "print(\"Connected to MySQL.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check1-header", - "metadata": {}, - "source": [ - "### Check 1: How sentiment comments are distributed across tables\n", - "\n", - "There are two kinds of Github comments in GHTorrent: commit comments (discussions posted about commits) and PR inline comments (left on a specific line of code in a pull request).\n", - "\n", - "The Gold Standard dataset includes both types. The same `ID` value maps to `comment_id` in both `commit_comments` and `pull_request_comments`.\n", - "\n", - "This check tells us how many sentiment IDs join to each table. Some IDs appear in both tables (overlap = 85), meaning a small number of comments were captured under both endpoints in GHTorrent. The total unique IDs should sum to 7,122.\n", - "\n", - "Expected values:\n", - "- Commit comment matches: ~4,317\n", - "- PR comment matches: ~2,890\n", - "- Overlap (both): ~85\n", - "- Commit-only: 4,232 | PR-only: 2,805 | Total unique: 7,122" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-check1", - "metadata": {}, - "outputs": [], - "source": [ - "with engine.connect() as con:\n", - " commit_count = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS commit_comment_matches\n", - " FROM comment_sentiment s\n", - " INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - " pr_count = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS pr_comment_matches\n", - " FROM comment_sentiment s\n", - " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - " overlap = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS overlap\n", - " FROM comment_sentiment s\n", - " INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - "commit_only = commit_count - overlap\n", - "pr_only = pr_count - overlap\n", - "total_unique = commit_only + pr_only + overlap\n", - "\n", - "summary = pd.DataFrame({\n", - " 'Category': ['Commit matches', 'PR matches', 'Overlap (both)', 'Commit-only', 'PR-only', 'Total unique'],\n", - " 'Count': [commit_count, pr_count, overlap, commit_only, pr_only, total_unique],\n", - " 'Expected': [4317, 2890, 85, 4232, 2805, 7122]\n", - "})\n", - "display(summary)\n", - "\n", - "if total_unique == 7122:\n", - " print(\"PASS: total unique IDs = 7122.\")\n", - "else:\n", - " print(f\"WARNING: total unique IDs = {total_unique}, expected 7122.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check2-header", - "metadata": {}, - "source": [ - "### Check 2: Projects with the most sentiment-labeled commit comments\n", - "\n", - "This query ranks projects by how many of their commit comments are in the Gold Standard. This tells us which projects are most heavily represented in the labeled data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-check2", - "metadata": {}, - "outputs": [], - "source": [ - "query_check2 = \"\"\"\n", - "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN commits c ON p.id = c.project_id\n", - "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", - "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check2 = pd.read_sql(text(query_check2), con)\n", - "\n", - "print(f\"Projects with sentiment-labeled commit comments: {len(check2)}\")\n", - "display(check2)" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check3-header", - "metadata": {}, - "source": [ - "### Check 3: Projects with the most sentiment-labeled PR comments\n", - "\n", - "Same ranking, but for pull request inline comments. PR inline comments are joined through `pull_requests` via `base_repo_id`. The base repo is the canoonical project the PR was opened in." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-check3", - "metadata": {}, - "outputs": [], - "source": [ - "query_check3 = \"\"\"\n", - "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", - "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", - "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check3 = pd.read_sql(text(query_check3), con)\n", - "\n", - "print(f\"Projects with sentiment-labeled PR comments: {len(check3)}\")\n", - "display(check3)" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check4-header", - "metadata": {}, - "source": [ - "### Check 4: Canonical repo vs fork accessibility\n", - "\n", - "GitHub projects get forked frequently. A fork shares the same commit history as its upstream repo, which means the same comment IDs can appear under both the original (canonical) project and one or more forks in GHTorrent.\n", - "\n", - "This matters for Notebook 3. When we generate Kaiaulu config files, we only want to target canonical repos. Downloading from a fork is redundant since the canonical repo already contains all the same commits.\n", - "\n", - "Expected values:\n", - "- `canonical_only`: ~4,555\n", - "- `fork_only`: ~569 (these will be missed when targeting canonical repos only)\n", - "- `both_sides`: ~2,083\n", - "- Canonical reachable rate: ~92.1%" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cell-check4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Canonical vs fork accessibility summary:\n", - " canonical_only : 4555.0 (expected ~4555)\n", - " fork_only : 569.0 (expected ~569)\n", - " both_sides : 2083.0 (expected ~2083)\n", - " fork_only_pct : 7.9% (expected ~7.9%)\n", - " canonical_reachable % : 92.1% (expected ~92.1%)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
canonical_onlyfork_onlyboth_sidesfork_only_pctcanonical_reachable_pct
04555.0569.02083.07.992.1
\n", - "
" - ], - "text/plain": [ - " canonical_only fork_only both_sides fork_only_pct \\\n", - "0 4555.0 569.0 2083.0 7.9 \n", - "\n", - " canonical_reachable_pct \n", - "0 92.1 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "query_check4 = \"\"\"\n", - "WITH RECURSIVE project_root AS (\n", - " SELECT p.id AS project_id, p.id AS root_id\n", - " FROM projects p\n", - " WHERE p.forked_from IS NULL\n", - " UNION ALL\n", - " SELECT c.id AS project_id, pr.root_id\n", - " FROM projects c\n", - " JOIN project_root pr ON c.forked_from = pr.project_id\n", - "),\n", - "comment_project_rows AS (\n", - " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", - " JOIN commits c ON cc.commit_id = c.id\n", - " UNION ALL\n", - " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - " UNION ALL\n", - " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "),\n", - "labeled AS (\n", - " SELECT\n", - " cpr.comment_id,\n", - " cpr.source_tag,\n", - " pr.root_id,\n", - " (cpr.project_id = pr.root_id) AS is_canonical\n", - " FROM comment_project_rows cpr\n", - " JOIN project_root pr ON pr.project_id = cpr.project_id\n", - "),\n", - "comment_flags AS (\n", - " SELECT\n", - " root_id, source_tag, comment_id,\n", - " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", - " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", - " FROM labeled\n", - " GROUP BY root_id, source_tag, comment_id\n", - "),\n", - "global_counts AS (\n", - " SELECT\n", - " COUNT(*) AS mapped_comment_ids,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", - " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", - " FROM comment_flags\n", - ")\n", - "SELECT\n", - " canonical_only,\n", - " fork_only,\n", - " both_sides,\n", - " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", - " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", - "FROM global_counts;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check4 = pd.read_sql(text(query_check4), con)\n", - "\n", - "print(\"Canonical vs fork accessibility summary:\")\n", - "print(f\" canonical_only : {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", - "print(f\" fork_only : {check4['fork_only'].iloc[0]} (expected ~569)\")\n", - "print(f\" both_sides : {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", - "print(f\" fork_only_pct : {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", - "print(f\" canonical_reachable % : {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", - "display(check4)" - ] - }, - { - "cell_type": "markdown", - "id": "52c9ee7e", - "metadata": {}, - "source": [ - "### When to move on to Notebook 3\n", - "\n", - "Move to Notebook 3 when all of the following are true:\n", - "\n", - "1. Check 1: PASS printed and total unique IDs = 7,122\n", - "2. Check 2: returns a non-empty DataFrame of projects with commit comment matches\n", - "3. Check 3: returns a non-empty DataFrame of projects with PR comment matches\n", - "4. Check 4: runs without error and shows a non-zero `canonical_reachable_pct`\n", - "\n", - "If any check returns zero rows, the most likely cause is that `comment_sentiment` was not loaded correctly in Notebook 1. Re-run Notebook 1 first.\n", - "\n", - "The ~7.9% of comments that are `fork_only` will be missed when we target only canonical repos in Notebook 3. This is an acceptable tradeoff. We document it here so the limitation is visible." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/03_scale_config_files.ipynb b/notebooks/03_scale_config_files.ipynb index 104c0fb..1ec5d7c 100644 --- a/notebooks/03_scale_config_files.ipynb +++ b/notebooks/03_scale_config_files.ipynb @@ -5,21 +5,13 @@ "id": "6fa72e9d", "metadata": {}, "source": [ - "# Scale and Automate Config Generation (Notebook 3)\n", + "# Notebook 3: Generate Kaiaulu Config Files\n", "\n", - "This notebook generates Kaiaulu config files for each canonical project repo that has sentiment-labeled comments in GHTorrent.\n", + "At this point we have a contextualized CSV with 7,122 labeled comments mapped to 82 canonical GitHub repos. The next step is to actually download the comment data for those repos from GitHub. This is where Kaiaulu comes in.\n", "\n", - "Kaiaulu uses `.yml` config files to know which GitHub projects to download data from, where to save the raw JSON files, and how to structure the output. Without a config file for a project, Kaiaulu has no way to download it. In this notebook, we will automatically generate one config per canonical repo. Anyone can re-run the same config against the live GitHub API and get fresh data.\n", + "Kaiaulu downloads and parses GitHub comment data using `.yml` config files. Each config tells Kaiaulu which project to target and where to save the output.\n", "\n", - "**Why canonical repos only?** As shown in Notebook 2 (Check 4), ~93.2% of sentiment comments are reachable from canonical (non-fork) repos. Since the comment coverage from forked repos is minimal, we target only canonical repos since they provide most of the sentiment-labeled coverage.\n", - "\n", - "**What this notebook does:**\n", - "1. Queries MySQL/GHTorrent to identify canonical repos with sentiment-labeled comments (~82 repos)\n", - "2. Generates a `.yml` config file per repo using `trinitycore.yml` as a template and writes them to Kaiaulu's `conf/` directory\n", - "\n", - "**What comes next** — once configs are written, use these Kaiaulu vignettes to download and parse comments:\n", - "- `vignettes/download_github_events.Rmd` → commit comments\n", - "- `vignettes/download_github_pull_request_comments.Rmd` → PR inline comments" + "Writing 82 config files by hand would be tedious, so we're going to generate them automatically." ] }, { @@ -27,9 +19,9 @@ "id": "a18a63e8", "metadata": {}, "source": [ - "### Planned Output\n", + "### Before you start\n", "\n", - "1. One `.yml` config file per main project repo in the GHTorrent database, written to Kaiaulu's `conf/` directory." + "Download [trinitycore.yml](https://raw.githubusercontent.com/splimon/kaiaulu-sentiment/refs/heads/378-github-commit-comments-downloader-function/conf/trinitycore.yml) and place it in the `conf/` folder of your local Kaiaulu repo. This is the template every generated config is based on. If it's missing, Step 4 of this notebook will fail." ] }, { @@ -37,12 +29,12 @@ "id": "622cb929", "metadata": {}, "source": [ - "### Step 1: Import Dependencies" + "### Step 1: Import dependencies" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "id": "1bc36cfe", "metadata": {}, "outputs": [], @@ -61,24 +53,20 @@ "id": "c2d1ae1f", "metadata": {}, "source": [ - "### Step 2: Set Paths and Configuration\n", + "### Step 2: Set your paths and configuration\n", "\n", - "Update the variables below before running:\n", - "- **`KAIAULU_REPO`** - path to your local Kaiaulu repo\n", - "- **`MYSQL_PASSWORD`** / **`MYSQL_DB`** - must match what you used in Notebooks 1 and 2\n", - "- **`MAX_REPOS`** - set to an integer to limit the number of repos processed (useful for a dry run), or `None` to process all 82 project repos\n", - "- **`WRITE_CONFIGS`** - set to `False` to preview without writing any files to disk" + "Update the variables below before running. The most important one is `KAIAULU_REPO`. Point it to your local Kaiaulu directory. MySQL credentials should match what you used in Notebooks 1 and 2." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "20fb9b60", "metadata": {}, "outputs": [], "source": [ "# Paths\n", - "KAIAULU_REPO = (Path(\".\").resolve() / \"..\" / \"kaiaulu\").resolve()\n", + "KAIAULU_REPO = Path(\"PATH_TO/kaiaulu\")\n", "\n", "# Kaiaulu-owned inputs/outputs\n", "CONF_DIR = KAIAULU_REPO / \"conf\"\n", @@ -87,12 +75,12 @@ "# Repo selection cap (None = all main project repos)\n", "MAX_REPOS = None\n", "\n", - "# MySQL connection (override with env vars if needed)\n", + "# MySQL connection\n", "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", - "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\")\n", + "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\") # name of the database where GHTorrent was loaded\n", "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", - "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"password\")\n", + "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")\n", "\n", "# Toggle writing config files to Kaiaulu conf/\n", "WRITE_CONFIGS = True" @@ -103,16 +91,16 @@ "id": "aa923139", "metadata": {}, "source": [ - "### Step 3: Query Canonical Repos from GHTorrent\n", + "### Step 3: Query canonical repos from GHTorrent\n", "\n", - "This query shows canonical repos that have at least one sentiment-labeled commit or PR comment.\n", + "This query finds all canonical repos in GHTorrent that have at least one commit or PR inline sentiment-labeled comment.\n", "\n", "The expected output is 82 repos." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "5641db76", "metadata": {}, "outputs": [ @@ -120,7 +108,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "canonical repos found: 82\n" + "repos found: 82\n" ] }, { @@ -174,22 +162,60 @@ " bartaz\n", " impress.js\n", " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 77\n", + " xbmc\n", + " xbmc\n", + " \n", + " \n", + " 78\n", + " xphere-forks\n", + " symfony\n", + " \n", + " \n", + " 79\n", + " yihui\n", + " knitr\n", + " \n", + " \n", + " 80\n", + " zendframework\n", + " zf2\n", + " \n", + " \n", + " 81\n", + " zurb\n", + " foundation\n", + " \n", " \n", "\n", + "

82 rows × 2 columns

\n", "" ], "text/plain": [ - " owner repo\n", - "0 akka akka\n", - "1 antirez redis\n", - "2 ariya phantomjs\n", - "3 automapper automapper\n", - "4 bartaz impress.js" + " owner repo\n", + "0 akka akka\n", + "1 antirez redis\n", + "2 ariya phantomjs\n", + "3 automapper automapper\n", + "4 bartaz impress.js\n", + ".. ... ...\n", + "77 xbmc xbmc\n", + "78 xphere-forks symfony\n", + "79 yihui knitr\n", + "80 zendframework zf2\n", + "81 zurb foundation\n", + "\n", + "[82 rows x 2 columns]" ] }, - "execution_count": 69, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -234,7 +260,7 @@ "\n", "repos = pd.read_sql(text(sql), con=engine)\n", "print('repos found:', len(repos))\n", - "repos.head()" + "display(repos)" ] }, { @@ -242,15 +268,10 @@ "id": "46104941", "metadata": {}, "source": [ - "### Step 4: Generate and Write Config Files\n", - "\n", - "Each config file tells Kaiaulu:\n", - "- Which GitHub owner and repo to target\n", - "- Where to save raw JSON downloads (under `rawdata/github/{owner}/{repo}/`)\n", + "### Step 4: Generate and write config files\n", "\n", - "Before running this cell, download [trinitycore.yml](https://raw.githubusercontent.com/splimon/kaiaulu-sentiment/refs/heads/378-github-commit-comments-downloader-function/conf/trinitycore.yml) and place it in the `conf/` folder of your local Kaiaulu repo.\n", + "For each repo in the list above, we substitute `{owner}` and `{repo}` into the `trinitycore.yml` template and write the result to `kaiaulu/conf/{repo}.yml`. Each config looks like this:\n", "\n", - "The config files should follow this structure:\n", "```yaml\n", "project:\n", " website: https://github.com/{owner}/{repo}\n", @@ -266,7 +287,7 @@ " pr_comments: rawdata/github/{owner}/{repo}/pr_comments/\n", "```\n", "\n", - "Expected output: a list of 82 written `.yml` filenames, e.g. `['akka.yml', 'redis.yml', ...]`" + "Expected output: A list of 82 written `.yml` filenames, e.g. `['akka.yml', 'redis.yml', ...]`" ] }, { @@ -367,19 +388,16 @@ }, { "cell_type": "markdown", - "id": "oiibwri0k6", + "id": "411dc661", "metadata": {}, "source": [ - "### Next Steps\n", - "\n", - "Config files have been written to `kaiaulu/conf/`. From here, the workflow moves entirely into Kaiaulu.\n", - "\n", - "To download commit and PR inline comments for each project, follow the Kaiaulu vignettes:\n", + "### What comes next\n", "\n", - "- **`vignettes/download_github_events.Rmd`** — downloads commit comments via `/repos/{owner}/{repo}/comments` and parses them into a data.table with columns: `comment_id`, `commit_id`, `author_login`, `author_id`, `line`, `created_at`, `updated_at`\n", - "- **`vignettes/download_github_pull_request_comments.Rmd`** — downloads PR inline comments via `/repos/{owner}/{repo}/pulls/comments` and parses them into a data.table with columns: `review_id`, `comment_id`, `html_url`, `created_at`, `updated_at`, `comment_user_login`, `author_association`, `file_path`, `start_line`, `line`, `original_start_line`, `original_line`, `position`, `diff_hunk`, `body`, `commit_id`\n", + "Once the configs are written, you'll run two Kaiaulu vignettes to download and parse comments for each project:\n", + "- **`vignettes/download_github_events.Rmd`** → downloads commit comments, outputs `{repo}_commit_comments.csv`\n", + "- **`vignettes/download_github_pull_request_comments.Rmd`** → downloads PR inline comments, outputs `{repo}_pr_inline_comments.csv`\n", "\n", - "Run both vignettes for each of the 82 configs generated above. Once the CSVs exist at `rawdata/github/{owner}/{repo}/`, proceed to `vignettes/sentiment_analysis.Rmd` to train the sentiment model and generate polarity predictions." + "Copy the output CSVs into this repo's `data/` folder, then open Notebook 4 to INNER JOIN them with the contextualized Gold Standard dataset." ] } ], diff --git a/notebooks/04_inner_join_kaiaulu_comments.ipynb b/notebooks/04_inner_join_kaiaulu_comments.ipynb new file mode 100644 index 0000000..d4caec3 --- /dev/null +++ b/notebooks/04_inner_join_kaiaulu_comments.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-nb4-title", + "metadata": {}, + "source": [ + "# Notebook 4: Inner Join Kaiaulu Comments with Sentiment Labels\n", + "\n", + "By now, you should have:\n", + "1. The contextualized Gold Standard dataset from Notebook 2 (`github_gold_standard_contextualized.csv`)\n", + "2. Comment data freshly downloaded from GitHub via Kaiaulu (`{repo}_commit_comments.csv` and `{repo}_pr_inline_comments.csv`)\n", + "\n", + "Neither is complete on its own. The Gold Standard has sentiment labels, but no fresh GitHub metadata. Kaiaulu's output has GitHub metadata, but no sentiment labels. INNER JOINing them on `comment_id` gives us both." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-prereqs", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "Three files need to be in `data/` before running any cells:\n", + "\n", + "| File | Where it comes from |\n", + "|---|---|\n", + "| `data/github_gold_standard_contextualized.csv` | Output of Notebook 2, Step 7 |\n", + "| `data/{repo}_commit_comments.csv` | Copy from `rawdata/github/{owner}/{repo}/commit_comments/` after running `vignettes/download_github_events.Rmd` |\n", + "| `data/{repo}_pr_inline_comments.csv` | Copy from `rawdata/github/{owner}/{repo}/pr_comments/` after running `vignettes/download_github_pull_request_comments.Rmd` |\n", + "\n", + "If either Kaiaulu CSV is missing, go back and run the corresponding vignette in Notebook 3 first." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-nb4-step1-code", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step2-header", + "metadata": {}, + "source": [ + "### Step 2: Configure Project\n", + "\n", + "Set `OWNER` and `REPO` to match the project you ran the Kaiaulu vignettes for. These must match the `owner` and `repo` columns in `github_gold_standard_contextualized.csv` and the filenames of the Kaiaulu output CSVs in `data/`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbfd4324", + "metadata": {}, + "outputs": [], + "source": [ + "# Configure these before running\n", + "OWNER = \"ADD_OWNER_HERE\" # GitHub repo owner (must match 'owner' column in contextualized CSV)\n", + "REPO = \"ADD_REPO_HERE\" # GitHub repo name (must match 'repo' column in contextualized CSV)\n", + "\n", + "DATA_DIR = os.path.join(os.path.dirname(os.getcwd()), \"data\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step3-header", + "metadata": {}, + "source": [ + "### Step 3: Load the contextualized dataset\n", + "\n", + "Load the full contextualized Gold Standard dataset from Notebook 2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-nb4-step3-code", + "metadata": {}, + "outputs": [], + "source": [ + "contextualized = pd.read_csv(os.path.join(DATA_DIR, \"github_gold_standard_contextualized.csv\"))\n", + "print(f\"Full contextualized dataset: {len(contextualized)} rows\")\n", + "\n", + "project_ctx = contextualized[(contextualized['owner'] == OWNER) & (contextualized['repo'] == REPO)].copy()\n", + "print(f\"{OWNER}/{REPO} rows: {len(project_ctx)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step4-header", + "metadata": {}, + "source": [ + "### Step 4: Load the Kaiaulu output CSVs\n", + "\n", + "Load the two CSVs you copied into `data/` after running the Kaiaulu vignettes." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-nb4-step4-code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Kaiaulu commit comments : 1569 rows, columns: ['comment_id', 'commit_id', 'author_login', 'author_id', 'body', 'created_at', 'updated_at']\n", + "Kaiaulu PR inline comments: 6100 rows, columns: ['review_id', 'comment_id', 'html_url', 'created_at', 'updated_at', 'comment_user_login', 'author_association', 'file_path', 'start_line', 'line', 'original_start_line', 'original_line', 'position', 'diff_hunk', 'body', 'commit_id']\n" + ] + } + ], + "source": [ + "kaiaulu_commit = pd.read_csv(os.path.join(DATA_DIR, f\"{REPO}_commit_comments.csv\"))\n", + "kaiaulu_pr = pd.read_csv(os.path.join(DATA_DIR, f\"{REPO}_pr_inline_comments.csv\"))\n", + "\n", + "print(f\"Kaiaulu commit comments : {len(kaiaulu_commit)} rows, columns: {list(kaiaulu_commit.columns)}\")\n", + "print(f\"Kaiaulu PR inline comments: {len(kaiaulu_pr)} rows, columns: {list(kaiaulu_pr.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step5-header", + "metadata": {}, + "source": [ + "### Step 5: INNER JOIN - Commit Comments\n", + "\n", + "Join the project's contextualized Gold Standard rows against Kaiaulu's commit comments on `comment_id`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-nb4-step5-code", + "metadata": {}, + "outputs": [], + "source": [ + "commit_joined = project_ctx.merge(\n", + " kaiaulu_commit,\n", + " on='comment_id',\n", + " how='inner', # INNER JOIN\n", + " suffixes=('_gold', '_kaiaulu')\n", + ")\n", + "\n", + "commit_dropped = len(project_ctx) - len(commit_joined)\n", + "print(f\"{OWNER}/{REPO} rows in contextualized dataset : {len(project_ctx)}\")\n", + "print(f\"Rows matched in Kaiaulu commit comments : {len(commit_joined)}\")\n", + "print(f\"Rows not found in Kaiaulu download : {commit_dropped}\")\n", + "if commit_dropped > 0:\n", + " print(f\" These {commit_dropped} comment IDs exist in the Gold Standard but were not found in the Kaiaulu download.\")\n", + "\n", + "print(\"\\nJoined commit comments (first 5 rows):\")\n", + "display(commit_joined.head())\n", + "\n", + "out_path = os.path.join(DATA_DIR, f\"{REPO}_sentiment_commit_comments_joined.csv\")\n", + "commit_joined.to_csv(out_path, index=False)\n", + "print(f\"\\nSaved: {out_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "daef76ca", + "metadata": {}, + "source": [ + "### Step 6: INNER JOIN - PR inline comments\n", + "\n", + "Same join as step 5, but against Kaiaulu's PR inline comments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ba23d8", + "metadata": {}, + "outputs": [], + "source": [ + "pr_joined = project_ctx.merge(\n", + " kaiaulu_pr,\n", + " on='comment_id',\n", + " how='inner', # INNER JOIN\n", + " suffixes=('_gold', '_kaiaulu')\n", + ")\n", + "\n", + "pr_dropped = len(project_ctx) - len(pr_joined)\n", + "print(f\"{OWNER}/{REPO} rows in contextualized dataset : {len(project_ctx)}\")\n", + "print(f\"Rows matched in Kaiaulu PR inline comments : {len(pr_joined)}\")\n", + "print(f\"Rows not found in Kaiaulu download : {pr_dropped}\")\n", + "if pr_dropped > 0:\n", + " print(f\" These {pr_dropped} comment IDs exist in the Gold Standard but were not found in the Kaiaulu download.\")\n", + "\n", + "print(\"\\nJoined PR inline comments (first 5 rows):\")\n", + "display(pr_joined.head())\n", + "\n", + "out_path = os.path.join(DATA_DIR, f\"{REPO}_sentiment_pr_inline_comments_joined.csv\")\n", + "pr_joined.to_csv(out_path, index=False)\n", + "print(f\"\\nSaved: {out_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-next-steps", + "metadata": {}, + "source": [ + "### You're done!\n", + "\n", + "**To run for a different project:** update `OWNER` and `REPO` in Step 2, copy the new Kaiaulu CSVs into `data/`, and re-run Steps 3–6." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From dc8b17c96b624e7e072ac79174ea7c45fa2c01f9 Mon Sep 17 00:00:00 2001 From: splimon Date: Fri, 17 Apr 2026 01:45:13 -1000 Subject: [PATCH 4/8] Add Notebook 4: Join Sentiment Labels with Kaiaulu Output - Rewrites Notebook 4 to query sentiment labels directly from MySQL and INNER JOIN them against Kaiaulu-downloaded comment data. Writes the output back into Kaiaulu's `rawdata` directory - Updates Notebooks 2 and 3 to align with the revised pipeline. --- .../01_load_sentiment_csv_to_mysql.ipynb | 34 +- ...2_contextualize_github_gold_standard.ipynb | 97 +- notebooks/03_scale_config_files.ipynb | 10 +- notebooks/04_add_sentiment_to_kaiaulu.ipynb | 911 ++++++++++++++++++ .../04_inner_join_kaiaulu_comments.ipynb | 245 ----- 5 files changed, 968 insertions(+), 329 deletions(-) create mode 100644 notebooks/04_add_sentiment_to_kaiaulu.ipynb delete mode 100644 notebooks/04_inner_join_kaiaulu_comments.ipynb diff --git a/notebooks/01_load_sentiment_csv_to_mysql.ipynb b/notebooks/01_load_sentiment_csv_to_mysql.ipynb index 457763a..7e1370f 100644 --- a/notebooks/01_load_sentiment_csv_to_mysql.ipynb +++ b/notebooks/01_load_sentiment_csv_to_mysql.ipynb @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "cell-create-table", "metadata": {}, "outputs": [ @@ -146,9 +146,9 @@ "\n", "cursor.execute(\"\"\"\n", " CREATE TABLE comment_sentiment (\n", - " ID INT NULL,\n", + " ID INT NULL,\n", " Polarity VARCHAR(256) NULL,\n", - " Text TEXT NULL\n", + " Text TEXT NULL\n", " );\n", "\"\"\")\n", "conn.commit()\n", @@ -209,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "cell-validate", "metadata": {}, "outputs": [ @@ -229,14 +229,14 @@ ")\n", "\n", "with engine.connect() as con:\n", - " total_rows = pd.read_sql(text(\"SELECT COUNT(*) AS total_rows FROM comment_sentiment;\"), con)\n", - " distinct_ids = pd.read_sql(text(\"SELECT COUNT(DISTINCT ID) AS distinct_ids FROM comment_sentiment;\"), con)\n", + " total_rows = pd.read_sql(text(\"SELECT COUNT(*) AS total_rows FROM comment_sentiment;\"), con)\n", + " distinct_ids = pd.read_sql(text(\"SELECT COUNT(DISTINCT ID) AS distinct_ids FROM comment_sentiment;\"), con)\n", "\n", "total = total_rows['total_rows'].iloc[0]\n", "unique = distinct_ids['distinct_ids'].iloc[0]\n", "\n", - "print(f\"Total rows : {total} (expected 7122)\")\n", - "print(f\"Distinct IDs : {unique} (expected 7122)\")\n", + "print(f\"Total rows: {total} (expected 7122)\")\n", + "print(f\"Distinct IDs: {unique} (expected 7122)\")\n", "\n", "if total == 7122 and unique == 7122:\n", " print(\"PASS: all rows loaded and all IDs are unique.\")\n", @@ -258,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "cell-query1", "metadata": {}, "outputs": [ @@ -451,16 +451,16 @@ "source": [ "query1 = \"\"\"\n", "SELECT\n", - " s.ID AS sentiment_id,\n", - " s.Polarity AS polarity,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " c.sha AS commit_sha,\n", - " s.Text AS comment_text\n", + " s.ID AS sentiment_id,\n", + " s.Polarity AS polarity,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " c.sha AS commit_sha,\n", + " s.Text AS comment_text\n", "FROM comment_sentiment s\n", "INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - "INNER JOIN commits c ON c.id = cc.commit_id\n", - "INNER JOIN projects p ON c.project_id = p.id\n", + "INNER JOIN commits c ON c.id = cc.commit_id\n", + "INNER JOIN projects p ON c.project_id = p.id\n", "LIMIT 10;\n", "\"\"\"\n", "\n", diff --git a/notebooks/02_contextualize_github_gold_standard.ipynb b/notebooks/02_contextualize_github_gold_standard.ipynb index f5d5312..87b19a8 100644 --- a/notebooks/02_contextualize_github_gold_standard.ipynb +++ b/notebooks/02_contextualize_github_gold_standard.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 2, "id": "cell-imports", "metadata": {}, "outputs": [], @@ -55,7 +55,7 @@ "MYSQL_HOST = \"localhost\"\n", "MYSQL_PORT = 3306\n", "MYSQL_USER = \"root\"\n", - "MYSQL_PASSWORD = \"ADD_PASSWORD_HERE\"\n", + "MYSQL_PASSWORD = \"ADD_YOUR_PASSWORD_HERE\"\n", "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", "\n", "engine = create_engine(\n", @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, "id": "cell-check1", "metadata": {}, "outputs": [ @@ -228,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 5, "id": "cell-check2", "metadata": {}, "outputs": [ @@ -4361,14 +4361,14 @@ "source": [ "query_check2 = \"\"\"\n", "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", "FROM projects p\n", - "INNER JOIN commits c ON p.id = c.project_id\n", - "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", - "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "INNER JOIN commits c ON p.id = c.project_id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", "GROUP BY p.id, p.name, p.url\n", "ORDER BY labeled_comment_count DESC;\n", "\"\"\"\n", @@ -4392,7 +4392,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 6, "id": "cell-check3", "metadata": {}, "outputs": [ @@ -5024,14 +5024,14 @@ "source": [ "query_check3 = \"\"\"\n", "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", "FROM projects p\n", - "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", + "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", - "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", + "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", "GROUP BY p.id, p.name, p.url\n", "ORDER BY labeled_comment_count DESC;\n", "\"\"\"\n", @@ -5064,7 +5064,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 8, "id": "cell-check4", "metadata": {}, "outputs": [ @@ -5073,11 +5073,11 @@ "output_type": "stream", "text": [ "Canonical vs fork accessibility summary:\n", - " canonical_only : 4555.0 (expected ~4555)\n", - " fork_only : 569.0 (expected ~569)\n", - " both_sides : 2083.0 (expected ~2083)\n", - " fork_only_pct : 7.9% (expected ~7.9%)\n", - " canonical_reachable % : 92.1% (expected ~92.1%)\n" + " canonical_only: 4555.0 (expected ~4555)\n", + " fork_only: 569.0 (expected ~569)\n", + " both_sides: 2083.0 (expected ~2083)\n", + " fork_only %: 7.9% (expected ~7.9%)\n", + " canonical_reachable %: 92.1% (expected ~92.1%)\n" ] }, { @@ -5172,14 +5172,14 @@ "comment_flags AS (\n", " SELECT\n", " root_id, source_tag, comment_id,\n", - " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", + " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", " FROM labeled\n", " GROUP BY root_id, source_tag, comment_id\n", "),\n", "global_counts AS (\n", " SELECT\n", - " COUNT(*) AS mapped_comment_ids,\n", + " COUNT(*) AS mapped_comment_ids,\n", " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", @@ -5189,7 +5189,7 @@ " canonical_only,\n", " fork_only,\n", " both_sides,\n", - " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", + " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", "FROM global_counts;\n", "\"\"\"\n", @@ -5198,11 +5198,11 @@ " check4 = pd.read_sql(text(query_check4), con)\n", "\n", "print(\"Canonical vs fork accessibility summary:\")\n", - "print(f\" canonical_only : {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", - "print(f\" fork_only : {check4['fork_only'].iloc[0]} (expected ~569)\")\n", - "print(f\" both_sides : {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", - "print(f\" fork_only_pct : {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", - "print(f\" canonical_reachable % : {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", + "print(f\" canonical_only: {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", + "print(f\" fork_only: {check4['fork_only'].iloc[0]} (expected ~569)\")\n", + "print(f\" both_sides: {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", + "print(f\" fork_only %: {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", + "print(f\" canonical_reachable %: {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", "display(check4)" ] }, @@ -5215,7 +5215,7 @@ "\n", "Now that we know which projects have sentiment-labeled comments and how they map across tables, we can build the contextualized dataset.\n", "\n", - "As mentioned, the Gold Standard currently has three columns (`ID`, `polarity`, `text`). We're going to add six more columns from GHTorrent so that Notebook 4 can INNER JOIN on `comment_id` against Kaiaulu's downloaded comment data:\n", + "The Gold Standard currently has three columns (`ID`, `polarity`, `text`). We're going to add six more from GHTorrent:\n", "\n", "1. `created_at` - Comment timestamp\n", "2. `author_login` - Author username\n", @@ -5227,7 +5227,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "77ab4187", "metadata": {}, "outputs": [ @@ -5300,7 +5300,7 @@ ")\n", "\n", "print(f\"Total rows: {len(contextualized)} (expected 7122)\")\n", - "print(f\" From commit comments : {len(commit_ctx)}\")\n", + "print(f\" From commit comments: {len(commit_ctx)}\")\n", "print(f\" From PR comments only: {len(pr_only)}\")" ] }, @@ -5553,33 +5553,6 @@ "null_emails = contextualized['author_email'].isna().sum()\n", "print(f\"\\nNote: {null_emails} of {len(contextualized)} rows have a NULL author_email ({round(100*null_emails/len(contextualized), 1)}%). GitHub stopped exposing emails in the API, so this is expected.\")" ] - }, - { - "cell_type": "markdown", - "id": "a04e860a", - "metadata": {}, - "source": [ - "### Step 7: Save the contextualized dataset\n", - "\n", - "This CSV is the output of Notebook 2 and the input to Notebook 4. We'll save it to `data/github_gold_standard_contextualized.csv`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d4e8585", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "output_path = os.path.join(os.path.dirname(os.getcwd()), \"data\", \"github_gold_standard_contextualized.csv\")\n", - "os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", - "\n", - "contextualized.to_csv(output_path, index=False)\n", - "print(f\"Saved: {output_path}\")\n", - "print(f\"Rows: {len(contextualized)}, Columns: {list(contextualized.columns)}\")" - ] } ], "metadata": { diff --git a/notebooks/03_scale_config_files.ipynb b/notebooks/03_scale_config_files.ipynb index 1ec5d7c..0184c72 100644 --- a/notebooks/03_scale_config_files.ipynb +++ b/notebooks/03_scale_config_files.ipynb @@ -7,7 +7,7 @@ "source": [ "# Notebook 3: Generate Kaiaulu Config Files\n", "\n", - "At this point we have a contextualized CSV with 7,122 labeled comments mapped to 82 canonical GitHub repos. The next step is to actually download the comment data for those repos from GitHub. This is where Kaiaulu comes in.\n", + "At this point we know which 82 canonical repos have sentiment-labeled comments. The next step is to actually download the comment data for those repos from GitHub. This is where Kaiaulu comes in.\n", "\n", "Kaiaulu downloads and parses GitHub comment data using `.yml` config files. Each config tells Kaiaulu which project to target and where to save the output.\n", "\n", @@ -393,11 +393,11 @@ "source": [ "### What comes next\n", "\n", - "Once the configs are written, you'll run two Kaiaulu vignettes to download and parse comments for each project:\n", - "- **`vignettes/download_github_events.Rmd`** → downloads commit comments, outputs `{repo}_commit_comments.csv`\n", - "- **`vignettes/download_github_pull_request_comments.Rmd`** → downloads PR inline comments, outputs `{repo}_pr_inline_comments.csv`\n", + "Before opening Notebook 4, run these Kaiaulu vignettes to download and parse comments for your project(s) of choice:\n", + "- **`vignettes/download_github_events.Rmd`** → outputs `{repo}_commit_comments.csv` to `vignettes/rawdata/github/{owner}/{repo}/`\n", + "- **`vignettes/download_github_pull_request_comments.Rmd`** → outputs `{repo}_pr_inline_comments.csv` to `vignettes/rawdata/github/{owner}/{repo}/`\n", "\n", - "Copy the output CSVs into this repo's `data/` folder, then open Notebook 4 to INNER JOIN them with the contextualized Gold Standard dataset." + "Once the vignettes have run, head to Notebook 4. It will read the Kaiaulu output directly from `vignettes/rawdata/`." ] } ], diff --git a/notebooks/04_add_sentiment_to_kaiaulu.ipynb b/notebooks/04_add_sentiment_to_kaiaulu.ipynb new file mode 100644 index 0000000..ed1aa1a --- /dev/null +++ b/notebooks/04_add_sentiment_to_kaiaulu.ipynb @@ -0,0 +1,911 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-nb4-title", + "metadata": {}, + "source": [ + "# Notebook 4: Add Sentiment Labels to Kaiaulu\n", + "\n", + "By now, you should have:\n", + "1. Sentiment labels in MySQL (7,122 GitHub comments labeled positive, negative, or neutral)\n", + "2. Comment data freshly downloaded from GitHub via Kaiaulu (e.g., file paths, commit SHAs, review IDs, timestamps)\n", + "\n", + "Neither is complete on its own. The Gold Standard has polarity labels but no GitHub data. Kaiaulu's output has data but no sentiment labels. We'll query the labels from MySQL, INNER JOIN them against Kaiaulu's downloaded comment data on `comment_id`, and write the result back into Kaiaulu's directory so `sentiment_analysis.Rmd` can use it directly." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-prereqs", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "Two things need to be in place before running any cells:\n", + "\n", + "| What | Where it comes from |\n", + "|---|---|\n", + "| MySQL database with `comment_sentiment` table | Output of Notebook 1 |\n", + "| Kaiaulu rawdata for your selected project | Output of running `vignettes/download_github_events.Rmd` and `vignettes/download_github_pull_request_comments.Rmd` in Kaiaulu |\n", + "\n", + "The Kaiaulu vignettes write their output to `vignettes/rawdata/github/{owner}/{repo}/` inside your local Kaiaulu directory. That's where this notebook reads from.\n", + "\n", + "If those CSVs are missing, go back and run the corresponding vignettes in Notebook 3 first." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cell-nb4-step1-code", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from sqlalchemy import create_engine, text" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step2-header", + "metadata": {}, + "source": [ + "### Step 2: Configure Project\n", + "\n", + "Set `OWNER` and `REPO` to match the project you ran the Kaiaulu vignettes for. Set `KAIAULU_REPO` to your local Kaiaulu directory. This is where the notebook will read Kaiaulu's downloaded CSVs from and where it will write the joined output. MySQL credentials should match what you used in Notebooks 1-3." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbfd4324", + "metadata": {}, + "outputs": [], + "source": [ + "# Configure these before running\n", + "OWNER = \"ADD_OWNER_HERE\" # GitHub repo owner\n", + "REPO = \"ADD_REPO_HERE\" # GitHub repo name\n", + "\n", + "# Path to your local Kaiaulu directory\n", + "KAIAULU_REPO = Path(\"PATH_TO/kaiaulu\")\n", + "\n", + "# Kaiaulu rawdata directory for this project\n", + "KAIAULU_DATA_DIR = KAIAULU_REPO / \"vignettes\" / \"rawdata\" / \"github\" / OWNER / REPO\n", + "\n", + "# MySQL connection\n", + "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", + "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", + "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\")\n", + "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", + "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step3-header", + "metadata": {}, + "source": [ + "### Step 3: Query sentiment labels from MySQL\n", + "\n", + "Pull the sentiment labels for your project directly from the `comment_sentiment` table. We join through GHTorrent to filter down to just the comments belonging to `OWNER/REPO`, and grab a context columns." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "cell-nb4-step3-code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Commit comment sentiment labels: 33\n", + "PR inline sentiment labels: 111\n", + "Total sentiment labels for cakephp: 144\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
comment_idpolaritytextcreated_atauthor_loginownerrepo
03411503neutralThis is causing the year to return always with...2013-06-12 09:21:03luksmcakephpcakephp
12245783neutral@Scottymeuk Read the associated ticket [#3283]...2012-12-03 11:25:13ADmadcakephpcakephp
21040482neutralhttps://github.com/petteyg/code_check\"2012-03-04 07:12:51josegonzalezcakephpcakephp
3998908positiveI'm an idiot, I don't know how I missed that t...2012-02-22 14:22:03markstorycakephpcakephp
4744111negativeSorry, guys. Yes individually tests pass. I am...2011-11-24 01:02:20ceeramcakephpcakephp
\n", + "
" + ], + "text/plain": [ + " comment_id polarity text \\\n", + "0 3411503 neutral This is causing the year to return always with... \n", + "1 2245783 neutral @Scottymeuk Read the associated ticket [#3283]... \n", + "2 1040482 neutral https://github.com/petteyg/code_check\" \n", + "3 998908 positive I'm an idiot, I don't know how I missed that t... \n", + "4 744111 negative Sorry, guys. Yes individually tests pass. I am... \n", + "\n", + " created_at author_login owner repo \n", + "0 2013-06-12 09:21:03 luksm cakephp cakephp \n", + "1 2012-12-03 11:25:13 ADmad cakephp cakephp \n", + "2 2012-03-04 07:12:51 josegonzalez cakephp cakephp \n", + "3 2012-02-22 14:22:03 markstory cakephp cakephp \n", + "4 2011-11-24 01:02:20 ceeram cakephp cakephp " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "engine = create_engine(\n", + " f\"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "\n", + "commit_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " cc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + "JOIN commits c ON cc.commit_id = c.id\n", + "JOIN projects p ON c.project_id = p.id\n", + "JOIN users u ON cc.user_id = u.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "WHERE LOWER(u_owner.login) = :owner\n", + " AND LOWER(p.name) = :repo\n", + "\"\"\"\n", + "\n", + "pr_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " prc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "JOIN projects p ON pr.base_repo_id = p.id\n", + "JOIN users u ON prc.user_id = u.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "WHERE LOWER(u_owner.login) = :owner\n", + " AND LOWER(p.name) = :repo\n", + "\"\"\"\n", + "\n", + "params = {\"owner\": OWNER.lower(), \"repo\": REPO.lower()}\n", + "\n", + "with engine.connect() as con:\n", + " commit_labels = pd.read_sql(text(commit_sql), con, params=params)\n", + " pr_labels = pd.read_sql(text(pr_sql), con, params=params)\n", + "\n", + "# Deduplicate 85 comment IDs that appear in both commit_comments and pull_request_comments GHTorrent tables\n", + "combined = pd.concat([commit_labels, pr_labels], ignore_index=True)\n", + "project_ctx = combined.drop_duplicates(subset=\"comment_id\", keep=\"first\").copy()\n", + "\n", + "dupes_dropped = len(combined) - len(project_ctx)\n", + "print(f\"Commit comment sentiment labels: {len(commit_labels)}\")\n", + "print(f\"PR inline sentiment labels: {len(pr_labels)}\")\n", + "if dupes_dropped > 0:\n", + " print(f\"Duplicate IDs removed: {dupes_dropped} (appeared in both tables)\")\n", + "print(f\"Total sentiment labels for {REPO}: {len(project_ctx)}\")\n", + "project_ctx.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4xul75ers8r", + "metadata": {}, + "source": [ + "### Step 4: Remap polarity labels to integers\n", + "\n", + "The Gold Standard uses strings (`\"positive\"`, `\"negative\"`, `\"neutral\"`). Kaiaulu's `sentiment_analysis.Rmd` expects integers: `0` = neutral, `1` = positive, `2` = negative. We remap here so the output is ready to use directly." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "40fgc3k5q2l", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All polarity labels mapped successfully.\n", + "polarity\n", + "neutral 82\n", + "negative 45\n", + "positive 17\n" + ] + } + ], + "source": [ + "polarity_map = {\"neutral\": 0, \"positive\": 1, \"negative\": 2}\n", + "\n", + "if project_ctx[\"polarity\"].dtype == object:\n", + " project_ctx[\"polarity\"] = project_ctx[\"polarity\"].str.lower().map(polarity_map)\n", + "\n", + "unmapped = project_ctx[\"polarity\"].isna().sum()\n", + "if unmapped > 0:\n", + " print(f\"WARNING: {unmapped} rows could not be mapped. Check for unexpected polarity strings\")\n", + "else:\n", + " counts = project_ctx[\"polarity\"].value_counts().rename({0: \"neutral\", 1: \"positive\", 2: \"negative\"})\n", + " print(\"All polarity labels mapped successfully.\")\n", + " print(counts.to_string())" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step4-header", + "metadata": {}, + "source": [ + "### Step 5: Load the Kaiaulu output CSVs\n", + "\n", + "Read the two CSVs that Kaiaulu's vignettes wrote into the `rawdata/` directory." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "cell-nb4-step4-code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Kaiaulu commit comments: 1569 rows, columns: ['comment_id', 'commit_id', 'author_login', 'author_id', 'body', 'created_at', 'updated_at']\n", + "Kaiaulu PR inline comments: 6100 rows, columns: ['review_id', 'comment_id', 'html_url', 'created_at', 'updated_at', 'comment_user_login', 'author_association', 'file_path', 'start_line', 'line', 'original_start_line', 'original_line', 'position', 'diff_hunk', 'body', 'commit_id']\n" + ] + } + ], + "source": [ + "commit_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_commit_comments.csv\"\n", + "pr_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_pr_inline_comments.csv\"\n", + "\n", + "kaiaulu_commit = pd.read_csv(commit_csv_path)\n", + "kaiaulu_pr = pd.read_csv(pr_csv_path)\n", + "\n", + "print(f\"Kaiaulu commit comments: {len(kaiaulu_commit)} rows, columns: {list(kaiaulu_commit.columns)}\")\n", + "print(f\"Kaiaulu PR inline comments: {len(kaiaulu_pr)} rows, columns: {list(kaiaulu_pr.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step5-header", + "metadata": {}, + "source": [ + "### Step 6: INNER JOIN - Commit Comments\n", + "\n", + "Join the MySQL sentiment labels against Kaiaulu's commit comments on `comment_id`." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "cell-nb4-step5-code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cakephp rows in sentiment labels: 144\n", + "Rows matched in Kaiaulu commit comments: 33\n", + "\n", + "Joined commit comments (first 5 rows):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
comment_idpolaritytextcreated_at_goldauthor_login_goldownerrepocommit_idauthor_login_kaiauluauthor_idbodycreated_at_kaiauluupdated_at
03411503neutralThis is causing the year to return always with...2013-06-12 09:21:03luksmcakephpcakephpfd72f894ad091bbe3c10314091ee3ab34769afa2luksm868687This is causing the year to return always with...2013-06-12T21:21:03Z2013-06-12T21:21:03Z
12245783neutral@Scottymeuk Read the associated ticket [#3283]...2012-12-03 11:25:13ADmadcakephpcakephpea467e72d72e9eb7cd140816ee8d7abd900b2629ADmad142658@Scottymeuk Read the associated ticket [#3283]...2012-12-03T22:25:13Z2012-12-03T22:25:13Z
21040482neutralhttps://github.com/petteyg/code_check\"2012-03-04 07:12:51josegonzalezcakephpcakephpa6da7361494b85411f1b93ea589e58405a77524bjosegonzalez65675https://github.com/petteyg/code_check\\n2012-03-04T18:12:51Z2012-03-04T18:12:51Z
3998908positiveI'm an idiot, I don't know how I missed that t...2012-02-22 14:22:03markstorycakephpcakephp89df484fc5a93fac7b01bdf086a395ecf284217dmarkstory24086I'm an idiot, I don't know how I missed that t...2012-02-23T01:22:03Z2012-02-23T01:22:03Z
4744111negativeSorry, guys. Yes individually tests pass. I am...2011-11-24 01:02:20ceeramcakephpcakephp05940ae1ec703a23714ff815e4e2e19cd1c6b5b7ceeram111448Sorry, guys. Yes individually tests pass. I am...2011-11-24T12:02:20Z2011-11-24T12:02:20Z
\n", + "
" + ], + "text/plain": [ + " comment_id polarity text \\\n", + "0 3411503 neutral This is causing the year to return always with... \n", + "1 2245783 neutral @Scottymeuk Read the associated ticket [#3283]... \n", + "2 1040482 neutral https://github.com/petteyg/code_check\" \n", + "3 998908 positive I'm an idiot, I don't know how I missed that t... \n", + "4 744111 negative Sorry, guys. Yes individually tests pass. I am... \n", + "\n", + " created_at_gold author_login_gold owner repo \\\n", + "0 2013-06-12 09:21:03 luksm cakephp cakephp \n", + "1 2012-12-03 11:25:13 ADmad cakephp cakephp \n", + "2 2012-03-04 07:12:51 josegonzalez cakephp cakephp \n", + "3 2012-02-22 14:22:03 markstory cakephp cakephp \n", + "4 2011-11-24 01:02:20 ceeram cakephp cakephp \n", + "\n", + " commit_id author_login_kaiaulu author_id \\\n", + "0 fd72f894ad091bbe3c10314091ee3ab34769afa2 luksm 868687 \n", + "1 ea467e72d72e9eb7cd140816ee8d7abd900b2629 ADmad 142658 \n", + "2 a6da7361494b85411f1b93ea589e58405a77524b josegonzalez 65675 \n", + "3 89df484fc5a93fac7b01bdf086a395ecf284217d markstory 24086 \n", + "4 05940ae1ec703a23714ff815e4e2e19cd1c6b5b7 ceeram 111448 \n", + "\n", + " body created_at_kaiaulu \\\n", + "0 This is causing the year to return always with... 2013-06-12T21:21:03Z \n", + "1 @Scottymeuk Read the associated ticket [#3283]... 2012-12-03T22:25:13Z \n", + "2 https://github.com/petteyg/code_check\\n 2012-03-04T18:12:51Z \n", + "3 I'm an idiot, I don't know how I missed that t... 2012-02-23T01:22:03Z \n", + "4 Sorry, guys. Yes individually tests pass. I am... 2011-11-24T12:02:20Z \n", + "\n", + " updated_at \n", + "0 2013-06-12T21:21:03Z \n", + "1 2012-12-03T22:25:13Z \n", + "2 2012-03-04T18:12:51Z \n", + "3 2012-02-23T01:22:03Z \n", + "4 2011-11-24T12:02:20Z " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Saved: /Users/sheilalimon/Desktop/github/kaiaulu-sentiment/vignettes/rawdata/github/cakephp/cakephp/cakephp_sentiment_commit_comments_joined.csv\n" + ] + } + ], + "source": [ + "commit_joined = project_ctx.merge(\n", + " kaiaulu_commit,\n", + " on='comment_id',\n", + " how='inner',\n", + " suffixes=('_gold', '_kaiaulu')\n", + ")\n", + "\n", + "commit_dropped = len(project_ctx) - len(commit_joined)\n", + "print(f\"{REPO} rows in sentiment labels: {len(project_ctx)}\")\n", + "print(f\"Rows matched in Kaiaulu commit comments: {len(commit_joined)}\")\n", + "\n", + "print(\"\\nJoined commit comments (first 5 rows):\")\n", + "display(commit_joined.head())\n", + "\n", + "out_path = KAIAULU_DATA_DIR / f\"{REPO}_sentiment_commit_comments_joined.csv\"\n", + "commit_joined.to_csv(out_path, index=False)\n", + "print(f\"\\nSaved: {out_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "daef76ca", + "metadata": {}, + "source": [ + "### Step 7: INNER JOIN - PR inline comments\n", + "\n", + "Same join as Step 6, but against Kaiaulu's PR inline comments." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "d8ba23d8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cakephp rows in sentiment labels: 144\n", + "Rows matched in Kaiaulu PR inline comments: 111\n", + "\n", + "Joined PR inline comments (first 5 rows):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
comment_idpolaritytextcreated_at_goldauthor_loginownerreporeview_idhtml_urlcreated_at_kaiaulu...author_associationfile_pathstart_linelineoriginal_start_lineoriginal_linepositiondiff_hunkbodycommit_id
06044242neutralI had it implemented that way originally. The ...2013-08-28 07:51:55markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1568#d...2013-08-28T19:51:55Z...MEMBERlib/Cake/Utility/Security.phpNaNNaNNaNNaN1@@ -289,4 +289,69 @@ protected static function...I had it implemented that way originally. The ...13b870d7e183375822eea4ffd66aaacaeec760ff
15949747neutralThis block of code is repeated 3 times in Hash...2013-08-23 01:02:48markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1549#d...2013-08-23T13:02:48Z...MEMBERlib/Cake/Utility/Hash.phpNaN149.0NaNNaN30@@ -222,16 +222,36 @@ protected static functio...This block of code is repeated 3 times in Hash...a0014e7a303067bb9c36d438de5a70fe819d22a7
24288367neutralThis looks good, but makes me think we should ...2013-05-18 04:31:19markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1275#d...2013-05-18T16:31:19Z...MEMBERlib/Cake/Controller/Component/Auth/BlowfishPas...NaN44.0NaNNaN44@@ -0,0 +1,58 @@\\n+<?php\\n+/**\\n+ * PHP 5\\n+ *...This looks good, but makes me think we should ...dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa
34288664neutralHmm, my thinking was all password hasher class...2013-05-18 07:00:03ADmadcakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1275#d...2013-05-18T19:00:03Z...MEMBERlib/Cake/Controller/Component/Auth/BlowfishPas...NaNNaNNaNNaN1@@ -0,0 +1,58 @@\\n+<?php\\n+/**\\n+ * PHP 5\\n+ *...Hmm, my thinking was all password hasher class...dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa
43122764negativeI totally missed that, my bad. I'll get that f...2013-02-22 10:12:40markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1154#d...2013-02-22T21:12:40Z...MEMBERlib/Cake/Utility/ViewVarsTrait.phpNaNNaNNaNNaN1@@ -0,0 +1,55 @@\\n+<?php\\n+/**\\n+ * CakePHP(tm...I totally missed that, my bad. I'll get that f...955889c6c731a56f9cbe6f572cea4594fd887d3a
\n", + "

5 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " comment_id polarity text \\\n", + "0 6044242 neutral I had it implemented that way originally. The ... \n", + "1 5949747 neutral This block of code is repeated 3 times in Hash... \n", + "2 4288367 neutral This looks good, but makes me think we should ... \n", + "3 4288664 neutral Hmm, my thinking was all password hasher class... \n", + "4 3122764 negative I totally missed that, my bad. I'll get that f... \n", + "\n", + " created_at_gold author_login owner repo review_id \\\n", + "0 2013-08-28 07:51:55 markstory cakephp cakephp NaN \n", + "1 2013-08-23 01:02:48 markstory cakephp cakephp NaN \n", + "2 2013-05-18 04:31:19 markstory cakephp cakephp NaN \n", + "3 2013-05-18 07:00:03 ADmad cakephp cakephp NaN \n", + "4 2013-02-22 10:12:40 markstory cakephp cakephp NaN \n", + "\n", + " html_url created_at_kaiaulu \\\n", + "0 https://github.com/cakephp/cakephp/pull/1568#d... 2013-08-28T19:51:55Z \n", + "1 https://github.com/cakephp/cakephp/pull/1549#d... 2013-08-23T13:02:48Z \n", + "2 https://github.com/cakephp/cakephp/pull/1275#d... 2013-05-18T16:31:19Z \n", + "3 https://github.com/cakephp/cakephp/pull/1275#d... 2013-05-18T19:00:03Z \n", + "4 https://github.com/cakephp/cakephp/pull/1154#d... 2013-02-22T21:12:40Z \n", + "\n", + " ... author_association file_path \\\n", + "0 ... MEMBER lib/Cake/Utility/Security.php \n", + "1 ... MEMBER lib/Cake/Utility/Hash.php \n", + "2 ... MEMBER lib/Cake/Controller/Component/Auth/BlowfishPas... \n", + "3 ... MEMBER lib/Cake/Controller/Component/Auth/BlowfishPas... \n", + "4 ... MEMBER lib/Cake/Utility/ViewVarsTrait.php \n", + "\n", + " start_line line original_start_line original_line position \\\n", + "0 NaN NaN NaN NaN 1 \n", + "1 NaN 149.0 NaN NaN 30 \n", + "2 NaN 44.0 NaN NaN 44 \n", + "3 NaN NaN NaN NaN 1 \n", + "4 NaN NaN NaN NaN 1 \n", + "\n", + " diff_hunk \\\n", + "0 @@ -289,4 +289,69 @@ protected static function... \n", + "1 @@ -222,16 +222,36 @@ protected static functio... \n", + "2 @@ -0,0 +1,58 @@\\n+ 0:\n", - " print(f\" These {commit_dropped} comment IDs exist in the Gold Standard but were not found in the Kaiaulu download.\")\n", - "\n", - "print(\"\\nJoined commit comments (first 5 rows):\")\n", - "display(commit_joined.head())\n", - "\n", - "out_path = os.path.join(DATA_DIR, f\"{REPO}_sentiment_commit_comments_joined.csv\")\n", - "commit_joined.to_csv(out_path, index=False)\n", - "print(f\"\\nSaved: {out_path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "daef76ca", - "metadata": {}, - "source": [ - "### Step 6: INNER JOIN - PR inline comments\n", - "\n", - "Same join as step 5, but against Kaiaulu's PR inline comments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8ba23d8", - "metadata": {}, - "outputs": [], - "source": [ - "pr_joined = project_ctx.merge(\n", - " kaiaulu_pr,\n", - " on='comment_id',\n", - " how='inner', # INNER JOIN\n", - " suffixes=('_gold', '_kaiaulu')\n", - ")\n", - "\n", - "pr_dropped = len(project_ctx) - len(pr_joined)\n", - "print(f\"{OWNER}/{REPO} rows in contextualized dataset : {len(project_ctx)}\")\n", - "print(f\"Rows matched in Kaiaulu PR inline comments : {len(pr_joined)}\")\n", - "print(f\"Rows not found in Kaiaulu download : {pr_dropped}\")\n", - "if pr_dropped > 0:\n", - " print(f\" These {pr_dropped} comment IDs exist in the Gold Standard but were not found in the Kaiaulu download.\")\n", - "\n", - "print(\"\\nJoined PR inline comments (first 5 rows):\")\n", - "display(pr_joined.head())\n", - "\n", - "out_path = os.path.join(DATA_DIR, f\"{REPO}_sentiment_pr_inline_comments_joined.csv\")\n", - "pr_joined.to_csv(out_path, index=False)\n", - "print(f\"\\nSaved: {out_path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-next-steps", - "metadata": {}, - "source": [ - "### You're done!\n", - "\n", - "**To run for a different project:** update `OWNER` and `REPO` in Step 2, copy the new Kaiaulu CSVs into `data/`, and re-run Steps 3–6." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 95a8f11c197ee13dbe2c7087f57c8903f8fb6094 Mon Sep 17 00:00:00 2001 From: splimon Date: Fri, 17 Apr 2026 09:37:05 -1000 Subject: [PATCH 5/8] Clear all cell outputs from notebooks --- ...2_contextualize_github_gold_standard.ipynb | 5579 ----------------- notebooks/04_add_sentiment_to_kaiaulu.ipynb | 911 --- ...nb => 1_load_sentiment_csv_to_mysql.ipynb} | 225 +- notebooks/2_explore_relevant_projects.ipynb | 437 ++ ...files.ipynb => 3_scale_config_files.ipynb} | 131 +- notebooks/4_add_sentiment_to_kaiaulu.ipynb | 352 ++ 6 files changed, 799 insertions(+), 6836 deletions(-) delete mode 100644 notebooks/02_contextualize_github_gold_standard.ipynb delete mode 100644 notebooks/04_add_sentiment_to_kaiaulu.ipynb rename notebooks/{01_load_sentiment_csv_to_mysql.ipynb => 1_load_sentiment_csv_to_mysql.ipynb} (54%) create mode 100644 notebooks/2_explore_relevant_projects.ipynb rename notebooks/{03_scale_config_files.ipynb => 3_scale_config_files.ipynb} (71%) create mode 100644 notebooks/4_add_sentiment_to_kaiaulu.ipynb diff --git a/notebooks/02_contextualize_github_gold_standard.ipynb b/notebooks/02_contextualize_github_gold_standard.ipynb deleted file mode 100644 index 87b19a8..0000000 --- a/notebooks/02_contextualize_github_gold_standard.ipynb +++ /dev/null @@ -1,5579 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1b3dd1e0", - "metadata": {}, - "source": [ - "# Notebook 2: Contextualize the Github Gold Standard Dataset\n", - "\n", - "By this point, you should have the Gold Standard and GHTorrent 2004 dump loaded into MySQL. Since both datasets share the same comment IDs, we can join them to add contextual columns (e.g., project, author, timestamp) to the Gold Standard's three columns (`ID`, `polarity`, `text`).\n", - "\n", - "But, before we create the contextualized Github Gold Standard dataset, we need to understand what we're working with. \n", - "\n", - "How are the 7,122 IDs split between commit comments and PR comments? Which projects show up the most? And are these comments reachable from canonical (non-fork) repos, or contained in forks?\n", - "\n", - "The answers to these questions inform how we handle the data and which projects we target when generating project config files in Notebook 3." - ] - }, - { - "cell_type": "markdown", - "id": "cell-step1-header", - "metadata": {}, - "source": [ - "### Step 1: Import dependencies and connect to MySQL" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "cell-imports", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine, text\n", - "\n", - "pd.set_option('display.max_rows', None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-config", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connected to MySQL.\n" - ] - } - ], - "source": [ - "MYSQL_HOST = \"localhost\"\n", - "MYSQL_PORT = 3306\n", - "MYSQL_USER = \"root\"\n", - "MYSQL_PASSWORD = \"ADD_YOUR_PASSWORD_HERE\"\n", - "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", - "\n", - "engine = create_engine(\n", - " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", - ")\n", - "print(\"Connected to MySQL.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check1-header", - "metadata": {}, - "source": [ - "### Check 1: How are the sentiment comments distributed?\n", - "\n", - "GHTorrent stores two kinds of GitHub comments: commit comments (discussions on a specific commit) and PR inline comments (left on a line of code in a pull request).\n", - "\n", - "To understand what commit comments look like, [here](https://github.com/openssl/openssl/commit/4817504d069b4c5082161b02a22116ad75f822b1#commitcomment-5942359) are examples of commit comments under a commit that introduced a popular software vulnerability. To understand what PR inline comments look like, refer to the [GitHub Pull Requests Cheatsheet](https://github.com/sailuh/kaiaulu_cheatsheet/blob/main/cheatsheets/github-comments-cheatsheet.pdf).\n", - "\n", - "The Gold Standard includes both types. The same `ID` maps to `comment_id` in both `commit_comments` and `pull_request_comments`. So, the first thing to figure out is which table each sentiment ID lands in. Some IDs appear in both tables (overlap = 85), meaning a small number of comments were captured under both endpoints in GHTorrent. The total unique IDs should sum to 7,122.\n", - "\n", - "Expected values:\n", - "- Commit comment matches: ~4,317\n", - "- PR comment matches: ~2,890\n", - "- Overlap (both): ~85\n", - "- Commit-only: 4,232 | PR-only: 2,805 | Total unique: 7,122" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cell-check1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CategoryCountExpected
0Commit matches43174317
1PR matches28902890
2Overlap (both)8585
3Commit-only42324232
4PR-only28052805
5Total unique71227122
\n", - "
" - ], - "text/plain": [ - " Category Count Expected\n", - "0 Commit matches 4317 4317\n", - "1 PR matches 2890 2890\n", - "2 Overlap (both) 85 85\n", - "3 Commit-only 4232 4232\n", - "4 PR-only 2805 2805\n", - "5 Total unique 7122 7122" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PASS: total unique IDs = 7122.\n" - ] - } - ], - "source": [ - "with engine.connect() as con:\n", - " commit_count = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS commit_comment_matches\n", - " FROM comment_sentiment s\n", - " INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - " pr_count = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS pr_comment_matches\n", - " FROM comment_sentiment s\n", - " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - " overlap = pd.read_sql(text(\"\"\"\n", - " SELECT COUNT(*) AS overlap\n", - " FROM comment_sentiment s\n", - " INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", - " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", - " \"\"\"), con).iloc[0, 0]\n", - "\n", - "commit_only = commit_count - overlap\n", - "pr_only = pr_count - overlap\n", - "total_unique = commit_only + pr_only + overlap\n", - "\n", - "summary = pd.DataFrame({\n", - " 'Category': ['Commit matches', 'PR matches', 'Overlap (both)', 'Commit-only', 'PR-only', 'Total unique'],\n", - " 'Count': [commit_count, pr_count, overlap, commit_only, pr_only, total_unique],\n", - " 'Expected': [4317, 2890, 85, 4232, 2805, 7122]\n", - "})\n", - "display(summary)\n", - "\n", - "if total_unique == 7122:\n", - " print(\"PASS: total unique IDs = 7122.\")\n", - "else:\n", - " print(f\"WARNING: total unique IDs = {total_unique}, expected 7122.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check2-header", - "metadata": {}, - "source": [ - "### Check 2: Which projects have the most labeled commit comments?\n", - "\n", - "Let's see which projects' commit comments are most heavily represented in the Gold Standard. This is a preview of which projects we'll be generating Kaiaulu config files for in Notebook 3." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "cell-check2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Projects with sentiment-labeled commit comments: 453\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
project_idproject_nameproject_urllabeled_comment_count
012TrinityCorehttps://api.github.com/repos/TrinityCore/Trini...800
1289MaNGOShttps://api.github.com/repos/mangos/MaNGOS622
278852railshttps://api.github.com/repos/rails/rails448
322980CraftBukkithttps://api.github.com/repos/Bukkit/CraftBukkit357
425875jqueryhttps://api.github.com/repos/jquery/jquery235
591331diasporahttps://api.github.com/repos/diaspora/diaspora160
63583xbmchttps://api.github.com/repos/xbmc/xbmc130
722981nettyhttps://api.github.com/repos/netty/netty80
826388html5-boilerplatehttps://api.github.com/repos/h5bp/html5-boiler...80
924292nodehttps://api.github.com/repos/joyent/node65
1050618three.jshttps://api.github.com/repos/mrdoob/three.js65
111akkahttps://api.github.com/repos/akka/akka62
1279163homebrewhttps://api.github.com/repos/mxcl/homebrew57
1391020gitlabhqhttps://api.github.com/repos/gitlabhq/gitlabhq55
1451671symfonyhttps://api.github.com/repos/symfony/symfony46
159215openFrameworkshttps://api.github.com/repos/openframeworks/op...40
1663250cakephphttps://api.github.com/repos/cakephp/cakephp33
1710593libuvhttps://api.github.com/repos/joyent/libuv29
1895385devisehttps://api.github.com/repos/plataformatec/devise22
1974914requestshttps://api.github.com/repos/kennethreitz/requ...20
2069158djangohttps://api.github.com/repos/django/django19
2179166jekyllhttps://api.github.com/repos/mojombo/jekyll18
2259607CodeIgniterhttps://api.github.com/repos/EllisLab/CodeIgniter15
2377319Sick-Beardhttps://api.github.com/repos/midgetspy/Sick-Beard15
24104307papercliphttps://api.github.com/repos/thoughtbot/paperclip14
259636libgit2https://api.github.com/repos/libgit2/libgit213
265214xbmchttps://api.github.com/repos/jmarshallnz/xbmc12
2751669foundationhttps://api.github.com/repos/zurb/foundation12
2864176ThinkUphttps://api.github.com/repos/ginatrapani/ThinkUp12
2981335railshttps://api.github.com/repos/flooose/rails12
3010380redcarpethttps://api.github.com/repos/vmg/redcarpet11
3164918phpunithttps://api.github.com/repos/sebastianbergmann...11
32105378compasshttps://api.github.com/repos/chriseppstein/com...11
33107534scalahttps://api.github.com/repos/scala/scala11
3462501php-sdkhttps://api.github.com/repos/facebook/php-sdk10
3582328railshttps://api.github.com/repos/pinetops/rails9
3610629SignalRhttps://api.github.com/repos/SignalR/SignalR8
3714327Nancyhttps://api.github.com/repos/NancyFx/Nancy8
3817515elasticsearchhttps://api.github.com/repos/elasticsearch/ela...8
3919580facebook-android-sdkhttps://api.github.com/repos/facebook/facebook...8
4081320railshttps://api.github.com/repos/zetter/rails8
4111phantomjshttps://api.github.com/repos/ariya/phantomjs7
425164xbmchttps://api.github.com/repos/theuni/xbmc7
437242redishttps://api.github.com/repos/antirez/redis7
4426101nodehttps://api.github.com/repos/indutny/node7
4574915symfonyhttps://api.github.com/repos/xphere-forks/symfony7
4675984reddithttps://api.github.com/repos/reddit/reddit7
4776945botohttps://api.github.com/repos/boto/boto7
4876946django-debug-toolbarhttps://api.github.com/repos/django-debug-tool...7
4978835django-cmshttps://api.github.com/repos/divio/django-cms7
5080514railshttps://api.github.com/repos/andhapp/rails7
515176xbmchttps://api.github.com/repos/davilla/xbmc6
5212976pluploadhttps://api.github.com/repos/moxiecode/plupload6
5317566ActionBarSherlockhttps://api.github.com/repos/JakeWharton/Actio...6
5465250thinkuphttps://api.github.com/repos/mwilkie/thinkup6
5579948railshttps://api.github.com/repos/bjeanes/rails6
56107186gizzardhttps://api.github.com/repos/twitter/gizzard6
57784bitcoin-githttps://api.github.com/repos/gavinandresen/bit...5
585215xbmchttps://api.github.com/repos/Montellese/xbmc5
595232xbmchttps://api.github.com/repos/elupus/xbmc5
6014328ServiceStackhttps://api.github.com/repos/ServiceStack/Serv...5
6147382chosenhttps://api.github.com/repos/harvesthq/chosen5
6259683CodeIgniterhttps://api.github.com/repos/dchill42/CodeIgniter5
6376118reddithttps://api.github.com/repos/andre-d/reddit5
64107302xsbthttps://api.github.com/repos/snowplow/xsbt5
65107823scalahttps://api.github.com/repos/paulp/scala5
66454bitcoinhttps://api.github.com/repos/bitcoin/bitcoin4
675438xbmchttps://api.github.com/repos/0wing/xbmc4
687241xbmchttps://api.github.com/repos/FernetMenta/xbmc4
6942644d3https://api.github.com/repos/mbostock/d34
7053712symfonyhttps://api.github.com/repos/vicb/symfony4
7154148symfonyhttps://api.github.com/repos/schmittjoh/symfony4
7280581railshttps://api.github.com/repos/SAP-Oxygen/rails4
7384869railshttps://api.github.com/repos/slave-but-free/rails4
746knitrhttps://api.github.com/repos/yihui/knitr3
757shinyhttps://api.github.com/repos/rstudio/shiny3
769mongohttps://api.github.com/repos/mongodb/mongo3
773750http-parserhttps://api.github.com/repos/joyent/http-parser3
785182xbmchttps://api.github.com/repos/Voyager1/xbmc3
795230xbmchttps://api.github.com/repos/jimfcarroll/xbmc3
8013509TrinityCorehttps://api.github.com/repos/Havenard/TrinityCore3
8113597monohttps://api.github.com/repos/Unity-Technologie...3
8216134MiniProfilerhttps://api.github.com/repos/SamSaffron/MiniPr...3
8319786clojurehttps://api.github.com/repos/clojure/clojure3
8453713symfonyhttps://api.github.com/repos/bschussek/symfony3
8564197zf2https://api.github.com/repos/bakura10/zf23
8665107Slimhttps://api.github.com/repos/codeguy/Slim3
8771786tornadohttps://api.github.com/repos/facebook/tornado3
8880231railshttps://api.github.com/repos/sikachu/rails3
8980618railshttps://api.github.com/repos/steveklabnik/rails3
9084052railshttps://api.github.com/repos/bbenezech/rails3
9188617Homebrewhttps://api.github.com/repos/MindTooth/Homebrew3
9292167homebrewhttps://api.github.com/repos/axelsteiner/homebrew3
9392243homebrewhttps://api.github.com/repos/rgov/homebrew3
94101997octopresshttps://api.github.com/repos/imathis/octopress3
95107187sbthttps://api.github.com/repos/sbt/sbt3
96107811scalahttps://api.github.com/repos/adriaanm/scala3
97108589TC-Eluna-3.3.5ahttps://api.github.com/repos/ElunaLuaEngine/TC...3
982devtoolshttps://api.github.com/repos/hadley/devtools2
995162xbmchttps://api.github.com/repos/opdenkamp/xbmc2
1005202xbmchttps://api.github.com/repos/cptspiff/xbmc2
1015240xbmchttps://api.github.com/repos/bobo1on1/xbmc2
1025283xbmchttps://api.github.com/repos/Memphiz/xbmc2
1035326beanstalkdhttps://api.github.com/repos/kr/beanstalkd2
1045494xbmchttps://api.github.com/repos/koying/xbmc2
1056244xbmchttps://api.github.com/repos/jpsdr/xbmc2
1068255redishttps://api.github.com/repos/charsyam/redis2
1078948redishttps://api.github.com/repos/evilsocket/redis2
10811582openFrameworkshttps://api.github.com/repos/bilderbuchi/openF...2
10912554SparkleSharehttps://api.github.com/repos/hbons/SparkleShare2
11013197TrinityCorehttps://api.github.com/repos/kandera/TrinityCore2
11115018RestSharphttps://api.github.com/repos/restsharp/RestSharp2
11216402stormhttps://api.github.com/repos/nathanmarz/storm2
11323311CraftBukkithttps://api.github.com/repos/ScoreUnder/CraftB...2
11425879nodehttps://api.github.com/repos/isaacs/node2
11526240nodehttps://api.github.com/repos/kuebk/node2
11626766nodehttps://api.github.com/repos/mattrobenolt/node2
11729190html5-boilerplatehttps://api.github.com/repos/bentruyman/html5-...2
11853743symfonyhttps://api.github.com/repos/hhamon/symfony2
11954403symfonyhttps://api.github.com/repos/drak/symfony2
12054691symfonyhttps://api.github.com/repos/Tobion/symfony2
12161016CodeIgniterhttps://api.github.com/repos/and-ers/CodeIgniter2
12263690zf2https://api.github.com/repos/weierophinney/zf22
12365762cakephphttps://api.github.com/repos/markstory/cakephp2
12465946cakephphttps://api.github.com/repos/ADmad/cakephp2
12565983ThinkUphttps://api.github.com/repos/brandonroberts/Th...2
12667281cakephphttps://api.github.com/repos/schrolli/cakephp2
12768893phpunithttps://api.github.com/repos/fizzka/phpunit2
12869177djangohttps://api.github.com/repos/andrewgodwin/django2
12969349djangohttps://api.github.com/repos/carljm/django2
13069897djangohttps://api.github.com/repos/melinath/django2
13179642railshttps://api.github.com/repos/drogus/rails2
13280002railshttps://api.github.com/repos/joshk/rails2
13380498railshttps://api.github.com/repos/bratish/rails2
13480535railshttps://api.github.com/repos/anildigital/rails2
13580559railshttps://api.github.com/repos/sishen/rails2
13680818railshttps://api.github.com/repos/goncalossilva/rails2
13781332railshttps://api.github.com/repos/jenseng/rails2
13881621railshttps://api.github.com/repos/jenslukowski/rails2
13982863railshttps://api.github.com/repos/gururuby/rails2
14088593homebrewhttps://api.github.com/repos/docwhat/homebrew2
14188732homebrewhttps://api.github.com/repos/mbrunthaler/homebrew2
14290945homebrewhttps://api.github.com/repos/clusty/homebrew2
14391892homebrewhttps://api.github.com/repos/semaperepelitsa/h...2
14492348homebrewhttps://api.github.com/repos/alastairandrew/ho...2
14592859homebrewhttps://api.github.com/repos/danielb2/homebrew2
14693198homebrewhttps://api.github.com/repos/simleo/homebrew2
14794288homebrewhttps://api.github.com/repos/vhbit/homebrew2
148100124diasporahttps://api.github.com/repos/christophe-de/dia...2
149102734octopresshttps://api.github.com/repos/andrewreid/octopress2
150106160finaglehttps://api.github.com/repos/twitter/finagle2
151107312sbthttps://api.github.com/repos/gkossakowski/sbt2
152107535scalatrahttps://api.github.com/repos/scalatra/scalatra2
153107829scalahttps://api.github.com/repos/magarciaEPFL/scala2
1548follyhttps://api.github.com/repos/facebook/folly1
155893bitcoinhttps://api.github.com/repos/TheBlueMatt/bitcoin1
156935devtoolshttps://api.github.com/repos/yoni/devtools1
1571024bitcoinhttps://api.github.com/repos/laanwj/bitcoin1
1582710phantomjshttps://api.github.com/repos/dburrows/phantomjs1
1593814mongohttps://api.github.com/repos/guanqun/mongo1
1603840mongohttps://api.github.com/repos/stulentsev/mongo1
1613897http-parserhttps://api.github.com/repos/bnoordhuis/http-p...1
1624262mongohttps://api.github.com/repos/amcfague/mongo1
1635208xbmchttps://api.github.com/repos/Fneufneu/xbmc1
1645228xbmchttps://api.github.com/repos/anssih/xbmc1
1655234xbmchttps://api.github.com/repos/pieh/xbmc1
1665236xbmchttps://api.github.com/repos/llyzs/xbmc1
1675273XBMChttps://api.github.com/repos/HarryMuscle/XBMC1
1685297xbmchttps://api.github.com/repos/garbear/xbmc1
1695422xbmchttps://api.github.com/repos/doozan/xbmc1
1705428xbmchttps://api.github.com/repos/cbxbiker61/xbmc1
1715448xbmchttps://api.github.com/repos/vdrfan/xbmc1
1725485xbmchttps://api.github.com/repos/malard/xbmc1
1735715mangoshttps://api.github.com/repos/vermie/mangos1
1745828xbmchttps://api.github.com/repos/xbmcfanboy/xbmc1
1755832xbmchttps://api.github.com/repos/taxigps/xbmc1
1765837xbmchttps://api.github.com/repos/aviksil/xbmc1
1775858xbmchttps://api.github.com/repos/MichaelAnders/xbmc1
1785887xbmchttps://api.github.com/repos/PSyton/xbmc1
1795962xbmchttps://api.github.com/repos/adam-aph/xbmc1
1806250xbmchttps://api.github.com/repos/herrnst/xbmc1
1816258xbmchttps://api.github.com/repos/xhaggi/xbmc1
1826349xbmchttps://api.github.com/repos/manio/xbmc1
1836472xbmchttps://api.github.com/repos/dragonflight/xbmc1
1846505xbmchttps://api.github.com/repos/maheus/xbmc1
1856609xbmchttps://api.github.com/repos/elbeardmorez/xbmc1
1866790xbmchttps://api.github.com/repos/DigitalDJ/xbmc1
1876819xbmchttps://api.github.com/repos/FlyingRat/xbmc1
1887180xbmchttps://api.github.com/repos/Jalle19/xbmc1
1897404redishttps://api.github.com/repos/mrb/redis1
1907427ccvhttps://api.github.com/repos/liuliu/ccv1
1917457xbmchttps://api.github.com/repos/phil65/xbmc1
1928478redishttps://api.github.com/repos/marcelaraujo/redis1
1938605redishttps://api.github.com/repos/jumping/redis1
1948634redishttps://api.github.com/repos/melvyn-sopacua/redis1
1958955memcachedhttps://api.github.com/repos/memcached/memcached1
1969014hiphop-phphttps://api.github.com/repos/fungos/hiphop-php1
19710468xbmca10https://api.github.com/repos/npeacock/xbmca101
19810617libgit2https://api.github.com/repos/phkelley/libgit21
19910622libgit2https://api.github.com/repos/martinwoodward/li...1
20010632libgit2https://api.github.com/repos/carlosmn/libgit21
20111494openFrameworkshttps://api.github.com/repos/obviousjim/openFr...1
20212102openFrameworkshttps://api.github.com/repos/openFrameworks-Ra...1
20312529libgit2https://api.github.com/repos/glesserd/libgit21
20412934SparkleSharehttps://api.github.com/repos/serras/SparkleShare1
20512941TrinityCorehttps://api.github.com/repos/0omega/TrinityCore1
20612943TrinityCorehttps://api.github.com/repos/blipi/TrinityCore1
20712964TrinityCorehttps://api.github.com/repos/zorix/TrinityCore1
20812972TrinityCorehttps://api.github.com/repos/Naervin/TrinityCore1
20912973TrinityCorehttps://api.github.com/repos/4m1g0/TrinityCore1
21013410pluploadhttps://api.github.com/repos/dtmax/plupload1
21113514uwom-serverhttps://api.github.com/repos/WarHead/uwom-server1
21213516TrinityCorehttps://api.github.com/repos/hacknowledge/Trin...1
21313526TrinityCorehttps://api.github.com/repos/johnholiver/Trini...1
21413558TrinityCorehttps://api.github.com/repos/Baeumchen/Trinity...1
21513688monohttps://api.github.com/repos/cyplo/mono1
21613724monohttps://api.github.com/repos/QuickJack/mono1
21713765monohttps://api.github.com/repos/ermshiperete/mono1
21813775monohttps://api.github.com/repos/ukplc/mono1
21914092monohttps://api.github.com/repos/killabytenow/mono1
22014523ServiceStackhttps://api.github.com/repos/jeffgabhart/Servi...1
22114593ServiceStackhttps://api.github.com/repos/leon-andria/Servi...1
22214912AutoMapperhttps://api.github.com/repos/AutoMapper/AutoMa...1
22314993Nancyhttps://api.github.com/repos/thedersen/Nancy1
22415521RestSharphttps://api.github.com/repos/crdeutsch/RestSharp1
22515699RestSharphttps://api.github.com/repos/Haacked/RestSharp1
22615836Nancyhttps://api.github.com/repos/mat-mcloughlin/Nancy1
22717907elasticsearchhttps://api.github.com/repos/imotov/elasticsearch1
22818328elasticsearchhttps://api.github.com/repos/javanna/elasticse...1
22918863elasticsearchhttps://api.github.com/repos/brwe/elasticsearch1
23023040CraftBukkithttps://api.github.com/repos/Zaraza107/CraftBu...1
23123203CraftBukkithttps://api.github.com/repos/cyberdudedk/Craft...1
23223245CraftBukkithttps://api.github.com/repos/TheEliteFour/Craf...1
23323411CraftBukkithttps://api.github.com/repos/dumptruckman/Craf...1
23424886nettyhttps://api.github.com/repos/jpinner/netty1
23525148nettyhttps://api.github.com/repos/CruzBishop/netty1
23625358nettyhttps://api.github.com/repos/zcourts/netty1
23725520nettyhttps://api.github.com/repos/Melon1017/netty1
23825884nodehttps://api.github.com/repos/felixge/node1
23926062nodehttps://api.github.com/repos/TooTallNate/node1
24026508nodehttps://api.github.com/repos/pixelglow/node1
24127020jqueryhttps://api.github.com/repos/louisremi/jquery1
24227040nodehttps://api.github.com/repos/interruptz/node1
24327075jqueryhttps://api.github.com/repos/rwaldron/jquery1
24427190jqueryhttps://api.github.com/repos/gnarf/jquery1
24527236jqueryhttps://api.github.com/repos/SlexAxton/jquery1
24627269jqueryhttps://api.github.com/repos/alexisabril/jquery1
24727330nodehttps://api.github.com/repos/laverdet/node1
24827379nodehttps://api.github.com/repos/JSBizon/node1
24927504impress.jshttps://api.github.com/repos/bartaz/impress.js1
25027570jqueryhttps://api.github.com/repos/mikesherov/jquery1
25127761jquery-nodomhttps://api.github.com/repos/kpozin/jquery-nodom1
25227796jqueryhttps://api.github.com/repos/orkel/jquery1
25327906jqueryhttps://api.github.com/repos/joelbirchler/jquery1
25428346jqueryhttps://api.github.com/repos/gibson042/jquery1
25529698html5-boilerplatehttps://api.github.com/repos/cleanforestco/htm...1
25632974nodehttps://api.github.com/repos/mcurcio/node1
25734354nodehttps://api.github.com/repos/iizukanao/node1
25834632jqueryhttps://api.github.com/repos/danilsomsikov/jquery1
25935620jqueryhttps://api.github.com/repos/shalecraig/jquery1
26035652nodehttps://api.github.com/repos/WebReflection/node1
26136228nodehttps://api.github.com/repos/hueniverse/node1
26242719d3https://api.github.com/repos/jasondavies/d31
26343107d3https://api.github.com/repos/GerHobbelt/d31
26449358chosenhttps://api.github.com/repos/Sikwan/chosen1
26552432foundationhttps://api.github.com/repos/jvivs/foundation1
26653485symfonyhttps://api.github.com/repos/fabpot/symfony1
26753494symfonyhttps://api.github.com/repos/usefulthink/symfony1
26853548symfonyhttps://api.github.com/repos/jwage/symfony1
26953598symfonyhttps://api.github.com/repos/weaverryan/symfony1
27053694symfonyhttps://api.github.com/repos/francisbesset/sym...1
27153715symfonyhttps://api.github.com/repos/mvrhov/symfony1
27253792symfonyhttps://api.github.com/repos/lsmith77/symfony1
27353993symfonyhttps://api.github.com/repos/stloyd/symfony1
27454713symfonyhttps://api.github.com/repos/tacman/symfony1
27555185symfonyhttps://api.github.com/repos/jfsimon/symfony1
27655564three.jshttps://api.github.com/repos/AddictArts/three.js1
27756114symfonyhttps://api.github.com/repos/dlsniper/symfony1
27856501three.jshttps://api.github.com/repos/tapio/three.js1
27957041symfonyhttps://api.github.com/repos/ircmaxell/symfony1
28057142three.jshttps://api.github.com/repos/bhouston/three.js1
28158640symfonyhttps://api.github.com/repos/gnugat/symfony1
28258697three.jshttps://api.github.com/repos/kevinoe/three.js1
28358762three.jshttps://api.github.com/repos/lminko/three.js1
28459627CodeIgniterhttps://api.github.com/repos/darkwhispering/Co...1
28559890CodeIgniterhttps://api.github.com/repos/zechdc/CodeIgniter1
28660294CodeIgniterhttps://api.github.com/repos/vlakoff/CodeIgniter1
28760784CodeIgniterhttps://api.github.com/repos/BillHeaton/CodeIg...1
28861226CodeIgniterhttps://api.github.com/repos/chrispassas/CodeI...1
28962502zf2https://api.github.com/repos/zendframework/zf21
29063700zf2https://api.github.com/repos/DASPRiD/zf21
29163898zf2https://api.github.com/repos/ezimuel/zf21
29264184zf2https://api.github.com/repos/Maks3w/zf21
29364379zf2https://api.github.com/repos/davidwindell/zf21
29464412zf2https://api.github.com/repos/jacobkiers/zf21
29564680zf2https://api.github.com/repos/samsonasik/zf21
29665260thinkuphttps://api.github.com/repos/mithaler/thinkup1
29765317ThinkUphttps://api.github.com/repos/samwho/ThinkUp1
29865338ThinkUphttps://api.github.com/repos/anildash/ThinkUp1
29965349ThinkUphttps://api.github.com/repos/kylehase/ThinkUp1
30065530ThinkUphttps://api.github.com/repos/rgroves/ThinkUp1
30165796cakephphttps://api.github.com/repos/ceeram/cakephp1
30265889ThinkUphttps://api.github.com/repos/bleything/ThinkUp1
30365980cakephphttps://api.github.com/repos/zoghal/cakephp1
30469254djangohttps://api.github.com/repos/niwibe/django1
30569304djangohttps://api.github.com/repos/pvanderlinden/django1
30672032flaskhttps://api.github.com/repos/mitsuhiko/flask1
30775330requestshttps://api.github.com/repos/dandrzejewski/req...1
30875722requestshttps://api.github.com/repos/nicoddemus/requests1
30976156reddithttps://api.github.com/repos/k21/reddit1
31077048django-debug-toolbarhttps://api.github.com/repos/msaelices/django-...1
31177422botohttps://api.github.com/repos/irskep/boto1
31277458botohttps://api.github.com/repos/goura/boto1
31377717botohttps://api.github.com/repos/fayazkhan/boto1
31478347Sick-Beardhttps://api.github.com/repos/bshep/Sick-Beard1
31578356Sick-Beardhttps://api.github.com/repos/ozeraser/Sick-Beard1
31678461Sick-Beardhttps://api.github.com/repos/jorgenpt/Sick-Beard1
31778634Sick-Beardhttps://api.github.com/repos/EchelonFour/Sick-...1
31878808Sick-Beardhttps://api.github.com/repos/mozvip/Sick-Beard1
31978942Sick-Beardhttps://api.github.com/repos/Prinz23/Sick-Beard1
32079408railshttps://api.github.com/repos/coderrr/rails1
32179648railshttps://api.github.com/repos/zires/rails1
32279876railshttps://api.github.com/repos/bigfix/rails1
32379958railshttps://api.github.com/repos/rafaelfranca/rails1
32479972railshttps://api.github.com/repos/github/rails1
32580096railshttps://api.github.com/repos/cldwalker/rails1
32680105railshttps://api.github.com/repos/indirect/rails1
32780289railshttps://api.github.com/repos/arunagw/rails1
32880307railshttps://api.github.com/repos/raysrashmi/rails1
32980322railshttps://api.github.com/repos/smartinez87/rails1
33080363railshttps://api.github.com/repos/castlerock/rails1
33180437railshttps://api.github.com/repos/senny/rails1
33280670railshttps://api.github.com/repos/acroca/rails1
33380682railshttps://api.github.com/repos/gazay/rails1
33480790django-cmshttps://api.github.com/repos/powderflask/djang...1
33580961railshttps://api.github.com/repos/kennyj/rails1
33680974rails-1https://api.github.com/repos/tanin47/rails-11
33781033railshttps://api.github.com/repos/nashby/rails1
33881294railshttps://api.github.com/repos/larskanis/rails1
33981346railshttps://api.github.com/repos/kielkowicz/rails1
34081764railshttps://api.github.com/repos/bogdan/rails1
34181842railshttps://api.github.com/repos/tigrish/rails1
34282391railshttps://api.github.com/repos/homakov/rails1
34382577railshttps://api.github.com/repos/dylanahsmith/rails1
34482701railshttps://api.github.com/repos/revans/rails1
34583262railshttps://api.github.com/repos/blowmage/rails1
34683426jekyllhttps://api.github.com/repos/robru/jekyll1
34783454railshttps://api.github.com/repos/Grandrath/rails1
34883533railshttps://api.github.com/repos/gsphanikumar/rails1
34983737railshttps://api.github.com/repos/gaurish/rails1
35084573railshttps://api.github.com/repos/morgancurrie/rails1
35184757railshttps://api.github.com/repos/Jiebour/rails1
35285511railshttps://api.github.com/repos/frodsan/rails1
35386503railshttps://api.github.com/repos/ankit8898/rails1
35486821railshttps://api.github.com/repos/versioncontrol/rails1
35587060homebrewhttps://api.github.com/repos/godfat/homebrew1
35687169homebrewhttps://api.github.com/repos/wright/homebrew1
35787351homebrewhttps://api.github.com/repos/bdd/homebrew1
35887973homebrewhttps://api.github.com/repos/greedy/homebrew1
35988010jekyllhttps://api.github.com/repos/edeustace/jekyll1
36088104homebrewhttps://api.github.com/repos/jlcapps/homebrew1
36188197homebrewhttps://api.github.com/repos/justinclift/homebrew1
36288203homebrewhttps://api.github.com/repos/tusbar/homebrew1
36388479homebrewhttps://api.github.com/repos/neglectedvalue/ho...1
36488666homebrewhttps://api.github.com/repos/losmuertos/homebrew1
36588681homebrewhttps://api.github.com/repos/larseggert/homebrew1
36688705homebrewhttps://api.github.com/repos/donspaulding/home...1
36788715jekyllhttps://api.github.com/repos/metamatt/jekyll1
36888751homebrewhttps://api.github.com/repos/jacknagel/homebrew1
36988845homebrewhttps://api.github.com/repos/vibrog/homebrew1
37088884homebrewhttps://api.github.com/repos/catsby/homebrew1
37188975homebrewhttps://api.github.com/repos/thoughtpolice/hom...1
37289039homebrewhttps://api.github.com/repos/jedi4ever/homebrew1
37389091homebrewhttps://api.github.com/repos/dch/homebrew1
37489305homebrewhttps://api.github.com/repos/mistydemeo/homebrew1
37589327homebrewhttps://api.github.com/repos/tonit/homebrew1
37689384homebrewhttps://api.github.com/repos/svenax/homebrew1
37789541homebrewhttps://api.github.com/repos/jcupitt/homebrew1
37889949homebrewhttps://api.github.com/repos/wesen/homebrew1
37990205homebrewhttps://api.github.com/repos/vertis/homebrew1
38090433homebrewhttps://api.github.com/repos/2bits/homebrew1
38190568homebrewhttps://api.github.com/repos/jwilkins/homebrew1
38290602homebrewhttps://api.github.com/repos/anatol/homebrew1
38390714homebrewhttps://api.github.com/repos/msabramo/homebrew1
38490813homebrewhttps://api.github.com/repos/azarbayejani/home...1
38590961homebrewhttps://api.github.com/repos/nicolasdespres/ho...1
38691026homebrewhttps://api.github.com/repos/yllan/homebrew1
38791062homebrewhttps://api.github.com/repos/samueljohn/homebrew1
38891113homebrewhttps://api.github.com/repos/cartazio/homebrew1
38991159homebrewhttps://api.github.com/repos/fish2000/homebrew1
39091240homebrewhttps://api.github.com/repos/glejeune/homebrew1
39191270homebrewhttps://api.github.com/repos/bpiwowar/homebrew1
39291294homebrewhttps://api.github.com/repos/ingmar/homebrew1
39391326homebrewhttps://api.github.com/repos/funnymanva/homebrew1
39491496homebrewhttps://api.github.com/repos/nmadura/homebrew1
39591611homebrewhttps://api.github.com/repos/sandeep048/homebrew1
39691638homebrewhttps://api.github.com/repos/lifepillar/homebrew1
39791741homebrewhttps://api.github.com/repos/ummels/homebrew1
39891854homebrewhttps://api.github.com/repos/nevir/homebrew1
39991864homebrewhttps://api.github.com/repos/mrjbq7/homebrew1
40091916homebrewhttps://api.github.com/repos/zhangcheng/homebrew1
40191918homebrewhttps://api.github.com/repos/rmndk/homebrew1
40291967homebrewhttps://api.github.com/repos/rhysd/homebrew1
40392060homebrewhttps://api.github.com/repos/mattyr/homebrew1
40492372homebrewhttps://api.github.com/repos/AstonJ/homebrew1
40592399homebrewhttps://api.github.com/repos/raedwulf/homebrew1
40692749homebrewhttps://api.github.com/repos/rays/homebrew1
40792789homebrewhttps://api.github.com/repos/OldCrow/homebrew1
40892830homebrewhttps://api.github.com/repos/vogonistic/homebrew1
40992839homebrewhttps://api.github.com/repos/ashirazi/homebrew1
41092866homebrewhttps://api.github.com/repos/sheerun/homebrew1
41192978homebrewhttps://api.github.com/repos/marr/homebrew1
41293004homebrewhttps://api.github.com/repos/wix/homebrew1
41393049homebrewhttps://api.github.com/repos/andriytyurnikov/h...1
41493112homebrewhttps://api.github.com/repos/tinystatemachine/...1
41593180homebrewhttps://api.github.com/repos/mashtizadeh/homebrew1
41693250homebrewhttps://api.github.com/repos/peabody124/homebrew1
41793295homebrewhttps://api.github.com/repos/handlename/homebrew1
41893310homebrewhttps://api.github.com/repos/crishoj/homebrew1
41993377homebrewhttps://api.github.com/repos/eladg/homebrew1
42093428homebrewhttps://api.github.com/repos/chrmoritz/homebrew1
42193913homebrewhttps://api.github.com/repos/natritmeyer/homebrew1
42294037homebrewhttps://api.github.com/repos/chenpc/homebrew1
42395895gitlabhqhttps://api.github.com/repos/zzet/gitlabhq1
42496401gitlabhqhttps://api.github.com/repos/mikew/gitlabhq1
42596669gitlabhqhttps://api.github.com/repos/proverbface/gitlabhq1
42696736gitlabhqhttps://api.github.com/repos/drahamim/gitlabhq1
42796815gitlabhqhttps://api.github.com/repos/senny/gitlabhq1
42897857devisehttps://api.github.com/repos/rahearn/devise1
429100090diasporahttps://api.github.com/repos/Gonzih/diaspora1
430100137diasporahttps://api.github.com/repos/Raven24/diaspora1
431101472blueprint-csshttps://api.github.com/repos/joshuaclayton/blu...1
432102063octopresshttps://api.github.com/repos/pilif/octopress1
433104578papercliphttps://api.github.com/repos/yar/paperclip1
434105484compasshttps://api.github.com/repos/gmclelland/compass1
435105965compasshttps://api.github.com/repos/cimmanon/compass1
436106161kestrelhttps://api.github.com/repos/robey/kestrel1
437106447finaglehttps://api.github.com/repos/benpence/finagle1
438106680akkahttps://api.github.com/repos/paulpach/akka1
439106686akkahttps://api.github.com/repos/metamorph/akka1
440106779akkahttps://api.github.com/repos/scullxbones/akka1
441106793akkahttps://api.github.com/repos/drewhk/akka1
442107188xsbthttps://api.github.com/repos/retronym/xsbt1
443107255xsbthttps://api.github.com/repos/vigdorchik/xsbt1
444107317xsbthttps://api.github.com/repos/ebowman/xsbt1
445107377sbthttps://api.github.com/repos/ezh/sbt1
446107809scalahttps://api.github.com/repos/odersky/scala1
447107815scalahttps://api.github.com/repos/lrytz/scala1
448107819scalahttps://api.github.com/repos/soc/scala1
449107824scalahttps://api.github.com/repos/phaller/scala1
450108259scalahttps://api.github.com/repos/Ichoran/scala1
451108348mongohttps://api.github.com/repos/idning/mongo1
452108646djangohttps://api.github.com/repos/zbenjamin/django1
\n", - "
" - ], - "text/plain": [ - " project_id project_name \\\n", - "0 12 TrinityCore \n", - "1 289 MaNGOS \n", - "2 78852 rails \n", - "3 22980 CraftBukkit \n", - "4 25875 jquery \n", - "5 91331 diaspora \n", - "6 3583 xbmc \n", - "7 22981 netty \n", - "8 26388 html5-boilerplate \n", - "9 24292 node \n", - "10 50618 three.js \n", - "11 1 akka \n", - "12 79163 homebrew \n", - "13 91020 gitlabhq \n", - "14 51671 symfony \n", - "15 9215 openFrameworks \n", - "16 63250 cakephp \n", - "17 10593 libuv \n", - "18 95385 devise \n", - "19 74914 requests \n", - "20 69158 django \n", - "21 79166 jekyll \n", - "22 59607 CodeIgniter \n", - "23 77319 Sick-Beard \n", - "24 104307 paperclip \n", - "25 9636 libgit2 \n", - "26 5214 xbmc \n", - "27 51669 foundation \n", - "28 64176 ThinkUp \n", - "29 81335 rails \n", - "30 10380 redcarpet \n", - "31 64918 phpunit \n", - "32 105378 compass \n", - "33 107534 scala \n", - "34 62501 php-sdk \n", - "35 82328 rails \n", - "36 10629 SignalR \n", - "37 14327 Nancy \n", - "38 17515 elasticsearch \n", - "39 19580 facebook-android-sdk \n", - "40 81320 rails \n", - "41 11 phantomjs \n", - "42 5164 xbmc \n", - "43 7242 redis \n", - "44 26101 node \n", - "45 74915 symfony \n", - "46 75984 reddit \n", - "47 76945 boto \n", - "48 76946 django-debug-toolbar \n", - "49 78835 django-cms \n", - "50 80514 rails \n", - "51 5176 xbmc \n", - "52 12976 plupload \n", - "53 17566 ActionBarSherlock \n", - "54 65250 thinkup \n", - "55 79948 rails \n", - "56 107186 gizzard \n", - "57 784 bitcoin-git \n", - "58 5215 xbmc \n", - "59 5232 xbmc \n", - "60 14328 ServiceStack \n", - "61 47382 chosen \n", - "62 59683 CodeIgniter \n", - "63 76118 reddit \n", - "64 107302 xsbt \n", - "65 107823 scala \n", - "66 454 bitcoin \n", - "67 5438 xbmc \n", - "68 7241 xbmc \n", - "69 42644 d3 \n", - "70 53712 symfony \n", - "71 54148 symfony \n", - "72 80581 rails \n", - "73 84869 rails \n", - "74 6 knitr \n", - "75 7 shiny \n", - "76 9 mongo \n", - "77 3750 http-parser \n", - "78 5182 xbmc \n", - "79 5230 xbmc \n", - "80 13509 TrinityCore \n", - "81 13597 mono \n", - "82 16134 MiniProfiler \n", - "83 19786 clojure \n", - "84 53713 symfony \n", - "85 64197 zf2 \n", - "86 65107 Slim \n", - "87 71786 tornado \n", - "88 80231 rails \n", - "89 80618 rails \n", - "90 84052 rails \n", - "91 88617 Homebrew \n", - "92 92167 homebrew \n", - "93 92243 homebrew \n", - "94 101997 octopress \n", - "95 107187 sbt \n", - "96 107811 scala \n", - "97 108589 TC-Eluna-3.3.5a \n", - "98 2 devtools \n", - "99 5162 xbmc \n", - "100 5202 xbmc \n", - "101 5240 xbmc \n", - "102 5283 xbmc \n", - "103 5326 beanstalkd \n", - "104 5494 xbmc \n", - "105 6244 xbmc \n", - "106 8255 redis \n", - "107 8948 redis \n", - "108 11582 openFrameworks \n", - "109 12554 SparkleShare \n", - "110 13197 TrinityCore \n", - "111 15018 RestSharp \n", - "112 16402 storm \n", - "113 23311 CraftBukkit \n", - "114 25879 node \n", - "115 26240 node \n", - "116 26766 node \n", - "117 29190 html5-boilerplate \n", - "118 53743 symfony \n", - "119 54403 symfony \n", - "120 54691 symfony \n", - "121 61016 CodeIgniter \n", - "122 63690 zf2 \n", - "123 65762 cakephp \n", - "124 65946 cakephp \n", - "125 65983 ThinkUp \n", - "126 67281 cakephp \n", - "127 68893 phpunit \n", - "128 69177 django \n", - "129 69349 django \n", - "130 69897 django \n", - "131 79642 rails \n", - "132 80002 rails \n", - "133 80498 rails \n", - "134 80535 rails \n", - "135 80559 rails \n", - "136 80818 rails \n", - "137 81332 rails \n", - "138 81621 rails \n", - "139 82863 rails \n", - "140 88593 homebrew \n", - "141 88732 homebrew \n", - "142 90945 homebrew \n", - "143 91892 homebrew \n", - "144 92348 homebrew \n", - "145 92859 homebrew \n", - "146 93198 homebrew \n", - "147 94288 homebrew \n", - "148 100124 diaspora \n", - "149 102734 octopress \n", - "150 106160 finagle \n", - "151 107312 sbt \n", - "152 107535 scalatra \n", - "153 107829 scala \n", - "154 8 folly \n", - "155 893 bitcoin \n", - "156 935 devtools \n", - "157 1024 bitcoin \n", - "158 2710 phantomjs \n", - "159 3814 mongo \n", - "160 3840 mongo \n", - "161 3897 http-parser \n", - "162 4262 mongo \n", - "163 5208 xbmc \n", - "164 5228 xbmc \n", - "165 5234 xbmc \n", - "166 5236 xbmc \n", - "167 5273 XBMC \n", - "168 5297 xbmc \n", - "169 5422 xbmc \n", - "170 5428 xbmc \n", - "171 5448 xbmc \n", - "172 5485 xbmc \n", - "173 5715 mangos \n", - "174 5828 xbmc \n", - "175 5832 xbmc \n", - "176 5837 xbmc \n", - "177 5858 xbmc \n", - "178 5887 xbmc \n", - "179 5962 xbmc \n", - "180 6250 xbmc \n", - "181 6258 xbmc \n", - "182 6349 xbmc \n", - "183 6472 xbmc \n", - "184 6505 xbmc \n", - "185 6609 xbmc \n", - "186 6790 xbmc \n", - "187 6819 xbmc \n", - "188 7180 xbmc \n", - "189 7404 redis \n", - "190 7427 ccv \n", - "191 7457 xbmc \n", - "192 8478 redis \n", - "193 8605 redis \n", - "194 8634 redis \n", - "195 8955 memcached \n", - "196 9014 hiphop-php \n", - "197 10468 xbmca10 \n", - "198 10617 libgit2 \n", - "199 10622 libgit2 \n", - "200 10632 libgit2 \n", - "201 11494 openFrameworks \n", - "202 12102 openFrameworks \n", - "203 12529 libgit2 \n", - "204 12934 SparkleShare \n", - "205 12941 TrinityCore \n", - "206 12943 TrinityCore \n", - "207 12964 TrinityCore \n", - "208 12972 TrinityCore \n", - "209 12973 TrinityCore \n", - "210 13410 plupload \n", - "211 13514 uwom-server \n", - "212 13516 TrinityCore \n", - "213 13526 TrinityCore \n", - "214 13558 TrinityCore \n", - "215 13688 mono \n", - "216 13724 mono \n", - "217 13765 mono \n", - "218 13775 mono \n", - "219 14092 mono \n", - "220 14523 ServiceStack \n", - "221 14593 ServiceStack \n", - "222 14912 AutoMapper \n", - "223 14993 Nancy \n", - "224 15521 RestSharp \n", - "225 15699 RestSharp \n", - "226 15836 Nancy \n", - "227 17907 elasticsearch \n", - "228 18328 elasticsearch \n", - "229 18863 elasticsearch \n", - "230 23040 CraftBukkit \n", - "231 23203 CraftBukkit \n", - "232 23245 CraftBukkit \n", - "233 23411 CraftBukkit \n", - "234 24886 netty \n", - "235 25148 netty \n", - "236 25358 netty \n", - "237 25520 netty \n", - "238 25884 node \n", - "239 26062 node \n", - "240 26508 node \n", - "241 27020 jquery \n", - "242 27040 node \n", - "243 27075 jquery \n", - "244 27190 jquery \n", - "245 27236 jquery \n", - "246 27269 jquery \n", - "247 27330 node \n", - "248 27379 node \n", - "249 27504 impress.js \n", - "250 27570 jquery \n", - "251 27761 jquery-nodom \n", - "252 27796 jquery \n", - "253 27906 jquery \n", - "254 28346 jquery \n", - "255 29698 html5-boilerplate \n", - "256 32974 node \n", - "257 34354 node \n", - "258 34632 jquery \n", - "259 35620 jquery \n", - "260 35652 node \n", - "261 36228 node \n", - "262 42719 d3 \n", - "263 43107 d3 \n", - "264 49358 chosen \n", - "265 52432 foundation \n", - "266 53485 symfony \n", - "267 53494 symfony \n", - "268 53548 symfony \n", - "269 53598 symfony \n", - "270 53694 symfony \n", - "271 53715 symfony \n", - "272 53792 symfony \n", - "273 53993 symfony \n", - "274 54713 symfony \n", - "275 55185 symfony \n", - "276 55564 three.js \n", - "277 56114 symfony \n", - "278 56501 three.js \n", - "279 57041 symfony \n", - "280 57142 three.js \n", - "281 58640 symfony \n", - "282 58697 three.js \n", - "283 58762 three.js \n", - "284 59627 CodeIgniter \n", - "285 59890 CodeIgniter \n", - "286 60294 CodeIgniter \n", - "287 60784 CodeIgniter \n", - "288 61226 CodeIgniter \n", - "289 62502 zf2 \n", - "290 63700 zf2 \n", - "291 63898 zf2 \n", - "292 64184 zf2 \n", - "293 64379 zf2 \n", - "294 64412 zf2 \n", - "295 64680 zf2 \n", - "296 65260 thinkup \n", - "297 65317 ThinkUp \n", - "298 65338 ThinkUp \n", - "299 65349 ThinkUp \n", - "300 65530 ThinkUp \n", - "301 65796 cakephp \n", - "302 65889 ThinkUp \n", - "303 65980 cakephp \n", - "304 69254 django \n", - "305 69304 django \n", - "306 72032 flask \n", - "307 75330 requests \n", - "308 75722 requests \n", - "309 76156 reddit \n", - "310 77048 django-debug-toolbar \n", - "311 77422 boto \n", - "312 77458 boto \n", - "313 77717 boto \n", - "314 78347 Sick-Beard \n", - "315 78356 Sick-Beard \n", - "316 78461 Sick-Beard \n", - "317 78634 Sick-Beard \n", - "318 78808 Sick-Beard \n", - "319 78942 Sick-Beard \n", - "320 79408 rails \n", - "321 79648 rails \n", - "322 79876 rails \n", - "323 79958 rails \n", - "324 79972 rails \n", - "325 80096 rails \n", - "326 80105 rails \n", - "327 80289 rails \n", - "328 80307 rails \n", - "329 80322 rails \n", - "330 80363 rails \n", - "331 80437 rails \n", - "332 80670 rails \n", - "333 80682 rails \n", - "334 80790 django-cms \n", - "335 80961 rails \n", - "336 80974 rails-1 \n", - "337 81033 rails \n", - "338 81294 rails \n", - "339 81346 rails \n", - "340 81764 rails \n", - "341 81842 rails \n", - "342 82391 rails \n", - "343 82577 rails \n", - "344 82701 rails \n", - "345 83262 rails \n", - "346 83426 jekyll \n", - "347 83454 rails \n", - "348 83533 rails \n", - "349 83737 rails \n", - "350 84573 rails \n", - "351 84757 rails \n", - "352 85511 rails \n", - "353 86503 rails \n", - "354 86821 rails \n", - "355 87060 homebrew \n", - "356 87169 homebrew \n", - "357 87351 homebrew \n", - "358 87973 homebrew \n", - "359 88010 jekyll \n", - "360 88104 homebrew \n", - "361 88197 homebrew \n", - "362 88203 homebrew \n", - "363 88479 homebrew \n", - "364 88666 homebrew \n", - "365 88681 homebrew \n", - "366 88705 homebrew \n", - "367 88715 jekyll \n", - "368 88751 homebrew \n", - "369 88845 homebrew \n", - "370 88884 homebrew \n", - "371 88975 homebrew \n", - "372 89039 homebrew \n", - "373 89091 homebrew \n", - "374 89305 homebrew \n", - "375 89327 homebrew \n", - "376 89384 homebrew \n", - "377 89541 homebrew \n", - "378 89949 homebrew \n", - "379 90205 homebrew \n", - "380 90433 homebrew \n", - "381 90568 homebrew \n", - "382 90602 homebrew \n", - "383 90714 homebrew \n", - "384 90813 homebrew \n", - "385 90961 homebrew \n", - "386 91026 homebrew \n", - "387 91062 homebrew \n", - "388 91113 homebrew \n", - "389 91159 homebrew \n", - "390 91240 homebrew \n", - "391 91270 homebrew \n", - "392 91294 homebrew \n", - "393 91326 homebrew \n", - "394 91496 homebrew \n", - "395 91611 homebrew \n", - "396 91638 homebrew \n", - "397 91741 homebrew \n", - "398 91854 homebrew \n", - "399 91864 homebrew \n", - "400 91916 homebrew \n", - "401 91918 homebrew \n", - "402 91967 homebrew \n", - "403 92060 homebrew \n", - "404 92372 homebrew \n", - "405 92399 homebrew \n", - "406 92749 homebrew \n", - "407 92789 homebrew \n", - "408 92830 homebrew \n", - "409 92839 homebrew \n", - "410 92866 homebrew \n", - "411 92978 homebrew \n", - "412 93004 homebrew \n", - "413 93049 homebrew \n", - "414 93112 homebrew \n", - "415 93180 homebrew \n", - "416 93250 homebrew \n", - "417 93295 homebrew \n", - "418 93310 homebrew \n", - "419 93377 homebrew \n", - "420 93428 homebrew \n", - "421 93913 homebrew \n", - "422 94037 homebrew \n", - "423 95895 gitlabhq \n", - "424 96401 gitlabhq \n", - "425 96669 gitlabhq \n", - "426 96736 gitlabhq \n", - "427 96815 gitlabhq \n", - "428 97857 devise \n", - "429 100090 diaspora \n", - "430 100137 diaspora \n", - "431 101472 blueprint-css \n", - "432 102063 octopress \n", - "433 104578 paperclip \n", - "434 105484 compass \n", - "435 105965 compass \n", - "436 106161 kestrel \n", - "437 106447 finagle \n", - "438 106680 akka \n", - "439 106686 akka \n", - "440 106779 akka \n", - "441 106793 akka \n", - "442 107188 xsbt \n", - "443 107255 xsbt \n", - "444 107317 xsbt \n", - "445 107377 sbt \n", - "446 107809 scala \n", - "447 107815 scala \n", - "448 107819 scala \n", - "449 107824 scala \n", - "450 108259 scala \n", - "451 108348 mongo \n", - "452 108646 django \n", - "\n", - " project_url labeled_comment_count \n", - "0 https://api.github.com/repos/TrinityCore/Trini... 800 \n", - "1 https://api.github.com/repos/mangos/MaNGOS 622 \n", - "2 https://api.github.com/repos/rails/rails 448 \n", - "3 https://api.github.com/repos/Bukkit/CraftBukkit 357 \n", - "4 https://api.github.com/repos/jquery/jquery 235 \n", - "5 https://api.github.com/repos/diaspora/diaspora 160 \n", - "6 https://api.github.com/repos/xbmc/xbmc 130 \n", - "7 https://api.github.com/repos/netty/netty 80 \n", - "8 https://api.github.com/repos/h5bp/html5-boiler... 80 \n", - "9 https://api.github.com/repos/joyent/node 65 \n", - "10 https://api.github.com/repos/mrdoob/three.js 65 \n", - "11 https://api.github.com/repos/akka/akka 62 \n", - "12 https://api.github.com/repos/mxcl/homebrew 57 \n", - "13 https://api.github.com/repos/gitlabhq/gitlabhq 55 \n", - "14 https://api.github.com/repos/symfony/symfony 46 \n", - "15 https://api.github.com/repos/openframeworks/op... 40 \n", - "16 https://api.github.com/repos/cakephp/cakephp 33 \n", - "17 https://api.github.com/repos/joyent/libuv 29 \n", - "18 https://api.github.com/repos/plataformatec/devise 22 \n", - "19 https://api.github.com/repos/kennethreitz/requ... 20 \n", - "20 https://api.github.com/repos/django/django 19 \n", - "21 https://api.github.com/repos/mojombo/jekyll 18 \n", - "22 https://api.github.com/repos/EllisLab/CodeIgniter 15 \n", - "23 https://api.github.com/repos/midgetspy/Sick-Beard 15 \n", - "24 https://api.github.com/repos/thoughtbot/paperclip 14 \n", - "25 https://api.github.com/repos/libgit2/libgit2 13 \n", - "26 https://api.github.com/repos/jmarshallnz/xbmc 12 \n", - "27 https://api.github.com/repos/zurb/foundation 12 \n", - "28 https://api.github.com/repos/ginatrapani/ThinkUp 12 \n", - "29 https://api.github.com/repos/flooose/rails 12 \n", - "30 https://api.github.com/repos/vmg/redcarpet 11 \n", - "31 https://api.github.com/repos/sebastianbergmann... 11 \n", - "32 https://api.github.com/repos/chriseppstein/com... 11 \n", - "33 https://api.github.com/repos/scala/scala 11 \n", - "34 https://api.github.com/repos/facebook/php-sdk 10 \n", - "35 https://api.github.com/repos/pinetops/rails 9 \n", - "36 https://api.github.com/repos/SignalR/SignalR 8 \n", - "37 https://api.github.com/repos/NancyFx/Nancy 8 \n", - "38 https://api.github.com/repos/elasticsearch/ela... 8 \n", - "39 https://api.github.com/repos/facebook/facebook... 8 \n", - "40 https://api.github.com/repos/zetter/rails 8 \n", - "41 https://api.github.com/repos/ariya/phantomjs 7 \n", - "42 https://api.github.com/repos/theuni/xbmc 7 \n", - "43 https://api.github.com/repos/antirez/redis 7 \n", - "44 https://api.github.com/repos/indutny/node 7 \n", - "45 https://api.github.com/repos/xphere-forks/symfony 7 \n", - "46 https://api.github.com/repos/reddit/reddit 7 \n", - "47 https://api.github.com/repos/boto/boto 7 \n", - "48 https://api.github.com/repos/django-debug-tool... 7 \n", - "49 https://api.github.com/repos/divio/django-cms 7 \n", - "50 https://api.github.com/repos/andhapp/rails 7 \n", - "51 https://api.github.com/repos/davilla/xbmc 6 \n", - "52 https://api.github.com/repos/moxiecode/plupload 6 \n", - "53 https://api.github.com/repos/JakeWharton/Actio... 6 \n", - "54 https://api.github.com/repos/mwilkie/thinkup 6 \n", - "55 https://api.github.com/repos/bjeanes/rails 6 \n", - "56 https://api.github.com/repos/twitter/gizzard 6 \n", - "57 https://api.github.com/repos/gavinandresen/bit... 5 \n", - "58 https://api.github.com/repos/Montellese/xbmc 5 \n", - "59 https://api.github.com/repos/elupus/xbmc 5 \n", - "60 https://api.github.com/repos/ServiceStack/Serv... 5 \n", - "61 https://api.github.com/repos/harvesthq/chosen 5 \n", - "62 https://api.github.com/repos/dchill42/CodeIgniter 5 \n", - "63 https://api.github.com/repos/andre-d/reddit 5 \n", - "64 https://api.github.com/repos/snowplow/xsbt 5 \n", - "65 https://api.github.com/repos/paulp/scala 5 \n", - "66 https://api.github.com/repos/bitcoin/bitcoin 4 \n", - "67 https://api.github.com/repos/0wing/xbmc 4 \n", - "68 https://api.github.com/repos/FernetMenta/xbmc 4 \n", - "69 https://api.github.com/repos/mbostock/d3 4 \n", - "70 https://api.github.com/repos/vicb/symfony 4 \n", - "71 https://api.github.com/repos/schmittjoh/symfony 4 \n", - "72 https://api.github.com/repos/SAP-Oxygen/rails 4 \n", - "73 https://api.github.com/repos/slave-but-free/rails 4 \n", - "74 https://api.github.com/repos/yihui/knitr 3 \n", - "75 https://api.github.com/repos/rstudio/shiny 3 \n", - "76 https://api.github.com/repos/mongodb/mongo 3 \n", - "77 https://api.github.com/repos/joyent/http-parser 3 \n", - "78 https://api.github.com/repos/Voyager1/xbmc 3 \n", - "79 https://api.github.com/repos/jimfcarroll/xbmc 3 \n", - "80 https://api.github.com/repos/Havenard/TrinityCore 3 \n", - "81 https://api.github.com/repos/Unity-Technologie... 3 \n", - "82 https://api.github.com/repos/SamSaffron/MiniPr... 3 \n", - "83 https://api.github.com/repos/clojure/clojure 3 \n", - "84 https://api.github.com/repos/bschussek/symfony 3 \n", - "85 https://api.github.com/repos/bakura10/zf2 3 \n", - "86 https://api.github.com/repos/codeguy/Slim 3 \n", - "87 https://api.github.com/repos/facebook/tornado 3 \n", - "88 https://api.github.com/repos/sikachu/rails 3 \n", - "89 https://api.github.com/repos/steveklabnik/rails 3 \n", - "90 https://api.github.com/repos/bbenezech/rails 3 \n", - "91 https://api.github.com/repos/MindTooth/Homebrew 3 \n", - "92 https://api.github.com/repos/axelsteiner/homebrew 3 \n", - "93 https://api.github.com/repos/rgov/homebrew 3 \n", - "94 https://api.github.com/repos/imathis/octopress 3 \n", - "95 https://api.github.com/repos/sbt/sbt 3 \n", - "96 https://api.github.com/repos/adriaanm/scala 3 \n", - "97 https://api.github.com/repos/ElunaLuaEngine/TC... 3 \n", - "98 https://api.github.com/repos/hadley/devtools 2 \n", - "99 https://api.github.com/repos/opdenkamp/xbmc 2 \n", - "100 https://api.github.com/repos/cptspiff/xbmc 2 \n", - "101 https://api.github.com/repos/bobo1on1/xbmc 2 \n", - "102 https://api.github.com/repos/Memphiz/xbmc 2 \n", - "103 https://api.github.com/repos/kr/beanstalkd 2 \n", - "104 https://api.github.com/repos/koying/xbmc 2 \n", - "105 https://api.github.com/repos/jpsdr/xbmc 2 \n", - "106 https://api.github.com/repos/charsyam/redis 2 \n", - "107 https://api.github.com/repos/evilsocket/redis 2 \n", - "108 https://api.github.com/repos/bilderbuchi/openF... 2 \n", - "109 https://api.github.com/repos/hbons/SparkleShare 2 \n", - "110 https://api.github.com/repos/kandera/TrinityCore 2 \n", - "111 https://api.github.com/repos/restsharp/RestSharp 2 \n", - "112 https://api.github.com/repos/nathanmarz/storm 2 \n", - "113 https://api.github.com/repos/ScoreUnder/CraftB... 2 \n", - "114 https://api.github.com/repos/isaacs/node 2 \n", - "115 https://api.github.com/repos/kuebk/node 2 \n", - "116 https://api.github.com/repos/mattrobenolt/node 2 \n", - "117 https://api.github.com/repos/bentruyman/html5-... 2 \n", - "118 https://api.github.com/repos/hhamon/symfony 2 \n", - "119 https://api.github.com/repos/drak/symfony 2 \n", - "120 https://api.github.com/repos/Tobion/symfony 2 \n", - "121 https://api.github.com/repos/and-ers/CodeIgniter 2 \n", - "122 https://api.github.com/repos/weierophinney/zf2 2 \n", - "123 https://api.github.com/repos/markstory/cakephp 2 \n", - "124 https://api.github.com/repos/ADmad/cakephp 2 \n", - "125 https://api.github.com/repos/brandonroberts/Th... 2 \n", - "126 https://api.github.com/repos/schrolli/cakephp 2 \n", - "127 https://api.github.com/repos/fizzka/phpunit 2 \n", - "128 https://api.github.com/repos/andrewgodwin/django 2 \n", - "129 https://api.github.com/repos/carljm/django 2 \n", - "130 https://api.github.com/repos/melinath/django 2 \n", - "131 https://api.github.com/repos/drogus/rails 2 \n", - "132 https://api.github.com/repos/joshk/rails 2 \n", - "133 https://api.github.com/repos/bratish/rails 2 \n", - "134 https://api.github.com/repos/anildigital/rails 2 \n", - "135 https://api.github.com/repos/sishen/rails 2 \n", - "136 https://api.github.com/repos/goncalossilva/rails 2 \n", - "137 https://api.github.com/repos/jenseng/rails 2 \n", - "138 https://api.github.com/repos/jenslukowski/rails 2 \n", - "139 https://api.github.com/repos/gururuby/rails 2 \n", - "140 https://api.github.com/repos/docwhat/homebrew 2 \n", - "141 https://api.github.com/repos/mbrunthaler/homebrew 2 \n", - "142 https://api.github.com/repos/clusty/homebrew 2 \n", - "143 https://api.github.com/repos/semaperepelitsa/h... 2 \n", - "144 https://api.github.com/repos/alastairandrew/ho... 2 \n", - "145 https://api.github.com/repos/danielb2/homebrew 2 \n", - "146 https://api.github.com/repos/simleo/homebrew 2 \n", - "147 https://api.github.com/repos/vhbit/homebrew 2 \n", - "148 https://api.github.com/repos/christophe-de/dia... 2 \n", - "149 https://api.github.com/repos/andrewreid/octopress 2 \n", - "150 https://api.github.com/repos/twitter/finagle 2 \n", - "151 https://api.github.com/repos/gkossakowski/sbt 2 \n", - "152 https://api.github.com/repos/scalatra/scalatra 2 \n", - "153 https://api.github.com/repos/magarciaEPFL/scala 2 \n", - "154 https://api.github.com/repos/facebook/folly 1 \n", - "155 https://api.github.com/repos/TheBlueMatt/bitcoin 1 \n", - "156 https://api.github.com/repos/yoni/devtools 1 \n", - "157 https://api.github.com/repos/laanwj/bitcoin 1 \n", - "158 https://api.github.com/repos/dburrows/phantomjs 1 \n", - "159 https://api.github.com/repos/guanqun/mongo 1 \n", - "160 https://api.github.com/repos/stulentsev/mongo 1 \n", - "161 https://api.github.com/repos/bnoordhuis/http-p... 1 \n", - "162 https://api.github.com/repos/amcfague/mongo 1 \n", - "163 https://api.github.com/repos/Fneufneu/xbmc 1 \n", - "164 https://api.github.com/repos/anssih/xbmc 1 \n", - "165 https://api.github.com/repos/pieh/xbmc 1 \n", - "166 https://api.github.com/repos/llyzs/xbmc 1 \n", - "167 https://api.github.com/repos/HarryMuscle/XBMC 1 \n", - "168 https://api.github.com/repos/garbear/xbmc 1 \n", - "169 https://api.github.com/repos/doozan/xbmc 1 \n", - "170 https://api.github.com/repos/cbxbiker61/xbmc 1 \n", - "171 https://api.github.com/repos/vdrfan/xbmc 1 \n", - "172 https://api.github.com/repos/malard/xbmc 1 \n", - "173 https://api.github.com/repos/vermie/mangos 1 \n", - "174 https://api.github.com/repos/xbmcfanboy/xbmc 1 \n", - "175 https://api.github.com/repos/taxigps/xbmc 1 \n", - "176 https://api.github.com/repos/aviksil/xbmc 1 \n", - "177 https://api.github.com/repos/MichaelAnders/xbmc 1 \n", - "178 https://api.github.com/repos/PSyton/xbmc 1 \n", - "179 https://api.github.com/repos/adam-aph/xbmc 1 \n", - "180 https://api.github.com/repos/herrnst/xbmc 1 \n", - "181 https://api.github.com/repos/xhaggi/xbmc 1 \n", - "182 https://api.github.com/repos/manio/xbmc 1 \n", - "183 https://api.github.com/repos/dragonflight/xbmc 1 \n", - "184 https://api.github.com/repos/maheus/xbmc 1 \n", - "185 https://api.github.com/repos/elbeardmorez/xbmc 1 \n", - "186 https://api.github.com/repos/DigitalDJ/xbmc 1 \n", - "187 https://api.github.com/repos/FlyingRat/xbmc 1 \n", - "188 https://api.github.com/repos/Jalle19/xbmc 1 \n", - "189 https://api.github.com/repos/mrb/redis 1 \n", - "190 https://api.github.com/repos/liuliu/ccv 1 \n", - "191 https://api.github.com/repos/phil65/xbmc 1 \n", - "192 https://api.github.com/repos/marcelaraujo/redis 1 \n", - "193 https://api.github.com/repos/jumping/redis 1 \n", - "194 https://api.github.com/repos/melvyn-sopacua/redis 1 \n", - "195 https://api.github.com/repos/memcached/memcached 1 \n", - "196 https://api.github.com/repos/fungos/hiphop-php 1 \n", - "197 https://api.github.com/repos/npeacock/xbmca10 1 \n", - "198 https://api.github.com/repos/phkelley/libgit2 1 \n", - "199 https://api.github.com/repos/martinwoodward/li... 1 \n", - "200 https://api.github.com/repos/carlosmn/libgit2 1 \n", - "201 https://api.github.com/repos/obviousjim/openFr... 1 \n", - "202 https://api.github.com/repos/openFrameworks-Ra... 1 \n", - "203 https://api.github.com/repos/glesserd/libgit2 1 \n", - "204 https://api.github.com/repos/serras/SparkleShare 1 \n", - "205 https://api.github.com/repos/0omega/TrinityCore 1 \n", - "206 https://api.github.com/repos/blipi/TrinityCore 1 \n", - "207 https://api.github.com/repos/zorix/TrinityCore 1 \n", - "208 https://api.github.com/repos/Naervin/TrinityCore 1 \n", - "209 https://api.github.com/repos/4m1g0/TrinityCore 1 \n", - "210 https://api.github.com/repos/dtmax/plupload 1 \n", - "211 https://api.github.com/repos/WarHead/uwom-server 1 \n", - "212 https://api.github.com/repos/hacknowledge/Trin... 1 \n", - "213 https://api.github.com/repos/johnholiver/Trini... 1 \n", - "214 https://api.github.com/repos/Baeumchen/Trinity... 1 \n", - "215 https://api.github.com/repos/cyplo/mono 1 \n", - "216 https://api.github.com/repos/QuickJack/mono 1 \n", - "217 https://api.github.com/repos/ermshiperete/mono 1 \n", - "218 https://api.github.com/repos/ukplc/mono 1 \n", - "219 https://api.github.com/repos/killabytenow/mono 1 \n", - "220 https://api.github.com/repos/jeffgabhart/Servi... 1 \n", - "221 https://api.github.com/repos/leon-andria/Servi... 1 \n", - "222 https://api.github.com/repos/AutoMapper/AutoMa... 1 \n", - "223 https://api.github.com/repos/thedersen/Nancy 1 \n", - "224 https://api.github.com/repos/crdeutsch/RestSharp 1 \n", - "225 https://api.github.com/repos/Haacked/RestSharp 1 \n", - "226 https://api.github.com/repos/mat-mcloughlin/Nancy 1 \n", - "227 https://api.github.com/repos/imotov/elasticsearch 1 \n", - "228 https://api.github.com/repos/javanna/elasticse... 1 \n", - "229 https://api.github.com/repos/brwe/elasticsearch 1 \n", - "230 https://api.github.com/repos/Zaraza107/CraftBu... 1 \n", - "231 https://api.github.com/repos/cyberdudedk/Craft... 1 \n", - "232 https://api.github.com/repos/TheEliteFour/Craf... 1 \n", - "233 https://api.github.com/repos/dumptruckman/Craf... 1 \n", - "234 https://api.github.com/repos/jpinner/netty 1 \n", - "235 https://api.github.com/repos/CruzBishop/netty 1 \n", - "236 https://api.github.com/repos/zcourts/netty 1 \n", - "237 https://api.github.com/repos/Melon1017/netty 1 \n", - "238 https://api.github.com/repos/felixge/node 1 \n", - "239 https://api.github.com/repos/TooTallNate/node 1 \n", - "240 https://api.github.com/repos/pixelglow/node 1 \n", - "241 https://api.github.com/repos/louisremi/jquery 1 \n", - "242 https://api.github.com/repos/interruptz/node 1 \n", - "243 https://api.github.com/repos/rwaldron/jquery 1 \n", - "244 https://api.github.com/repos/gnarf/jquery 1 \n", - "245 https://api.github.com/repos/SlexAxton/jquery 1 \n", - "246 https://api.github.com/repos/alexisabril/jquery 1 \n", - "247 https://api.github.com/repos/laverdet/node 1 \n", - "248 https://api.github.com/repos/JSBizon/node 1 \n", - "249 https://api.github.com/repos/bartaz/impress.js 1 \n", - "250 https://api.github.com/repos/mikesherov/jquery 1 \n", - "251 https://api.github.com/repos/kpozin/jquery-nodom 1 \n", - "252 https://api.github.com/repos/orkel/jquery 1 \n", - "253 https://api.github.com/repos/joelbirchler/jquery 1 \n", - "254 https://api.github.com/repos/gibson042/jquery 1 \n", - "255 https://api.github.com/repos/cleanforestco/htm... 1 \n", - "256 https://api.github.com/repos/mcurcio/node 1 \n", - "257 https://api.github.com/repos/iizukanao/node 1 \n", - "258 https://api.github.com/repos/danilsomsikov/jquery 1 \n", - "259 https://api.github.com/repos/shalecraig/jquery 1 \n", - "260 https://api.github.com/repos/WebReflection/node 1 \n", - "261 https://api.github.com/repos/hueniverse/node 1 \n", - "262 https://api.github.com/repos/jasondavies/d3 1 \n", - "263 https://api.github.com/repos/GerHobbelt/d3 1 \n", - "264 https://api.github.com/repos/Sikwan/chosen 1 \n", - "265 https://api.github.com/repos/jvivs/foundation 1 \n", - "266 https://api.github.com/repos/fabpot/symfony 1 \n", - "267 https://api.github.com/repos/usefulthink/symfony 1 \n", - "268 https://api.github.com/repos/jwage/symfony 1 \n", - "269 https://api.github.com/repos/weaverryan/symfony 1 \n", - "270 https://api.github.com/repos/francisbesset/sym... 1 \n", - "271 https://api.github.com/repos/mvrhov/symfony 1 \n", - "272 https://api.github.com/repos/lsmith77/symfony 1 \n", - "273 https://api.github.com/repos/stloyd/symfony 1 \n", - "274 https://api.github.com/repos/tacman/symfony 1 \n", - "275 https://api.github.com/repos/jfsimon/symfony 1 \n", - "276 https://api.github.com/repos/AddictArts/three.js 1 \n", - "277 https://api.github.com/repos/dlsniper/symfony 1 \n", - "278 https://api.github.com/repos/tapio/three.js 1 \n", - "279 https://api.github.com/repos/ircmaxell/symfony 1 \n", - "280 https://api.github.com/repos/bhouston/three.js 1 \n", - "281 https://api.github.com/repos/gnugat/symfony 1 \n", - "282 https://api.github.com/repos/kevinoe/three.js 1 \n", - "283 https://api.github.com/repos/lminko/three.js 1 \n", - "284 https://api.github.com/repos/darkwhispering/Co... 1 \n", - "285 https://api.github.com/repos/zechdc/CodeIgniter 1 \n", - "286 https://api.github.com/repos/vlakoff/CodeIgniter 1 \n", - "287 https://api.github.com/repos/BillHeaton/CodeIg... 1 \n", - "288 https://api.github.com/repos/chrispassas/CodeI... 1 \n", - "289 https://api.github.com/repos/zendframework/zf2 1 \n", - "290 https://api.github.com/repos/DASPRiD/zf2 1 \n", - "291 https://api.github.com/repos/ezimuel/zf2 1 \n", - "292 https://api.github.com/repos/Maks3w/zf2 1 \n", - "293 https://api.github.com/repos/davidwindell/zf2 1 \n", - "294 https://api.github.com/repos/jacobkiers/zf2 1 \n", - "295 https://api.github.com/repos/samsonasik/zf2 1 \n", - "296 https://api.github.com/repos/mithaler/thinkup 1 \n", - "297 https://api.github.com/repos/samwho/ThinkUp 1 \n", - "298 https://api.github.com/repos/anildash/ThinkUp 1 \n", - "299 https://api.github.com/repos/kylehase/ThinkUp 1 \n", - "300 https://api.github.com/repos/rgroves/ThinkUp 1 \n", - "301 https://api.github.com/repos/ceeram/cakephp 1 \n", - "302 https://api.github.com/repos/bleything/ThinkUp 1 \n", - "303 https://api.github.com/repos/zoghal/cakephp 1 \n", - "304 https://api.github.com/repos/niwibe/django 1 \n", - "305 https://api.github.com/repos/pvanderlinden/django 1 \n", - "306 https://api.github.com/repos/mitsuhiko/flask 1 \n", - "307 https://api.github.com/repos/dandrzejewski/req... 1 \n", - "308 https://api.github.com/repos/nicoddemus/requests 1 \n", - "309 https://api.github.com/repos/k21/reddit 1 \n", - "310 https://api.github.com/repos/msaelices/django-... 1 \n", - "311 https://api.github.com/repos/irskep/boto 1 \n", - "312 https://api.github.com/repos/goura/boto 1 \n", - "313 https://api.github.com/repos/fayazkhan/boto 1 \n", - "314 https://api.github.com/repos/bshep/Sick-Beard 1 \n", - "315 https://api.github.com/repos/ozeraser/Sick-Beard 1 \n", - "316 https://api.github.com/repos/jorgenpt/Sick-Beard 1 \n", - "317 https://api.github.com/repos/EchelonFour/Sick-... 1 \n", - "318 https://api.github.com/repos/mozvip/Sick-Beard 1 \n", - "319 https://api.github.com/repos/Prinz23/Sick-Beard 1 \n", - "320 https://api.github.com/repos/coderrr/rails 1 \n", - "321 https://api.github.com/repos/zires/rails 1 \n", - "322 https://api.github.com/repos/bigfix/rails 1 \n", - "323 https://api.github.com/repos/rafaelfranca/rails 1 \n", - "324 https://api.github.com/repos/github/rails 1 \n", - "325 https://api.github.com/repos/cldwalker/rails 1 \n", - "326 https://api.github.com/repos/indirect/rails 1 \n", - "327 https://api.github.com/repos/arunagw/rails 1 \n", - "328 https://api.github.com/repos/raysrashmi/rails 1 \n", - "329 https://api.github.com/repos/smartinez87/rails 1 \n", - "330 https://api.github.com/repos/castlerock/rails 1 \n", - "331 https://api.github.com/repos/senny/rails 1 \n", - "332 https://api.github.com/repos/acroca/rails 1 \n", - "333 https://api.github.com/repos/gazay/rails 1 \n", - "334 https://api.github.com/repos/powderflask/djang... 1 \n", - "335 https://api.github.com/repos/kennyj/rails 1 \n", - "336 https://api.github.com/repos/tanin47/rails-1 1 \n", - "337 https://api.github.com/repos/nashby/rails 1 \n", - "338 https://api.github.com/repos/larskanis/rails 1 \n", - "339 https://api.github.com/repos/kielkowicz/rails 1 \n", - "340 https://api.github.com/repos/bogdan/rails 1 \n", - "341 https://api.github.com/repos/tigrish/rails 1 \n", - "342 https://api.github.com/repos/homakov/rails 1 \n", - "343 https://api.github.com/repos/dylanahsmith/rails 1 \n", - "344 https://api.github.com/repos/revans/rails 1 \n", - "345 https://api.github.com/repos/blowmage/rails 1 \n", - "346 https://api.github.com/repos/robru/jekyll 1 \n", - "347 https://api.github.com/repos/Grandrath/rails 1 \n", - "348 https://api.github.com/repos/gsphanikumar/rails 1 \n", - "349 https://api.github.com/repos/gaurish/rails 1 \n", - "350 https://api.github.com/repos/morgancurrie/rails 1 \n", - "351 https://api.github.com/repos/Jiebour/rails 1 \n", - "352 https://api.github.com/repos/frodsan/rails 1 \n", - "353 https://api.github.com/repos/ankit8898/rails 1 \n", - "354 https://api.github.com/repos/versioncontrol/rails 1 \n", - "355 https://api.github.com/repos/godfat/homebrew 1 \n", - "356 https://api.github.com/repos/wright/homebrew 1 \n", - "357 https://api.github.com/repos/bdd/homebrew 1 \n", - "358 https://api.github.com/repos/greedy/homebrew 1 \n", - "359 https://api.github.com/repos/edeustace/jekyll 1 \n", - "360 https://api.github.com/repos/jlcapps/homebrew 1 \n", - "361 https://api.github.com/repos/justinclift/homebrew 1 \n", - "362 https://api.github.com/repos/tusbar/homebrew 1 \n", - "363 https://api.github.com/repos/neglectedvalue/ho... 1 \n", - "364 https://api.github.com/repos/losmuertos/homebrew 1 \n", - "365 https://api.github.com/repos/larseggert/homebrew 1 \n", - "366 https://api.github.com/repos/donspaulding/home... 1 \n", - "367 https://api.github.com/repos/metamatt/jekyll 1 \n", - "368 https://api.github.com/repos/jacknagel/homebrew 1 \n", - "369 https://api.github.com/repos/vibrog/homebrew 1 \n", - "370 https://api.github.com/repos/catsby/homebrew 1 \n", - "371 https://api.github.com/repos/thoughtpolice/hom... 1 \n", - "372 https://api.github.com/repos/jedi4ever/homebrew 1 \n", - "373 https://api.github.com/repos/dch/homebrew 1 \n", - "374 https://api.github.com/repos/mistydemeo/homebrew 1 \n", - "375 https://api.github.com/repos/tonit/homebrew 1 \n", - "376 https://api.github.com/repos/svenax/homebrew 1 \n", - "377 https://api.github.com/repos/jcupitt/homebrew 1 \n", - "378 https://api.github.com/repos/wesen/homebrew 1 \n", - "379 https://api.github.com/repos/vertis/homebrew 1 \n", - "380 https://api.github.com/repos/2bits/homebrew 1 \n", - "381 https://api.github.com/repos/jwilkins/homebrew 1 \n", - "382 https://api.github.com/repos/anatol/homebrew 1 \n", - "383 https://api.github.com/repos/msabramo/homebrew 1 \n", - "384 https://api.github.com/repos/azarbayejani/home... 1 \n", - "385 https://api.github.com/repos/nicolasdespres/ho... 1 \n", - "386 https://api.github.com/repos/yllan/homebrew 1 \n", - "387 https://api.github.com/repos/samueljohn/homebrew 1 \n", - "388 https://api.github.com/repos/cartazio/homebrew 1 \n", - "389 https://api.github.com/repos/fish2000/homebrew 1 \n", - "390 https://api.github.com/repos/glejeune/homebrew 1 \n", - "391 https://api.github.com/repos/bpiwowar/homebrew 1 \n", - "392 https://api.github.com/repos/ingmar/homebrew 1 \n", - "393 https://api.github.com/repos/funnymanva/homebrew 1 \n", - "394 https://api.github.com/repos/nmadura/homebrew 1 \n", - "395 https://api.github.com/repos/sandeep048/homebrew 1 \n", - "396 https://api.github.com/repos/lifepillar/homebrew 1 \n", - "397 https://api.github.com/repos/ummels/homebrew 1 \n", - "398 https://api.github.com/repos/nevir/homebrew 1 \n", - "399 https://api.github.com/repos/mrjbq7/homebrew 1 \n", - "400 https://api.github.com/repos/zhangcheng/homebrew 1 \n", - "401 https://api.github.com/repos/rmndk/homebrew 1 \n", - "402 https://api.github.com/repos/rhysd/homebrew 1 \n", - "403 https://api.github.com/repos/mattyr/homebrew 1 \n", - "404 https://api.github.com/repos/AstonJ/homebrew 1 \n", - "405 https://api.github.com/repos/raedwulf/homebrew 1 \n", - "406 https://api.github.com/repos/rays/homebrew 1 \n", - "407 https://api.github.com/repos/OldCrow/homebrew 1 \n", - "408 https://api.github.com/repos/vogonistic/homebrew 1 \n", - "409 https://api.github.com/repos/ashirazi/homebrew 1 \n", - "410 https://api.github.com/repos/sheerun/homebrew 1 \n", - "411 https://api.github.com/repos/marr/homebrew 1 \n", - "412 https://api.github.com/repos/wix/homebrew 1 \n", - "413 https://api.github.com/repos/andriytyurnikov/h... 1 \n", - "414 https://api.github.com/repos/tinystatemachine/... 1 \n", - "415 https://api.github.com/repos/mashtizadeh/homebrew 1 \n", - "416 https://api.github.com/repos/peabody124/homebrew 1 \n", - "417 https://api.github.com/repos/handlename/homebrew 1 \n", - "418 https://api.github.com/repos/crishoj/homebrew 1 \n", - "419 https://api.github.com/repos/eladg/homebrew 1 \n", - "420 https://api.github.com/repos/chrmoritz/homebrew 1 \n", - "421 https://api.github.com/repos/natritmeyer/homebrew 1 \n", - "422 https://api.github.com/repos/chenpc/homebrew 1 \n", - "423 https://api.github.com/repos/zzet/gitlabhq 1 \n", - "424 https://api.github.com/repos/mikew/gitlabhq 1 \n", - "425 https://api.github.com/repos/proverbface/gitlabhq 1 \n", - "426 https://api.github.com/repos/drahamim/gitlabhq 1 \n", - "427 https://api.github.com/repos/senny/gitlabhq 1 \n", - "428 https://api.github.com/repos/rahearn/devise 1 \n", - "429 https://api.github.com/repos/Gonzih/diaspora 1 \n", - "430 https://api.github.com/repos/Raven24/diaspora 1 \n", - "431 https://api.github.com/repos/joshuaclayton/blu... 1 \n", - "432 https://api.github.com/repos/pilif/octopress 1 \n", - "433 https://api.github.com/repos/yar/paperclip 1 \n", - "434 https://api.github.com/repos/gmclelland/compass 1 \n", - "435 https://api.github.com/repos/cimmanon/compass 1 \n", - "436 https://api.github.com/repos/robey/kestrel 1 \n", - "437 https://api.github.com/repos/benpence/finagle 1 \n", - "438 https://api.github.com/repos/paulpach/akka 1 \n", - "439 https://api.github.com/repos/metamorph/akka 1 \n", - "440 https://api.github.com/repos/scullxbones/akka 1 \n", - "441 https://api.github.com/repos/drewhk/akka 1 \n", - "442 https://api.github.com/repos/retronym/xsbt 1 \n", - "443 https://api.github.com/repos/vigdorchik/xsbt 1 \n", - "444 https://api.github.com/repos/ebowman/xsbt 1 \n", - "445 https://api.github.com/repos/ezh/sbt 1 \n", - "446 https://api.github.com/repos/odersky/scala 1 \n", - "447 https://api.github.com/repos/lrytz/scala 1 \n", - "448 https://api.github.com/repos/soc/scala 1 \n", - "449 https://api.github.com/repos/phaller/scala 1 \n", - "450 https://api.github.com/repos/Ichoran/scala 1 \n", - "451 https://api.github.com/repos/idning/mongo 1 \n", - "452 https://api.github.com/repos/zbenjamin/django 1 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "query_check2 = \"\"\"\n", - "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN commits c ON p.id = c.project_id\n", - "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", - "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check2 = pd.read_sql(text(query_check2), con)\n", - "\n", - "print(f\"Projects with sentiment-labeled commit comments: {len(check2)}\")\n", - "display(check2)" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check3-header", - "metadata": {}, - "source": [ - "### Check 3: Which projects have the most labeled PR comments?\n", - "\n", - "Now, let's do the same project ranking for the pull request inline comments most heavily represented in the Gold Standard." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "cell-check3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Projects with sentiment-labeled PR comments: 64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
project_idproject_nameproject_urllabeled_comment_count
01akkahttps://api.github.com/repos/akka/akka404
151671symfonyhttps://api.github.com/repos/symfony/symfony315
278852railshttps://api.github.com/repos/rails/rails177
324292nodehttps://api.github.com/repos/joyent/node163
462502zf2https://api.github.com/repos/zendframework/zf2154
525875jqueryhttps://api.github.com/repos/jquery/jquery150
679163homebrewhttps://api.github.com/repos/mxcl/homebrew138
79636libgit2https://api.github.com/repos/libgit2/libgit2122
863250cakephphttps://api.github.com/repos/cakephp/cakephp111
9107534scalahttps://api.github.com/repos/scala/scala110
1010629SignalRhttps://api.github.com/repos/SignalR/SignalR90
1169158djangohttps://api.github.com/repos/django/django87
123583xbmchttps://api.github.com/repos/xbmc/xbmc85
1322981nettyhttps://api.github.com/repos/netty/netty82
14454bitcoinhttps://api.github.com/repos/bitcoin/bitcoin77
1591020gitlabhqhttps://api.github.com/repos/gitlabhq/gitlabhq54
1659607CodeIgniterhttps://api.github.com/repos/EllisLab/CodeIgniter50
1779166jekyllhttps://api.github.com/repos/mojombo/jekyll42
1812TrinityCorehttps://api.github.com/repos/TrinityCore/Trini...38
1922980CraftBukkithttps://api.github.com/repos/Bukkit/CraftBukkit38
2014327Nancyhttps://api.github.com/repos/NancyFx/Nancy37
2142644d3https://api.github.com/repos/mbostock/d331
2278835django-cmshttps://api.github.com/repos/divio/django-cms25
239215openFrameworkshttps://api.github.com/repos/openframeworks/op...24
2491331diasporahttps://api.github.com/repos/diaspora/diaspora23
2510593libuvhttps://api.github.com/repos/joyent/libuv19
26106160finaglehttps://api.github.com/repos/twitter/finagle16
27107672zipkinhttps://api.github.com/repos/twitter/zipkin15
2875984reddithttps://api.github.com/repos/reddit/reddit14
296knitrhttps://api.github.com/repos/yihui/knitr14
3095385devisehttps://api.github.com/repos/plataformatec/devise14
3147382chosenhttps://api.github.com/repos/harvesthq/chosen13
3216402stormhttps://api.github.com/repos/nathanmarz/storm13
3376945botohttps://api.github.com/repos/boto/boto13
3417515elasticsearchhttps://api.github.com/repos/elasticsearch/ela...12
3574914requestshttps://api.github.com/repos/kennethreitz/requ...12
3664176ThinkUphttps://api.github.com/repos/ginatrapani/ThinkUp11
37104307papercliphttps://api.github.com/repos/thoughtbot/paperclip11
3871786tornadohttps://api.github.com/repos/facebook/tornado9
3913566monohttps://api.github.com/repos/mono/mono8
4026388html5-boilerplatehttps://api.github.com/repos/h5bp/html5-boiler...8
4111phantomjshttps://api.github.com/repos/ariya/phantomjs7
4215018RestSharphttps://api.github.com/repos/restsharp/RestSharp6
4310380redcarpethttps://api.github.com/repos/vmg/redcarpet6
4477319Sick-Beardhttps://api.github.com/repos/midgetspy/Sick-Beard4
452devtoolshttps://api.github.com/repos/hadley/devtools4
46105378compasshttps://api.github.com/repos/chriseppstein/com...4
4723781androidhttps://api.github.com/repos/github/android3
483750http-parserhttps://api.github.com/repos/joyent/http-parser3
4917566ActionBarSherlockhttps://api.github.com/repos/JakeWharton/Actio...3
509mongohttps://api.github.com/repos/mongodb/mongo3
5116134MiniProfilerhttps://api.github.com/repos/SamSaffron/MiniPr...2
52101997octopresshttps://api.github.com/repos/imathis/octopress2
5362501php-sdkhttps://api.github.com/repos/facebook/php-sdk2
5414328ServiceStackhttps://api.github.com/repos/ServiceStack/Serv...2
55107085flockdbhttps://api.github.com/repos/twitter/flockdb2
5651669foundationhttps://api.github.com/repos/zurb/foundation1
577242redishttps://api.github.com/repos/antirez/redis1
5827504impress.jshttps://api.github.com/repos/bartaz/impress.js1
5950618three.jshttps://api.github.com/repos/mrdoob/three.js1
60107186gizzardhttps://api.github.com/repos/twitter/gizzard1
61107187sbthttps://api.github.com/repos/sbt/sbt1
6265107Slimhttps://api.github.com/repos/codeguy/Slim1
6314912AutoMapperhttps://api.github.com/repos/AutoMapper/AutoMa...1
\n", - "
" - ], - "text/plain": [ - " project_id project_name \\\n", - "0 1 akka \n", - "1 51671 symfony \n", - "2 78852 rails \n", - "3 24292 node \n", - "4 62502 zf2 \n", - "5 25875 jquery \n", - "6 79163 homebrew \n", - "7 9636 libgit2 \n", - "8 63250 cakephp \n", - "9 107534 scala \n", - "10 10629 SignalR \n", - "11 69158 django \n", - "12 3583 xbmc \n", - "13 22981 netty \n", - "14 454 bitcoin \n", - "15 91020 gitlabhq \n", - "16 59607 CodeIgniter \n", - "17 79166 jekyll \n", - "18 12 TrinityCore \n", - "19 22980 CraftBukkit \n", - "20 14327 Nancy \n", - "21 42644 d3 \n", - "22 78835 django-cms \n", - "23 9215 openFrameworks \n", - "24 91331 diaspora \n", - "25 10593 libuv \n", - "26 106160 finagle \n", - "27 107672 zipkin \n", - "28 75984 reddit \n", - "29 6 knitr \n", - "30 95385 devise \n", - "31 47382 chosen \n", - "32 16402 storm \n", - "33 76945 boto \n", - "34 17515 elasticsearch \n", - "35 74914 requests \n", - "36 64176 ThinkUp \n", - "37 104307 paperclip \n", - "38 71786 tornado \n", - "39 13566 mono \n", - "40 26388 html5-boilerplate \n", - "41 11 phantomjs \n", - "42 15018 RestSharp \n", - "43 10380 redcarpet \n", - "44 77319 Sick-Beard \n", - "45 2 devtools \n", - "46 105378 compass \n", - "47 23781 android \n", - "48 3750 http-parser \n", - "49 17566 ActionBarSherlock \n", - "50 9 mongo \n", - "51 16134 MiniProfiler \n", - "52 101997 octopress \n", - "53 62501 php-sdk \n", - "54 14328 ServiceStack \n", - "55 107085 flockdb \n", - "56 51669 foundation \n", - "57 7242 redis \n", - "58 27504 impress.js \n", - "59 50618 three.js \n", - "60 107186 gizzard \n", - "61 107187 sbt \n", - "62 65107 Slim \n", - "63 14912 AutoMapper \n", - "\n", - " project_url labeled_comment_count \n", - "0 https://api.github.com/repos/akka/akka 404 \n", - "1 https://api.github.com/repos/symfony/symfony 315 \n", - "2 https://api.github.com/repos/rails/rails 177 \n", - "3 https://api.github.com/repos/joyent/node 163 \n", - "4 https://api.github.com/repos/zendframework/zf2 154 \n", - "5 https://api.github.com/repos/jquery/jquery 150 \n", - "6 https://api.github.com/repos/mxcl/homebrew 138 \n", - "7 https://api.github.com/repos/libgit2/libgit2 122 \n", - "8 https://api.github.com/repos/cakephp/cakephp 111 \n", - "9 https://api.github.com/repos/scala/scala 110 \n", - "10 https://api.github.com/repos/SignalR/SignalR 90 \n", - "11 https://api.github.com/repos/django/django 87 \n", - "12 https://api.github.com/repos/xbmc/xbmc 85 \n", - "13 https://api.github.com/repos/netty/netty 82 \n", - "14 https://api.github.com/repos/bitcoin/bitcoin 77 \n", - "15 https://api.github.com/repos/gitlabhq/gitlabhq 54 \n", - "16 https://api.github.com/repos/EllisLab/CodeIgniter 50 \n", - "17 https://api.github.com/repos/mojombo/jekyll 42 \n", - "18 https://api.github.com/repos/TrinityCore/Trini... 38 \n", - "19 https://api.github.com/repos/Bukkit/CraftBukkit 38 \n", - "20 https://api.github.com/repos/NancyFx/Nancy 37 \n", - "21 https://api.github.com/repos/mbostock/d3 31 \n", - "22 https://api.github.com/repos/divio/django-cms 25 \n", - "23 https://api.github.com/repos/openframeworks/op... 24 \n", - "24 https://api.github.com/repos/diaspora/diaspora 23 \n", - "25 https://api.github.com/repos/joyent/libuv 19 \n", - "26 https://api.github.com/repos/twitter/finagle 16 \n", - "27 https://api.github.com/repos/twitter/zipkin 15 \n", - "28 https://api.github.com/repos/reddit/reddit 14 \n", - "29 https://api.github.com/repos/yihui/knitr 14 \n", - "30 https://api.github.com/repos/plataformatec/devise 14 \n", - "31 https://api.github.com/repos/harvesthq/chosen 13 \n", - "32 https://api.github.com/repos/nathanmarz/storm 13 \n", - "33 https://api.github.com/repos/boto/boto 13 \n", - "34 https://api.github.com/repos/elasticsearch/ela... 12 \n", - "35 https://api.github.com/repos/kennethreitz/requ... 12 \n", - "36 https://api.github.com/repos/ginatrapani/ThinkUp 11 \n", - "37 https://api.github.com/repos/thoughtbot/paperclip 11 \n", - "38 https://api.github.com/repos/facebook/tornado 9 \n", - "39 https://api.github.com/repos/mono/mono 8 \n", - "40 https://api.github.com/repos/h5bp/html5-boiler... 8 \n", - "41 https://api.github.com/repos/ariya/phantomjs 7 \n", - "42 https://api.github.com/repos/restsharp/RestSharp 6 \n", - "43 https://api.github.com/repos/vmg/redcarpet 6 \n", - "44 https://api.github.com/repos/midgetspy/Sick-Beard 4 \n", - "45 https://api.github.com/repos/hadley/devtools 4 \n", - "46 https://api.github.com/repos/chriseppstein/com... 4 \n", - "47 https://api.github.com/repos/github/android 3 \n", - "48 https://api.github.com/repos/joyent/http-parser 3 \n", - "49 https://api.github.com/repos/JakeWharton/Actio... 3 \n", - "50 https://api.github.com/repos/mongodb/mongo 3 \n", - "51 https://api.github.com/repos/SamSaffron/MiniPr... 2 \n", - "52 https://api.github.com/repos/imathis/octopress 2 \n", - "53 https://api.github.com/repos/facebook/php-sdk 2 \n", - "54 https://api.github.com/repos/ServiceStack/Serv... 2 \n", - "55 https://api.github.com/repos/twitter/flockdb 2 \n", - "56 https://api.github.com/repos/zurb/foundation 1 \n", - "57 https://api.github.com/repos/antirez/redis 1 \n", - "58 https://api.github.com/repos/bartaz/impress.js 1 \n", - "59 https://api.github.com/repos/mrdoob/three.js 1 \n", - "60 https://api.github.com/repos/twitter/gizzard 1 \n", - "61 https://api.github.com/repos/sbt/sbt 1 \n", - "62 https://api.github.com/repos/codeguy/Slim 1 \n", - "63 https://api.github.com/repos/AutoMapper/AutoMa... 1 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "query_check3 = \"\"\"\n", - "SELECT\n", - " p.id AS project_id,\n", - " p.name AS project_name,\n", - " p.url AS project_url,\n", - " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", - "FROM projects p\n", - "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", - "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", - "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", - "GROUP BY p.id, p.name, p.url\n", - "ORDER BY labeled_comment_count DESC;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check3 = pd.read_sql(text(query_check3), con)\n", - "\n", - "print(f\"Projects with sentiment-labeled PR comments: {len(check3)}\")\n", - "display(check3)" - ] - }, - { - "cell_type": "markdown", - "id": "cell-check4-header", - "metadata": {}, - "source": [ - "### Check 4: Are the labeled comments reachable from canonical repos?\n", - "\n", - "Projects on GitHub get forked all the time. Since forks share commit history with their upstream, the same comment IDs can appear under multiple projects in GHTorrent. This matters for Notebook 3 (config files generation). We want to know: if we only generate Kaiaulu configs for canonical (non-fork) repos, how much labeled data will we miss? The purpose of this query is to inform our coverage strategy going into Notebook 3.\n", - "\n", - "Expected values:\n", - "- `canonical_only`: ~4,555\n", - "- `fork_only`: ~569 (these will be missed when targeting canonical repos only)\n", - "- `both_sides`: ~2,083\n", - "- Fork only rate: ~7.9%\n", - "- Canonical reachable rate: ~92.1%\n", - "\n", - "From these values, we can see that ~92.1% of comments are reachable from canonical repos. The ~7.9% that are fork-only will be skipped when we generate project config files in Notebook 3. This is an acceptable tradeoff. We document it here so the limitation is visible." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cell-check4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Canonical vs fork accessibility summary:\n", - " canonical_only: 4555.0 (expected ~4555)\n", - " fork_only: 569.0 (expected ~569)\n", - " both_sides: 2083.0 (expected ~2083)\n", - " fork_only %: 7.9% (expected ~7.9%)\n", - " canonical_reachable %: 92.1% (expected ~92.1%)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
canonical_onlyfork_onlyboth_sidesfork_only_pctcanonical_reachable_pct
04555.0569.02083.07.992.1
\n", - "
" - ], - "text/plain": [ - " canonical_only fork_only both_sides fork_only_pct \\\n", - "0 4555.0 569.0 2083.0 7.9 \n", - "\n", - " canonical_reachable_pct \n", - "0 92.1 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "query_check4 = \"\"\"\n", - "WITH RECURSIVE project_root AS (\n", - " SELECT p.id AS project_id, p.id AS root_id\n", - " FROM projects p\n", - " WHERE p.forked_from IS NULL\n", - " UNION ALL\n", - " SELECT c.id AS project_id, pr.root_id\n", - " FROM projects c\n", - " JOIN project_root pr ON c.forked_from = pr.project_id\n", - "),\n", - "comment_project_rows AS (\n", - " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", - " JOIN commits c ON cc.commit_id = c.id\n", - " UNION ALL\n", - " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - " UNION ALL\n", - " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", - " FROM comment_sentiment cs\n", - " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "),\n", - "labeled AS (\n", - " SELECT\n", - " cpr.comment_id,\n", - " cpr.source_tag,\n", - " pr.root_id,\n", - " (cpr.project_id = pr.root_id) AS is_canonical\n", - " FROM comment_project_rows cpr\n", - " JOIN project_root pr ON pr.project_id = cpr.project_id\n", - "),\n", - "comment_flags AS (\n", - " SELECT\n", - " root_id, source_tag, comment_id,\n", - " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", - " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", - " FROM labeled\n", - " GROUP BY root_id, source_tag, comment_id\n", - "),\n", - "global_counts AS (\n", - " SELECT\n", - " COUNT(*) AS mapped_comment_ids,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", - " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", - " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", - " FROM comment_flags\n", - ")\n", - "SELECT\n", - " canonical_only,\n", - " fork_only,\n", - " both_sides,\n", - " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", - " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", - "FROM global_counts;\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " check4 = pd.read_sql(text(query_check4), con)\n", - "\n", - "print(\"Canonical vs fork accessibility summary:\")\n", - "print(f\" canonical_only: {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", - "print(f\" fork_only: {check4['fork_only'].iloc[0]} (expected ~569)\")\n", - "print(f\" both_sides: {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", - "print(f\" fork_only %: {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", - "print(f\" canonical_reachable %: {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", - "display(check4)" - ] - }, - { - "cell_type": "markdown", - "id": "1af5516e", - "metadata": {}, - "source": [ - "### Step 5: Build the contextualized dataset\n", - "\n", - "Now that we know which projects have sentiment-labeled comments and how they map across tables, we can build the contextualized dataset.\n", - "\n", - "The Gold Standard currently has three columns (`ID`, `polarity`, `text`). We're going to add six more from GHTorrent:\n", - "\n", - "1. `created_at` - Comment timestamp\n", - "2. `author_login` - Author username\n", - "3. `author_name` - Author First Name & Last Name\n", - "4. `author_email` - Author email\n", - "5. `owner` - Project owner\n", - "6. `repo` - Project repo name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77ab4187", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total rows: 7122 (expected 7122)\n", - " From commit comments : 4317\n", - " From PR comments only: 2805\n" - ] - } - ], - "source": [ - "# Pull context for commit comments\n", - "commit_context_sql = \"\"\"\n", - "SELECT\n", - " cs.ID AS comment_id,\n", - " cs.Polarity AS polarity,\n", - " cs.Text AS text,\n", - " cc.created_at AS created_at,\n", - " u.login AS author_login,\n", - " u.name AS author_name,\n", - " u.email AS author_email,\n", - " u_owner.login AS owner,\n", - " p.name AS repo\n", - "FROM comment_sentiment cs\n", - "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", - "JOIN users u ON cc.user_id = u.id\n", - "JOIN commits c ON cc.commit_id = c.id\n", - "JOIN projects p ON c.project_id = p.id\n", - "JOIN users u_owner ON p.owner_id = u_owner.id\n", - "\"\"\"\n", - "\n", - "# Pull context for PR comments\n", - "pr_context_sql = \"\"\"\n", - "SELECT\n", - " cs.ID AS comment_id,\n", - " cs.Polarity AS polarity,\n", - " cs.Text AS text,\n", - " prc.created_at AS created_at,\n", - " u.login AS author_login,\n", - " u.name AS author_name,\n", - " u.email AS author_email,\n", - " u_owner.login AS owner,\n", - " p.name AS repo\n", - "FROM comment_sentiment cs\n", - "JOIN pull_request_comments prc ON cs.ID = CAST(prc.comment_id AS UNSIGNED)\n", - "JOIN users u ON prc.user_id = u.id\n", - "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "JOIN projects p ON pr.base_repo_id = p.id\n", - "JOIN users u_owner ON p.owner_id = u_owner.id\n", - "\"\"\"\n", - "\n", - "with engine.connect() as con:\n", - " commit_ctx = pd.read_sql(text(commit_context_sql), con)\n", - " pr_ctx = pd.read_sql(text(pr_context_sql), con)\n", - "\n", - "# Deduplicate within each source: keep first match per comment_id\n", - "commit_ctx = commit_ctx.drop_duplicates(subset='comment_id', keep='first')\n", - "pr_ctx = pr_ctx.drop_duplicates(subset='comment_id', keep='first')\n", - "\n", - "# Merge: prefer commit comment rows; fill in PR-only rows for IDs not in commit set\n", - "commit_ids = set(commit_ctx['comment_id'])\n", - "pr_only = pr_ctx[~pr_ctx['comment_id'].isin(commit_ids)]\n", - "contextualized = (\n", - " pd.concat([commit_ctx, pr_only], ignore_index=True)\n", - " .sort_values('comment_id')\n", - " .reset_index(drop=True)\n", - ")\n", - "\n", - "print(f\"Total rows: {len(contextualized)} (expected 7122)\")\n", - "print(f\" From commit comments: {len(commit_ctx)}\")\n", - "print(f\" From PR comments only: {len(pr_only)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9e04de22", - "metadata": {}, - "source": [ - "### Step 6: Compare original vs. contextualized dataset\n", - "\n", - "Let's see a quick before/after to see what columns we added. The original Gold Standard has three columns, while the new contextualized version we created has ten." - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "2850d211", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original GitHub Gold Standard (first 5 rows):\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDPolarityText
04063186neutralNo. I still see the wrong twins.  * https://gi...
13894703neutralReverted.\"
21971084neutralYou can leave a queue while in queue ? (before...
31827828positiveDidn't look at SpellTargetRestrictions XD\"
4232603neutralNot sure about what kind of line lengths the p...
\n", - "
" - ], - "text/plain": [ - " ID Polarity Text\n", - "0 4063186 neutral No. I still see the wrong twins. * https://gi...\n", - "1 3894703 neutral Reverted.\"\n", - "2 1971084 neutral You can leave a queue while in queue ? (before...\n", - "3 1827828 positive Didn't look at SpellTargetRestrictions XD\"\n", - "4 232603 neutral Not sure about what kind of line lengths the p..." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Contextualized dataset with additional GHTorrent columns (first 5 rows):\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
comment_idpolaritytextcreated_atauthor_loginauthor_nameauthor_emailownerrepo
0135negativeIf you mean #any_instance, you were better off...2008-04-12 04:54:27tomafroTom Wardtom@popdog.netrailsrails
1138negativeYou're a nasty code smell.\"2008-04-12 06:00:58joevandykJoe Van Dykjoe@tanga.comrailsrails
2196negativeMost users has winrar installed, which afaik h...2008-04-13 04:36:06augustlAugust Lilleaasaugust@augustl.comrailsrails
3318negative(apologies for the weird formatting there, i d...2008-04-15 08:47:51lazyatomJames Adamjames@lazyatom.comrailsrails
4919neutralAre there any other ivars I missed ?\"2008-05-06 02:16:34lifoPratikpratiknaik@gmail.comrailsrails
\n", - "
" - ], - "text/plain": [ - " comment_id polarity text \\\n", - "0 135 negative If you mean #any_instance, you were better off... \n", - "1 138 negative You're a nasty code smell.\" \n", - "2 196 negative Most users has winrar installed, which afaik h... \n", - "3 318 negative (apologies for the weird formatting there, i d... \n", - "4 919 neutral Are there any other ivars I missed ?\" \n", - "\n", - " created_at author_login author_name author_email \\\n", - "0 2008-04-12 04:54:27 tomafro Tom Ward tom@popdog.net \n", - "1 2008-04-12 06:00:58 joevandyk Joe Van Dyk joe@tanga.com \n", - "2 2008-04-13 04:36:06 augustl August Lilleaas august@augustl.com \n", - "3 2008-04-15 08:47:51 lazyatom James Adam james@lazyatom.com \n", - "4 2008-05-06 02:16:34 lifo Pratik pratiknaik@gmail.com \n", - "\n", - " owner repo \n", - "0 rails rails \n", - "1 rails rails \n", - "2 rails rails \n", - "3 rails rails \n", - "4 rails rails " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Note: 784 of 7122 rows have a NULL author_email (11.0%). GitHub stopped exposing emails in the API, so this is expected.\n" - ] - } - ], - "source": [ - "with engine.connect() as con:\n", - " original = pd.read_sql(text(\"SELECT ID, Polarity, Text FROM comment_sentiment LIMIT 5;\"), con)\n", - "\n", - "print(\"Original GitHub Gold Standard (first 5 rows):\")\n", - "display(original)\n", - "\n", - "print(\"\\nContextualized dataset with additional GHTorrent columns (first 5 rows):\")\n", - "display(contextualized.head())\n", - "\n", - "null_emails = contextualized['author_email'].isna().sum()\n", - "print(f\"\\nNote: {null_emails} of {len(contextualized)} rows have a NULL author_email ({round(100*null_emails/len(contextualized), 1)}%). GitHub stopped exposing emails in the API, so this is expected.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/04_add_sentiment_to_kaiaulu.ipynb b/notebooks/04_add_sentiment_to_kaiaulu.ipynb deleted file mode 100644 index ed1aa1a..0000000 --- a/notebooks/04_add_sentiment_to_kaiaulu.ipynb +++ /dev/null @@ -1,911 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cell-nb4-title", - "metadata": {}, - "source": [ - "# Notebook 4: Add Sentiment Labels to Kaiaulu\n", - "\n", - "By now, you should have:\n", - "1. Sentiment labels in MySQL (7,122 GitHub comments labeled positive, negative, or neutral)\n", - "2. Comment data freshly downloaded from GitHub via Kaiaulu (e.g., file paths, commit SHAs, review IDs, timestamps)\n", - "\n", - "Neither is complete on its own. The Gold Standard has polarity labels but no GitHub data. Kaiaulu's output has data but no sentiment labels. We'll query the labels from MySQL, INNER JOIN them against Kaiaulu's downloaded comment data on `comment_id`, and write the result back into Kaiaulu's directory so `sentiment_analysis.Rmd` can use it directly." - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-prereqs", - "metadata": {}, - "source": [ - "### Before you start\n", - "\n", - "Two things need to be in place before running any cells:\n", - "\n", - "| What | Where it comes from |\n", - "|---|---|\n", - "| MySQL database with `comment_sentiment` table | Output of Notebook 1 |\n", - "| Kaiaulu rawdata for your selected project | Output of running `vignettes/download_github_events.Rmd` and `vignettes/download_github_pull_request_comments.Rmd` in Kaiaulu |\n", - "\n", - "The Kaiaulu vignettes write their output to `vignettes/rawdata/github/{owner}/{repo}/` inside your local Kaiaulu directory. That's where this notebook reads from.\n", - "\n", - "If those CSVs are missing, go back and run the corresponding vignettes in Notebook 3 first." - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-step1-header", - "metadata": {}, - "source": [ - "### Step 1: Import dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "cell-nb4-step1-code", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "from sqlalchemy import create_engine, text" - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-step2-header", - "metadata": {}, - "source": [ - "### Step 2: Configure Project\n", - "\n", - "Set `OWNER` and `REPO` to match the project you ran the Kaiaulu vignettes for. Set `KAIAULU_REPO` to your local Kaiaulu directory. This is where the notebook will read Kaiaulu's downloaded CSVs from and where it will write the joined output. MySQL credentials should match what you used in Notebooks 1-3." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dbfd4324", - "metadata": {}, - "outputs": [], - "source": [ - "# Configure these before running\n", - "OWNER = \"ADD_OWNER_HERE\" # GitHub repo owner\n", - "REPO = \"ADD_REPO_HERE\" # GitHub repo name\n", - "\n", - "# Path to your local Kaiaulu directory\n", - "KAIAULU_REPO = Path(\"PATH_TO/kaiaulu\")\n", - "\n", - "# Kaiaulu rawdata directory for this project\n", - "KAIAULU_DATA_DIR = KAIAULU_REPO / \"vignettes\" / \"rawdata\" / \"github\" / OWNER / REPO\n", - "\n", - "# MySQL connection\n", - "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", - "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", - "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\")\n", - "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", - "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-step3-header", - "metadata": {}, - "source": [ - "### Step 3: Query sentiment labels from MySQL\n", - "\n", - "Pull the sentiment labels for your project directly from the `comment_sentiment` table. We join through GHTorrent to filter down to just the comments belonging to `OWNER/REPO`, and grab a context columns." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "cell-nb4-step3-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Commit comment sentiment labels: 33\n", - "PR inline sentiment labels: 111\n", - "Total sentiment labels for cakephp: 144\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
comment_idpolaritytextcreated_atauthor_loginownerrepo
03411503neutralThis is causing the year to return always with...2013-06-12 09:21:03luksmcakephpcakephp
12245783neutral@Scottymeuk Read the associated ticket [#3283]...2012-12-03 11:25:13ADmadcakephpcakephp
21040482neutralhttps://github.com/petteyg/code_check\"2012-03-04 07:12:51josegonzalezcakephpcakephp
3998908positiveI'm an idiot, I don't know how I missed that t...2012-02-22 14:22:03markstorycakephpcakephp
4744111negativeSorry, guys. Yes individually tests pass. I am...2011-11-24 01:02:20ceeramcakephpcakephp
\n", - "
" - ], - "text/plain": [ - " comment_id polarity text \\\n", - "0 3411503 neutral This is causing the year to return always with... \n", - "1 2245783 neutral @Scottymeuk Read the associated ticket [#3283]... \n", - "2 1040482 neutral https://github.com/petteyg/code_check\" \n", - "3 998908 positive I'm an idiot, I don't know how I missed that t... \n", - "4 744111 negative Sorry, guys. Yes individually tests pass. I am... \n", - "\n", - " created_at author_login owner repo \n", - "0 2013-06-12 09:21:03 luksm cakephp cakephp \n", - "1 2012-12-03 11:25:13 ADmad cakephp cakephp \n", - "2 2012-03-04 07:12:51 josegonzalez cakephp cakephp \n", - "3 2012-02-22 14:22:03 markstory cakephp cakephp \n", - "4 2011-11-24 01:02:20 ceeram cakephp cakephp " - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "engine = create_engine(\n", - " f\"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", - ")\n", - "\n", - "commit_sql = \"\"\"\n", - "SELECT\n", - " cs.ID AS comment_id,\n", - " cs.Polarity AS polarity,\n", - " cs.Text AS text,\n", - " cc.created_at AS created_at,\n", - " u.login AS author_login,\n", - " u_owner.login AS owner,\n", - " p.name AS repo\n", - "FROM comment_sentiment cs\n", - "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", - "JOIN commits c ON cc.commit_id = c.id\n", - "JOIN projects p ON c.project_id = p.id\n", - "JOIN users u ON cc.user_id = u.id\n", - "JOIN users u_owner ON p.owner_id = u_owner.id\n", - "WHERE LOWER(u_owner.login) = :owner\n", - " AND LOWER(p.name) = :repo\n", - "\"\"\"\n", - "\n", - "pr_sql = \"\"\"\n", - "SELECT\n", - " cs.ID AS comment_id,\n", - " cs.Polarity AS polarity,\n", - " cs.Text AS text,\n", - " prc.created_at AS created_at,\n", - " u.login AS author_login,\n", - " u_owner.login AS owner,\n", - " p.name AS repo\n", - "FROM comment_sentiment cs\n", - "JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", - "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", - "JOIN projects p ON pr.base_repo_id = p.id\n", - "JOIN users u ON prc.user_id = u.id\n", - "JOIN users u_owner ON p.owner_id = u_owner.id\n", - "WHERE LOWER(u_owner.login) = :owner\n", - " AND LOWER(p.name) = :repo\n", - "\"\"\"\n", - "\n", - "params = {\"owner\": OWNER.lower(), \"repo\": REPO.lower()}\n", - "\n", - "with engine.connect() as con:\n", - " commit_labels = pd.read_sql(text(commit_sql), con, params=params)\n", - " pr_labels = pd.read_sql(text(pr_sql), con, params=params)\n", - "\n", - "# Deduplicate 85 comment IDs that appear in both commit_comments and pull_request_comments GHTorrent tables\n", - "combined = pd.concat([commit_labels, pr_labels], ignore_index=True)\n", - "project_ctx = combined.drop_duplicates(subset=\"comment_id\", keep=\"first\").copy()\n", - "\n", - "dupes_dropped = len(combined) - len(project_ctx)\n", - "print(f\"Commit comment sentiment labels: {len(commit_labels)}\")\n", - "print(f\"PR inline sentiment labels: {len(pr_labels)}\")\n", - "if dupes_dropped > 0:\n", - " print(f\"Duplicate IDs removed: {dupes_dropped} (appeared in both tables)\")\n", - "print(f\"Total sentiment labels for {REPO}: {len(project_ctx)}\")\n", - "project_ctx.head()" - ] - }, - { - "cell_type": "markdown", - "id": "4xul75ers8r", - "metadata": {}, - "source": [ - "### Step 4: Remap polarity labels to integers\n", - "\n", - "The Gold Standard uses strings (`\"positive\"`, `\"negative\"`, `\"neutral\"`). Kaiaulu's `sentiment_analysis.Rmd` expects integers: `0` = neutral, `1` = positive, `2` = negative. We remap here so the output is ready to use directly." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "40fgc3k5q2l", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All polarity labels mapped successfully.\n", - "polarity\n", - "neutral 82\n", - "negative 45\n", - "positive 17\n" - ] - } - ], - "source": [ - "polarity_map = {\"neutral\": 0, \"positive\": 1, \"negative\": 2}\n", - "\n", - "if project_ctx[\"polarity\"].dtype == object:\n", - " project_ctx[\"polarity\"] = project_ctx[\"polarity\"].str.lower().map(polarity_map)\n", - "\n", - "unmapped = project_ctx[\"polarity\"].isna().sum()\n", - "if unmapped > 0:\n", - " print(f\"WARNING: {unmapped} rows could not be mapped. Check for unexpected polarity strings\")\n", - "else:\n", - " counts = project_ctx[\"polarity\"].value_counts().rename({0: \"neutral\", 1: \"positive\", 2: \"negative\"})\n", - " print(\"All polarity labels mapped successfully.\")\n", - " print(counts.to_string())" - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-step4-header", - "metadata": {}, - "source": [ - "### Step 5: Load the Kaiaulu output CSVs\n", - "\n", - "Read the two CSVs that Kaiaulu's vignettes wrote into the `rawdata/` directory." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "cell-nb4-step4-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kaiaulu commit comments: 1569 rows, columns: ['comment_id', 'commit_id', 'author_login', 'author_id', 'body', 'created_at', 'updated_at']\n", - "Kaiaulu PR inline comments: 6100 rows, columns: ['review_id', 'comment_id', 'html_url', 'created_at', 'updated_at', 'comment_user_login', 'author_association', 'file_path', 'start_line', 'line', 'original_start_line', 'original_line', 'position', 'diff_hunk', 'body', 'commit_id']\n" - ] - } - ], - "source": [ - "commit_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_commit_comments.csv\"\n", - "pr_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_pr_inline_comments.csv\"\n", - "\n", - "kaiaulu_commit = pd.read_csv(commit_csv_path)\n", - "kaiaulu_pr = pd.read_csv(pr_csv_path)\n", - "\n", - "print(f\"Kaiaulu commit comments: {len(kaiaulu_commit)} rows, columns: {list(kaiaulu_commit.columns)}\")\n", - "print(f\"Kaiaulu PR inline comments: {len(kaiaulu_pr)} rows, columns: {list(kaiaulu_pr.columns)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-nb4-step5-header", - "metadata": {}, - "source": [ - "### Step 6: INNER JOIN - Commit Comments\n", - "\n", - "Join the MySQL sentiment labels against Kaiaulu's commit comments on `comment_id`." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "cell-nb4-step5-code", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cakephp rows in sentiment labels: 144\n", - "Rows matched in Kaiaulu commit comments: 33\n", - "\n", - "Joined commit comments (first 5 rows):\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
comment_idpolaritytextcreated_at_goldauthor_login_goldownerrepocommit_idauthor_login_kaiauluauthor_idbodycreated_at_kaiauluupdated_at
03411503neutralThis is causing the year to return always with...2013-06-12 09:21:03luksmcakephpcakephpfd72f894ad091bbe3c10314091ee3ab34769afa2luksm868687This is causing the year to return always with...2013-06-12T21:21:03Z2013-06-12T21:21:03Z
12245783neutral@Scottymeuk Read the associated ticket [#3283]...2012-12-03 11:25:13ADmadcakephpcakephpea467e72d72e9eb7cd140816ee8d7abd900b2629ADmad142658@Scottymeuk Read the associated ticket [#3283]...2012-12-03T22:25:13Z2012-12-03T22:25:13Z
21040482neutralhttps://github.com/petteyg/code_check\"2012-03-04 07:12:51josegonzalezcakephpcakephpa6da7361494b85411f1b93ea589e58405a77524bjosegonzalez65675https://github.com/petteyg/code_check\\n2012-03-04T18:12:51Z2012-03-04T18:12:51Z
3998908positiveI'm an idiot, I don't know how I missed that t...2012-02-22 14:22:03markstorycakephpcakephp89df484fc5a93fac7b01bdf086a395ecf284217dmarkstory24086I'm an idiot, I don't know how I missed that t...2012-02-23T01:22:03Z2012-02-23T01:22:03Z
4744111negativeSorry, guys. Yes individually tests pass. I am...2011-11-24 01:02:20ceeramcakephpcakephp05940ae1ec703a23714ff815e4e2e19cd1c6b5b7ceeram111448Sorry, guys. Yes individually tests pass. I am...2011-11-24T12:02:20Z2011-11-24T12:02:20Z
\n", - "
" - ], - "text/plain": [ - " comment_id polarity text \\\n", - "0 3411503 neutral This is causing the year to return always with... \n", - "1 2245783 neutral @Scottymeuk Read the associated ticket [#3283]... \n", - "2 1040482 neutral https://github.com/petteyg/code_check\" \n", - "3 998908 positive I'm an idiot, I don't know how I missed that t... \n", - "4 744111 negative Sorry, guys. Yes individually tests pass. I am... \n", - "\n", - " created_at_gold author_login_gold owner repo \\\n", - "0 2013-06-12 09:21:03 luksm cakephp cakephp \n", - "1 2012-12-03 11:25:13 ADmad cakephp cakephp \n", - "2 2012-03-04 07:12:51 josegonzalez cakephp cakephp \n", - "3 2012-02-22 14:22:03 markstory cakephp cakephp \n", - "4 2011-11-24 01:02:20 ceeram cakephp cakephp \n", - "\n", - " commit_id author_login_kaiaulu author_id \\\n", - "0 fd72f894ad091bbe3c10314091ee3ab34769afa2 luksm 868687 \n", - "1 ea467e72d72e9eb7cd140816ee8d7abd900b2629 ADmad 142658 \n", - "2 a6da7361494b85411f1b93ea589e58405a77524b josegonzalez 65675 \n", - "3 89df484fc5a93fac7b01bdf086a395ecf284217d markstory 24086 \n", - "4 05940ae1ec703a23714ff815e4e2e19cd1c6b5b7 ceeram 111448 \n", - "\n", - " body created_at_kaiaulu \\\n", - "0 This is causing the year to return always with... 2013-06-12T21:21:03Z \n", - "1 @Scottymeuk Read the associated ticket [#3283]... 2012-12-03T22:25:13Z \n", - "2 https://github.com/petteyg/code_check\\n 2012-03-04T18:12:51Z \n", - "3 I'm an idiot, I don't know how I missed that t... 2012-02-23T01:22:03Z \n", - "4 Sorry, guys. Yes individually tests pass. I am... 2011-11-24T12:02:20Z \n", - "\n", - " updated_at \n", - "0 2013-06-12T21:21:03Z \n", - "1 2012-12-03T22:25:13Z \n", - "2 2012-03-04T18:12:51Z \n", - "3 2012-02-23T01:22:03Z \n", - "4 2011-11-24T12:02:20Z " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Saved: /Users/sheilalimon/Desktop/github/kaiaulu-sentiment/vignettes/rawdata/github/cakephp/cakephp/cakephp_sentiment_commit_comments_joined.csv\n" - ] - } - ], - "source": [ - "commit_joined = project_ctx.merge(\n", - " kaiaulu_commit,\n", - " on='comment_id',\n", - " how='inner',\n", - " suffixes=('_gold', '_kaiaulu')\n", - ")\n", - "\n", - "commit_dropped = len(project_ctx) - len(commit_joined)\n", - "print(f\"{REPO} rows in sentiment labels: {len(project_ctx)}\")\n", - "print(f\"Rows matched in Kaiaulu commit comments: {len(commit_joined)}\")\n", - "\n", - "print(\"\\nJoined commit comments (first 5 rows):\")\n", - "display(commit_joined.head())\n", - "\n", - "out_path = KAIAULU_DATA_DIR / f\"{REPO}_sentiment_commit_comments_joined.csv\"\n", - "commit_joined.to_csv(out_path, index=False)\n", - "print(f\"\\nSaved: {out_path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "daef76ca", - "metadata": {}, - "source": [ - "### Step 7: INNER JOIN - PR inline comments\n", - "\n", - "Same join as Step 6, but against Kaiaulu's PR inline comments." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "d8ba23d8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cakephp rows in sentiment labels: 144\n", - "Rows matched in Kaiaulu PR inline comments: 111\n", - "\n", - "Joined PR inline comments (first 5 rows):\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
comment_idpolaritytextcreated_at_goldauthor_loginownerreporeview_idhtml_urlcreated_at_kaiaulu...author_associationfile_pathstart_linelineoriginal_start_lineoriginal_linepositiondiff_hunkbodycommit_id
06044242neutralI had it implemented that way originally. The ...2013-08-28 07:51:55markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1568#d...2013-08-28T19:51:55Z...MEMBERlib/Cake/Utility/Security.phpNaNNaNNaNNaN1@@ -289,4 +289,69 @@ protected static function...I had it implemented that way originally. The ...13b870d7e183375822eea4ffd66aaacaeec760ff
15949747neutralThis block of code is repeated 3 times in Hash...2013-08-23 01:02:48markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1549#d...2013-08-23T13:02:48Z...MEMBERlib/Cake/Utility/Hash.phpNaN149.0NaNNaN30@@ -222,16 +222,36 @@ protected static functio...This block of code is repeated 3 times in Hash...a0014e7a303067bb9c36d438de5a70fe819d22a7
24288367neutralThis looks good, but makes me think we should ...2013-05-18 04:31:19markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1275#d...2013-05-18T16:31:19Z...MEMBERlib/Cake/Controller/Component/Auth/BlowfishPas...NaN44.0NaNNaN44@@ -0,0 +1,58 @@\\n+<?php\\n+/**\\n+ * PHP 5\\n+ *...This looks good, but makes me think we should ...dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa
34288664neutralHmm, my thinking was all password hasher class...2013-05-18 07:00:03ADmadcakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1275#d...2013-05-18T19:00:03Z...MEMBERlib/Cake/Controller/Component/Auth/BlowfishPas...NaNNaNNaNNaN1@@ -0,0 +1,58 @@\\n+<?php\\n+/**\\n+ * PHP 5\\n+ *...Hmm, my thinking was all password hasher class...dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa
43122764negativeI totally missed that, my bad. I'll get that f...2013-02-22 10:12:40markstorycakephpcakephpNaNhttps://github.com/cakephp/cakephp/pull/1154#d...2013-02-22T21:12:40Z...MEMBERlib/Cake/Utility/ViewVarsTrait.phpNaNNaNNaNNaN1@@ -0,0 +1,55 @@\\n+<?php\\n+/**\\n+ * CakePHP(tm...I totally missed that, my bad. I'll get that f...955889c6c731a56f9cbe6f572cea4594fd887d3a
\n", - "

5 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " comment_id polarity text \\\n", - "0 6044242 neutral I had it implemented that way originally. The ... \n", - "1 5949747 neutral This block of code is repeated 3 times in Hash... \n", - "2 4288367 neutral This looks good, but makes me think we should ... \n", - "3 4288664 neutral Hmm, my thinking was all password hasher class... \n", - "4 3122764 negative I totally missed that, my bad. I'll get that f... \n", - "\n", - " created_at_gold author_login owner repo review_id \\\n", - "0 2013-08-28 07:51:55 markstory cakephp cakephp NaN \n", - "1 2013-08-23 01:02:48 markstory cakephp cakephp NaN \n", - "2 2013-05-18 04:31:19 markstory cakephp cakephp NaN \n", - "3 2013-05-18 07:00:03 ADmad cakephp cakephp NaN \n", - "4 2013-02-22 10:12:40 markstory cakephp cakephp NaN \n", - "\n", - " html_url created_at_kaiaulu \\\n", - "0 https://github.com/cakephp/cakephp/pull/1568#d... 2013-08-28T19:51:55Z \n", - "1 https://github.com/cakephp/cakephp/pull/1549#d... 2013-08-23T13:02:48Z \n", - "2 https://github.com/cakephp/cakephp/pull/1275#d... 2013-05-18T16:31:19Z \n", - "3 https://github.com/cakephp/cakephp/pull/1275#d... 2013-05-18T19:00:03Z \n", - "4 https://github.com/cakephp/cakephp/pull/1154#d... 2013-02-22T21:12:40Z \n", - "\n", - " ... author_association file_path \\\n", - "0 ... MEMBER lib/Cake/Utility/Security.php \n", - "1 ... MEMBER lib/Cake/Utility/Hash.php \n", - "2 ... MEMBER lib/Cake/Controller/Component/Auth/BlowfishPas... \n", - "3 ... MEMBER lib/Cake/Controller/Component/Auth/BlowfishPas... \n", - "4 ... MEMBER lib/Cake/Utility/ViewVarsTrait.php \n", - "\n", - " start_line line original_start_line original_line position \\\n", - "0 NaN NaN NaN NaN 1 \n", - "1 NaN 149.0 NaN NaN 30 \n", - "2 NaN 44.0 NaN NaN 44 \n", - "3 NaN NaN NaN NaN 1 \n", - "4 NaN NaN NaN NaN 1 \n", - "\n", - " diff_hunk \\\n", - "0 @@ -289,4 +289,69 @@ protected static function... \n", - "1 @@ -222,16 +222,36 @@ protected static functio... \n", - "2 @@ -0,0 +1,58 @@\\n+\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sentiment_idpolarityproject_nameproject_urlcommit_shacomment_text
04063186neutraljekyllhttps://api.github.com/repos/mojombo/jekyllcb521b7f9a6887051b982a2053cd402ff019594eNo. I still see the wrong twins.  * https://gi...
13894703neutraljqueryhttps://api.github.com/repos/jquery/jqueryf6e86c3ca4d527d5453a0b5b9591ef38b5d3c000Reverted.\"
21971084neutralMaNGOShttps://api.github.com/repos/mangos/MaNGOSabfc99ef522b8b6353d051a002d026530ec7d253You can leave a queue while in queue ? (before...
31827828positiveMaNGOShttps://api.github.com/repos/mangos/MaNGOS915b77339711ec1278ac06ec80d206133bdb427aDidn't look at SpellTargetRestrictions XD\"
4232603neutralclojurehttps://api.github.com/repos/clojure/clojureb43bf20e1ba864c817ada237042cfdc8922831c0Not sure about what kind of line lengths the p...
53565454positivenettyhttps://api.github.com/repos/netty/netty1fee1ef74ed8ac515c19a7f8eebd16f41a37b7b6@normanmaurer Nice catch ! Did you make the sa...
63504879neutralnettyhttps://api.github.com/repos/netty/nettycfd514d099fb41b2a467ca208fe1334bb04f8f6cThat's why I didn't close after sending the cl...
73413199neutralnettyhttps://api.github.com/repos/netty/netty78d8f05c218cab107255c4dc1a1344aef138d379Build result for 78d8f05c218cab107255c4dc1a134...
83404541neutralnettyhttps://api.github.com/repos/netty/nettyfd0084ecfa254bc5f619f50ec50a8cb8e3cc083eWhy you think using ImmediateEventExecutor is ...
92290082neutraljqueryhttps://api.github.com/repos/jquery/jquerycef044d82ec0d338b2b69756d3ba08692fb80ae4These are the ones we currently hardcode in Te...
\n", - "" - ], - "text/plain": [ - " sentiment_id polarity project_name \\\n", - "0 4063186 neutral jekyll \n", - "1 3894703 neutral jquery \n", - "2 1971084 neutral MaNGOS \n", - "3 1827828 positive MaNGOS \n", - "4 232603 neutral clojure \n", - "5 3565454 positive netty \n", - "6 3504879 neutral netty \n", - "7 3413199 neutral netty \n", - "8 3404541 neutral netty \n", - "9 2290082 neutral jquery \n", - "\n", - " project_url \\\n", - "0 https://api.github.com/repos/mojombo/jekyll \n", - "1 https://api.github.com/repos/jquery/jquery \n", - "2 https://api.github.com/repos/mangos/MaNGOS \n", - "3 https://api.github.com/repos/mangos/MaNGOS \n", - "4 https://api.github.com/repos/clojure/clojure \n", - "5 https://api.github.com/repos/netty/netty \n", - "6 https://api.github.com/repos/netty/netty \n", - "7 https://api.github.com/repos/netty/netty \n", - "8 https://api.github.com/repos/netty/netty \n", - "9 https://api.github.com/repos/jquery/jquery \n", - "\n", - " commit_sha \\\n", - "0 cb521b7f9a6887051b982a2053cd402ff019594e \n", - "1 f6e86c3ca4d527d5453a0b5b9591ef38b5d3c000 \n", - "2 abfc99ef522b8b6353d051a002d026530ec7d253 \n", - "3 915b77339711ec1278ac06ec80d206133bdb427a \n", - "4 b43bf20e1ba864c817ada237042cfdc8922831c0 \n", - "5 1fee1ef74ed8ac515c19a7f8eebd16f41a37b7b6 \n", - "6 cfd514d099fb41b2a467ca208fe1334bb04f8f6c \n", - "7 78d8f05c218cab107255c4dc1a1344aef138d379 \n", - "8 fd0084ecfa254bc5f619f50ec50a8cb8e3cc083e \n", - "9 cef044d82ec0d338b2b69756d3ba08692fb80ae4 \n", - "\n", - " comment_text \n", - "0 No. I still see the wrong twins. * https://gi... \n", - "1 Reverted.\" \n", - "2 You can leave a queue while in queue ? (before... \n", - "3 Didn't look at SpellTargetRestrictions XD\" \n", - "4 Not sure about what kind of line lengths the p... \n", - "5 @normanmaurer Nice catch ! Did you make the sa... \n", - "6 That's why I didn't close after sending the cl... \n", - "7 Build result for 78d8f05c218cab107255c4dc1a134... \n", - "8 Why you think using ImmediateEventExecutor is ... \n", - "9 These are the ones we currently hardcode in Te... " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "query1 = \"\"\"\n", "SELECT\n", diff --git a/notebooks/2_explore_relevant_projects.ipynb b/notebooks/2_explore_relevant_projects.ipynb new file mode 100644 index 0000000..131abd6 --- /dev/null +++ b/notebooks/2_explore_relevant_projects.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1b3dd1e0", + "metadata": {}, + "source": [ + "# Notebook 2: Explore Relevant Projects in the GHTorrent Database\n", + "\n", + "By this point, you should have the Gold Standard and GHTorrent 2004 dump loaded into MySQL. Since both datasets share the same comment IDs, we can join them to add contextual columns (e.g., project, author, timestamp) to the Gold Standard's three columns (`ID`, `polarity`, `text`).\n", + "\n", + "But, before we create the contextualized Github Gold Standard dataset, we need to understand what we're working with and which projects have relevant sentiment-labeled comments that we want to use to be compatible with Kaiaulu\n", + "\n", + "How are the 7,122 IDs split between commit comments and PR comments? Which projects show up the most? And are these comments reachable from canonical (non-fork) repos, or contained in forks?\n", + "\n", + "The answers to these questions inform how we handle the data and which projects we target when generating project config files in Notebook 3." + ] + }, + { + "cell_type": "markdown", + "id": "cell-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies and connect to MySQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-imports", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine, text\n", + "\n", + "pd.set_option('display.max_rows', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-config", + "metadata": {}, + "outputs": [], + "source": [ + "MYSQL_HOST = \"localhost\"\n", + "MYSQL_PORT = 3306\n", + "MYSQL_USER = \"root\"\n", + "MYSQL_PASSWORD = \"ADD_YOUR_PASSWORD_HERE\"\n", + "MYSQL_DB = \"github\" # name of the database where GHTorrent was loaded\n", + "\n", + "engine = create_engine(\n", + " f\"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "print(\"Connected to MySQL.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check1-header", + "metadata": {}, + "source": [ + "### Check 1: How are the sentiment comments distributed?\n", + "\n", + "GHTorrent stores two kinds of GitHub comments: commit comments (discussions on a specific commit) and PR inline comments (left on a line of code in a pull request).\n", + "\n", + "To understand what commit comments look like, [here](https://github.com/openssl/openssl/commit/4817504d069b4c5082161b02a22116ad75f822b1#commitcomment-5942359) are examples of commit comments under a commit that introduced a popular software vulnerability. To understand what PR inline comments look like, refer to the [GitHub Pull Requests Cheatsheet](https://github.com/sailuh/kaiaulu_cheatsheet/blob/main/cheatsheets/github-comments-cheatsheet.pdf).\n", + "\n", + "The Gold Standard includes both types. The same `ID` maps to `comment_id` in both `commit_comments` and `pull_request_comments`. So, the first thing to figure out is which table each sentiment ID lands in. Some IDs appear in both tables (overlap = 85), meaning a small number of comments were captured under both endpoints in GHTorrent. The total unique IDs should sum to 7,122.\n", + "\n", + "Expected values:\n", + "- Commit comment matches: ~4,317\n", + "- PR comment matches: ~2,890\n", + "- Overlap (both): ~85\n", + "- Commit-only: 4,232 | PR-only: 2,805 | Total unique: 7,122" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check1", + "metadata": {}, + "outputs": [], + "source": [ + "with engine.connect() as con:\n", + " commit_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS commit_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " pr_count = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS pr_comment_matches\n", + " FROM comment_sentiment s\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + " overlap = pd.read_sql(text(\"\"\"\n", + " SELECT COUNT(*) AS overlap\n", + " FROM comment_sentiment s\n", + " INNER JOIN commit_comments cc ON s.ID = cc.comment_id\n", + " INNER JOIN pull_request_comments prc ON s.ID = prc.comment_id;\n", + " \"\"\"), con).iloc[0, 0]\n", + "\n", + "commit_only = commit_count - overlap\n", + "pr_only = pr_count - overlap\n", + "total_unique = commit_only + pr_only + overlap\n", + "\n", + "summary = pd.DataFrame({\n", + " 'Category': ['Commit matches', 'PR matches', 'Overlap (both)', 'Commit-only', 'PR-only', 'Total unique'],\n", + " 'Count': [commit_count, pr_count, overlap, commit_only, pr_only, total_unique],\n", + " 'Expected': [4317, 2890, 85, 4232, 2805, 7122]\n", + "})\n", + "display(summary)\n", + "\n", + "if total_unique == 7122:\n", + " print(\"PASS: total unique IDs = 7122.\")\n", + "else:\n", + " print(f\"WARNING: total unique IDs = {total_unique}, expected 7122.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check2-header", + "metadata": {}, + "source": [ + "### Check 2: Which projects have the most labeled commit comments?\n", + "\n", + "Let's see which projects' commit comments are most heavily represented in the Gold Standard. This is a preview of which projects we'll be generating Kaiaulu config files for in Notebook 3." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check2", + "metadata": {}, + "outputs": [], + "source": [ + "query_check2 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN commits c ON p.id = c.project_id\n", + "INNER JOIN commit_comments cc ON c.id = cc.commit_id\n", + "INNER JOIN comment_sentiment s ON cc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check2 = pd.read_sql(text(query_check2), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled commit comments: {len(check2)}\")\n", + "display(check2)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check3-header", + "metadata": {}, + "source": [ + "### Check 3: Which projects have the most labeled PR comments?\n", + "\n", + "Now, let's do the same project ranking for the pull request inline comments most heavily represented in the Gold Standard." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check3", + "metadata": {}, + "outputs": [], + "source": [ + "query_check3 = \"\"\"\n", + "SELECT\n", + " p.id AS project_id,\n", + " p.name AS project_name,\n", + " p.url AS project_url,\n", + " COUNT(DISTINCT s.ID) AS labeled_comment_count\n", + "FROM projects p\n", + "INNER JOIN pull_requests pr ON p.id = pr.base_repo_id\n", + "INNER JOIN pull_request_comments prc ON pr.id = prc.pull_request_id\n", + "INNER JOIN comment_sentiment s ON prc.comment_id = s.ID\n", + "GROUP BY p.id, p.name, p.url\n", + "ORDER BY labeled_comment_count DESC;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check3 = pd.read_sql(text(query_check3), con)\n", + "\n", + "print(f\"Projects with sentiment-labeled PR comments: {len(check3)}\")\n", + "display(check3)" + ] + }, + { + "cell_type": "markdown", + "id": "cell-check4-header", + "metadata": {}, + "source": [ + "### Check 4: Are the labeled comments reachable from canonical repos?\n", + "\n", + "Projects on GitHub get forked all the time. Since forks share commit history with their upstream, the same comment IDs can appear under multiple projects in GHTorrent. This matters for Notebook 3 (config files generation). We want to know: if we only generate Kaiaulu configs for canonical (non-fork) repos, how much labeled data will we miss? The purpose of this query is to inform our coverage strategy going into Notebook 3.\n", + "\n", + "Expected values:\n", + "- `canonical_only`: ~4,555\n", + "- `fork_only`: ~569 (these will be missed when targeting canonical repos only)\n", + "- `both_sides`: ~2,083\n", + "- Fork only rate: ~7.9%\n", + "- Canonical reachable rate: ~92.1%\n", + "\n", + "From these values, we can see that ~92.1% of comments are reachable from canonical repos. The ~7.9% that are fork-only will be skipped when we generate project config files in Notebook 3. This is an acceptable tradeoff. We document it here so the limitation is visible." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-check4", + "metadata": {}, + "outputs": [], + "source": [ + "query_check4 = \"\"\"\n", + "WITH RECURSIVE project_root AS (\n", + " SELECT p.id AS project_id, p.id AS root_id\n", + " FROM projects p\n", + " WHERE p.forked_from IS NULL\n", + " UNION ALL\n", + " SELECT c.id AS project_id, pr.root_id\n", + " FROM projects c\n", + " JOIN project_root pr ON c.forked_from = pr.project_id\n", + "),\n", + "comment_project_rows AS (\n", + " SELECT cs.ID AS comment_id, c.project_id, 'commit_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + " JOIN commits c ON cc.commit_id = c.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.base_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + " UNION ALL\n", + " SELECT cs.ID AS comment_id, pr.head_repo_id AS project_id, 'pr_comment' AS source_tag\n", + " FROM comment_sentiment cs\n", + " JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + " JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "),\n", + "labeled AS (\n", + " SELECT\n", + " cpr.comment_id,\n", + " cpr.source_tag,\n", + " pr.root_id,\n", + " (cpr.project_id = pr.root_id) AS is_canonical\n", + " FROM comment_project_rows cpr\n", + " JOIN project_root pr ON pr.project_id = cpr.project_id\n", + "),\n", + "comment_flags AS (\n", + " SELECT\n", + " root_id, source_tag, comment_id,\n", + " MAX(CASE WHEN is_canonical THEN 1 ELSE 0 END) AS has_canonical,\n", + " MAX(CASE WHEN NOT is_canonical THEN 1 ELSE 0 END) AS has_fork\n", + " FROM labeled\n", + " GROUP BY root_id, source_tag, comment_id\n", + "),\n", + "global_counts AS (\n", + " SELECT\n", + " COUNT(*) AS mapped_comment_ids,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 0 THEN 1 ELSE 0 END) AS canonical_only,\n", + " SUM(CASE WHEN has_canonical = 0 AND has_fork = 1 THEN 1 ELSE 0 END) AS fork_only,\n", + " SUM(CASE WHEN has_canonical = 1 AND has_fork = 1 THEN 1 ELSE 0 END) AS both_sides\n", + " FROM comment_flags\n", + ")\n", + "SELECT\n", + " canonical_only,\n", + " fork_only,\n", + " both_sides,\n", + " ROUND(100 * fork_only / NULLIF(mapped_comment_ids, 0), 2) AS fork_only_pct,\n", + " ROUND(100 * (canonical_only + both_sides) / NULLIF(mapped_comment_ids, 0), 2) AS canonical_reachable_pct\n", + "FROM global_counts;\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " check4 = pd.read_sql(text(query_check4), con)\n", + "\n", + "print(\"Canonical vs fork accessibility summary:\")\n", + "print(f\" canonical_only: {check4['canonical_only'].iloc[0]} (expected ~4555)\")\n", + "print(f\" fork_only: {check4['fork_only'].iloc[0]} (expected ~569)\")\n", + "print(f\" both_sides: {check4['both_sides'].iloc[0]} (expected ~2083)\")\n", + "print(f\" fork_only %: {check4['fork_only_pct'].iloc[0]}% (expected ~7.9%)\")\n", + "print(f\" canonical_reachable %: {check4['canonical_reachable_pct'].iloc[0]}% (expected ~92.1%)\")\n", + "display(check4)" + ] + }, + { + "cell_type": "markdown", + "id": "1af5516e", + "metadata": {}, + "source": [ + "### Step 5: Build the contextualized dataset\n", + "\n", + "Now that we know which projects have sentiment-labeled comments and how they map across tables, we can build the contextualized dataset.\n", + "\n", + "The Gold Standard currently has three columns (`ID`, `polarity`, `text`). We're going to add six more from GHTorrent:\n", + "\n", + "1. `created_at` - Comment timestamp\n", + "2. `author_login` - Author username\n", + "3. `author_name` - Author First Name & Last Name\n", + "4. `author_email` - Author email\n", + "5. `owner` - Project owner\n", + "6. `repo` - Project repo name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ab4187", + "metadata": {}, + "outputs": [], + "source": [ + "# Pull context for commit comments\n", + "commit_context_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " cc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u.name AS author_name,\n", + " u.email AS author_email,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + "JOIN users u ON cc.user_id = u.id\n", + "JOIN commits c ON cc.commit_id = c.id\n", + "JOIN projects p ON c.project_id = p.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "\"\"\"\n", + "\n", + "# Pull context for PR comments\n", + "pr_context_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " prc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u.name AS author_name,\n", + " u.email AS author_email,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN pull_request_comments prc ON cs.ID = CAST(prc.comment_id AS UNSIGNED)\n", + "JOIN users u ON prc.user_id = u.id\n", + "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "JOIN projects p ON pr.base_repo_id = p.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "\"\"\"\n", + "\n", + "with engine.connect() as con:\n", + " commit_ctx = pd.read_sql(text(commit_context_sql), con)\n", + " pr_ctx = pd.read_sql(text(pr_context_sql), con)\n", + "\n", + "# Deduplicate within each source: keep first match per comment_id\n", + "commit_ctx = commit_ctx.drop_duplicates(subset='comment_id', keep='first')\n", + "pr_ctx = pr_ctx.drop_duplicates(subset='comment_id', keep='first')\n", + "\n", + "# Merge: prefer commit comment rows; fill in PR-only rows for IDs not in commit set\n", + "commit_ids = set(commit_ctx['comment_id'])\n", + "pr_only = pr_ctx[~pr_ctx['comment_id'].isin(commit_ids)]\n", + "contextualized = (\n", + " pd.concat([commit_ctx, pr_only], ignore_index=True)\n", + " .sort_values('comment_id')\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "print(f\"Total rows: {len(contextualized)} (expected 7122)\")\n", + "print(f\" From commit comments: {len(commit_ctx)}\")\n", + "print(f\" From PR comments only: {len(pr_only)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9e04de22", + "metadata": {}, + "source": [ + "### Step 6: Compare original vs. contextualized dataset\n", + "\n", + "Let's see a quick before/after to see what columns we added. The original Gold Standard has three columns, while the new contextualized version we created has ten." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2850d211", + "metadata": {}, + "outputs": [], + "source": [ + "with engine.connect() as con:\n", + " original = pd.read_sql(text(\"SELECT ID, Polarity, Text FROM comment_sentiment LIMIT 5;\"), con)\n", + "\n", + "print(\"Original GitHub Gold Standard (first 5 rows):\")\n", + "display(original)\n", + "\n", + "print(\"\\nContextualized dataset with additional GHTorrent columns (first 5 rows):\")\n", + "display(contextualized.head())\n", + "\n", + "null_emails = contextualized['author_email'].isna().sum()\n", + "print(f\"\\nNote: {null_emails} of {len(contextualized)} rows have a NULL author_email ({round(100*null_emails/len(contextualized), 1)}%). GitHub stopped exposing emails in the API, so this is expected.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_scale_config_files.ipynb b/notebooks/3_scale_config_files.ipynb similarity index 71% rename from notebooks/03_scale_config_files.ipynb rename to notebooks/3_scale_config_files.ipynb index 0184c72..c026bc4 100644 --- a/notebooks/03_scale_config_files.ipynb +++ b/notebooks/3_scale_config_files.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "1bc36cfe", "metadata": {}, "outputs": [], @@ -100,124 +100,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "5641db76", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "repos found: 82\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ownerrepo
0akkaakka
1antirezredis
2ariyaphantomjs
3automapperautomapper
4bartazimpress.js
.........
77xbmcxbmc
78xphere-forkssymfony
79yihuiknitr
80zendframeworkzf2
81zurbfoundation
\n", - "

82 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " owner repo\n", - "0 akka akka\n", - "1 antirez redis\n", - "2 ariya phantomjs\n", - "3 automapper automapper\n", - "4 bartaz impress.js\n", - ".. ... ...\n", - "77 xbmc xbmc\n", - "78 xphere-forks symfony\n", - "79 yihui knitr\n", - "80 zendframework zf2\n", - "81 zurb foundation\n", - "\n", - "[82 rows x 2 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Query canonical repos that have sentiment-labeled comments\n", "engine = create_engine(\n", @@ -295,16 +181,7 @@ "execution_count": null, "id": "7a926ed1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "repos selected for config generation: 82\n", - "written configs: ['akka.yml', 'redis.yml', 'phantomjs.yml', 'automapper.yml', 'impress.js.yml', 'bitcoin.yml', 'boto.yml', 'craftbukkit.yml', 'cakephp.yml', 'compass.yml', 'clojure.yml', 'slim.yml', 'diaspora.yml', 'django-cms.yml', 'django.yml', 'django-debug-toolbar.yml', 'elasticsearch.yml', 'codeigniter.yml', 'facebook-android-sdk.yml', 'folly.yml', 'hiphop-php.yml', 'php-sdk.yml', 'tornado.yml', 'thinkup.yml', 'android.yml', 'gitlabhq.yml', 'html5-boilerplate.yml', 'devtools.yml', 'chosen.yml', 'sparkleshare.yml', 'octopress.yml', 'actionbarsherlock.yml', 'blueprint-css.yml', 'http-parser.yml', 'libuv.yml', 'node.yml', 'jquery.yml', 'requests.yml', 'beanstalkd.yml', 'libgit2.yml', 'ccv.yml', 'mangos.yml', 'd3.yml', 'memcached.yml', 'sick-beard.yml', 'flask.yml', 'jekyll.yml', 'mongo.yml', 'mono.yml', 'plupload.yml', 'three.js.yml', 'homebrew.yml', 'nancy.yml', 'storm.yml', 'netty.yml', 'openframeworks.yml', 'devise.yml', 'rails.yml', 'reddit.yml', 'restsharp.yml', 'kestrel.yml', 'shiny.yml', 'miniprofiler.yml', 'sbt.yml', 'scala.yml', 'scalatra.yml', 'phpunit.yml', 'servicestack.yml', 'signalr.yml', 'symfony.yml', 'paperclip.yml', 'trinitycore.yml', 'finagle.yml', 'flockdb.yml', 'gizzard.yml', 'zipkin.yml', 'redcarpet.yml', 'xbmc.yml', 'symfony.yml', 'knitr.yml', 'zf2.yml', 'foundation.yml']\n" - ] - } - ], + "outputs": [], "source": [ "# Build YAML configs for 82 project repos using trinitycore.yml as the base template\n", "header_lines = [\n", diff --git a/notebooks/4_add_sentiment_to_kaiaulu.ipynb b/notebooks/4_add_sentiment_to_kaiaulu.ipynb new file mode 100644 index 0000000..55eb61c --- /dev/null +++ b/notebooks/4_add_sentiment_to_kaiaulu.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-nb4-title", + "metadata": {}, + "source": [ + "# Notebook 4: Add Sentiment Labels to Kaiaulu\n", + "\n", + "By now, you should have:\n", + "1. Sentiment labels in MySQL (7,122 GitHub comments labeled positive, negative, or neutral)\n", + "2. Comment data freshly downloaded from GitHub via Kaiaulu (e.g., file paths, commit SHAs, review IDs, timestamps)\n", + "\n", + "Neither is complete on its own. The Gold Standard has polarity labels but no GitHub data. Kaiaulu's output has data but no sentiment labels. We'll query the labels from MySQL, INNER JOIN them against Kaiaulu's downloaded comment data on `comment_id`, and write the result back into Kaiaulu's directory so `sentiment_analysis.Rmd` can use it directly." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-prereqs", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "Two things need to be in place before running any cells:\n", + "\n", + "| What | Where it comes from |\n", + "|---|---|\n", + "| MySQL database with `comment_sentiment` table | Output of Notebook 1 |\n", + "| Kaiaulu data for your selected project | Output of running `vignettes/download_github_events.Rmd` and `vignettes/download_github_pull_request_comments.Rmd` in Kaiaulu |\n", + "\n", + "The Kaiaulu vignettes write their output to `vignettes/rawdata/github/{owner}/{repo}/` inside your local Kaiaulu directory. That's where this notebook reads from.\n", + "\n", + "If those CSVs are missing, go back and run the corresponding vignettes in Notebook 3 first." + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step1-header", + "metadata": {}, + "source": [ + "### Step 1: Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cell-nb4-step1-code", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from sqlalchemy import create_engine, text" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step2-header", + "metadata": {}, + "source": [ + "### Step 2: Configure Project\n", + "\n", + "Set `OWNER` and `REPO` to match the project you ran the Kaiaulu vignettes for. Set `KAIAULU_REPO` to your local Kaiaulu directory. This is where the notebook will read Kaiaulu's downloaded CSVs from and where it will write the joined output. MySQL credentials should match what you used in Notebooks 1-3." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbfd4324", + "metadata": {}, + "outputs": [], + "source": [ + "# Configure these before running\n", + "OWNER = \"ADD_OWNER_HERE\" # GitHub repo owner\n", + "REPO = \"ADD_REPO_HERE\" # GitHub repo name\n", + "\n", + "# Path to your local Kaiaulu directory\n", + "KAIAULU_REPO = Path(\"PATH_TO/kaiaulu\")\n", + "\n", + "# Kaiaulu rawdata directory for this project\n", + "KAIAULU_DATA_DIR = KAIAULU_REPO / \"vignettes\" / \"rawdata\" / \"github\" / OWNER / REPO\n", + "\n", + "# MySQL connection\n", + "MYSQL_HOST = os.getenv(\"MYSQL_HOST\", \"localhost\")\n", + "MYSQL_PORT = int(os.getenv(\"MYSQL_PORT\", \"3306\"))\n", + "MYSQL_DB = os.getenv(\"MYSQL_DB\", \"github\")\n", + "MYSQL_USER = os.getenv(\"MYSQL_USER\", \"root\")\n", + "MYSQL_PASSWORD = os.getenv(\"MYSQL_PASSWORD\", \"ADD_PASSWORD_HERE\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step3-header", + "metadata": {}, + "source": [ + "### Step 3: Query sentiment labels from MySQL\n", + "\n", + "Pull the sentiment labels for your project directly from the `comment_sentiment` table. We join through GHTorrent to filter down to just the comments belonging to `OWNER/REPO`, and grab a context columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-nb4-step3-code", + "metadata": {}, + "outputs": [], + "source": [ + "engine = create_engine(\n", + " f\"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}\"\n", + ")\n", + "\n", + "commit_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " cc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN commit_comments cc ON cs.ID = cc.comment_id\n", + "JOIN commits c ON cc.commit_id = c.id\n", + "JOIN projects p ON c.project_id = p.id\n", + "JOIN users u ON cc.user_id = u.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "WHERE LOWER(u_owner.login) = :owner\n", + " AND LOWER(p.name) = :repo\n", + "\"\"\"\n", + "\n", + "pr_sql = \"\"\"\n", + "SELECT\n", + " cs.ID AS comment_id,\n", + " cs.Polarity AS polarity,\n", + " cs.Text AS text,\n", + " prc.created_at AS created_at,\n", + " u.login AS author_login,\n", + " u_owner.login AS owner,\n", + " p.name AS repo\n", + "FROM comment_sentiment cs\n", + "JOIN pull_request_comments prc ON cs.ID = prc.comment_id\n", + "JOIN pull_requests pr ON prc.pull_request_id = pr.id\n", + "JOIN projects p ON pr.base_repo_id = p.id\n", + "JOIN users u ON prc.user_id = u.id\n", + "JOIN users u_owner ON p.owner_id = u_owner.id\n", + "WHERE LOWER(u_owner.login) = :owner\n", + " AND LOWER(p.name) = :repo\n", + "\"\"\"\n", + "\n", + "params = {\"owner\": OWNER.lower(), \"repo\": REPO.lower()}\n", + "\n", + "with engine.connect() as con:\n", + " commit_labels = pd.read_sql(text(commit_sql), con, params=params)\n", + " pr_labels = pd.read_sql(text(pr_sql), con, params=params)\n", + "\n", + "# Deduplicate 85 comment IDs that appear in both commit_comments and pull_request_comments GHTorrent tables\n", + "combined = pd.concat([commit_labels, pr_labels], ignore_index=True)\n", + "project_ctx = combined.drop_duplicates(subset=\"comment_id\", keep=\"first\").copy()\n", + "\n", + "dupes_dropped = len(combined) - len(project_ctx)\n", + "print(f\"Commit comment sentiment labels: {len(commit_labels)}\")\n", + "print(f\"PR inline sentiment labels: {len(pr_labels)}\")\n", + "if dupes_dropped > 0:\n", + " print(f\"Duplicate IDs removed: {dupes_dropped} (appeared in both tables)\")\n", + "print(f\"Total sentiment labels for {REPO}: {len(project_ctx)}\")\n", + "project_ctx.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4xul75ers8r", + "metadata": {}, + "source": [ + "### Step 4: Remap polarity labels to integers\n", + "\n", + "The Gold Standard uses strings (`\"positive\"`, `\"negative\"`, `\"neutral\"`). Kaiaulu's `sentiment_analysis.Rmd` expects integers: `0` = neutral, `1` = positive, `2` = negative. We remap here so the output is ready to use directly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40fgc3k5q2l", + "metadata": {}, + "outputs": [], + "source": [ + "polarity_map = {\"neutral\": 0, \"positive\": 1, \"negative\": 2}\n", + "\n", + "if project_ctx[\"polarity\"].dtype == object:\n", + " project_ctx[\"polarity\"] = project_ctx[\"polarity\"].str.lower().map(polarity_map)\n", + "\n", + "unmapped = project_ctx[\"polarity\"].isna().sum()\n", + "if unmapped > 0:\n", + " print(f\"WARNING: {unmapped} rows could not be mapped. Check for unexpected polarity strings\")\n", + "else:\n", + " counts = project_ctx[\"polarity\"].value_counts().rename({0: \"neutral\", 1: \"positive\", 2: \"negative\"})\n", + " print(\"All polarity labels mapped successfully.\")\n", + " print(counts.to_string())" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step4-header", + "metadata": {}, + "source": [ + "### Step 5: Load the Kaiaulu output CSVs\n", + "\n", + "Read the two CSVs that Kaiaulu's vignettes wrote into the `rawdata/` directory." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "cell-nb4-step4-code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Kaiaulu commit comments: 1569 rows, columns: ['comment_id', 'commit_id', 'author_login', 'author_id', 'body', 'created_at', 'updated_at']\n", + "Kaiaulu PR inline comments: 6100 rows, columns: ['review_id', 'comment_id', 'html_url', 'created_at', 'updated_at', 'comment_user_login', 'author_association', 'file_path', 'start_line', 'line', 'original_start_line', 'original_line', 'position', 'diff_hunk', 'body', 'commit_id']\n" + ] + } + ], + "source": [ + "commit_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_commit_comments.csv\"\n", + "pr_csv_path = KAIAULU_DATA_DIR / f\"{REPO}_pr_inline_comments.csv\"\n", + "\n", + "kaiaulu_commit = pd.read_csv(commit_csv_path)\n", + "kaiaulu_pr = pd.read_csv(pr_csv_path)\n", + "\n", + "print(f\"Kaiaulu commit comments: {len(kaiaulu_commit)} rows, columns: {list(kaiaulu_commit.columns)}\")\n", + "print(f\"Kaiaulu PR inline comments: {len(kaiaulu_pr)} rows, columns: {list(kaiaulu_pr.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-step5-header", + "metadata": {}, + "source": [ + "### Step 6: INNER JOIN - Commit Comments\n", + "\n", + "Join the MySQL sentiment labels against Kaiaulu's commit comments on `comment_id`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-nb4-step5-code", + "metadata": {}, + "outputs": [], + "source": [ + "commit_joined = project_ctx.merge(\n", + " kaiaulu_commit,\n", + " on='comment_id',\n", + " how='inner',\n", + " suffixes=('_gold', '_kaiaulu')\n", + ")\n", + "\n", + "commit_dropped = len(project_ctx) - len(commit_joined)\n", + "print(f\"{REPO} rows in sentiment labels: {len(project_ctx)}\")\n", + "print(f\"Rows matched in Kaiaulu commit comments: {len(commit_joined)}\")\n", + "\n", + "print(\"\\nJoined commit comments (first 5 rows):\")\n", + "display(commit_joined.head())\n", + "\n", + "out_path = KAIAULU_DATA_DIR / f\"{REPO}_sentiment_commit_comments_joined.csv\"\n", + "commit_joined.to_csv(out_path, index=False)\n", + "print(f\"\\nSaved: {out_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "daef76ca", + "metadata": {}, + "source": [ + "### Step 7: INNER JOIN - PR inline comments\n", + "\n", + "Same join as Step 6, but against Kaiaulu's PR inline comments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ba23d8", + "metadata": {}, + "outputs": [], + "source": [ + "pr_joined = project_ctx.merge(\n", + " kaiaulu_pr,\n", + " on='comment_id',\n", + " how='inner',\n", + " suffixes=('_gold', '_kaiaulu')\n", + ")\n", + "\n", + "pr_dropped = len(project_ctx) - len(pr_joined)\n", + "print(f\"{REPO} rows in sentiment labels: {len(project_ctx)}\")\n", + "print(f\"Rows matched in Kaiaulu PR inline comments: {len(pr_joined)}\")\n", + "\n", + "print(\"\\nJoined PR inline comments (first 5 rows):\")\n", + "display(pr_joined.head())\n", + "\n", + "out_path = KAIAULU_DATA_DIR / f\"{REPO}_sentiment_pr_inline_comments_joined.csv\"\n", + "pr_joined.to_csv(out_path, index=False)\n", + "print(f\"\\nSaved: {out_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-nb4-next-steps", + "metadata": {}, + "source": [ + "### You're done!\n", + "\n", + "The joined CSVs are saved into Kaiaulu's `vignettes/rawdata/` directory, right next to the raw downloads they came from:\n", + "\n", + "```\n", + "vignettes/rawdata/github/{owner}/{repo}/\n", + " {repo}_sentiment_commit_comments_joined.csv\n", + " {repo}_sentiment_pr_inline_comments_joined.csv\n", + "```\n", + "\n", + "Each row has polarity as an integer (`0` = neutral, `1` = positive, `2` = negative) and all the data Kaiaulu's `sentiment_analysis.Rmd` needs (comment body, author, timestamp, file path, commit SHA).\n", + "\n", + "**To run for a different project:** update `OWNER` and `REPO` in Step 2, make sure the Kaiaulu vignettes have run for that project, and re-run Steps 3-7." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 4ad05b680d7870308539b6e41e916d6b9077c16f Mon Sep 17 00:00:00 2001 From: splimon Date: Fri, 8 May 2026 01:44:16 -1000 Subject: [PATCH 6/8] Add CSVs from Github dataset to /data folder --- data/PR inline comments/.DS_Store | Bin 0 -> 6148 bytes ...ck_sentiment_pr_inline_comments_joined.csv | 19 + ...ka_sentiment_pr_inline_comments_joined.csv | 6514 +++++++ ...id_sentiment_pr_inline_comments_joined.csv | 81 + ...er_sentiment_pr_inline_comments_joined.csv | 18 + ...in_sentiment_pr_inline_comments_joined.csv | 800 + ...to_sentiment_pr_inline_comments_joined.csv | 189 + ...hp_sentiment_pr_inline_comments_joined.csv | 4243 +++++ ...en_sentiment_pr_inline_comments_joined.csv | 272 + ...er_sentiment_pr_inline_comments_joined.csv | 39 + ...ss_sentiment_pr_inline_comments_joined.csv | 45 + ...d3_sentiment_pr_inline_comments_joined.csv | 1136 ++ ...se_sentiment_pr_inline_comments_joined.csv | 269 + ...ls_sentiment_pr_inline_comments_joined.csv | 61 + ...ms_sentiment_pr_inline_comments_joined.csv | 135 + ...go_sentiment_pr_inline_comments_joined.csv | 1204 ++ ...ch_sentiment_pr_inline_comments_joined.csv | 1542 ++ ...le_sentiment_pr_inline_comments_joined.csv | 1002 ++ ...db_sentiment_pr_inline_comments_joined.csv | 96 + ...on_sentiment_pr_inline_comments_joined.csv | 132 + ...hq_sentiment_pr_inline_comments_joined.csv | 1304 ++ ...rd_sentiment_pr_inline_comments_joined.csv | 71 + ...te_sentiment_pr_inline_comments_joined.csv | 473 + ...er_sentiment_pr_inline_comments_joined.csv | 33 + ...js_sentiment_pr_inline_comments_joined.csv | 54 + ...ry_sentiment_pr_inline_comments_joined.csv | 4431 +++++ ...tr_sentiment_pr_inline_comments_joined.csv | 222 + ...t2_sentiment_pr_inline_comments_joined.csv | 6717 ++++++++ ...uv_sentiment_pr_inline_comments_joined.csv | 564 + ...er_sentiment_pr_inline_comments_joined.csv | 30 + ...go_sentiment_pr_inline_comments_joined.csv | 78 + ...no_sentiment_pr_inline_comments_joined.csv | 132 + ...ty_sentiment_pr_inline_comments_joined.csv | 4481 +++++ ...de_sentiment_pr_inline_comments_joined.csv | 1718 ++ ...ss_sentiment_pr_inline_comments_joined.csv | 21 + ...ip_sentiment_pr_inline_comments_joined.csv | 147 + ...js_sentiment_pr_inline_comments_joined.csv | 143 + ...ls_sentiment_pr_inline_comments_joined.csv | 123 + ...et_sentiment_pr_inline_comments_joined.csv | 69 + ...it_sentiment_pr_inline_comments_joined.csv | 344 + ...rp_sentiment_pr_inline_comments_joined.csv | 84 + ...bt_sentiment_pr_inline_comments_joined.csv | 13 + ...la_sentiment_pr_inline_comments_joined.csv | 1836 ++ ...ck_sentiment_pr_inline_comments_joined.csv | 85 + ...rd_sentiment_pr_inline_comments_joined.csv | 34 + ...lr_sentiment_pr_inline_comments_joined.csv | 1831 ++ ...rm_sentiment_pr_inline_comments_joined.csv | 444 + ...up_sentiment_pr_inline_comments_joined.csv | 293 + ...js_sentiment_pr_inline_comments_joined.csv | 8 + ...do_sentiment_pr_inline_comments_joined.csv | 155 + ...re_sentiment_pr_inline_comments_joined.csv | 1476 ++ ...mc_sentiment_pr_inline_comments_joined.csv | 2914 ++++ ...f2_sentiment_pr_inline_comments_joined.csv | 1503 ++ ...in_sentiment_pr_inline_comments_joined.csv | 862 + ...rlock_sentiment_commit_comments_joined.csv | 19 + data/commit comments/akka_commit_comments.csv | 14089 ++++++++++++++++ ...apper_sentiment_commit_comments_joined.csv | 3 + ...talkd_sentiment_commit_comments_joined.csv | 5 + ...tcoin_sentiment_commit_comments_joined.csv | 10 + ...t-css_sentiment_commit_comments_joined.csv | 3 + .../boto_sentiment_commit_comments_joined.csv | 15 + ...kephp_sentiment_commit_comments_joined.csv | 81 + .../ccv_sentiment_commit_comments_joined.csv | 3 + ...hosen_sentiment_commit_comments_joined.csv | 12 + ...ojure_sentiment_commit_comments_joined.csv | 7 + ...niter_sentiment_commit_comments_joined.csv | 51 + ...mpass_sentiment_commit_comments_joined.csv | 29 + .../d3_sentiment_commit_comments_joined.csv | 9 + ...evise_sentiment_commit_comments_joined.csv | 81 + ...tools_sentiment_commit_comments_joined.csv | 5 + ...spora_sentiment_commit_comments_joined.csv | 430 + ...o-cms_sentiment_commit_comments_joined.csv | 15 + ...olbar_sentiment_commit_comments_joined.csv | 16 + ...jango_sentiment_commit_comments_joined.csv | 115 + ...earch_sentiment_commit_comments_joined.csv | 21 + ...d-sdk_sentiment_commit_comments_joined.csv | 28 + ...nagle_sentiment_commit_comments_joined.csv | 5 + ...ockdb_sentiment_commit_comments_joined.csv | 1 + ...folly_sentiment_commit_comments_joined.csv | 3 + ...ation_sentiment_commit_comments_joined.csv | 29 + ...labhq_sentiment_commit_comments_joined.csv | 151 + ...zzard_sentiment_commit_comments_joined.csv | 23 + ...ebrew_sentiment_commit_comments_joined.csv | 232 + ...plate_sentiment_commit_comments_joined.csv | 269 + ...arser_sentiment_commit_comments_joined.csv | 14 + ...ss.js_sentiment_commit_comments_joined.csv | 3 + ...ekyll_sentiment_commit_comments_joined.csv | 63 + ...query_sentiment_commit_comments_joined.csv | 627 + ...strel_sentiment_commit_comments_joined.csv | 3 + ...knitr_sentiment_commit_comments_joined.csv | 7 + ...bgit2_sentiment_commit_comments_joined.csv | 38 + ...libuv_sentiment_commit_comments_joined.csv | 109 + ...angos_sentiment_commit_comments_joined.csv | 1643 ++ ...ached_sentiment_commit_comments_joined.csv | 3 + ...filer_sentiment_commit_comments_joined.csv | 5 + ...mongo_sentiment_commit_comments_joined.csv | 7 + ...nancy_sentiment_commit_comments_joined.csv | 17 + ...netty_sentiment_commit_comments_joined.csv | 378 + .../node_sentiment_commit_comments_joined.csv | 333 + ...press_sentiment_commit_comments_joined.csv | 7 + ...works_sentiment_commit_comments_joined.csv | 120 + ...rclip_sentiment_commit_comments_joined.csv | 67 + ...tomjs_sentiment_commit_comments_joined.csv | 27 + ...punit_sentiment_commit_comments_joined.csv | 54 + ...pload_sentiment_commit_comments_joined.csv | 18 + ...rails_sentiment_commit_comments_joined.csv | 1601 ++ ...arpet_sentiment_commit_comments_joined.csv | 35 + ...eddit_sentiment_commit_comments_joined.csv | 15 + ...sharp_sentiment_commit_comments_joined.csv | 5 + .../sbt_sentiment_commit_comments_joined.csv | 7 + ...scala_sentiment_commit_comments_joined.csv | 32 + ...latra_sentiment_commit_comments_joined.csv | 9 + ...stack_sentiment_commit_comments_joined.csv | 19 + ...shiny_sentiment_commit_comments_joined.csv | 9 + ...beard_sentiment_commit_comments_joined.csv | 63 + ...gnalr_sentiment_commit_comments_joined.csv | 17 + ...share_sentiment_commit_comments_joined.csv | 7 + ...storm_sentiment_commit_comments_joined.csv | 5 + ...inkup_sentiment_commit_comments_joined.csv | 104 + ...ee.js_sentiment_commit_comments_joined.csv | 317 + ...rnado_sentiment_commit_comments_joined.csv | 7 + ...ycore_sentiment_commit_comments_joined.csv | 2256 +++ .../xbmc_sentiment_commit_comments_joined.csv | 508 + .../zf2_sentiment_commit_comments_joined.csv | 3 + 124 files changed, 74812 insertions(+) create mode 100644 data/PR inline comments/.DS_Store create mode 100644 data/PR inline comments/actionbarsherlock_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/akka_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/android_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/automapper_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/bitcoin_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/boto_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/cakephp_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/chosen_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/codeigniter_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/compass_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/d3_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/devise_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/devtools_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/django-cms_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/django_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/elasticsearch_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/finagle_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/flockdb_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/foundation_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/gitlabhq_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/gizzard_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/html5-boilerplate_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/http-parser_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/impress.js_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/jquery_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/knitr_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/libgit2_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/libuv_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/miniprofiler_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/mongo_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/mono_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/netty_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/node_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/octopress_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/paperclip_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/phantomjs_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/rails_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/redcarpet_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/reddit_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/restsharp_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/sbt_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/scala_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/servicestack_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/sick-beard_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/signalr_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/storm_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/thinkup_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/three.js_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/tornado_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/trinitycore_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/xbmc_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/zf2_sentiment_pr_inline_comments_joined.csv create mode 100644 data/PR inline comments/zipkin_sentiment_pr_inline_comments_joined.csv create mode 100644 data/commit comments/actionbarsherlock_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/akka_commit_comments.csv create mode 100644 data/commit comments/automapper_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/beanstalkd_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/bitcoin_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/blueprint-css_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/boto_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/cakephp_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/ccv_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/chosen_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/clojure_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/codeigniter_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/compass_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/d3_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/devise_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/devtools_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/diaspora_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/django-cms_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/django-debug-toolbar_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/django_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/elasticsearch_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/facebook-android-sdk_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/finagle_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/flockdb_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/folly_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/foundation_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/gitlabhq_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/gizzard_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/homebrew_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/html5-boilerplate_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/http-parser_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/impress.js_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/jekyll_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/jquery_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/kestrel_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/knitr_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/libgit2_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/libuv_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/mangos_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/memcached_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/miniprofiler_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/mongo_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/nancy_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/netty_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/node_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/octopress_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/openframeworks_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/paperclip_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/phantomjs_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/phpunit_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/plupload_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/rails_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/redcarpet_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/reddit_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/restsharp_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/sbt_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/scala_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/scalatra_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/servicestack_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/shiny_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/sick-beard_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/signalr_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/sparkleshare_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/storm_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/thinkup_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/three.js_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/tornado_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/trinitycore_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/xbmc_sentiment_commit_comments_joined.csv create mode 100644 data/commit comments/zf2_sentiment_commit_comments_joined.csv diff --git a/data/PR inline comments/.DS_Store b/data/PR inline comments/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 ++ */ ++package akka.remote.netty ++ ++import java.net.InetSocketAddress ++import org.jboss.netty.util.HashedWheelTimer ++import org.jboss.netty.bootstrap.ClientBootstrap ++import org.jboss.netty.channel.group.DefaultChannelGroup ++import org.jboss.netty.channel.{ ChannelHandler, StaticChannelPipeline, SimpleChannelUpstreamHandler, MessageEvent, ExceptionEvent, ChannelStateEvent, ChannelPipelineFactory, ChannelPipeline, ChannelHandlerContext, ChannelFuture, Channel } ++import org.jboss.netty.handler.codec.frame.{ LengthFieldPrepender, LengthFieldBasedFrameDecoder } ++import org.jboss.netty.handler.execution.ExecutionHandler ++import org.jboss.netty.handler.timeout.{ ReadTimeoutHandler, ReadTimeoutException } ++import akka.remote.RemoteProtocol.{ RemoteControlProtocol, CommandType, AkkaRemoteProtocol } ++import akka.remote.{ RemoteProtocol, RemoteMessage, RemoteLifeCycleEvent, RemoteClientStarted, RemoteClientShutdown, RemoteClientException, RemoteClientError, RemoteClientDisconnected, RemoteClientConnected } ++import akka.actor.{ simpleName, Address } ++import akka.AkkaException ++import akka.event.Logging ++import akka.util.Switch ++import akka.actor.ActorRef ++import org.jboss.netty.channel.ChannelFutureListener ++import akka.remote.RemoteClientWriteFailed ++import java.net.InetAddress ++import org.jboss.netty.util.TimerTask ++import org.jboss.netty.util.Timeout ++import java.util.concurrent.TimeUnit ++ ++class RemoteClientMessageBufferException(message: String, cause: Throwable) extends AkkaException(message, cause) { ++ def this(msg: String) = this(msg, null) ++} ++ ++/** ++ * This is the abstract baseclass for netty remote clients, currently there's only an ++ * ActiveRemoteClient, but others could be feasible, like a PassiveRemoteClient that ++ * reuses an already established connection. ++ */ ++abstract class RemoteClient private[akka] ( ++ val netty: NettyRemoteTransport, ++ val remoteAddress: Address) { ++ ++ val log = Logging(netty.system, ""RemoteClient"") ++ ++ val name = simpleName(this) + ""@"" + remoteAddress ++ ++ private[remote] val runSwitch = new Switch() ++ ++ private[remote] def isRunning = runSwitch.isOn ++ ++ protected def currentChannel: Channel ++ ++ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean ++ ++ def shutdown(): Boolean ++ ++ def isBoundTo(address: Address): Boolean = remoteAddress == address ++ ++ /** ++ * Converts the message to the wireprotocol and sends the message across the wire ++ */ ++ def send(message: Any, senderOption: Option[ActorRef], recipient: ActorRef): Unit = if (isRunning) { ++ if (netty.remoteSettings.LogSend) log.debug(""Sending message {} from {} to {}"", message, senderOption, recipient) ++ send((message, senderOption, recipient)) ++ } else { ++ val exception = new RemoteClientException(""RemoteModule client is not running, make sure you have invoked 'RemoteClient.connect()' before using it."", netty, remoteAddress) ++ netty.notifyListeners(RemoteClientError(exception, netty, remoteAddress)) ++ throw exception ++ } ++ ++ /** ++ * Sends the message across the wire ++ */ ++ private def send(request: (Any, Option[ActorRef], ActorRef)): Unit = { ++ try { ++ val channel = currentChannel ++ val f = channel.write(request) ++ f.addListener( ++ new ChannelFutureListener { ++ def operationComplete(future: ChannelFuture) { ++ if (future.isCancelled || !future.isSuccess) { ++ netty.notifyListeners(RemoteClientWriteFailed(request, future.getCause, netty, remoteAddress)) ++ } ++ } ++ }) ++ // Check if we should back off ++ if (!channel.isWritable) { ++ val backoff = netty.settings.BackoffTimeout ++ if (backoff.length > 0 && !f.await(backoff.length, backoff.unit)) f.cancel() //Waited as long as we could, now back off ++ } ++ } catch { ++ case e: Exception ⇒ netty.notifyListeners(RemoteClientError(e, netty, remoteAddress)) ++ } ++ } ++ ++ override def toString = name ++} ++ ++/** ++ * RemoteClient represents a connection to an Akka node. Is used to send messages to remote actors on the node. ++ */ ++class ActiveRemoteClient private[akka] ( ++ netty: NettyRemoteTransport, ++ remoteAddress: Address, ++ localAddress: Address) ++ extends RemoteClient(netty, remoteAddress) { ++ ++ import netty.settings ++ ++ //TODO rewrite to a wrapper object (minimize volatile access and maximize encapsulation) ++ @volatile ++ private var bootstrap: ClientBootstrap = _ ++ @volatile ++ private var connection: ChannelFuture = _ ++ @volatile ++ private[remote] var openChannels: DefaultChannelGroup = _ ++ @volatile ++ private var executionHandler: ExecutionHandler = _ ++ ++ @volatile ++ private var reconnectionTimeWindowStart = 0L ++ ++ def notifyListeners(msg: RemoteLifeCycleEvent): Unit = netty.notifyListeners(msg) ++ ++ def currentChannel = connection.getChannel ++ ++ /** ++ * Connect to remote server. ++ */ ++ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean = { ++ ++ def sendSecureCookie(connection: ChannelFuture) { ++ val handshake = RemoteControlProtocol.newBuilder.setCommandType(CommandType.CONNECT) ++ if (settings.SecureCookie.nonEmpty) handshake.setCookie(settings.SecureCookie.get) ++ handshake.setOrigin(RemoteProtocol.AddressProtocol.newBuilder ++ .setSystem(localAddress.system) ++ .setHostname(localAddress.host.get) ++ .setPort(localAddress.port.get) ++ .build) ++ connection.getChannel.write(netty.createControlEnvelope(handshake.build)) ++ } ++ ++ def attemptReconnect(): Boolean = { ++ val remoteIP = InetAddress.getByName(remoteAddress.host.get) ++ log.debug(""Remote client reconnecting to [{}|{}]"", remoteAddress, remoteIP) ++ connection = bootstrap.connect(new InetSocketAddress(remoteIP, remoteAddress.port.get)) ++ openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. ++ ++ if (!connection.isSuccess) { ++ notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) ++ false ++ } else { ++ sendSecureCookie(connection) ++ true ++ } ++ } ++ ++ runSwitch switchOn { ++ openChannels = new DefaultDisposableChannelGroup(classOf[RemoteClient].getName) ++ ++ executionHandler = new ExecutionHandler(netty.executor) ++ ++ bootstrap = new ClientBootstrap(netty.clientChannelFactory) ++ bootstrap.setPipelineFactory(new ActiveRemoteClientPipelineFactory(name, bootstrap, executionHandler, remoteAddress, this)) ++ bootstrap.setOption(""tcpNoDelay"", true) ++ bootstrap.setOption(""keepAlive"", true) ++ bootstrap.setOption(""connectTimeoutMillis"", settings.ConnectionTimeout.toMillis) ++ ++ val remoteIP = InetAddress.getByName(remoteAddress.host.get) ++ log.debug(""Starting remote client connection to [{}|{}]"", remoteAddress, remoteIP) ++ ++ connection = bootstrap.connect(new InetSocketAddress(remoteIP, remoteAddress.port.get)) ++ ++ openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. ++ ++ if (!connection.isSuccess) { ++ notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) ++ false ++ } else { ++ sendSecureCookie(connection) ++ notifyListeners(RemoteClientStarted(netty, remoteAddress)) ++ true ++ } ++ } match { ++ case true ⇒ true ++ case false if reconnectIfAlreadyConnected ⇒ ++ connection.getChannel.close() ++ openChannels.remove(connection.getChannel) ++ ++ log.debug(""Remote client reconnecting to [{}]"", remoteAddress) ++ attemptReconnect() ++ ++ case false ⇒ false ++ } ++ } ++ ++ // Please note that this method does _not_ remove the ARC from the NettyRemoteClientModule's map of clients ++ def shutdown() = runSwitch switchOff { ++ log.debug(""Shutting down remote client [{}]"", name) ++ ++ notifyListeners(RemoteClientShutdown(netty, remoteAddress)) ++ try { ++ if ((connection ne null) && (connection.getChannel ne null)) ++ connection.getChannel.close() ++ } finally { ++ try { ++ if (openChannels ne null) openChannels.close.awaitUninterruptibly() ++ } finally { ++ connection = null ++ executionHandler = null ++ } ++ } ++ ++ log.debug(""[{}] has been shut down"", name) ++ } ++ ++ private[akka] def isWithinReconnectionTimeWindow: Boolean = { ++ if (reconnectionTimeWindowStart == 0L) { ++ reconnectionTimeWindowStart = System.currentTimeMillis ++ true ++ } else { ++ val timeLeft = (settings.ReconnectionTimeWindow.toMillis - (System.currentTimeMillis - reconnectionTimeWindowStart)) > 0 ++ if (timeLeft) ++ log.info(""Will try to reconnect to remote server for another [{}] milliseconds"", timeLeft) ++ ++ timeLeft ++ } ++ } ++ ++ private[akka] def resetReconnectionTimeWindow = reconnectionTimeWindowStart = 0L ++} ++ ++@ChannelHandler.Sharable ++class ActiveRemoteClientHandler( ++ val name: String, ++ val bootstrap: ClientBootstrap, ++ val remoteAddress: Address, ++ val timer: HashedWheelTimer, ++ val client: ActiveRemoteClient) ++ extends SimpleChannelUpstreamHandler { ++ ++ def runOnceNow(thunk: ⇒ Unit): Unit = timer.newTimeout(new TimerTask() { ++ def run(timeout: Timeout) = try { thunk } finally { timeout.cancel() } ++ }, 0, TimeUnit.MILLISECONDS) ++ ++ override def messageReceived(ctx: ChannelHandlerContext, event: MessageEvent) { ++ try { ++ event.getMessage match { ++ case arp: AkkaRemoteProtocol if arp.hasInstruction ⇒ ++ val rcp = arp.getInstruction ++ rcp.getCommandType match { ++ case CommandType.SHUTDOWN ⇒ runOnceNow { client.netty.shutdownClientConnection(remoteAddress) } ++ case _ ⇒ //Ignore others ++ } ++ ++ case arp: AkkaRemoteProtocol if arp.hasMessage ⇒ ++ client.netty.receiveMessage(new RemoteMessage(arp.getMessage, client.netty.system)) ++ ++ case other ⇒ ++ throw new RemoteClientException(""Unknown message received in remote client handler: "" + other, client.netty, client.remoteAddress) ++ } ++ } catch { ++ case e: Exception ⇒ client.notifyListeners(RemoteClientError(e, client.netty, client.remoteAddress)) ++ } ++ } ++ ++ override def channelClosed(ctx: ChannelHandlerContext, event: ChannelStateEvent) = client.runSwitch ifOn { ++ if (client.isWithinReconnectionTimeWindow) { ++ timer.newTimeout(new TimerTask() { ++ def run(timeout: Timeout) = ++ if (client.isRunning) { ++ client.openChannels.remove(event.getChannel) ++ client.connect(reconnectIfAlreadyConnected = true) ++ } ++ }, client.netty.settings.ReconnectDelay.toMillis, TimeUnit.MILLISECONDS) ++ } else runOnceNow { ++ client.netty.shutdownClientConnection(remoteAddress) // spawn in another thread ++ } ++ } ++ ++ override def channelConnected(ctx: ChannelHandlerContext, event: ChannelStateEvent) = { ++ try { ++ client.notifyListeners(RemoteClientConnected(client.netty, client.remoteAddress)) ++ client.resetReconnectionTimeWindow ++ } catch { ++ case e: Exception ⇒ client.notifyListeners(RemoteClientError(e, client.netty, client.remoteAddress)) ++ } ++ } ++ ++ override def channelDisconnected(ctx: ChannelHandlerContext, event: ChannelStateEvent) = { ++ client.notifyListeners(RemoteClientDisconnected(client.netty, client.remoteAddress)) ++ } ++ ++ override def exceptionCaught(ctx: ChannelHandlerContext, event: ExceptionEvent) = { ++ val cause = event.getCause ++ if (cause ne null) { ++ client.notifyListeners(RemoteClientError(cause, client.netty, client.remoteAddress)) ++ cause match { ++ case e: ReadTimeoutException ⇒ ++ runOnceNow { ++ client.netty.shutdownClientConnection(remoteAddress) // spawn in another thread ++ } ++ case e: Exception ⇒ event.getChannel.close() ++ } ++ ++ } else client.notifyListeners(RemoteClientError(new Exception(""Unknown cause""), client.netty, client.remoteAddress)) ++ } ++} ++ ++class ActiveRemoteClientPipelineFactory( ++ name: String, ++ bootstrap: ClientBootstrap, ++ executionHandler: ExecutionHandler, ++ remoteAddress: Address, ++ client: ActiveRemoteClient) extends ChannelPipelineFactory { ++ ++ import client.netty.settings ++ ++ def getPipeline: ChannelPipeline = { ++ val timeout = new ReadTimeoutHandler(client.netty.timer, settings.ReadTimeout.length, settings.ReadTimeout.unit) ++ val lenDec = new LengthFieldBasedFrameDecoder(settings.MessageFrameSize, 0, 4, 0, 4) ++ val lenPrep = new LengthFieldPrepender(4) ++ val messageDec = new RemoteMessageDecoder ++ val messageEnc = new RemoteMessageEncoder(client.netty) ++ val remoteClient = new ActiveRemoteClientHandler(name, bootstrap, remoteAddress, client.netty.timer, client) ++ ++ new StaticChannelPipeline(timeout, lenDec, messageDec, lenPrep, messageEnc, executionHandler, remoteClient) ++ } ++} ++ ++class PassiveRemoteClient(val currentChannel: Channel, ++ netty: NettyRemoteTransport, ++ remoteAddress: Address) ++ extends RemoteClient(netty, remoteAddress) { ++ ++ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean = runSwitch switchOn { ++ netty.notifyListeners(RemoteClientStarted(netty, remoteAddress)) ++ log.debug(""Starting remote client connection to [{}]"", remoteAddress) ++ } ++ ++ def shutdown() = runSwitch switchOff { ++ log.debug(""Shutting down remote client [{}]"", name) ++ ++ netty.notifyListeners(RemoteClientShutdown(netty, remoteAddress)) ++ log.debug(""[{}] has been shut down"", name) ++ } ++}","Is anything of the file above changed? I don't like these sweeping changes :p +",4fb0858e557232e5f4a7e5b364d1697c0e21bdd1 +367173,0,"If you add features to the view, please always add a corresponding test to AbstractLayoutTest to make sure the same functionality is supported by all rendering engines. I will add this test now.""",2012-01-19 04:04:51,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/223#discussion_r367173,2012-01-19T15:04:51Z,2012-01-19T22:37:20Z,viktorklang,CONTRIBUTOR,akka-transactor/src/main/scala/akka/transactor/Coordinated.scala,,129.0,,,15,"@@ -125,7 +126,7 @@ class Coordinated(val message: Any, member: CommitBarrier.Member) { + * + * @throws CoordinatedTransactionException if the coordinated transaction fails. + */ +- def atomic[T](body: InTxn ⇒ T): T = { ++ def atomic[A](body: InTxn ⇒ A): A = {","A is much better than T +",eecb2a624745f1d3f45c4fd99f0b442c0390f606 +506731,0,"Because I needed a specific apply method to create the Node instance. Same signature as the generated one but different impl: def apply(name: String): Node = new Node(hash(name))""",2012-03-01 02:50:39,jboner,akka,akka,,https://github.com/akka/akka-core/pull/329#discussion_r506731,2012-03-01T13:50:39Z,2012-03-12T18:22:17Z,jboner,CONTRIBUTOR,akka-cluster/src/main/scala/akka/cluster/VectorClock.scala,,,,,1,"@@ -5,138 +5,199 @@ + package akka.cluster + + import akka.AkkaException ++import akka.event.Logging ++import akka.actor.ActorSystem ++ ++import System.{ currentTimeMillis ⇒ newTimestamp } ++import java.security.MessageDigest ++import java.util.concurrent.atomic.AtomicLong + + class VectorClockException(message: String) extends AkkaException(message) + + /** + * Trait to be extended by classes that wants to be versioned using a VectorClock. + */ +-trait Versioned { ++trait Versioned[T] { + def version: VectorClock ++ def +(node: VectorClock.Node): T + } + + /** + * Utility methods for comparing Versioned instances. + */ + object Versioned { +- def latestVersionOf[T <: Versioned](versioned1: T, versioned2: T): T = { +- (versioned1.version compare versioned2.version) match { +- case VectorClock.Before ⇒ versioned2 // version 1 is BEFORE (older), use version 2 +- case VectorClock.After ⇒ versioned1 // version 1 is AFTER (newer), use version 1 +- case VectorClock.Concurrent ⇒ versioned1 // can't establish a causal relationship between versions => conflict - keeping version 1 ++ ++ /** ++ * The result of comparing two Versioned objects. ++ * Either: ++ * {{{ ++ * 1) v1 is BEFORE v2 => Before ++ * 2) v1 is AFTER t2 => After ++ * 3) v1 happens CONCURRENTLY to v2 => Concurrent ++ * }}} ++ */ ++ sealed trait Ordering ++ case object Before extends Ordering ++ case object After extends Ordering ++ case object Concurrent extends Ordering ++ ++ /** ++ * Returns or 'Ordering' for the two 'Versioned' instances. ++ */ ++ def compare[T <: Versioned[T]](versioned1: Versioned[T], versioned2: Versioned[T]): Ordering = { ++ if (versioned1.version <> versioned2.version) Concurrent ++ else if (versioned1.version < versioned2.version) Before ++ else After ++ } ++ ++ /** ++ * Returns the Versioned that have the latest version. ++ */ ++ def latestVersionOf[T <: Versioned[T]](versioned1: T, versioned2: T): T = { ++ compare(versioned1, versioned2) match { ++ case Concurrent ⇒ versioned2 ++ case Before ⇒ versioned2 ++ case After ⇒ versioned1 + } + } + } + + /** +- * Representation of a Vector-based clock (counting clock), inspired by Lamport logical clocks. +- * {{ +- * Reference: +- * 1) Leslie Lamport (1978). ""Time, clocks, and the ordering of events in a distributed system"". Communications of the ACM 21 (7): 558-565. +- * 2) Friedemann Mattern (1988). ""Virtual Time and Global States of Distributed Systems"". Workshop on Parallel and Distributed Algorithms: pp. 215-226 +- * }} ++ * VectorClock module with helper classes and methods. ++ * ++ * Based on code from the 'vlock' VectorClock library by Coda Hale. + */ +-case class VectorClock( +- versions: Vector[VectorClock.Entry] = Vector.empty[VectorClock.Entry], +- timestamp: Long = System.currentTimeMillis) { +- import VectorClock._ ++object VectorClock { + +- def compare(other: VectorClock): Ordering = VectorClock.compare(this, other) ++ /** ++ * Hash representation of a versioned node name. ++ */ ++ class Node private (val name: String) extends Serializable {","Because I needed a specific apply method to create the Node instance. Same signature as the generated one but different impl: + def apply(name: String): Node = new Node(hash(name)) +",cf3fa9fa3ce9e9312db0922370c93ce1af9db7c8 +419666,0,"Who do you handle capacity violations? (In case of bounded Deque)""",2012-02-06 06:23:31,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/307#discussion_r419666,2012-02-06T17:23:31Z,2012-02-28T09:22:30Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/Stash.scala,,,,,1,"@@ -0,0 +1,97 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.actor ++ ++import akka.dispatch.{ Envelope, DequeBasedMessageQueue } ++ ++/** ++ * The `Stash` trait enables an actor to temporarily stash away messages that can not or ++ * should not be handled using the actor's current behavior. ++ *

++ * Example: ++ *

++ *    class ActorWithProtocol extends Actor with Stash {
++ *      def receive = {
++ *        case ""open"" ⇒
++ *          unstashAll {
++ *            case ""write"" ⇒ // do writing...
++ *            case ""close"" ⇒
++ *              unstashAll()
++ *              context.unbecome()
++ *            case msg ⇒ stash()
++ *          }
++ *        case ""done"" ⇒ // done
++ *        case msg    ⇒ stash()
++ *      }
++ *    }
++ *  
++ * ++ * Note that the `Stash` trait can only be used together with actors that have a deque-based ++ * mailbox. Actors can be configured to use a deque-based mailbox using a configuration like ++ * the following: ++ *
++ *  akka {
++ *    actor {
++ *      default-dispatcher {
++ *        mailboxType = ""akka.dispatch.UnboundedDequeBasedMailbox""
++ *      }
++ *    }
++ *  }
++ *  
++ */ ++trait Stash { ++ thisActor: Actor ⇒ ++ ++ /* The private stash of the actor. It is only accessible using `stash()` and ++ * `unstashAll()`. ++ */ ++ private[this] var theStash = Vector.empty[Envelope] ++ ++ /* The actor's deque-based message queue. ++ * `mailbox.queue` is the underlying `Deque`. ++ */ ++ private[this] val mailbox: DequeBasedMessageQueue = { ++ context.asInstanceOf[ActorCell].mailbox match { ++ case queue: DequeBasedMessageQueue ⇒ queue ++ case other ⇒ throw new ActorInitializationException(self, ""UnboundedDequeBasedMailbox required, got: "" + other.getClass()) ++ } ++ } ++ ++ /** ++ * Adds the current message (the message that the actor received last) to the ++ * actor's stash. ++ */ ++ def stash(): Unit = theStash :+= context.asInstanceOf[ActorCell].currentMessage ++ ++ /** ++ * Prepends all messages in the stash to the mailbox, and then clears the stash. ++ */ ++ def unstashAll(): Unit = { ++ theStash.reverseIterator foreach mailbox.queue.addFirst","Who do you handle capacity violations? (In case of bounded Deque) +",8ea949857b39388ac25bdc08df7feee3757f3ee9 +362698,2,"Still don't like this one""",2012-01-18 02:06:53,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362698,2012-01-18T13:06:53Z,2012-01-18T16:57:19Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/internal/component/ActorComponent.scala,,,,,1,"@@ -0,0 +1,303 @@ ++package akka.camel.internal.component ++ ++/** ++ * Copyright (C) 2009-2010 Scalable Solutions AB ++ */ ++ ++import java.util.{Map => JMap} ++ ++import org.apache.camel._ ++import org.apache.camel.impl.{DefaultProducer, DefaultEndpoint, DefaultComponent} ++ ++import akka.actor._ ++ ++import scala.reflect.BeanProperty ++import akka.dispatch.Await ++import akka.util.{Duration, Timeout} ++import akka.util.duration._ ++import akka.camel.{Camel, CamelExchangeAdapter, Ack, Failure, Message, BlockingOrNot, Blocking, NonBlocking} ++import java.util.concurrent.TimeoutException ++ ++private[camel] case class Path(actorPath: String) { ++ require(actorPath != null) ++ require(actorPath.length() > 0) ++ def toCamelPath = ""actor://path:%s"" format actorPath ++} ++ ++private[camel] object Path{ ++ def apply(actorRef: ActorRef) = new Path(actorRef.path.toString) ++ def fromCamelPath(camelPath : String) = camelPath match { ++ case id if id startsWith ""path:"" => new Path(id substring 5) ++ case _ => throw new IllegalArgumentException(""Invalid path: [%s] - should be path:"" format camelPath) ++ } ++} ++ ++ ++ ++/** ++ * Camel component for sending messages to and receiving replies from (untyped) actors. ++ * ++ * @see akka.camel.component.ActorEndpoint ++ * @see akka.camel.component.ActorProducer ++ * ++ * @author Martin Krasser ++ */ ++class ActorComponent(camel : Camel) extends DefaultComponent { ++ def createEndpoint(uri: String, remaining: String, parameters: JMap[String, Object]): ActorEndpoint = { ++ val path = Path.fromCamelPath(remaining) ++ new ActorEndpoint(uri, this, path, camel) ++ } ++} ++ ++ ++/** ++ * TODO fix the doc to be consistent with implementation ++ * Camel endpoint for sending messages to and receiving replies from (untyped) actors. Actors ++ * are referenced using actor endpoint URIs of the following format: ++ * actor:, ++ * actor:id:[] and ++ * actor:uuid:[], ++ * where refers to ActorRef.id and ++ * refers to the String-representation od ActorRef.uuid. In URIs that contain ++ * id: or uuid:, an actor identifier (id or uuid) is optional. In this ++ * case, the in-message of an exchange produced to this endpoint must contain a message header ++ * with name CamelActorIdentifier and a value that is the target actor's identifier. ++ * If the URI contains an actor identifier, a message with a CamelActorIdentifier ++ * header overrides the identifier in the endpoint URI. ++ * ++ * @see akka.camel.component.ActorComponent ++ * @see akka.camel.component.ActorProducer ++ ++ * @author Martin Krasser ++ */ ++class ActorEndpoint(uri: String, ++ comp: ActorComponent, ++ val path: Path, ++ camel : Camel) extends DefaultEndpoint(uri, comp) with ActorEndpointConfig{ ++ ++ ++ ++ /** ++ * @throws UnsupportedOperationException ++ */ ++ def createConsumer(processor: Processor): org.apache.camel.Consumer = ++ throw new UnsupportedOperationException(""actor consumer not supported yet"") ++ ++ /** ++ * Creates a new ActorProducer instance initialized with this endpoint. ++ */ ++ def createProducer: ActorProducer = new ActorProducer(this, camel) ++ ++ /** ++ * Returns true. ++ */ ++ def isSingleton: Boolean = true ++} ++ ++trait ActorEndpointConfig{ ++ def getEndpointUri : String ++ def path : Path ++ /** ++ * When endpoint is outCapable (can produce responses) outTimeout is the maximum time ++ * the endpoint can take to send the response back. It defaults to Int.MaxValue seconds. ++ * It can be overwritten by setting @see blocking property ++ */ ++ @BeanProperty var outTimeout: Duration = Int.MaxValue seconds ++ ++ ++ /** ++ * Whether to block caller thread during two-way message exchanges with (untyped) actors. This is ++ * set via the blocking=true|false endpoint URI parameter. Default value is ++ * false. ++ */ ++ @BeanProperty var blocking: BlockingOrNot = NonBlocking","Still don't like this one +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +362858,1,"to be cool:)""",2012-01-18 03:06:15,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362858,2012-01-18T14:06:15Z,2012-01-18T16:57:21Z,piotrga,CONTRIBUTOR,akka-camel/src/main/scala/akka/camelexamples/_2_SupervisedConsumers.scala,,26.0,,,26,"@@ -0,0 +1,37 @@ ++package akka.camelexamples ++ ++import akka.actor.{PoisonPill, Terminated, Props, ActorSystem, Actor} ++import ExamplesSupport._ ++import RichString._ ++ ++ ++object SupervisedConsumersExample extends App{ ++ ++ val system = ActorSystem(""test1"") ++ ++ system.actorOf(Props(new Actor{ ++ context.watch(context.actorOf(Props(faultHandler = retry3xWithin1s, creator = () => new EndpointManager))) ++ protected def receive = { ++ case Terminated(ref) => system.shutdown() ++ } ++ })) ++ ++ ++ ""data/input/CamelConsumer/file1.txt"" << ""test data ""+math.random ++} ++ ++class EndpointManager extends Actor { ++ ++ override def preStart() { ++ self ! Props[SysOutConsumer]","to be cool:) +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +363036,0,"Like something you put in your configuration file""",2012-01-18 03:57:59,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r363036,2012-01-18T14:57:59Z,2012-01-18T16:57:21Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/Consumer.scala,,33.0,,,1,"@@ -1,144 +1,71 @@ + /** +- * Copyright (C) 2009-2010 Typesafe Inc. ++ * Copyright (C) 2009-2010 Scalable Solutions AB + */ + + package akka.camel + +-import org.apache.camel.model.{ RouteDefinition, ProcessorDefinition } ++import org.apache.camel.model.{RouteDefinition, ProcessorDefinition} + + import akka.actor._ ++import akka.util.Duration ++import akka.util.duration._ + + /** + * Mixed in by Actor implementations that consume message from Camel endpoints. + * + * @author Martin Krasser + */ +-trait Consumer { this: Actor ⇒ +- import RouteDefinitionHandler._ ++trait Consumer extends Actor with ConsumerConfig{ + +- /** +- * The default route definition handler is the identity function +- */ +- private[camel] var routeDefinitionHandler: RouteDefinitionHandler = identity ++ def endpointUri : String + +- /** +- * Returns the Camel endpoint URI to consume messages from. +- */ +- def endpointUri: String +- +- /** +- * Determines whether two-way communications between an endpoint and this consumer actor +- * should be done in blocking or non-blocking mode (default is non-blocking). This method +- * doesn't have any effect on one-way communications (they'll never block). +- */ +- def blocking = false ++ CamelExtension(context.system).registerConsumer(endpointUri, this, activationTimeout) ++} + +- /** +- * Determines whether one-way communications between an endpoint and this consumer actor +- * should be auto-acknowledged or system-acknowledged. +- */ +- def autoack = true + +- /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- */ +- def onRouteDefinition(h: RouteDefinition ⇒ ProcessorDefinition[_]): Unit = onRouteDefinition(from(h)) ++trait ConsumerConfig{ ++ //TODO: Explain the parameters better with some examples! + + /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- *

+- * Java API. ++ * How long should the actor wait for activation before it fails. + */ +- def onRouteDefinition(h: RouteDefinitionHandler): Unit = routeDefinitionHandler = h +-} +- +-/** +- * Java-friendly Consumer. +- * +- * Subclass this abstract class to create an MDB-style untyped consumer actor. This +- * class is meant to be used from Java. +- * +- * @author Martin Krasser +- */ +-abstract class UntypedConsumerActor extends UntypedActor with Consumer { +- final override def endpointUri = getEndpointUri +- final override def blocking = isBlocking +- final override def autoack = isAutoack ++ def activationTimeout: Duration = 10 seconds","Like something you put in your configuration file +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +363247,0,"If there are no references to it, it means that it's been terminated, if it has been terminated then DeathWAtch has been triggered. No need for WeakHashMap""",2012-01-18 04:39:46,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r363247,2012-01-18T15:39:46Z,2012-01-18T16:57:22Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/internal/ActivationTracker.scala,,80.0,,,80,"@@ -0,0 +1,80 @@ ++package akka.camel.internal ++ ++import akka.actor._ ++import akka.camel._ ++import collection.mutable.WeakHashMap ++import akka.event.Logging.Warning ++ ++ ++ ++class ActivationTracker extends Actor{ ++ ++ val activations = new WeakHashMap[ActorRef, ActivationStateMachine] ++ ++ class ActivationStateMachine { ++ private[this] var awaitingActivation : List[ActorRef] = Nil ++ private[this] var awaitingDeActivation : List[ActorRef] = Nil ++ private[this] var activationFailure : Option[Throwable] = None ++ ++ var receive : Receive = notActivated ++ ++ def notActivated : Receive = { ++ case AwaitActivation(ref) => awaitingActivation ::= sender ++ case AwaitDeActivation(ref) => awaitingDeActivation ::= sender ++ ++ case msg @ EndpointActivated(ref) => { ++ migration.Migration.EventHandler.debug(ref+"" activated"") ++ awaitingActivation.foreach(_ ! msg) ++ awaitingActivation = Nil ++ receive = activated ++ } ++ ++ case EndpointFailedToActivate(ref, cause) => { ++ migration.Migration.EventHandler.debug(ref+"" failed to activate"") ++ activationFailure = Option(cause) ++ awaitingActivation.foreach(_ ! EndpointFailedToActivate(ref, cause)) ++ awaitingActivation = Nil ++ receive = failedToActivate ++ } ++ } ++ ++ def activated : Receive = { ++ case AwaitActivation(ref) => sender ! EndpointActivated(ref) ++ case AwaitDeActivation(ref) => awaitingDeActivation ::= sender ++ case EndpointDeActivated(ref) => { ++ awaitingDeActivation foreach (_ ! EndpointDeActivated(ref)) ++ awaitingDeActivation = Nil ++ context.stop(self) ++ } ++ case msg : EndpointFailedToDeActivate => { ++ awaitingDeActivation foreach (_ ! msg) ++ awaitingDeActivation = Nil ++ } ++ } ++ ++ def failedToActivate : Receive = { ++ case AwaitActivation(ref) => sender ! EndpointFailedToActivate(ref, activationFailure.get) ++ case AwaitDeActivation(ref) => sender ! EndpointFailedToActivate(ref, activationFailure.get) ++ } ++ ++ } ++ ++ override def preStart() { ++ context.system.eventStream.subscribe(self, classOf[ActivationMessage]) ++ } ++ ++ override def receive = { ++ case msg @ ActivationMessage(ref) =>{ ++ try{ ++ activations.getOrElseUpdate(ref, new ActivationStateMachine).receive(msg)","If there are no references to it, it means that it's been terminated, if it has been terminated then DeathWAtch has been triggered. No need for WeakHashMap +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +314470,0,"Perhaps a ref to the remoting docs could be of interest here.""",2011-12-25 04:25:52,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/187#discussion_r314470,2011-12-25T15:25:52Z,2011-12-25T15:25:52Z,viktorklang,CONTRIBUTOR,akka-docs/java/untyped-actors.rst,,222.0,,,27,"@@ -205,9 +210,16 @@ result:: + getContext().actorFor(""/user/serviceA/aggregator"") // will look up this absolute path + getContext().actorFor(""../joe"") // will look up sibling beneath same supervisor + ++The supplied path is parsed as a :class:`java.net.URI`, which basically means ++that it is split on ``/`` into path elements. If the path starts with ``/``, it ++is absolute and the look-up starts at the root guardian (which is the parent of ++``""/user""``); otherwise it starts at the current actor. If a path element equals ++``..``, the look-up will take a step “up” towards the supervisor of the ++currently traversed actor, otherwise it will step “down” to the named child. + It should be noted that the ``..`` in actor paths here always means the logical +-structure, i.e. the supervisor. Remote actor addresses may also be looked up, +-if remoting is enabled:: ++structure, i.e. the supervisor. ++ ++Remote actor addresses may also be looked up, if remoting is enabled::","Perhaps a ref to the remoting docs could be of interest here. +",1be9c1d99db3db0384b5ba10790b2de70db30147 +308613,0,"Is this threadsafe?""",2011-12-21 02:19:34,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/182#discussion_r308613,2011-12-21T13:19:34Z,2011-12-21T22:47:21Z,viktorklang,CONTRIBUTOR,akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala,,,,,1,"@@ -422,23 +423,48 @@ abstract class ActorModelSpec extends AkkaSpec with DefaultTimeout { + } + } + ++object DispatcherModelSpec { ++ val config = """""" ++ dispatcher { ++ type = Dispatcher ++ } ++ boss { ++ type = PinnedDispatcher ++ } ++ """""" ++} ++ + @org.junit.runner.RunWith(classOf[org.scalatest.junit.JUnitRunner]) +-class DispatcherModelSpec extends ActorModelSpec { ++class DispatcherModelSpec extends ActorModelSpec(DispatcherModelSpec.config) { + import ActorModelSpec._ + +- def newInterceptedDispatcher = ThreadPoolConfigDispatcherBuilder(config ⇒ +- new Dispatcher(system.dispatcherFactory.prerequisites, ""foo"", system.settings.DispatcherThroughput, +- system.settings.DispatcherThroughputDeadlineTime, system.dispatcherFactory.MailboxType, +- config, system.settings.DispatcherDefaultShutdown) with MessageDispatcherInterceptor, +- ThreadPoolConfig()).build.asInstanceOf[MessageDispatcherInterceptor] ++ var dispatcherCount = 0 ++ ++ override def registerInterceptedDispatcher(): MessageDispatcherInterceptor = { ++ // use new key for each invocation, since the MessageDispatcherInterceptor holds state ++ dispatcherCount += 1","Is this threadsafe? +",ed2fb14dcf0120a6c126fe9c99038d897dbcdf0d +295443,0,"Fixed""",2011-12-14 13:37:48,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/160#discussion_r295443,2011-12-15T00:37:48Z,2011-12-15T16:19:19Z,viktorklang,CONTRIBUTOR,akka-docs/scala/typed-actors.rst,,,,,1,"@@ -4,186 +4,160 @@ Typed Actors (Scala) + .. sidebar:: Contents + + .. contents:: :local: +- +-The Typed Actors are implemented through `Typed Actors `_. It uses AOP through `AspectWerkz `_ to turn regular POJOs into asynchronous non-blocking Actors with semantics of the Actor Model. Each method dispatch is turned into a message that is put on a queue to be processed by the Typed Actor sequentially one by one. + +-If you are using the `Spring Framework `_ then take a look at Akka's `Spring integration `_. ++Akka Typed Actors is an implementation of the `Active Objects `_ pattern. ++Essentially turning method invocations into asynchronous dispatch instead of synchronous that has been the default way since Smalltalk came out. + +-**WARNING:** Do not configure to use a ``BalancingDispatcher`` with your ``TypedActors``, it just isn't safe with how ``TypedActors`` currently are implemented. This limitation will most likely be removed in the future. ++Typed Actors consist of 2 ""parts"", a public interface and an implementation, and if you've done any work in ""enterprise"" Java, this will be very familiar to you. As with normal Actors you have an external API (the public interface instance) that will delegate methodcalls asynchronously to ++a private instance of the implementation. + +-Creating Typed Actors +---------------------- +- +-**IMPORTANT:** The Typed Actors class must have access modifier 'public' (which is default) and can't be an inner class (unless it is an inner class in an 'object'). ++The advantage of Typed Actors vs. Actors is that with TypedActors you have a static contract, and don't need to define your own messages, the downside is that it places some limitations on what you can do and what you can't, i.e. you can't use become/unbecome. + +-Akka turns POJOs with interface and implementation into asynchronous (Typed) Actors. Akka is using `AspectWerkz’s Proxy `_ implementation, which is the `most performant `_ proxy implementation there exists. ++Typed Actors are implemented using `JDK Proxies `_ which provide a pretty easy-worked API to intercept method calls. + +-In order to create a Typed Actor you have to subclass the ``TypedActor`` base class. + +-Here is an example. ++The tools of the trade ++---------------------- + +-If you have a POJO with an interface implementation separation like this: ++Before we create our first Typed Actor we should first go through the tools that we have at our disposal, ++it's located in ``akka.actor.TypedActor``. + +-.. code-block:: scala ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-extension-tools + +- import akka.actor.TypedActor ++.. warning:: ++ ++ Same as not exposing ``this`` of an Akka Actor, it's important not to expose ``this`` of a Typed Actor, ++ instead you should pass the external proxy reference, which is obtained from within your Typed Actor as ++ ``TypedActor.self``, this is your external identity, as the ``ActorRef`` is the external identity of ++ and Akka Actor.","Fixed +",9d2ab2e7145ed1e200ed29aee42f9a5734f108ab +297170,0,"Not only that, but I don't believe that Lift uses akka futures anyway. """"or"""" is definitely better than """"orElse"""" though I still like """"race."""" """,2011-12-15 06:41:39,nuttycom,akka,akka,,https://github.com/akka/akka-core/pull/147#discussion_r297170,2011-12-15T17:41:39Z,2011-12-15T17:41:39Z,nuttycom,NONE,akka-actor/src/main/scala/akka/dispatch/Future.scala,,398.0,,,615,"@@ -524,67 +339,63 @@ sealed trait Future[+T] extends japi.Future[T] { + def value: Option[Either[Throwable, T]] + + /** +- * Returns the successful result of this Future if it exists. +- */ +- final def result: Option[T] = value match { +- case Some(Right(r)) ⇒ Some(r) +- case _ ⇒ None +- } +- +- /** +- * Returns the contained exception of this Future if it exists. +- */ +- final def exception: Option[Throwable] = value match { +- case Some(Left(e)) ⇒ Some(e) +- case _ ⇒ None +- } +- +- /** + * When this Future is completed, apply the provided function to the + * Future. If the Future has already been completed, this will apply +- * immediately. Will not be called in case of a timeout, which also holds if +- * corresponding Promise is attempted to complete after expiry. Multiple ++ * immediately. Multiple + * callbacks may be registered; there is no guarantee that they will be + * executed in a particular order. + */ +- def onComplete(func: Future[T] ⇒ Unit): this.type ++ def onComplete(func: Either[Throwable, T] ⇒ Unit): this.type + + /** + * When the future is completed with a valid result, apply the provided + * PartialFunction to the result. See `onComplete` for more details. + *

+-   *   future onResult {
++   *   future onSuccess {
+    *     case Foo ⇒ target ! ""foo""
+    *     case Bar ⇒ target ! ""bar""
+    *   }
+    * 
+ */ +- final def onResult(pf: PartialFunction[T, Unit]): this.type = onComplete { +- _.value match { +- case Some(Right(r)) if pf isDefinedAt r ⇒ pf(r) +- case _ ⇒ +- } ++ final def onSuccess[U](pf: PartialFunction[T, U]): this.type = onComplete { ++ case Right(r) if pf isDefinedAt r ⇒ pf(r) ++ case _ ⇒ + } + + /** + * When the future is completed with an exception, apply the provided + * PartialFunction to the exception. See `onComplete` for more details. + *
+-   *   future onException {
++   *   future onFailure {
+    *     case NumberFormatException ⇒ target ! ""wrong format""
+    *   }
+    * 
+ */ +- final def onException(pf: PartialFunction[Throwable, Unit]): this.type = onComplete { +- _.value match { +- case Some(Left(ex)) if pf isDefinedAt ex ⇒ pf(ex) +- case _ ⇒ +- } ++ final def onFailure[U](pf: PartialFunction[Throwable, U]): this.type = onComplete { ++ case Left(ex) if pf isDefinedAt ex ⇒ pf(ex) ++ case _ ⇒ + } + +- def onTimeout(func: Future[T] ⇒ Unit): this.type ++ /** ++ * Returns a failure projection of this Future ++ * If `this` becomes completed with a failure, that failure will be the success of the returned Future ++ * If `this` becomes completed with a result, then the returned future will fail with a NoSuchElementException ++ */ ++ final def failed: Future[Throwable] = { ++ val p = Promise[Throwable]() ++ this.onComplete { ++ case Left(t) ⇒ p success t ++ case Right(r) ⇒ p failure new NoSuchElementException(""Future.failed not completed with a throwable. Instead completed with: "" + r) ++ } ++ p ++ } + +- def orElse[A >: T](fallback: ⇒ A): Future[A] ++ /** ++ * Creates a Future that will be the result of the first completed Future of this and the Future that was passed into this. ++ * This is semantically the same as: Future.firstCompletedOf(Seq(this, that)) ++ */ ++ //FIXME implement as The result of any of the Futures, or if oth failed, the first failure ++ def orElse[A >: T](that: Future[A]): Future[A] = Future.firstCompletedOf(List(this, that)) //TODO Optimize","Not only that, but I don't believe that Lift uses akka futures anyway. ""or"" is definitely better than ""orElse"" though I still like ""race."" +",0af92f24400f1b05d1919be54dfd822037f0076f +284870,0,"Null-object pattern"""" sounds more academic""",2011-12-10 06:40:39,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/144#discussion_r284870,2011-12-10T17:40:39Z,2011-12-13T15:06:06Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/routing/Routing.scala,,,,,1,"@@ -73,62 +76,105 @@ object RouterType { + */ + case class RoutedProps private[akka] ( + routerFactory: () ⇒ Router, +- connectionManager: ConnectionManager, +- timeout: Timeout = RoutedProps.defaultTimeout, +- localOnly: Boolean = RoutedProps.defaultLocalOnly) { ++ connectionManager: ConnectionManager) { + + // Java API +- def this(creator: Creator[Router], connectionManager: ConnectionManager, timeout: Timeout, localOnly: Boolean) { +- this(() ⇒ creator.create(), connectionManager, timeout, localOnly) ++ def this(creator: Creator[Router], connectionManager: ConnectionManager) { ++ this(() ⇒ creator.create(), connectionManager) + } ++} ++ ++///** ++// * The Router is responsible for sending a message to one (or more) of its connections. Connections are stored in the ++// * {@link FailureDetector} and each Router should be linked to only one {@link FailureDetector}. ++// * ++// * @author Jonas Bonér ++// */ ++//trait Router { ++// ++// /** ++// * Initializes this Router with a given set of Connections. The Router can use this datastructure to ask for ++// * the current connections, signal that there were problems with one of the connections and see if there have ++// * been changes in the connections. ++// * ++// * This method is not threadsafe, and should only be called once ++// * ++// * JMM Guarantees: ++// * This method guarantees that all changes made in this method, are visible before one of the routing methods is called. ++// */ ++// def init(connectionManager: ConnectionManager) ++// ++// /** ++// * Routes the message to one of the connections. ++// * ++// * @throws RoutingException if something goes wrong while routing the message ++// */ ++// def route(message: Any)(implicit sender: ActorRef) ++// ++// /** ++// * Routes the message using a timeout to one of the connections and returns a Future to synchronize on the ++// * completion of the processing of the message. ++// * ++// * @throws RoutingExceptionif something goes wrong while routing the message. ++// */ ++// def route[T](message: Any, timeout: Timeout): Future[T] ++//} ++// ++///** ++// * An {@link AkkaException} thrown when something goes wrong while routing a message ++// */ ++//class RoutingException(message: String) extends AkkaException(message) ++// + ++/** ++ * A RoutedActorRef is an ActorRef that has a set of connected ActorRef and it uses a Router to send a message to ++ * on (or more) of these actors. ++ */ ++private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _supervisor: InternalActorRef, _path: ActorPath) ++ extends LocalActorRef( ++ _system, ++ _props.copy(creator = _props.routerConfig), ++ _supervisor, ++ _path) { ++ ++ val route: Routing.Route = _props.routerConfig.createRoute(_props.creator, actorContext) ++ ++ override def !(message: Any)(implicit sender: ActorRef = null) { ++ route(message) match { ++ case null ⇒ super.!(message)(sender) ++ case ref: ActorRef ⇒ ref.!(message)(sender) ++ case refs: Traversable[ActorRef] ⇒ refs foreach (_.!(message)(sender)) ++ } ++ } + } + +-object RoutedProps { +- final val defaultTimeout = Timeout(Duration.MinusInf) +- final val defaultLocalOnly = false ++trait RouterConfig extends Function0[Actor] { ++ def adaptFromDeploy(deploy: Option[Deploy]): RouterConfig ++ def createRoute(creator: () ⇒ Actor, actorContext: ActorContext): Routing.Route + } + + /** +- * The Router is responsible for sending a message to one (or more) of its connections. Connections are stored in the +- * {@link FailureDetector} and each Router should be linked to only one {@link FailureDetector}. +- * +- * @author Jonas Bonér ++ * Routing configuration that indicates no routing. ++ * Oxymoron style.","""Null-object pattern"" sounds more academic +",134fac4bfe8d6f8b4e9b96dfbfdf532d48ae3c86 +269700,2,"DO NOT USE THIS EXCEPT INTERNALLY IN AKKA""",2011-12-05 02:03:00,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269700,2011-12-05T13:03:00Z,2011-12-05T21:48:29Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,163.0,,,102,"@@ -118,14 +111,76 @@ abstract class ActorRef extends java.lang.Comparable[ActorRef] with Serializable + */ + def isTerminated: Boolean + +- override def hashCode: Int = HashCode.hash(HashCode.SEED, address) ++ // FIXME RK check if we should scramble the bits or whether they can stay the same ++ override def hashCode: Int = path.hashCode + +- override def equals(that: Any): Boolean = { +- that.isInstanceOf[ActorRef] && +- that.asInstanceOf[ActorRef].address == address ++ override def equals(that: Any): Boolean = that match { ++ case other: ActorRef ⇒ path == other.path ++ case _ ⇒ false + } + +- override def toString = ""Actor[%s]"".format(address) ++ override def toString = ""Actor[%s]"".format(path) ++} ++ ++/** ++ * This trait represents the Scala Actor API ++ * There are implicit conversions in ../actor/Implicits.scala ++ * from ActorRef -> ScalaActorRef and back ++ */ ++trait ScalaActorRef { ref: ActorRef ⇒ ++ ++ /** ++ * Sends a one-way asynchronous message. E.g. fire-and-forget semantics. ++ *

++ * ++ * If invoked from within an actor then the actor reference is implicitly passed on as the implicit 'sender' argument. ++ *

++ * ++ * This actor 'sender' reference is then available in the receiving actor in the 'sender' member variable, ++ * if invoked from within an Actor. If not then no sender is available. ++ *

++   *   actor ! message
++   * 
++ *

++ */ ++ def !(message: Any)(implicit sender: ActorRef = null): Unit ++ ++ /** ++ * Sends a message asynchronously, returning a future which may eventually hold the reply. ++ */ ++ def ?(message: Any)(implicit timeout: Timeout): Future[Any] ++ ++ /** ++ * Sends a message asynchronously, returning a future which may eventually hold the reply. ++ * The implicit parameter with the default value is just there to disambiguate it from the version that takes the ++ * implicit timeout ++ */ ++ def ?(message: Any, timeout: Timeout)(implicit ignore: Int = 0): Future[Any] = ?(message)(timeout) ++} ++ ++/** ++ * Internal trait for assembling all the functionality needed internally on ++ * ActorRefs. NOTE THAT THIS IS NOT A STABLE EXTERNAL INTERFACE!","DO NOT USE THIS EXCEPT INTERNALLY IN AKKA +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +479831,0,"Can such a scenario exist such that both connection and reconnectionFuture is Some(...) ? If not, you might want to encode it as Option[Either[Connection, Cancellable]]""",2012-02-22 23:26:23,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/350#discussion_r479831,2012-02-23T10:26:23Z,2012-03-06T19:30:30Z,viktorklang,CONTRIBUTOR,akka-amqp/src/main/scala/akka/amqp/FaultTolerantConnectionActor.scala,,,,,1,"@@ -0,0 +1,160 @@ ++/** ++ * Copyright (C) 2009-2010 Scalable Solutions AB ++ */ ++ ++package akka.amqp ++ ++import java.io.IOException ++import com.rabbitmq.client._ ++import akka.event.Logging ++import akka.actor._ ++import akka.util.duration._ ++import akka.amqp.AMQP.{ ConsumerParameters, ProducerParameters, ConnectionParameters } ++import java.util.UUID ++ ++private[amqp] class FaultTolerantConnectionActor(connectionParameters: ConnectionParameters) extends Actor { ++ import connectionParameters._ ++ ++ val log = Logging(context.system, this) ++ ++ val connectionFactory: ConnectionFactory = new ConnectionFactory() ++ connectionFactory.setUsername(username) ++ connectionFactory.setPassword(password) ++ connectionFactory.setVirtualHost(virtualHost) ++ ++ var connection: Option[Connection] = None ++ var reconnectionFuture: Option[Cancellable] = None","Can such a scenario exist such that both connection and reconnectionFuture is Some(...) ? If not, you might want to encode it as Option[Either[Connection, Cancellable]] +",e039c380c868b28285dab6fc313025d981ef022b +490636,0,"This should most definitely go into test/resources and not main/resources""",2012-02-27 00:03:53,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/350#discussion_r490636,2012-02-27T11:03:53Z,2012-03-06T19:30:33Z,viktorklang,CONTRIBUTOR,akka-amqp/src/main/resources/application.conf,,,,,1,"@@ -0,0 +1,15 @@ ++example { ++ akka { ++ loglevel = INFO ++ actor { ++ debug { ++ receive = off ++ autoreceive = off ++ lifecycle = off ++ } ++ } ++ amqp { ++ timeout = 2000 ++ } ++ } ++}","This should most definitely go into test/resources and not main/resources +",e039c380c868b28285dab6fc313025d981ef022b +503277,1,"This is so badass I don't even know where to begin""",2012-02-29 09:01:24,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/329#discussion_r503277,2012-02-29T20:01:24Z,2012-03-12T18:22:14Z,viktorklang,CONTRIBUTOR,akka-cluster/src/main/scala/akka/cluster/Node.scala,,,,,1,"@@ -0,0 +1,803 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.cluster ++ ++import akka.actor._ ++import akka.actor.Status._ ++import akka.remote._ ++import akka.routing._ ++import akka.event.Logging ++import akka.dispatch.Await ++import akka.pattern.ask ++import akka.util._ ++import akka.config.ConfigurationException ++ ++import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } ++import java.util.concurrent.TimeUnit._ ++import java.util.concurrent.TimeoutException ++import java.security.SecureRandom ++ ++import scala.collection.immutable.{ Map, SortedSet } ++import scala.annotation.tailrec ++ ++import com.google.protobuf.ByteString ++ ++/** ++ * Interface for membership change listener. ++ */ ++trait MembershipChangeListener { ++ def notify(members: SortedSet[Member]): Unit ++} ++ ++/** ++ * Interface for meta data change listener. ++ */ ++trait MetaDataChangeListener { // FIXME add management and notification for MetaDataChangeListener ++ def notify(meta: Map[String, Array[Byte]]): Unit ++} ++ ++// FIXME create Protobuf messages out of all the Gossip stuff - but wait until the prototol is fully stablized. ++ ++/** ++ * Base trait for all cluster messages. All ClusterMessage's are serializable. ++ */ ++sealed trait ClusterMessage extends Serializable ++ ++/** ++ * Cluster commands sent by the USER. ++ */ ++object ClusterAction { ++ ++ /** ++ * Command to join the cluster. Sent when a node (reprsesented by 'address') ++ * wants to join another node (the receiver). ++ */ ++ case class Join(address: Address) extends ClusterMessage ++ ++ /** ++ * Command to set a node to Up (from Joining). ++ */ ++ case object Up extends ClusterMessage ++ ++ /** ++ * Command to leave the cluster. ++ */ ++ case object Leave extends ClusterMessage ++ ++ /** ++ * Command to mark node as temporary down. ++ */ ++ case object Down extends ClusterMessage ++ ++ /** ++ * Command to mark a node to be removed from the cluster immediately. ++ */ ++ case object Exit extends ClusterMessage ++ ++ /** ++ * Command to remove a node from the cluster immediately. ++ */ ++ case object Remove extends ClusterMessage ++} ++ ++/** ++ * Represents the address and the current status of a cluster member node. ++ */ ++case class Member(address: Address, status: MemberStatus) extends ClusterMessage ++ ++/** ++ * Envelope adding a sender address to the gossip. ++ */ ++case class GossipEnvelope(sender: Member, gossip: Gossip) extends ClusterMessage ++ ++/** ++ * Defines the current status of a cluster member node ++ * ++ * Can be one of: Joining, Up, Leaving, Exiting and Down. ++ */ ++sealed trait MemberStatus extends ClusterMessage ++object MemberStatus { ++ case object Joining extends MemberStatus ++ case object Up extends MemberStatus ++ case object Leaving extends MemberStatus ++ case object Exiting extends MemberStatus ++ case object Down extends MemberStatus ++ case object Removed extends MemberStatus ++} ++ ++// sealed trait PartitioningStatus ++// object PartitioningStatus { ++// case object Complete extends PartitioningStatus ++// case object Awaiting extends PartitioningStatus ++// } ++ ++// case class PartitioningChange( ++// from: Address, ++// to: Address, ++// path: PartitionPath, ++// status: PartitioningStatus) ++ ++/** ++ * Represents the overview of the cluster, holds the cluster convergence table and set with unreachable nodes. ++ */ ++case class GossipOverview( ++ seen: Map[Address, VectorClock] = Map.empty[Address, VectorClock], ++ unreachable: Set[Address] = Set.empty[Address]) { ++ ++ override def toString = ++ ""GossipOverview(seen = ["" + seen.mkString("", "") + ++ ""], unreachable = ["" + unreachable.mkString("", "") + ++ ""])"" ++} ++ ++/** ++ * Represents the state of the cluster; cluster ring membership, ring convergence, meta data - all versioned by a vector clock. ++ */ ++case class Gossip( ++ overview: GossipOverview = GossipOverview(), ++ members: SortedSet[Member], // sorted set of members with their status, sorted by name ++ //partitions: Tree[PartitionPath, Node] = Tree.empty[PartitionPath, Node], // name/partition service ++ //pending: Set[PartitioningChange] = Set.empty[PartitioningChange], ++ meta: Map[String, Array[Byte]] = Map.empty[String, Array[Byte]], ++ version: VectorClock = VectorClock()) // vector clock version ++ extends ClusterMessage // is a serializable cluster message ++ with Versioned[Gossip] { ++ ++ /** ++ * Increments the version for this 'Node'. ++ */ ++ def +(node: VectorClock.Node): Gossip = copy(version = version + node) ++ ++ def +(member: Member): Gossip = { ++ if (members contains member) this ++ else this copy (members = members + member) ++ } ++ ++ /** ++ * Marks the gossip as seen by this node (remoteAddress) by updating the address entry in the 'gossip.overview.seen' ++ * Map with the VectorClock for the new gossip. ++ */ ++ def seen(address: Address): Gossip = ++ this copy (overview = overview copy (seen = overview.seen + (address -> version))) ++ ++ override def toString = ++ ""Gossip("" + ++ ""overview = "" + overview + ++ "", members = ["" + members.mkString("", "") + ++ ""], meta = ["" + meta.mkString("", "") + ++ ""], version = "" + version + ++ "")"" ++} ++ ++/** ++ * FSM actor managing the different cluster nodes states. ++ * Single instance - e.g. serialized access to Node - message after message. ++ */ ++final class ClusterCommandDaemon(system: ActorSystem, node: Node) extends Actor with FSM[MemberStatus, Unit] { ++ ++ // start in JOINING ++ startWith(MemberStatus.Joining, Unit) ++ ++ // ======================== ++ // === IN JOINING === ++ when(MemberStatus.Joining) { ++ case Event(ClusterAction.Up, _) ⇒ ++ node.up() ++ goto(MemberStatus.Up) ++ } ++ ++ // ======================== ++ // === IN UP === ++ when(MemberStatus.Up) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Leave, _) ⇒ ++ node.leaving() ++ goto(MemberStatus.Leaving) ++ ++ case Event(ClusterAction.Exit, _) ⇒ ++ node.exiting() ++ goto(MemberStatus.Exiting) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN LEAVING === ++ when(MemberStatus.Leaving) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN EXITING === ++ when(MemberStatus.Exiting) { ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN DOWN === ++ when(MemberStatus.Down) { ++ // FIXME How to transition from DOWN => JOINING when node comes back online. Can't just listen to Gossip message since it is received be another actor. How to fix this? ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN REMOVED === ++ when(MemberStatus.Removed) { ++ case command ⇒ ++ log.warning(""Removed node [{}] received cluster command [{}]"", system.name, command) ++ stay ++ } ++ ++ // ======================== ++ // === GENERIC AND UNHANDLED COMMANDS === ++ whenUnhandled { ++ // should be able to handle Join in any state ++ case Event(ClusterAction.Join(address), _) ⇒ ++ node.joining(address) ++ stay ++ ++ case Event(command, _) ⇒ { ++ log.warning(""Unhandled command [{}] in state [{}]"", command, stateName) ++ stay ++ } ++ } ++} ++ ++/** ++ * Pooled and routed wit N number of configurable instances. ++ * Concurrent access to Node. ++ */ ++final class ClusterGossipDaemon(system: ActorSystem, node: Node) extends Actor { ++ val log = Logging(system, ""ClusterGossipDaemon"")","This is so badass I don't even know where to begin +",cf3fa9fa3ce9e9312db0922370c93ce1af9db7c8 +427299,0,"maybe mention which thing is changed, `m.copy(name = …)`""",2012-02-08 00:10:50,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/315#discussion_r427299,2012-02-08T11:10:50Z,2012-02-08T13:03:45Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/AbstractDispatcher.scala,,,,,1,"@@ -420,8 +420,13 @@ class ThreadPoolExecutorConfigurator(config: Config, prerequisites: DispatcherPr + })(queueFactory ⇒ _.setQueueFactory(queueFactory))) + } + +- def createExecutorServiceFactory(name: String, threadFactory: ThreadFactory): ExecutorServiceFactory = +- threadPoolConfig.createExecutorServiceFactory(name, threadFactory) ++ def createExecutorServiceFactory(id: String, threadFactory: ThreadFactory): ExecutorServiceFactory = { ++ val tf = threadFactory match { ++ case m: MonitorableThreadFactory ⇒ m.copy(m.name + ""-"" + id)","maybe mention which thing is changed, `m.copy(name = …)` +",df1606a8eec75af73917072f27d7bf6a49bd4d7c +415676,0,"all interfaces must be resolved recursively as well, so that doesn't work. My first approach was to try to get collectFirst working but I didn't find a good way, and it is really only some extra HashMap lookups that are performed (initially). I would be""",2012-02-03 09:49:29,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/300#discussion_r415676,2012-02-03T20:49:29Z,2012-02-04T16:38:25Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/serialization/Serialization.scala,,,,,1,"@@ -111,10 +114,45 @@ class Serialization(val system: ExtendedActorSystem) extends Extension { + } + + /** +- * Returns the configured Serializer for the given Class, falls back to the Serializer named ""default"" ++ * Returns the configured Serializer for the given Class, falls back to the Serializer named ""default"". ++ * It traverses interfaces and super classes to find any configured Serializer that match ++ * the class name. + */ +- def serializerFor(clazz: Class[_]): Serializer = //TODO fall back on BestMatchClass THEN default AND memoize the lookups +- serializerMap.get(clazz.getName).getOrElse(serializers(""default"")) ++ def serializerFor(clazz: Class[_]): Serializer = { ++ ++ def lookup(c: Class[_]): Option[Serializer] = { ++ val className = c.getName ++ serializerMap.get(className) match { ++ case null ⇒ bindings.get(className) map serializers ++ case serializer ⇒ Some(serializer) ++ } ++ } ++ ++ def resolve(c: Class[_]): Option[Serializer] = { ++ lookup(c) match { ++ case x @ Some(_) ⇒ x ++ case None ⇒ ++ val classes = c.getInterfaces.toList ::: Option(c.getSuperclass).toList ++ classes flatMap resolve headOption","all interfaces must be resolved recursively as well, so that doesn't work. + +My first approach was to try to get collectFirst working but I didn't find a good way, and it is really only some extra HashMap lookups that are performed (initially). + +I would be glad to see a tailrec version, but I don't think it is necessary, since inheritance depth is limited +",bd3766d8a75d43f8a67066e0d3de87b882124aa8 +406347,0,"so, what’s the rationale for not suspending the actor in this case? Or should it in fact be implicitly Resume’d? (I mean in the Directive sense)""",2012-02-01 03:42:11,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/282#discussion_r406347,2012-02-01T14:42:11Z,2012-02-03T13:05:16Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorCell.scala,,486.0,,,1,"@@ -476,31 +475,29 @@ private[akka] class ActorCell( + cancelReceiveTimeout() // FIXME: leave this here??? + messageHandle.message match { + case msg: AutoReceivedMessage ⇒ autoReceiveMessage(messageHandle) ++ // actor can be null when creation fails fatal error ++ case msg if actor == null ⇒ + case msg ⇒ actor(msg) + } + currentMessage = null // reset current message after successful invocation + } catch { +- case e ⇒ ++ case e: InterruptedException ⇒","so, what’s the rationale for not suspending the actor in this case? Or should it in fact be implicitly Resume’d? (I mean in the Directive sense) +",09e13e271b430949b45192f0f28737fbfde56c9d +374470,0,"just verified that the bytecode generated with and without `final` is identical … (vals are always private final fields)""",2012-01-21 21:42:18,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/242#discussion_r374470,2012-01-22T08:42:18Z,2012-01-22T08:42:18Z,rkuhn,CONTRIBUTOR,akka-remote/src/main/scala/akka/remote/RemoteSettings.scala,,99.0,,,114,"@@ -74,26 +76,26 @@ class RemoteSettings(val config: Config, val systemName: String) { + case other ⇒ other + } + +- val Backlog = getInt(""akka.remote.server.backlog"") ++ final val Backlog = getInt(""akka.remote.server.backlog"") + +- val ExecutionPoolKeepAlive = Duration(getMilliseconds(""akka.remote.server.execution-pool-keepalive""), MILLISECONDS) ++ final val ExecutionPoolKeepAlive = Duration(getMilliseconds(""akka.remote.server.execution-pool-keepalive""), MILLISECONDS) + +- val ExecutionPoolSize = getInt(""akka.remote.server.execution-pool-size"") match { ++ final val ExecutionPoolSize = getInt(""akka.remote.server.execution-pool-size"") match { + case sz if sz < 1 ⇒ throw new IllegalArgumentException(""akka.remote.server.execution-pool-size is less than 1"") + case sz ⇒ sz + } + +- val MaxChannelMemorySize = getBytes(""akka.remote.server.max-channel-memory-size"") match { ++ final val MaxChannelMemorySize = getBytes(""akka.remote.server.max-channel-memory-size"") match { + case sz if sz < 0 ⇒ throw new IllegalArgumentException(""akka.remote.server.max-channel-memory-size is less than 0 bytes"") + case sz ⇒ sz + } + +- val MaxTotalMemorySize = getBytes(""akka.remote.server.max-total-memory-size"") match { ++ final val MaxTotalMemorySize = getBytes(""akka.remote.server.max-total-memory-size"") match { + case sz if sz < 0 ⇒ throw new IllegalArgumentException(""akka.remote.server.max-total-memory-size is less than 0 bytes"") + case sz ⇒ sz + } + + // TODO handle the system name right and move this to config file syntax +- val URI = ""akka://sys@"" + Hostname + "":"" + Port ++ final val URI = ""akka://sys@"" + Hostname + "":"" + Port","just verified that the bytecode generated with and without `final` is identical … (vals are always private final fields) +",f0bc2c7435c8898b695996b067d754407ab01f56 +376553,0,"Public class?""",2012-01-23 05:41:29,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/241#discussion_r376553,2012-01-23T16:41:29Z,2012-01-23T17:25:54Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/Locker.scala,,,,,1,"@@ -9,7 +9,12 @@ import akka.util.duration._ + import java.util.concurrent.ConcurrentHashMap + import akka.event.DeathWatch + +-class Locker(scheduler: Scheduler, period: Duration, val path: ActorPath, val deathWatch: DeathWatch) extends MinimalActorRef { ++class Locker(","Public class? +",9d7ed5eba16152d15269bf4f626260432359c0d4 +351948,0,"I think I prefer: import org.jboss.netty.akka.util.{ Timer, TimerTask, HashedWheelTimer, Timeout ⇒ HWTimeout }""",2012-01-13 08:27:39,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/214#discussion_r351948,2012-01-13T19:27:39Z,2012-01-16T09:16:09Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,,,,1,"@@ -5,7 +5,10 @@ + package akka.actor + + import java.util.concurrent.atomic.AtomicLong +-import org.jboss.netty.akka.util.{ TimerTask, HashedWheelTimer } ++import org.jboss.netty.akka.util.HashedWheelTimer ++import org.jboss.netty.akka.util.TimerTask ++import org.jboss.netty.akka.util.Timer ++import org.jboss.netty.akka.util.{ Timeout ⇒ HWTimeout }","I think I prefer: import org.jboss.netty.akka.util.{ Timer, TimerTask, HashedWheelTimer, Timeout ⇒ HWTimeout } +",a26876f5c6cc70fab15cc9f46620391d131c536f +309889,0,"changed to id""",2011-12-21 09:26:48,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/182#discussion_r309889,2011-12-21T20:26:48Z,2011-12-21T22:47:22Z,patriknw,CONTRIBUTOR,akka-actor-tests/src/test/scala/akka/actor/dispatch/ActorModelSpec.scala,,,,,1,"@@ -224,21 +225,21 @@ object ActorModelSpec { + } + } + +-abstract class ActorModelSpec extends AkkaSpec with DefaultTimeout { ++abstract class ActorModelSpec(config: String) extends AkkaSpec(config) with DefaultTimeout { + + import ActorModelSpec._ + +- def newTestActor(dispatcher: MessageDispatcher) = system.actorOf(Props[DispatcherActor].withDispatcher(dispatcher)) ++ def newTestActor(dispatcher: String) = system.actorOf(Props[DispatcherActor].withDispatcher(dispatcher)) + +- protected def newInterceptedDispatcher: MessageDispatcherInterceptor ++ protected def registerInterceptedDispatcher(): MessageDispatcherInterceptor + protected def dispatcherType: String + + ""A "" + dispatcherType must { + + ""must dynamically handle its own life cycle"" in { +- implicit val dispatcher = newInterceptedDispatcher ++ implicit val dispatcher = registerInterceptedDispatcher() + assertDispatcher(dispatcher)(stops = 0) +- val a = newTestActor(dispatcher) ++ val a = newTestActor(dispatcher.key)","changed to id +",ed2fb14dcf0120a6c126fe9c99038d897dbcdf0d +292805,2,"hmmm, weird...""",2011-12-14 00:56:37,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/147#discussion_r292805,2011-12-14T11:56:37Z,2011-12-14T11:56:37Z,viktorklang,CONTRIBUTOR,akka-docs/.history,,1.0,,,1,"@@ -0,0 +1 @@ ++exit","hmmm, weird... +",0af92f24400f1b05d1919be54dfd822037f0076f +20221,2,"This impl is weird. Uses system identityHashCode for hashCode AND equals?""",2011-04-17 23:42:26,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/73#discussion_r20221,2011-04-18T11:42:26Z,2011-04-18T11:42:26Z,viktorklang,CONTRIBUTOR,akka-stm/src/main/scala/stm/TransactionalSet.scala,,81.0,,,81,"@@ -0,0 +1,88 @@ ++package akka.stm ++ ++/** ++ * TransactionalSet : completely based on TransactionalMap ++ * @author - Dhananjay Nene ++ */ ++ ++/* ++ * TODO: Change package names in imports. This has been compiled against akka_2.8.0-1.0-M1.zip ++ */ ++ ++import scala.collection.mutable.HashSet ++import se.scalablesolutions.akka.stm.{Transactional, Ref} ++import se.scalablesolutions.akka.actor.{newUuid} ++ ++/** ++ * Transactional set that implements the mutable Set interface with an underlying Ref and HashSet. ++ */ ++ ++object TransactionalSet { ++ def apply[K]() = new TransactionalSet[K]() ++ ++ def apply[K](elems: K*) = new TransactionalSet(HashSet(elems: _*)) ++} ++ ++/** ++ * Transactional Set that implements the mutable Set interface with an underlying Ref and HashSet. ++ * ++ * From Scala you can use TSet as a shorter alias for TransactionalSet. ++ */ ++ ++class TransactionalSet[T](initialValue: HashSet[T]) extends Transactional with scala.collection.mutable.Set[T] { ++ def this() = this(HashSet[T]()) ++ ++ val uuid = newUuid.toString ++ ++ private[this] val ref = Ref(initialValue) ++ ++ override def -=(elem: T) = { ++ ref.set(ref.get - elem) ++ this ++ } ++ ++ override def +=(elem: T) = { ++ ref.set(ref.get + elem) ++ this ++ } ++ ++ override def += (elem1: T, elem2: T, elems: T*) = { ++ ref.set(ref.get + (elem1, elem2, elems: _*)) ++ this ++ } ++ ++ override def -= (elem1: T, elem2: T, elems: T*) = { ++ ref.set(ref.get - (elem1, elem2, elems: _*)) ++ this ++ } ++ ++ override def ++= (xs: TraversableOnce[T]) = { ++ ref.set(ref.get ++ xs) ++ this ++ } ++ ++ override def --= (xs: TraversableOnce[T]) = { ++ ref.set(ref.get -- xs) ++ this ++ } ++ ++ def iterator = ref.get.iterator ++ ++ override def elements: Iterator[T] = ref.get.iterator ++ ++ override def contains(elem: T): Boolean = ref.get.contains(elem) ++ ++ override def clear = ref.swap(HashSet[T]()) ++ ++ override def size: Int = ref.get.size ++ ++ override def hashCode: Int = System.identityHashCode(this); ++ ++ override def equals(other: Any): Boolean =","This impl is weird. Uses system identityHashCode for hashCode AND equals? +",95aff5ef78f4b8aca6f624f1402f3198acab056b +383593,2,"Really? You need to test that?""",2012-01-25 01:01:54,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/245#discussion_r383593,2012-01-25T12:01:54Z,2012-01-25T12:01:54Z,piotrga,CONTRIBUTOR,akka-camel/src/test/scala/akka/camel/ProducerRegistryTest.scala,,22.0,,,22,"@@ -0,0 +1,59 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel ++ ++import org.scalatest.matchers.MustMatchers ++import org.scalatest.WordSpec ++import akka.camel.TestSupport.SharedCamelSystem ++import akka.actor.Props ++import akka.util.duration._ ++ ++class ProducerRegistryTest extends WordSpec with MustMatchers with SharedCamelSystem { ++ ""A ProducerRegistry"" must { ++ ""register a started SendProcessor for the producer, which is stopped when the actor is stopped"" in { ++ val actorRef = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val (endpoint, processor) = camel.registerProducer(actorRef, ""mock:mock"") ++ camel.awaitActivation(actorRef, 1 second) ++ processor.isStarted must be(true) ++ endpoint.getCamelContext must equal(camel.context)","Really? You need to test that? +",f6646051f79f6f3c7fb3214652bce46034efed9d +479638,1,"also, always have return types on your methods. Trust me, will save everyone involved a lot of headache and will add documentation.""",2012-02-22 22:41:28,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/350#discussion_r479638,2012-02-23T09:41:28Z,2012-03-06T19:30:29Z,viktorklang,CONTRIBUTOR,akka-amqp/src/main/scala/akka/amqp/AMQP.scala,,,,,1,"@@ -0,0 +1,461 @@ ++/** ++ * Copyright (C) 2009-2010 Scalable Solutions AB ++ */ ++ ++package akka.amqp ++ ++import com.rabbitmq.client.AMQP.BasicProperties ++import java.lang.{ String, IllegalArgumentException } ++import reflect.Manifest ++import akka.japi.Procedure ++import com.rabbitmq.client._ ++import ConnectionFactory._ ++import akka.actor.{ Address ⇒ _, _ } ++import akka.pattern.ask ++import akka.util.duration._ ++import akka.util.Timeout ++import akka.dispatch.Await ++import java.util.UUID ++ ++/** ++ * AMQP Actor API. Implements Connection, Producer and Consumer materialized as Actors. ++ * ++ * @see akka.amqp.ExampleSession ++ * ++ * @author Irmo Manie ++ * @author John Stanford (migration to Akka 2.0) ++ */ ++ ++object AMQP { ++ ++ implicit val timeout = Timeout(5 seconds) ++ ++ private lazy val system = ActorSystem(""AMQPSystem"") ++ ++ /** ++ * Parameters used to make the connection to the amqp broker. Uses the rabbitmq defaults. ++ */ ++ case class ConnectionParameters( ++ addresses: Array[Address] = Array(new Address(DEFAULT_HOST, DEFAULT_AMQP_PORT)), ++ username: String = DEFAULT_USER, ++ password: String = DEFAULT_PASS, ++ virtualHost: String = DEFAULT_VHOST, ++ initReconnectDelay: Long = 5000, ++ connectionCallback: Option[ActorRef] = None) { ++ ++ // Needed for Java API usage ++ def this() = this(Array(new Address(DEFAULT_HOST, DEFAULT_AMQP_PORT)), DEFAULT_USER, DEFAULT_PASS, DEFAULT_VHOST, 5000, None) ++ ++ // Needed for Java API usage ++ def this(addresses: Array[Address], username: String, password: String, virtualHost: String) = ++ this(addresses, username, password, virtualHost, 5000, None) ++ ++ // Needed for Java API usage ++ def this(addresses: Array[Address], username: String, password: String, virtualHost: String, initReconnectDelay: Long, connectionCallback: ActorRef) = ++ this(addresses, username, password, virtualHost, initReconnectDelay, Some(connectionCallback)) ++ ++ // Needed for Java API usage ++ def this(connectionCallback: ActorRef) = ++ this(Array(new Address(DEFAULT_HOST, DEFAULT_AMQP_PORT)), DEFAULT_USER, DEFAULT_PASS, DEFAULT_VHOST, 5000, Some(connectionCallback)) ++ ++ } ++ ++ /** ++ * Additional parameters for the channel ++ */ ++ case class ChannelParameters( ++ shutdownListener: Option[ShutdownListener] = None, ++ channelCallback: Option[ActorRef] = None, ++ prefetchSize: Int = 0) { ++ ++ // Needed for Java API usage ++ def this() = this(None, None) ++ ++ // Needed for Java API usage ++ def this(channelCallback: ActorRef) = this(None, Some(channelCallback)) ++ ++ // Needed for Java API usage ++ def this(shutdownListener: ShutdownListener, channelCallback: ActorRef) = ++ this(Some(shutdownListener), Some(channelCallback)) ++ } ++ ++ /** ++ * Declaration type used for either exchange or queue declaration ++ */ ++ sealed trait Declaration ++ case object NoActionDeclaration extends Declaration { ++ def getInstance() = this // Needed for Java API usage ++ } ++ case object PassiveDeclaration extends Declaration { ++ def getInstance() = this // Needed for Java API usage ++ } ++ case class ActiveDeclaration(durable: Boolean = false, autoDelete: Boolean = true, exclusive: Boolean = false) extends Declaration { ++ ++ // Needed for Java API usage ++ def this() = this(false, true, false) ++ ++ // Needed for Java API usage ++ def this(durable: Boolean, autoDelete: Boolean) = this(durable, autoDelete, false) ++ } ++ ++ /** ++ * Exchange specific parameters ++ */ ++ case class ExchangeParameters( ++ exchangeName: String, ++ exchangeType: ExchangeType = Topic, ++ exchangeDeclaration: Declaration = ActiveDeclaration(), ++ configurationArguments: Map[String, AnyRef] = Map.empty) { ++ ++ // Needed for Java API usage ++ def this(exchangeName: String) = ++ this(exchangeName, Topic, ActiveDeclaration(), Map.empty) ++ ++ // Needed for Java API usage ++ def this(exchangeName: String, exchangeType: ExchangeType) = ++ this(exchangeName, exchangeType, ActiveDeclaration(), Map.empty) ++ ++ // Needed for Java API usage ++ def this(exchangeName: String, exchangeType: ExchangeType, exchangeDeclaration: Declaration) = ++ this(exchangeName, exchangeType, exchangeDeclaration, Map.empty) ++ } ++ ++ /** ++ * Producer specific parameters ++ */ ++ case class ProducerParameters( ++ exchangeParameters: Option[ExchangeParameters] = None, ++ returnListener: Option[ReturnListener] = None, ++ channelParameters: Option[ChannelParameters] = None, ++ errorCallbackActor: Option[ActorRef] = None) { ++ def this() = this(None, None, None, None) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters) = ++ this(Some(exchangeParameters), None, None, None) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters, returnListener: ReturnListener) = ++ this(Some(exchangeParameters), Some(returnListener), None, None) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters, channelParameters: ChannelParameters) = ++ this(Some(exchangeParameters), None, Some(channelParameters), None) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters, returnListener: ReturnListener, channelParameters: ChannelParameters) = ++ this(Some(exchangeParameters), Some(returnListener), Some(channelParameters), None) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters, returnListener: ReturnListener, channelParameters: ChannelParameters, errorCallbackActor: ActorRef) = ++ this(Some(exchangeParameters), Some(returnListener), Some(channelParameters), Some(errorCallbackActor)) ++ ++ // Needed for Java API usage ++ def this(exchangeParameters: ExchangeParameters, channelParameters: ChannelParameters, errorCallbackActor: ActorRef) = ++ this(Some(exchangeParameters), None, Some(channelParameters), Some(errorCallbackActor)) ++ } ++ ++ /** ++ * Consumer specific parameters ++ */ ++ case class ConsumerParameters( ++ routingKey: String, ++ deliveryHandler: ActorRef, ++ queueName: Option[String] = None, ++ exchangeParameters: Option[ExchangeParameters] = None, ++ queueDeclaration: Declaration = ActiveDeclaration(), ++ selfAcknowledging: Boolean = true, ++ channelParameters: Option[ChannelParameters] = None) { ++ if (queueName.isEmpty) { ++ queueDeclaration match { ++ case ActiveDeclaration(true, _, _) ⇒ ++ throw new IllegalArgumentException(""A queue name is required when requesting a durable queue."") ++ case PassiveDeclaration ⇒ ++ throw new IllegalArgumentException(""A queue name is required when requesting passive declaration."") ++ case _ ⇒ () // ignore ++ } ++ } ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef) = ++ this(routingKey, deliveryHandler, None, None, ActiveDeclaration(), true, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, channelParameters: ChannelParameters) = ++ this(routingKey, deliveryHandler, None, None, ActiveDeclaration(), true, Some(channelParameters)) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, selfAcknowledging: Boolean) = ++ this(routingKey, deliveryHandler, None, None, ActiveDeclaration(), selfAcknowledging, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, selfAcknowledging: Boolean, channelParameters: ChannelParameters) = ++ this(routingKey, deliveryHandler, None, None, ActiveDeclaration(), selfAcknowledging, Some(channelParameters)) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String) = ++ this(routingKey, deliveryHandler, Some(queueName), None, ActiveDeclaration(), true, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String, queueDeclaration: Declaration, selfAcknowledging: Boolean, channelParameters: ChannelParameters) = ++ this(routingKey, deliveryHandler, Some(queueName), None, queueDeclaration, selfAcknowledging, Some(channelParameters)) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, exchangeParameters: ExchangeParameters) = ++ this(routingKey, deliveryHandler, None, Some(exchangeParameters), ActiveDeclaration(), true, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, exchangeParameters: ExchangeParameters, channelParameters: ChannelParameters) = ++ this(routingKey, deliveryHandler, None, Some(exchangeParameters), ActiveDeclaration(), true, Some(channelParameters)) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, exchangeParameters: ExchangeParameters, selfAcknowledging: Boolean) = ++ this(routingKey, deliveryHandler, None, Some(exchangeParameters), ActiveDeclaration(), selfAcknowledging, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String, exchangeParameters: ExchangeParameters) = ++ this(routingKey, deliveryHandler, Some(queueName), Some(exchangeParameters), ActiveDeclaration(), true, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String, exchangeParameters: ExchangeParameters, queueDeclaration: Declaration) = ++ this(routingKey, deliveryHandler, Some(queueName), Some(exchangeParameters), queueDeclaration, true, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String, exchangeParameters: ExchangeParameters, queueDeclaration: Declaration, selfAcknowledging: Boolean) = ++ this(routingKey, deliveryHandler, Some(queueName), Some(exchangeParameters), queueDeclaration, selfAcknowledging, None) ++ ++ // Needed for Java API usage ++ def this(routingKey: String, deliveryHandler: ActorRef, queueName: String, exchangeParameters: ExchangeParameters, queueDeclaration: Declaration, selfAcknowledging: Boolean, channelParameters: ChannelParameters) = ++ this(routingKey, deliveryHandler, Some(queueName), Some(exchangeParameters), queueDeclaration, selfAcknowledging, Some(channelParameters)) ++ ++ // How about that for some overloading... huh? :P (yes, I know, there are still possibilities left...sue me!) ++ // Who said java is easy :( ++ } ++ ++ def newConnection(connectionParameters: ConnectionParameters = new ConnectionParameters()): ActorRef = { ++ ++ val connection = system.actorOf(Props(new FaultTolerantConnectionActor(connectionParameters)), ""amqp-connection-"" + UUID.randomUUID.toString) ++ connection ! Connect ++ connection ++ } ++ ++ def shutdownConnection(connection: ActorRef) = { ++ system.stop(connection) ++ } ++ ++ // Needed for Java API usage ++ def newConnection(): ActorRef = { ++ newConnection(new ConnectionParameters()) ++ } ++ ++ def newProducer(connection: ActorRef, producerParameters: ProducerParameters) = { ++ val p = connection ? ProducerRequest(producerParameters) ++ Await.result(p, timeout.duration).asInstanceOf[Option[ActorRef]]","also, always have return types on your methods. Trust me, will save everyone involved a lot of headache and will add documentation. +",e039c380c868b28285dab6fc313025d981ef022b +525841,0,"don't understand the above, why not just sender ! self""",2012-03-06 11:49:00,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/350#discussion_r525841,2012-03-06T22:49:00Z,2012-03-06T22:49:00Z,viktorklang,CONTRIBUTOR,akka-amqp/src/main/scala/akka/amqp/FaultTolerantChannelActor.scala,,69.0,,,69,"@@ -0,0 +1,188 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.amqp ++ ++import collection.JavaConversions ++import java.lang.Throwable ++import akka.event.Logging ++import akka.pattern.{ ask, pipe } ++import com.rabbitmq.client.{ ShutdownSignalException, Channel, ShutdownListener } ++import scala.PartialFunction ++import akka.dispatch.{ Promise, ExecutionContext, Future } ++import akka.util.{ NonFatal, Timeout } ++import akka.actor.{ ActorRef, Status, Kill, Actor } ++ ++abstract private[amqp] class FaultTolerantChannelActor( ++ exchangeParameters: Option[ExchangeParameters], channelParameters: Option[ChannelParameters]) extends Actor { ++ ++ protected[amqp] var channel: Option[Future[Channel]] = None ++ implicit val sys = context.system ++ ++ val settings = Settings(context.system) ++ implicit val timeout = Timeout(settings.Timeout) ++ ++ val log = Logging(context.system, this) ++ ++ val shutdownListener = new ShutdownListener { ++ def shutdownCompleted(cause: ShutdownSignalException) = { ++ val replyTo = self ++ replyTo ! ChannelShutdown(cause) ++ } ++ } ++ ++ /** ++ * handle channel core and custom messages ++ */ ++ override def receive = channelMessageHandler orElse specificMessageHandler ++ ++ /** ++ * extending actors should implement custom message handling logic in their specificMessageHandler method ++ */ ++ def specificMessageHandler: PartialFunction[Any, Unit] ++ ++ /** ++ * defines the core channel message handlers. ++ */ ++ private def channelMessageHandler: PartialFunction[Any, Unit] = { ++ ++ /** ++ * a producer or consumer is requesting a channel, either because they are initially starting, or ++ * because the channel has unexpectedly shut down. the parent is the connection actor associated ++ * with this producer/consumer. it sends back a Future[Channel] or Failure if it can't be established. ++ */ ++ case Start ⇒ ++ val slf = context.self ++ val snd = context.sender ++ ++ context.parent ? ChannelRequest onComplete { ++ case Right(r) ⇒ slf ? r onComplete { ++ case Right(r) ⇒ snd ! r ++ case Left(f) ⇒ snd ! Status.Failure(f) ++ } ++ case Left(f) ⇒ slf ! Status.Failure(f) ++ } ++ case ch: Channel ⇒ ++ setupChannelInternal(ch) ++ val reply = self ++ sender ! reply","don't understand the above, why not just sender ! self +",e039c380c868b28285dab6fc313025d981ef022b +488570,0,"No need - as header is set by camel only if previous call failed""",2012-02-24 23:43:01,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/344#discussion_r488570,2012-02-25T10:43:01Z,2012-03-20T12:17:26Z,piotrga,CONTRIBUTOR,akka-camel/src/test/scala/akka/camel/ConsumerIntegrationTest.scala,,153.0,,,1,"@@ -0,0 +1,159 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel ++ ++import akka.actor._ ++import org.scalatest.matchers.MustMatchers ++import akka.util.duration._ ++import java.util.concurrent.TimeUnit._ ++import TestSupport._ ++import org.scalatest.WordSpec ++import org.apache.camel.model.RouteDefinition ++import org.apache.camel.builder.Builder ++import org.apache.camel.{ FailedToCreateRouteException, CamelExecutionException } ++import java.util.concurrent.{ ExecutionException, TimeUnit, TimeoutException, CountDownLatch } ++ ++class ConsumerIntegrationTest extends WordSpec with MustMatchers with NonSharedCamelSystem { ++ ++ ""Consumer must throw FailedToCreateRouteException, while awaiting activation, if endpoint is invalid"" in { ++ val actorRef = system.actorOf(Props(new TestActor(uri = ""some invalid uri""))) ++ ++ intercept[FailedToCreateRouteException] { ++ camel.awaitActivation(actorRef, timeout = 1 second) ++ } ++ } ++ ++ ""Consumer must support in-out messaging"" in { ++ start(new Consumer { ++ def endpointUri = ""direct:a1"" ++ protected def receive = { ++ case m: Message ⇒ sender ! ""received "" + m.bodyAs[String] ++ } ++ }) ++ camel.sendTo(""direct:a1"", msg = ""some message"") must be(""received some message"") ++ } ++ ++ ""Consumer must time-out if consumer is slow"" in { ++ val SHORT_TIMEOUT = 10 millis ++ val LONG_WAIT = 200 millis ++ ++ start(new Consumer { ++ override def replyTimeout = SHORT_TIMEOUT ++ ++ def endpointUri = ""direct:a3"" ++ protected def receive = { case _ ⇒ { Thread.sleep(LONG_WAIT.toMillis); sender ! ""done"" } } ++ }) ++ ++ val exception = intercept[CamelExecutionException] { ++ camel.sendTo(""direct:a3"", msg = ""some msg 3"") ++ } ++ exception.getCause.getClass must be(classOf[TimeoutException]) ++ } ++ ++ ""Consumer must process messages even after actor restart"" in { ++ val restarted = new CountDownLatch(1) ++ val consumer = start(new Consumer { ++ def endpointUri = ""direct:a2"" ++ ++ protected def receive = { ++ case ""throw"" ⇒ throw new Exception ++ case m: Message ⇒ sender ! ""received "" + m.bodyAs[String] ++ } ++ ++ override def postRestart(reason: Throwable) { ++ restarted.countDown() ++ } ++ }) ++ consumer ! ""throw"" ++ if (!restarted.await(1, SECONDS)) fail(""Actor failed to restart!"") ++ ++ val response = camel.sendTo(""direct:a2"", msg = ""xyz"") ++ response must be(""received xyz"") ++ } ++ ++ ""Consumer must unregister itself when stopped"" in { ++ val consumer = start(new TestActor()) ++ camel.awaitActivation(consumer, 1 second) ++ ++ camel.routeCount must be > (0) ++ ++ system.stop(consumer) ++ camel.awaitDeactivation(consumer, 1 second) ++ ++ camel.routeCount must be(0) ++ } ++ ++ ""Error passing consumer supports error handling through route modification"" in { ++ start(new ErrorThrowingConsumer(""direct:error-handler-test"") with ErrorPassing { ++ override def onRouteDefinition(rd: RouteDefinition) = { ++ rd.onException(classOf[Exception]).handled(true).transform(Builder.exceptionMessage).end ++ } ++ }) ++ camel.sendTo(""direct:error-handler-test"", msg = ""hello"") must be(""error: hello"") ++ } ++ ++ ""Error passing consumer supports redelivery through route modification"" in { ++ start(new FailingOnceConsumer(""direct:failing-once-concumer"") with ErrorPassing { ++ override def onRouteDefinition(rd: RouteDefinition) = { ++ rd.onException(classOf[Exception]).maximumRedeliveries(1).end ++ } ++ }) ++ camel.sendTo(""direct:failing-once-concumer"", msg = ""hello"") must be(""accepted: hello"") ++ } ++ ++ ""Consumer supports manual Ack"" in { ++ start(new ManualAckConsumer() { ++ def endpointUri = ""direct:manual-ack"" ++ protected def receive = { case _ ⇒ sender ! Ack } ++ }) ++ camel.template.asyncSendBody(""direct:manual-ack"", ""some message"").get(1, TimeUnit.SECONDS) must be(null) //should not timeout ++ } ++ ++ ""Consumer handles manual Ack failure"" in { ++ val someException = new Exception(""e1"") ++ start(new ManualAckConsumer() { ++ def endpointUri = ""direct:manual-ack"" ++ protected def receive = { case _ ⇒ sender ! Failure(someException) } ++ }) ++ ++ intercept[ExecutionException] { ++ camel.template.asyncSendBody(""direct:manual-ack"", ""some message"").get(1, TimeUnit.SECONDS) ++ }.getCause.getCause must be(someException) ++ } ++ ++ ""Consumer should time-out, if manual Ack not received within replyTimeout and should give a human readable error message"" in { ++ start(new ManualAckConsumer() { ++ override def replyTimeout = 10 millis ++ def endpointUri = ""direct:manual-ack"" ++ protected def receive = { case _ ⇒ } ++ }) ++ ++ intercept[ExecutionException] { ++ camel.template.asyncSendBody(""direct:manual-ack"", ""some message"").get(1, TimeUnit.SECONDS) ++ }.getCause.getCause.getMessage must include(""Failed to get Ack"") ++ } ++} ++ ++class ErrorThrowingConsumer(override val endpointUri: String) extends Consumer { ++ def receive = { ++ case msg: Message ⇒ throw new Exception(""error: %s"" format msg.body) ++ } ++} ++ ++class FailingOnceConsumer(override val endpointUri: String) extends Consumer { ++ ++ def receive = { ++ case msg: Message ⇒ ++ if (msg.headerAs[Boolean](""CamelRedelivered"").getOrElse(false)) ++ sender ! (""accepted: %s"" format msg.body) ++ else ++ throw new Exception(""rejected: %s"" format msg.body)","No need - as header is set by camel only if previous call failed +",f74616f828d3e31724d768dd86ce05af85d97ade +438782,0,"what happens if negative or 0?""",2012-02-10 06:30:34,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/323#discussion_r438782,2012-02-10T17:30:34Z,2012-02-10T17:30:34Z,viktorklang,CONTRIBUTOR,akka-zeromq/src/main/resources/reference.conf,,16.0,,,15,"@@ -12,6 +12,9 @@ akka { + # The default timeout for a poll on the actual zeromq socket. + poll-timeout = 100ms + ++ # Timeout for creating a new socket ++ new-socket-timeout = 5s","what happens if negative or 0? +",e017aeef0826fc99722b74551f336d1751f0e884 +419031,0,"here, too""",2012-02-06 04:04:26,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/307#discussion_r419031,2012-02-06T15:04:26Z,2012-02-28T09:22:30Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/Stash.scala,,,,,1,"@@ -0,0 +1,97 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.actor ++ ++import akka.dispatch.{ Envelope, DequeBasedMessageQueue } ++ ++/** ++ * The `Stash` trait enables an actor to temporarily stash away messages that can not or ++ * should not be handled using the actor's current behavior. ++ *

++ * Example: ++ *

++ *    class ActorWithProtocol extends Actor with Stash {
++ *      def receive = {
++ *        case ""open"" ⇒
++ *          unstashAll {
++ *            case ""write"" ⇒ // do writing...
++ *            case ""close"" ⇒
++ *              unstashAll()
++ *              context.unbecome()
++ *            case msg ⇒ stash()
++ *          }
++ *        case ""done"" ⇒ // done
++ *        case msg    ⇒ stash()
++ *      }
++ *    }
++ *  
++ * ++ * Note that the `Stash` trait can only be used together with actors that have a deque-based ++ * mailbox. Actors can be configured to use a deque-based mailbox using a configuration like ++ * the following: ++ *
++ *  akka {
++ *    actor {
++ *      default-dispatcher {
++ *        mailboxType = ""akka.dispatch.UnboundedDequeBasedMailbox""
++ *      }
++ *    }
++ *  }
++ *  
++ */ ++trait Stash { ++ thisActor: Actor ⇒ ++ ++ /* The private stash of the actor. It is only accessible using `stash()` and ++ * `unstashAll()`. ++ */ ++ private[this] var theStash = Vector.empty[Envelope] ++ ++ /* The actor's deque-based message queue. ++ * `mailbox.queue` is the underlying `Deque`. ++ */ ++ private[this] val mailbox: DequeBasedMessageQueue = {","here, too +",8ea949857b39388ac25bdc08df7feee3757f3ee9 +371286,0,"I'd still recommend having this final and calling the other with Duration.Inf, then the implementation of that can deal with Duration.Inf as it wants""",2012-01-20 03:41:46,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/238#discussion_r371286,2012-01-20T14:41:46Z,2012-01-20T17:39:49Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorSystem.scala,,256.0,,,79,"@@ -224,18 +229,39 @@ abstract class ActorSystem extends ActorRefFactory { + def dispatcher: MessageDispatcher + + /** +- * Register a block of code to run after all actors in this actor system have +- * been stopped. Multiple code blocks may be registered by calling this method multiple times; there is no +- * guarantee that they will be executed in a particular order. ++ * Register a block of code (callback) to run after all actors in this actor system have ++ * been stopped. Multiple code blocks may be registered by calling this method multiple times. ++ * The callbacks will be run sequentilly in reverse order of registration, i.e. ++ * last registration is run first. ++ * ++ * Callbacks registered after that the shutdown process has started will likely not be run. + */ +- def registerOnTermination[T](code: ⇒ T) ++ def registerOnTermination[T](code: ⇒ T): Unit + + /** +- * Register a block of code to run after all actors in this actor system have +- * been stopped. Multiple code blocks may be registered by calling this method multiple times; there is no +- * guarantee that they will be executed in a particular order (Java API). ++ * Register a block of code (callback) to run after all actors in this actor system have ++ * been stopped. Multiple code blocks may be registered by calling this method multiple times. ++ * The callbacks will be run sequentilly in reverse order of registration, i.e. ++ * last registration is run first. ++ * ++ * Callbacks registered after that the shutdown process has started will likely not be run. ++ * ++ * Java API + */ +- def registerOnTermination(code: Runnable) ++ def registerOnTermination(code: Runnable): Unit ++ ++ /** ++ * Block current thread until the system has been shutdown, or the specified ++ * timeout has elapsed. This will block until after all on termination ++ * callbacks have been run. ++ */ ++ def awaitTermination(timeout: Duration): Unit ++ ++ /** ++ * Block current thread until the system has been shutdown. This will ++ * block until after all on termination callbacks have been run. ++ */ ++ def awaitTermination(): Unit","I'd still recommend having this final and calling the other with Duration.Inf, then the implementation of that can deal with Duration.Inf as it wants +",a4e2b5a51140c721034bb8b33f06d0213985deb1 +366612,1,"player handlers also? thanks, I'm happy""",2012-01-19 00:06:13,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/225#discussion_r366612,2012-01-19T11:06:13Z,2012-01-20T09:27:03Z,patriknw,CONTRIBUTOR,akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala,,,,,1,"@@ -0,0 +1,232 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.zeromq ++ ++import org.zeromq.ZMQ.{ Socket, Poller } ++import org.zeromq.{ ZMQ ⇒ JZMQ } ++import akka.actor._ ++import akka.dispatch.{ Promise, Future } ++import akka.event.Logging ++import akka.util.duration._ ++ ++private[zeromq] sealed trait PollLifeCycle ++private[zeromq] case object NoResults extends PollLifeCycle ++private[zeromq] case object Results extends PollLifeCycle ++private[zeromq] case object Closing extends PollLifeCycle ++ ++private[zeromq] class ConcurrentSocketActor(params: Seq[SocketOption]) extends Actor { ++ ++ private val noBytes = Array[Byte]() ++ private val zmqContext = { ++ params find (_.isInstanceOf[Context]) map (_.asInstanceOf[Context]) getOrElse new Context(1) ++ } ++ private lazy val deserializer = deserializerFromParams ++ private lazy val socket: Socket = socketFromParams ++ private lazy val poller: Poller = zmqContext.poller ++ private val log = Logging(context.system, this) ++ ++ private case object Poll ++ private case object ReceiveFrames ++ private case object ClearPoll ++ private case class PollError(ex: Throwable) ++ ++ private def handleConnectionMessages: Receive = { ++ case Send(frames) ⇒ { ++ sendFrames(frames) ++ pollAndReceiveFrames() ++ } ++ case ZMQMessage(frames) ⇒ { ++ sendFrames(frames) ++ pollAndReceiveFrames() ++ } ++ case Connect(endpoint) ⇒ { ++ socket.connect(endpoint) ++ notifyListener(Connecting) ++ pollAndReceiveFrames() ++ } ++ case Bind(endpoint) ⇒ { ++ socket.bind(endpoint) ++ pollAndReceiveFrames() ++ } ++ case Subscribe(topic) ⇒ { ++ socket.subscribe(topic.toArray) ++ pollAndReceiveFrames() ++ } ++ case Unsubscribe(topic) ⇒ { ++ socket.unsubscribe(topic.toArray) ++ pollAndReceiveFrames() ++ } ++ } ++ ++ private def handleSocketOption: Receive = { ++ case Linger(value) ⇒ socket.setLinger(value) ++ case ReconnectIVL(value) ⇒ socket.setReconnectIVL(value) ++ case Backlog(value) ⇒ socket.setBacklog(value) ++ case ReconnectIVLMax(value) ⇒ socket.setReconnectIVLMax(value) ++ case MaxMsgSize(value) ⇒ socket.setMaxMsgSize(value) ++ case SndHWM(value) ⇒ socket.setSndHWM(value) ++ case RcvHWM(value) ⇒ socket.setRcvHWM(value) ++ case HWM(value) ⇒ socket.setHWM(value) ++ case Swap(value) ⇒ socket.setSwap(value) ++ case Affinity(value) ⇒ socket.setAffinity(value) ++ case Identity(value) ⇒ socket.setIdentity(value) ++ case Rate(value) ⇒ socket.setRate(value) ++ case RecoveryInterval(value) ⇒ socket.setRecoveryInterval(value) ++ case MulticastLoop(value) ⇒ socket.setMulticastLoop(value) ++ case MulticastHops(value) ⇒ socket.setMulticastHops(value) ++ case ReceiveTimeOut(value) ⇒ socket.setReceiveTimeOut(value) ++ case SendTimeOut(value) ⇒ socket.setSendTimeOut(value) ++ case SendBufferSize(value) ⇒ socket.setSendBufferSize(value) ++ case ReceiveBufferSize(value) ⇒ socket.setReceiveBufferSize(value) ++ case Linger ⇒ sender ! socket.getLinger ++ case ReconnectIVL ⇒ sender ! socket.getReconnectIVL ++ case Backlog ⇒ sender ! socket.getBacklog ++ case ReconnectIVLMax ⇒ sender ! socket.getReconnectIVLMax ++ case MaxMsgSize ⇒ sender ! socket.getMaxMsgSize ++ case SndHWM ⇒ sender ! socket.getSndHWM ++ case RcvHWM ⇒ sender ! socket.getRcvHWM ++ case Swap ⇒ sender ! socket.getSwap ++ case Affinity ⇒ sender ! socket.getAffinity ++ case Identity ⇒ sender ! socket.getIdentity ++ case Rate ⇒ sender ! socket.getRate ++ case RecoveryInterval ⇒ sender ! socket.getRecoveryInterval ++ case MulticastLoop ⇒ sender ! socket.hasMulticastLoop ++ case MulticastHops ⇒ sender ! socket.getMulticastHops ++ case ReceiveTimeOut ⇒ sender ! socket.getReceiveTimeOut ++ case SendTimeOut ⇒ sender ! socket.getSendTimeOut ++ case SendBufferSize ⇒ sender ! socket.getSendBufferSize ++ case ReceiveBufferSize ⇒ sender ! socket.getReceiveBufferSize ++ case ReceiveMore ⇒ sender ! socket.hasReceiveMore ++ case FileDescriptor ⇒ sender ! socket.getFD ++ } ++ ++ private def internalMessage: Receive = { ++ case Poll ⇒ { ++ currentPoll = None ++ pollAndReceiveFrames() ++ } ++ case ReceiveFrames ⇒ { ++ receiveFrames() match { ++ case Seq() ⇒ ++ case frames ⇒ notifyListener(deserializer(frames)) ++ } ++ self ! Poll ++ } ++ case ClearPoll ⇒ currentPoll = None ++ case PollError(ex) ⇒ { ++ log.error(ex, ""There was a problem polling the zeromq socket"") ++ self ! Poll ++ } ++ } ++ ++ override def receive: Receive = handleConnectionMessages orElse handleSocketOption orElse internalMessage ++ ++ override def preStart { ++ setupSocket() ++ poller.register(socket, Poller.POLLIN) ++ setupConnection() ++ } ++ ++ private def setupConnection() { ++ params filter (_.isInstanceOf[SocketConnectOption]) foreach { self ! _ } ++ params filter (_.isInstanceOf[PubSubOption]) foreach { self ! _ } ++ } ++ ++ private def socketFromParams() = { ++ require(ZeroMQExtension.check[SocketType.ZMQSocketType](params), ""A socket type is required"") ++ (params ++ find (_.isInstanceOf[SocketType.ZMQSocketType]) ++ map (t ⇒ zmqContext.socket(t.asInstanceOf[SocketType.ZMQSocketType])) get) ++ } ++ ++ private def deserializerFromParams = { ++ params find (_.isInstanceOf[Deserializer]) map (_.asInstanceOf[Deserializer]) getOrElse new ZMQMessageDeserializer ++ } ++ ++ private def setupSocket() = { ++ params foreach { ++ case _: SocketConnectOption | _: PubSubOption | _: SocketMeta ⇒ // ignore, handled differently ++ case m ⇒ self ! m ++ } ++ } ++ ++ override def postStop { ++ try { ++ poller.unregister(socket) ++ currentPoll foreach { _ complete Right(Closing) } ++ if (socket != null) socket.close ++ } finally { ++ notifyListener(Closed) ++ } ++ } ++ ++ private def sendFrames(frames: Seq[Frame]) { ++ def sendBytes(bytes: Seq[Byte], flags: Int) = socket.send(bytes.toArray, flags) ++ val iter = frames.iterator ++ while (iter.hasNext) { ++ val payload = iter.next.payload ++ val flags = if (iter.hasNext) JZMQ.SNDMORE else 0 ++ sendBytes(payload, flags) ++ } ++ } ++ ++ private var currentPoll: Option[Promise[PollLifeCycle]] = None ++ private def pollAndReceiveFrames() { ++ if (currentPoll.isEmpty) currentPoll = newEventLoop ++ } ++ ++ private lazy val eventLoopDispatcher = {","why lazy? +",05e4af750081aeb757dab23c5a9885949ad901ea +362298,0,"Might want to include some more debug info about which actorref didn't manage to activate within how long.""",2012-01-18 00:03:26,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362298,2012-01-18T11:03:26Z,2012-01-18T16:57:17Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/Activation.scala,,53.0,,,53,"@@ -0,0 +1,54 @@ ++package akka.camel ++ ++import internal._ ++import java.util.concurrent.TimeoutException ++import akka.util.{Timeout, Duration} ++import akka.dispatch.Future ++import akka.actor.{ActorSystem, Props, ActorRef} ++ ++trait Activation{ ++ import akka.dispatch.Await ++ ++ val actorSystem : ActorSystem ++ private[camel] val activationTracker = actorSystem.actorOf(Props[ActivationTracker]) ++ ++ def activationFutureFor(actor: ActorRef, timeout: Duration): Future[ActorRef] = { ++ (activationTracker ?(AwaitActivation(actor), Timeout(timeout))).map[ActorRef]{ ++ case EndpointActivated(_) => actor ++ case EndpointFailedToActivate(_, cause) => throw cause ++ } ++ } ++ ++ /** ++ * Awaits for actor to be activated. ++ */ ++ def awaitActivation(actor: ActorRef, timeout: Duration): ActorRef = { ++ try{ ++ Await.result(activationFutureFor(actor, timeout), timeout) ++ }catch { ++ case e: TimeoutException => throw new ActivationTimeoutException ++ } ++ } ++ ++ def deactivationFutureFor(actor: ActorRef, timeout: Duration): Future[Unit] = { ++ (activationTracker ?(AwaitDeActivation(actor), Timeout(timeout))).map[Unit]{ ++ case EndpointDeActivated(_) => {} ++ case EndpointFailedToDeActivate(_, cause) => throw cause ++ } ++ } ++ ++ def awaitDeactivation(actor: ActorRef, timeout: Duration) { ++ try{ ++ Await.result(deactivationFutureFor(actor, timeout), timeout) ++ }catch { ++ case e: TimeoutException => throw new DeActivationTimeoutException ++ } ++ } ++ ++} ++ ++ ++ ++ ++class DeActivationTimeoutException extends RuntimeException(""Timed out while waiting for de-activation."")","Might want to include some more debug info about which actorref didn't manage to activate within how long. +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +362702,2,"This code above smells weird""",2012-01-18 02:07:57,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362702,2012-01-18T13:07:57Z,2012-01-18T16:57:19Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/internal/component/ActorComponent.scala,,170.0,,,1,"@@ -0,0 +1,303 @@ ++package akka.camel.internal.component ++ ++/** ++ * Copyright (C) 2009-2010 Scalable Solutions AB ++ */ ++ ++import java.util.{Map => JMap} ++ ++import org.apache.camel._ ++import org.apache.camel.impl.{DefaultProducer, DefaultEndpoint, DefaultComponent} ++ ++import akka.actor._ ++ ++import scala.reflect.BeanProperty ++import akka.dispatch.Await ++import akka.util.{Duration, Timeout} ++import akka.util.duration._ ++import akka.camel.{Camel, CamelExchangeAdapter, Ack, Failure, Message, BlockingOrNot, Blocking, NonBlocking} ++import java.util.concurrent.TimeoutException ++ ++private[camel] case class Path(actorPath: String) { ++ require(actorPath != null) ++ require(actorPath.length() > 0) ++ def toCamelPath = ""actor://path:%s"" format actorPath ++} ++ ++private[camel] object Path{ ++ def apply(actorRef: ActorRef) = new Path(actorRef.path.toString) ++ def fromCamelPath(camelPath : String) = camelPath match { ++ case id if id startsWith ""path:"" => new Path(id substring 5) ++ case _ => throw new IllegalArgumentException(""Invalid path: [%s] - should be path:"" format camelPath) ++ } ++} ++ ++ ++ ++/** ++ * Camel component for sending messages to and receiving replies from (untyped) actors. ++ * ++ * @see akka.camel.component.ActorEndpoint ++ * @see akka.camel.component.ActorProducer ++ * ++ * @author Martin Krasser ++ */ ++class ActorComponent(camel : Camel) extends DefaultComponent { ++ def createEndpoint(uri: String, remaining: String, parameters: JMap[String, Object]): ActorEndpoint = { ++ val path = Path.fromCamelPath(remaining) ++ new ActorEndpoint(uri, this, path, camel) ++ } ++} ++ ++ ++/** ++ * TODO fix the doc to be consistent with implementation ++ * Camel endpoint for sending messages to and receiving replies from (untyped) actors. Actors ++ * are referenced using actor endpoint URIs of the following format: ++ * actor:, ++ * actor:id:[] and ++ * actor:uuid:[], ++ * where refers to ActorRef.id and ++ * refers to the String-representation od ActorRef.uuid. In URIs that contain ++ * id: or uuid:, an actor identifier (id or uuid) is optional. In this ++ * case, the in-message of an exchange produced to this endpoint must contain a message header ++ * with name CamelActorIdentifier and a value that is the target actor's identifier. ++ * If the URI contains an actor identifier, a message with a CamelActorIdentifier ++ * header overrides the identifier in the endpoint URI. ++ * ++ * @see akka.camel.component.ActorComponent ++ * @see akka.camel.component.ActorProducer ++ ++ * @author Martin Krasser ++ */ ++class ActorEndpoint(uri: String, ++ comp: ActorComponent, ++ val path: Path, ++ camel : Camel) extends DefaultEndpoint(uri, comp) with ActorEndpointConfig{ ++ ++ ++ ++ /** ++ * @throws UnsupportedOperationException ++ */ ++ def createConsumer(processor: Processor): org.apache.camel.Consumer = ++ throw new UnsupportedOperationException(""actor consumer not supported yet"") ++ ++ /** ++ * Creates a new ActorProducer instance initialized with this endpoint. ++ */ ++ def createProducer: ActorProducer = new ActorProducer(this, camel) ++ ++ /** ++ * Returns true. ++ */ ++ def isSingleton: Boolean = true ++} ++ ++trait ActorEndpointConfig{ ++ def getEndpointUri : String ++ def path : Path ++ /** ++ * When endpoint is outCapable (can produce responses) outTimeout is the maximum time ++ * the endpoint can take to send the response back. It defaults to Int.MaxValue seconds. ++ * It can be overwritten by setting @see blocking property ++ */ ++ @BeanProperty var outTimeout: Duration = Int.MaxValue seconds ++ ++ ++ /** ++ * Whether to block caller thread during two-way message exchanges with (untyped) actors. This is ++ * set via the blocking=true|false endpoint URI parameter. Default value is ++ * false. ++ */ ++ @BeanProperty var blocking: BlockingOrNot = NonBlocking ++ ++ /** TODO fix it ++ * Whether to auto-acknowledge one-way message exchanges with (untyped) actors. This is ++ * set via the blocking=true|false endpoint URI parameter. Default value is ++ * true. When set to true consumer actors need to additionally ++ * call Consumer.ack within Actor.receive. ++ */ ++ @BeanProperty var autoack: Boolean = true ++} ++ ++/** ++ * Sends the in-message of an exchange to an (untyped) actor, identified by an ++ * actor endpoint URI or by a CamelActorIdentifier message header. ++ *
    ++ *
  • If the exchange pattern is out-capable and blocking is set to ++ * true then the producer waits for a reply, using the !! operator.
  • ++ *
  • If the exchange pattern is out-capable and blocking is set to ++ * false then the producer sends the message using the ! operator, together ++ * with a callback handler. The callback handler is an ActorRef that can be ++ * used by the receiving actor to asynchronously reply to the route that is sending the ++ * message.
  • ++ *
  • If the exchange pattern is in-only then the producer sends the message using the ++ * ! operator.
  • ++ *
++ * ++ * @see akka.camel.component.ActorComponent ++ * @see akka.camel.component.ActorEndpoint ++ * ++ * @author Martin Krasser ++ */ ++class ActorProducer(val ep: ActorEndpoint, camel: Camel) extends DefaultProducer(ep) with AsyncProcessor { ++ def process(exchange: Exchange) {new TestableProducer(ep, camel).process(new CamelExchangeAdapter(exchange))} ++ def process(exchange: Exchange, callback: AsyncCallback) = new TestableProducer(ep, camel).process(new CamelExchangeAdapter(exchange), callback) ++} ++ ++//TODO needs to know about ActorSystem instead of ConsumerRegistry. why is it called TestableProducer? ++// re: I'd rather keep the abstraction layer for now and let the Camel class delegate ++class TestableProducer(config : ActorEndpointConfig, camel : Camel) { ++ ++ private lazy val path = config.path ++ ++ def process(exchange: CamelExchangeAdapter) { ++ if (exchange.isOutCapable) ++ sendSync(exchange, config.outTimeout, forwardResponseTo(exchange)) ++ else ++ fireAndForget(exchange) ++ } ++ ++ def process(exchange: CamelExchangeAdapter, callback: AsyncCallback): Boolean = { ++ def notifyDoneSynchronously[A](a:A = null) = callback.done(true) ++ def notifyDoneAsynchronously[A](a:A = null) = callback.done(false) ++ val DoneSync = true ++ val DoneAsync = false","This code above smells weird +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +363051,0,"So why is it a WeakHashMap if you already use the DeathWatch?""",2012-01-18 04:00:41,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r363051,2012-01-18T15:00:41Z,2012-01-18T16:57:21Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/internal/ActivationTracker.scala,,80.0,,,80,"@@ -0,0 +1,80 @@ ++package akka.camel.internal ++ ++import akka.actor._ ++import akka.camel._ ++import collection.mutable.WeakHashMap ++import akka.event.Logging.Warning ++ ++ ++ ++class ActivationTracker extends Actor{ ++ ++ val activations = new WeakHashMap[ActorRef, ActivationStateMachine] ++ ++ class ActivationStateMachine { ++ private[this] var awaitingActivation : List[ActorRef] = Nil ++ private[this] var awaitingDeActivation : List[ActorRef] = Nil ++ private[this] var activationFailure : Option[Throwable] = None ++ ++ var receive : Receive = notActivated ++ ++ def notActivated : Receive = { ++ case AwaitActivation(ref) => awaitingActivation ::= sender ++ case AwaitDeActivation(ref) => awaitingDeActivation ::= sender ++ ++ case msg @ EndpointActivated(ref) => { ++ migration.Migration.EventHandler.debug(ref+"" activated"") ++ awaitingActivation.foreach(_ ! msg) ++ awaitingActivation = Nil ++ receive = activated ++ } ++ ++ case EndpointFailedToActivate(ref, cause) => { ++ migration.Migration.EventHandler.debug(ref+"" failed to activate"") ++ activationFailure = Option(cause) ++ awaitingActivation.foreach(_ ! EndpointFailedToActivate(ref, cause)) ++ awaitingActivation = Nil ++ receive = failedToActivate ++ } ++ } ++ ++ def activated : Receive = { ++ case AwaitActivation(ref) => sender ! EndpointActivated(ref) ++ case AwaitDeActivation(ref) => awaitingDeActivation ::= sender ++ case EndpointDeActivated(ref) => { ++ awaitingDeActivation foreach (_ ! EndpointDeActivated(ref)) ++ awaitingDeActivation = Nil ++ context.stop(self) ++ } ++ case msg : EndpointFailedToDeActivate => { ++ awaitingDeActivation foreach (_ ! msg) ++ awaitingDeActivation = Nil ++ } ++ } ++ ++ def failedToActivate : Receive = { ++ case AwaitActivation(ref) => sender ! EndpointFailedToActivate(ref, activationFailure.get) ++ case AwaitDeActivation(ref) => sender ! EndpointFailedToActivate(ref, activationFailure.get) ++ } ++ ++ } ++ ++ override def preStart() { ++ context.system.eventStream.subscribe(self, classOf[ActivationMessage]) ++ } ++ ++ override def receive = { ++ case msg @ ActivationMessage(ref) =>{ ++ try{ ++ activations.getOrElseUpdate(ref, new ActivationStateMachine).receive(msg)","So why is it a WeakHashMap if you already use the DeathWatch? +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +335896,0,"What if the collections' size is Integer.MAX_VALUE?""",2012-01-08 21:56:02,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/204#discussion_r335896,2012-01-09T08:56:02Z,2012-01-09T08:56:02Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/routing/Routing.scala,,253.0,,,34,"@@ -229,10 +231,28 @@ trait RoundRobinLike { this: RouterConfig ⇒ + def createRoute(props: Props, context: ActorContext, ref: RoutedActorRef): Route = { + createAndRegisterRoutees(props, context, nrOfInstances, routees) + +- val next = new AtomicInteger(0) ++ val next = new AtomicInteger(-1) + + def getNext(): ActorRef = { +- ref.routees(next.getAndIncrement % ref.routees.size) ++ val _routees = ref.routees ++ val size = _routees.size ++ ++ @tailrec ++ def reduce(n: Int) { ++ val safetyValue = if (size >= 10000) size else (size * 100000) ++ if (n >= safetyValue) { ++ // decrease with multiple of the modulus, so that it doesn't change the modulus value ++ val newValue = n - safetyValue ++ next.compareAndSet(n, newValue) ++ reduce(next.get) ++ } ++ } ++ ++ val n = next.incrementAndGet() ++ // make sure we don't exceed Int.MaxValue ++ reduce(n)","What if the collections' size is Integer.MAX_VALUE? +",3099a074e5feec91733f475000f2e8ab03d6ebbb +309244,0,"On Wed, Dec 21, 2011 at 5:24 PM, viktorklang < reply@reply.github.com > wrote: > > > > // FIXME: Dispatchers registered here are are not removed, see ticket > #1494 > > - private val dispatchers = new ConcurrentHashMap[String, > MessageDispatcher] > """,2011-12-21 05:26:54,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/182#discussion_r309244,2011-12-21T16:26:54Z,2011-12-21T22:47:22Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/Dispatchers.scala,,65.0,,,1,"@@ -67,15 +68,18 @@ class Dispatchers(val settings: ActorSystem.Settings, val prerequisites: Dispatc + if (settings.MailboxCapacity < 1) UnboundedMailbox() + else BoundedMailbox(settings.MailboxCapacity, settings.MailboxPushTimeout) + +- val defaultDispatcherConfig = settings.config.getConfig(""akka.actor.default-dispatcher"") ++ val defaultDispatcherConfig = { ++ val key = ""akka.actor.default-dispatcher"" ++ keyConfig(key).withFallback(settings.config.getConfig(key)) ++ } + +- lazy val defaultGlobalDispatcher: MessageDispatcher = +- from(defaultDispatcherConfig) getOrElse { +- throw new ConfigurationException(""Wrong configuration [akka.actor.default-dispatcher]"") +- } ++ private lazy val defaultDispatcherConfigurator: MessageDispatcherConfigurator = ++ configuratorFrom(defaultDispatcherConfig) ++ ++ lazy val defaultGlobalDispatcher: MessageDispatcher = defaultDispatcherConfigurator.dispatcher() + + // FIXME: Dispatchers registered here are are not removed, see ticket #1494 +- private val dispatchers = new ConcurrentHashMap[String, MessageDispatcher] ++ private val dispatcherConfigurators = new ConcurrentHashMap[String, MessageDispatcherConfigurator] + + /** + * Returns a dispatcher as specified in configuration, or if not defined it uses","On Wed, Dec 21, 2011 at 5:24 PM, viktorklang < +reply@reply.github.com + +> wrote: +> +> > // FIXME: Dispatchers registered here are are not removed, see ticket +> > #1494 +> > - private val dispatchers = new ConcurrentHashMap[String, +> > MessageDispatcher] +> > - private val dispatcherConfigurators = new ConcurrentHashMap[String, +> > MessageDispatcherConfigurator] +> > +> > /** +> > - Returns a dispatcher as specified in configuration, or if not +> > defined it uses +> +> No, my point was, if I say that ""foo"" should be my dispatcher, and there +> is no ""foo"" registered, and it falls back to the default, I want a WARNING +> +> Alright, I have another point, but I'll change it to Warning. +> +> --- +> +> Reply to this email directly or view it on GitHub: +> https://github.com/jboner/akka/pull/182/files#r309229 + +## + +Patrik Nordwall +Typesafe http://typesafe.com/ - Enterprise-Grade Scala from the Experts +Twitter: @patriknw +",ed2fb14dcf0120a6c126fe9c99038d897dbcdf0d +269632,0,"no, the only thing which must be protected against concurrent enqueue() (which is a read) is shutdown().""",2011-12-05 01:29:19,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269632,2011-12-05T12:29:19Z,2011-12-05T21:48:28Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/java/org/jboss/netty/akka/util/HashedWheelTimer.java,,184.0,,,16,"@@ -181,12 +181,17 @@ private static int normalizeTicksPerWheel(int ticksPerWheel) { + * {@linkplain #stop() stopped} already + */ + public synchronized void start() { +- if (shutdown.get()) { +- throw new IllegalStateException(""cannot be started once stopped""); +- } ++ lock.readLock().lock();","no, the only thing which must be protected against concurrent enqueue() (which is a read) is shutdown(). +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +436872,0,"I understand that, but what about DynamicAccess or DynamicClassMaster. Looking at the signatures it is all about fqcn strings. PropertyMaster can be a manager that handles just about anything.""",2012-02-09 21:14:51,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/316#discussion_r436872,2012-02-10T08:14:51Z,2012-02-10T13:37:18Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorSystem.scala,,,,,1,"@@ -324,13 +324,13 @@ abstract class ExtendedActorSystem extends ActorSystem { + def deathWatch: DeathWatch + + /** +- * ClassLoader which is used for reflective accesses internally. This is set +- * to the context class loader, if one is set, or the class loader which ++ * ClassLoader wrapper which is used for reflective accesses internally. This is set ++ * to use the context class loader, if one is set, or the class loader which + * loaded the ActorSystem implementation. The context class loader is also + * set on all threads created by the ActorSystem, if one was set during + * creation. + */ +- def internalClassLoader: ClassLoader ++ def propertyMaster: PropertyMaster","I understand that, but what about DynamicAccess or DynamicClassMaster. Looking at the signatures it is all about fqcn strings. + +PropertyMaster can be a manager that handles just about anything. +",4b71872aef418639e1033bd7f98edb0e5ea138f4 +443663,0,"Calling `unstashAll()` now throws a `MessageQueueAppendFailedException` in case of a capacity violation (this is also properly documented now). Moreover, the stash size limit is now configurable using `stash-capacity` (an int in the dispatcher section of """,2012-02-13 02:25:43,phaller,akka,akka,,https://github.com/akka/akka-core/pull/307#discussion_r443663,2012-02-13T13:25:43Z,2012-02-28T09:22:30Z,phaller,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/Stash.scala,,,,,1,"@@ -0,0 +1,97 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.actor ++ ++import akka.dispatch.{ Envelope, DequeBasedMessageQueue } ++ ++/** ++ * The `Stash` trait enables an actor to temporarily stash away messages that can not or ++ * should not be handled using the actor's current behavior. ++ *

++ * Example: ++ *

++ *    class ActorWithProtocol extends Actor with Stash {
++ *      def receive = {
++ *        case ""open"" ⇒
++ *          unstashAll {
++ *            case ""write"" ⇒ // do writing...
++ *            case ""close"" ⇒
++ *              unstashAll()
++ *              context.unbecome()
++ *            case msg ⇒ stash()
++ *          }
++ *        case ""done"" ⇒ // done
++ *        case msg    ⇒ stash()
++ *      }
++ *    }
++ *  
++ * ++ * Note that the `Stash` trait can only be used together with actors that have a deque-based ++ * mailbox. Actors can be configured to use a deque-based mailbox using a configuration like ++ * the following: ++ *
++ *  akka {
++ *    actor {
++ *      default-dispatcher {
++ *        mailboxType = ""akka.dispatch.UnboundedDequeBasedMailbox""
++ *      }
++ *    }
++ *  }
++ *  
++ */ ++trait Stash { ++ thisActor: Actor ⇒ ++ ++ /* The private stash of the actor. It is only accessible using `stash()` and ++ * `unstashAll()`. ++ */ ++ private[this] var theStash = Vector.empty[Envelope] ++ ++ /* The actor's deque-based message queue. ++ * `mailbox.queue` is the underlying `Deque`. ++ */ ++ private[this] val mailbox: DequeBasedMessageQueue = { ++ context.asInstanceOf[ActorCell].mailbox match { ++ case queue: DequeBasedMessageQueue ⇒ queue ++ case other ⇒ throw new ActorInitializationException(self, ""UnboundedDequeBasedMailbox required, got: "" + other.getClass()) ++ } ++ } ++ ++ /** ++ * Adds the current message (the message that the actor received last) to the ++ * actor's stash. ++ */ ++ def stash(): Unit = theStash :+= context.asInstanceOf[ActorCell].currentMessage ++ ++ /** ++ * Prepends all messages in the stash to the mailbox, and then clears the stash. ++ */ ++ def unstashAll(): Unit = { ++ theStash.reverseIterator foreach mailbox.queue.addFirst","Calling `unstashAll()` now throws a `MessageQueueAppendFailedException` in case of a capacity violation (this is also properly documented now). Moreover, the stash size limit is now configurable using `stash-capacity` (an int in the dispatcher section of the config). +",8ea949857b39388ac25bdc08df7feee3757f3ee9 +417230,2,"Damn. Missing the else. Thanks Patrik. Sloppy of me to let this one slip through. I'll fix ASAP. """,2012-02-04 21:03:22,jboner,akka,akka,,https://github.com/akka/akka-core/pull/297#discussion_r417230,2012-02-05T08:03:22Z,2012-02-05T08:20:17Z,jboner,CONTRIBUTOR,akka-actor/src/main/scala/akka/AkkaException.scala,,,,,1,"@@ -7,6 +7,26 @@ package akka + import akka.actor.newUuid + import java.net.{ InetAddress, UnknownHostException } + ++object AkkaException { ++ val hostname = try InetAddress.getLocalHost.getHostAddress catch { case e: UnknownHostException ⇒ ""unknown host"" } ++ ++ def toStringWithStackTrace(throwable: Throwable): String = { ++ if (throwable eq null) ""Unknown Throwable: was 'null'"" ++ throwable match { ++ case ae: AkkaException ⇒ ae.toLongString ++ case e ⇒ ""%s:%s\n%s"" format (e.getClass.getName, e.getMessage, stackTraceToString(e))","Damn. Missing the else. Thanks Patrik. Sloppy of me to let this one slip through. I'll fix ASAP. +",42f5af7fb011059d8098532493eb13ec3116c7e5 +342851,1,"Nice! Please comment that this prevents props and context to be closed over below.""",2012-01-10 23:35:04,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/208#discussion_r342851,2012-01-11T10:35:04Z,2012-01-11T10:42:36Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/routing/Routing.scala,,176.0,,,1,"@@ -171,15 +171,15 @@ trait RouterConfig { + * @see akka.routing.RouterConfig + */ + abstract class CustomRouterConfig extends RouterConfig { +- override def createRoute(props: Props, context: ActorContext, ref: RoutedActorRef): Route = { +- val customRoute = createCustomRoute(props, context, ref) ++ override def createRoute(props: Props, context: ActorContext): Route = { ++ val customRoute = createCustomRoute(props, context)","Nice! Please comment that this prevents props and context to be closed over below. +",bc7b5c92a074da6c4d3f66ce71774e144e531fe8 +342686,0,"So you cannot share the same RouterConfig between multiple Actors?""",2012-01-10 22:11:48,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/206#discussion_r342686,2012-01-11T09:11:48Z,2012-01-11T10:16:56Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/routing/Routing.scala,,,,,1,"@@ -94,18 +116,48 @@ trait RouterConfig { + + protected def toAll(sender: ActorRef, routees: Iterable[ActorRef]): Iterable[Destination] = routees.map(Destination(sender, _)) + +- protected def createRoutees(props: Props, context: ActorContext, nrOfInstances: Int, routees: Iterable[String]): IndexedSeq[ActorRef] = (nrOfInstances, routees) match { ++ def createRoutees(props: Props, context: ActorContext, nrOfInstances: Int, routees: Iterable[String]): IndexedSeq[ActorRef] = (nrOfInstances, routees) match { + case (0, Nil) ⇒ throw new IllegalArgumentException(""Insufficient information - missing configuration."") + case (x, Nil) ⇒ (1 to x).map(_ ⇒ context.actorOf(props))(scala.collection.breakOut) + case (_, xs) ⇒ xs.map(context.actorFor(_))(scala.collection.breakOut) + } + + protected def createAndRegisterRoutees(props: Props, context: ActorContext, nrOfInstances: Int, routees: Iterable[String]): Unit = { +- registerRoutees(context, createRoutees(props, context, nrOfInstances, routees)) ++ resizer match { ++ case None ⇒ registerRoutees(context, createRoutees(props, context, nrOfInstances, routees)) ++ case Some(p) ⇒ resize(props, context, context.self.asInstanceOf[RoutedActorRef].routees) ++ } + } + +- protected def registerRoutees(context: ActorContext, routees: IndexedSeq[ActorRef]): Unit = { +- context.self.asInstanceOf[RoutedActorRef]._routees = routees ++ /** ++ * Adds new routees to the router. ++ */ ++ def registerRoutees(context: ActorContext, routees: IndexedSeq[ActorRef]): Unit = { ++ context.self.asInstanceOf[RoutedActorRef].addRoutees(routees) ++ } ++ ++ /** ++ * Removes routees from the router. This method doesn't stop the routees. ++ */ ++ def unregisterRoutees(context: ActorContext, routees: IndexedSeq[ActorRef]): Unit = { ++ context.self.asInstanceOf[RoutedActorRef].removeRoutees(routees) ++ } ++ ++ def resizer: Option[Resizer] = None ++ ++ private val resizeProgress = new AtomicBoolean ++ private val resizeCounter = new AtomicLong ++ ++ def resize(props: Props, context: ActorContext, currentRoutees: IndexedSeq[ActorRef]) { ++ for (r ← resizer) { ++ if (r.isTimeForResize(resizeCounter.getAndIncrement()) && resizeProgress.compareAndSet(false, true)) {","So you cannot share the same RouterConfig between multiple Actors? +",85b673b63ffd61a02ae1393b8115160e4341a108 +319475,0,"Why does that look so cludgy? can't / just skip nulls and """"""""s?""",2011-12-29 10:54:53,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/196#discussion_r319475,2011-12-29T21:54:53Z,2011-12-29T23:12:59Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,,,,1,"@@ -453,22 +453,33 @@ class LocalActorRefProvider( + + def actorFor(ref: InternalActorRef, path: String): InternalActorRef = path match { + case RelativeActorPath(elems) ⇒ +- if (elems.isEmpty) deadLetters +- else if (elems.head.isEmpty) actorFor(rootGuardian, elems.tail) ++ if (elems.isEmpty) { ++ log.debug(""look-up of empty path string '{}' fails (per definition)"", path) ++ deadLetters ++ } else if (elems.head.isEmpty) actorFor(rootGuardian, elems.tail) + else actorFor(ref, elems) + case LocalActorPath(address, elems) if address == rootPath.address ⇒ actorFor(rootGuardian, elems) +- case _ ⇒ deadLetters ++ case _ ⇒ ++ log.debug(""look-up of unknown path '{}' failed"", path) ++ deadLetters + } + + def actorFor(path: ActorPath): InternalActorRef = + if (path.root == rootPath) actorFor(rootGuardian, path.elements) +- else deadLetters ++ else { ++ log.debug(""look-up of foreign ActorPath '{}' failed"", path) ++ deadLetters ++ } + + def actorFor(ref: InternalActorRef, path: Iterable[String]): InternalActorRef = +- if (path.isEmpty) deadLetters +- else ref.getChild(path.iterator) match { +- case Nobody ⇒ deadLetters +- case x ⇒ x ++ if (path.isEmpty) { ++ log.debug(""look-up of empty path sequence fails (per definition)"") ++ deadLetters ++ } else ref.getChild(path.iterator) match { ++ case Nobody ⇒ ++ log.debug(""look-up of path sequence '{}' failed"", path) ++ new EmptyLocalActorRef(eventStream, dispatcher, ref.path / path.filterNot(_.isEmpty))","Why does that look so cludgy? can't / just skip nulls and """"s? +",023f4eb0ecbbcdc6dacb0b73884fc11c3b95f00e +295283,0,"Just tried the actual generated docs. This example comes out a little strange because of the different indenting. I think as two blocks is better for this: .. includecode:: code/TypedActorDocSpec.scala#typed-actor-supercharge .. includecode:: co""",2011-12-14 12:23:14,pvlugter,akka,akka,,https://github.com/akka/akka-core/pull/160#discussion_r295283,2011-12-14T23:23:14Z,2011-12-15T16:19:19Z,pvlugter,MEMBER,akka-docs/scala/typed-actors.rst,,,,,1,"@@ -4,186 +4,160 @@ Typed Actors (Scala) + .. sidebar:: Contents + + .. contents:: :local: +- +-The Typed Actors are implemented through `Typed Actors `_. It uses AOP through `AspectWerkz `_ to turn regular POJOs into asynchronous non-blocking Actors with semantics of the Actor Model. Each method dispatch is turned into a message that is put on a queue to be processed by the Typed Actor sequentially one by one. + +-If you are using the `Spring Framework `_ then take a look at Akka's `Spring integration `_. ++Akka Typed Actors is an implementation of the `Active Objects `_ pattern. ++Essentially turning method invocations into asynchronous dispatch instead of synchronous that has been the default way since Smalltalk came out. + +-**WARNING:** Do not configure to use a ``BalancingDispatcher`` with your ``TypedActors``, it just isn't safe with how ``TypedActors`` currently are implemented. This limitation will most likely be removed in the future. ++Typed Actors consist of 2 ""parts"", a public interface and an implementation, and if you've done any work in ""enterprise"" Java, this will be very familiar to you. As with normal Actors you have an external API (the public interface instance) that will delegate methodcalls asynchronously to ++a private instance of the implementation. + +-Creating Typed Actors +---------------------- +- +-**IMPORTANT:** The Typed Actors class must have access modifier 'public' (which is default) and can't be an inner class (unless it is an inner class in an 'object'). ++The advantage of Typed Actors vs. Actors is that with TypedActors you have a static contract, and don't need to define your own messages, the downside is that it places some limitations on what you can do and what you can't, i.e. you can't use become/unbecome. + +-Akka turns POJOs with interface and implementation into asynchronous (Typed) Actors. Akka is using `AspectWerkz’s Proxy `_ implementation, which is the `most performant `_ proxy implementation there exists. ++Typed Actors are implemented using `JDK Proxies `_ which provide a pretty easy-worked API to intercept method calls. + +-In order to create a Typed Actor you have to subclass the ``TypedActor`` base class. + +-Here is an example. ++The tools of the trade ++---------------------- + +-If you have a POJO with an interface implementation separation like this: ++Before we create our first Typed Actor we should first go through the tools that we have at our disposal, ++it's located in ``akka.actor.TypedActor``. + +-.. code-block:: scala ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-extension-tools + +- import akka.actor.TypedActor ++.. warning:: ++ ++ Same as not exposing ``this`` of an Akka Actor, it's important not to expose ``this`` of a Typed Actor, ++ instead you should pass the external proxy reference, which is obtained from within your Typed Actor as ++ ``TypedActor.self``, this is your external identity, as the ``ActorRef`` is the external identity of ++ and Akka Actor. + +- trait RegistrationService { +- def register(user: User, cred: Credentials): Unit +- def getUserFor(username: String): User +- } +- +-.. code-block:: scala ++Creating Typed Actors ++--------------------- + +- public class RegistrationServiceImpl extends TypedActor with RegistrationService { +- def register(user: User, cred: Credentials) { +- ... // register user +- } ++To create a Typed Actor you need to have one or more interfaces, and one implementation. + +- def getUserFor(username: String): User = { +- ... // fetch user by username +- user +- } +- } ++Our example interface: + +-Then you can create an Typed Actor out of it by creating it through the ``TypedActor`` factory like this: ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: imports,typed-actor-iface ++ :exclude: typed-actor-iface-methods + +-.. code-block:: scala ++Our example implementation of that interface: + +- val service = TypedActor.newInstance(classOf[RegistrationService], classOf[RegistrationServiceImpl], 1000) +- // The last parameter defines the timeout for Future calls ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: imports,typed-actor-impl ++ :exclude: typed-actor-impl-methods + +-Creating Typed Actors with non-default constructor +-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++The most trivial way of creating a Typed Actor instance ++of our Squarer: + +-To create a typed actor that takes constructor arguments use a variant of ``newInstance`` or ``newRemoteInstance`` that takes a call-by-name block in which you can create the Typed Actor in any way you like. ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-create1 + +-Here is an example: ++First type is the type of the proxy, the second type is the type of the implementation. ++If you need to call a specific constructor you do it like this: + +-.. code-block:: scala ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-create2 + +- val service = TypedActor.newInstance(classOf[Service], new ServiceWithConstructorArgs(""someString"", 500L)) ++Since you supply a Props, you can specify which dispatcher to use, what the default timeout should be used and more. ++Now, our Squarer doesn't have any methods, so we'd better add those. + +-Configuration factory class +-^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: imports,typed-actor-iface + +-Using a configuration object: ++Alright, now we've got some methods we can call, but we need to implement those in SquarerImpl. + +-.. code-block:: scala ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: imports,typed-actor-impl + +- import akka.actor.TypedActorConfiguration +- import akka.util.Duration +- import akka.util.duration._ ++Alright, now we have an interface and an implementation of that interface, ++and we know how to create a Typed Actor from that, so let's look at calling these methods. + +- val config = TypedActorConfiguration() +- .timeout(3000 millis) ++Method dispatch semantics ++------------------------- + +- val service = TypedActor.newInstance(classOf[RegistrationService], classOf[RegistrationServiceImpl], config) ++Methods returning: + +-However, often you will not use these factory methods but declaratively define the Typed Actors as part of a supervisor hierarchy. More on that in the :ref:`fault-tolerance-scala` section. ++ * ``Unit`` will be dispatched with ``fire-and-forget`` semantics, exactly like ``Actor.tell`` ++ * ``akka.dispatch.Future[_]`` will use ``send-request-reply`` semantics, exactly like ``Actor.ask`` ++ * ``scala.Option[_]`` or akka.japi.Option[_] will use ``send-request-reply`` semantics, but _will_ block to wait for an answer, ++ and return None if no answer was produced within the timout, or scala.Some/akka.japi.Some containing the result otherwise. ++ Any exception that was thrown during this call will be rethrown. ++ * Any other type of value will use ``send-request-reply`` semantics, but _will_ block to wait for an answer, ++ throwing ``java.util.concurrent.TimeoutException`` if there was a timeout or rethrow any exception that was thrown during this call. + +-Sending messages +----------------- ++Messages and immutability ++------------------------- + +-Messages are sent simply by invoking methods on the POJO, which is proxy to the ""real"" POJO now. The arguments to the method are bundled up atomically into an message and sent to the receiver (the actual POJO instance). ++While Akka cannot enforce that the parameters to the methods of your Typed Actors are immutable, ++we *strongly* recommend that parameters passed are immutable. + + One-way message send + ^^^^^^^^^^^^^^^^^^^^ + +-Methods that return void are turned into ‘fire-and-forget’ semantics by asynchronously firing off the message and return immediately. In the example above it would be the 'register' method, so if this method is invoked then it returns immediately: ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-call-oneway + +-.. code-block:: java +- +- // method invocation returns immediately and method is invoke asynchronously using the Actor Model semantics +- service.register(user, creds) ++As simple as that! The method will be executed on another thread; asynchronously. + + Request-reply message send + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +-Methods that return something (e.g. non-void methods) are turned into ‘send-and-receive-eventually’ semantics by asynchronously firing off the message and wait on the reply using a Future. +- +-.. code-block:: scala ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-call-option + +- // method invocation is asynchronously dispatched using the Actor Model semantics, +- // but it blocks waiting on a Future to be resolved in the background +- val user = service.getUser(username) ++This will block for as long as the timeout that was set in the Props of the Typed Actor, ++if needed. It will return ``None`` if a timeout occurs. + +-Generally it is preferred to use fire-forget messages as much as possible since they will never block, e.g. consume a resource by waiting. But sometimes they are neat to use since they: ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-call-strict + +-* Simulates standard Java method dispatch, which is more intuitive for most Java developers +-* Are a neat to model request-reply +-* Are useful when you need to do things in a defined order ++This will block for as long as the timeout that was set in the Props of the Typed Actor, ++if needed. It will throw a ``java.util.concurrent.TimeoutException`` if a timeout occurs. + + Request-reply-with-future message send + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +-Methods that return a ``akka.dispatch.Future`` are turned into ‘send-and-receive-with-future’ semantics by asynchronously firing off the message and returns immediately with a Future. You need to use the ``future(...)`` method in the ``TypedActor`` base class to resolve the Future that the client code is waiting on. +- +-Here is an example: ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-call-future + +-.. code-block:: scala +- +- class MathTypedActorImpl extends TypedActor with MathTypedActor { +- def square(x: Int): Future[Integer] = future(x * x) +- } +- +- // create the ping actor +- val math = TypedActor.newInstance(classOf[MathTyped], classOf[MathTypedImpl]) +- +- // This method will return immediately when called, caller should wait on the Future for the result +- val future = math.square(10) +- future.await +- val result: Int = future.get ++This call is asynchronous, and the Future returned can be used for asynchronous composition. + + Stopping Typed Actors + --------------------- + +-Once Typed Actors have been created with one of the ``TypedActor.newInstance`` methods they need to be stopped with ``TypedActor.stop`` to free resources allocated by the created Typed Actor (this is not needed when the Typed Actor is supervised). +- +-.. code-block:: scala +- +- // Create Typed Actor +- val service = TypedActor.newInstance(classOf[RegistrationService], classOf[RegistrationServiceImpl], 1000) +- +- // ... ++Since Akkas Typed Actors are backed by Akka Actors they must be stopped when they aren't needed anymore. + +- // Free Typed Actor resources +- TypedActor.stop(service) ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-stop + +-When the Typed Actor defines a shutdown callback method (:ref:`fault-tolerance-scala`) it will be invoked on ``TypedActor.stop``. ++This asynchronously stops the Typed Actor associated with the specified proxy ASAP. + +-How to use the TypedActorContext for runtime information access +---------------------------------------------------------------- ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-poisonpill + +-The ``akka.actor.TypedActorContext`` class Holds 'runtime type information' (RTTI) for the Typed Actor. This context is a member field in the ``TypedActor`` base class and holds for example the current sender reference, the current sender future etc. ++This asynchronously stops the Typed Actor associated with the specified proxy ++after it's done with all calls that were made prior to this call. + +-Here is an example how you can use it to in a 'void' (e.g. fire-forget) method to implement request-reply by using the sender reference: ++Typed Actor Hierarchies ++----------------------- + +-.. code-block:: scala ++Since you can obtain a contextual Typed Actor Extension by passing in an ``ActorContext`` ++you can create child Typed Actors by invoking ``typedActorOf(..)`` on that. + +- class PingImpl extends TypedActor with Ping { +- +- def hit(count: Int) { +- val pong = context.getSender.asInstanceOf[Pong] +- pong.hit(count++) +- } +- } +- +-If the sender, sender future etc. is not available, then these methods will return ``null`` so you should have a way of dealing with that scenario. +- +-Messages and immutability +-------------------------- ++This also works for creating child Typed Actors in regular Akka Actors. + +-**IMPORTANT**: Messages can be any kind of object but have to be immutable (there is a workaround, see next section). Java or Scala can’t enforce immutability (yet) so this has to be by convention. Primitives like String, int, Long are always immutable. Apart from these you have to create your own immutable objects to send as messages. If you pass on a reference to an instance that is mutable then this instance can be modified concurrently by two different Typed Actors and the Actor model is broken leaving you with NO guarantees and most likely corrupt data. ++Lifecycle callbacks ++------------------- + +-Akka can help you in this regard. It allows you to turn on an option for serializing all messages, e.g. all parameters to the Typed Actor effectively making a deep clone/copy of the parameters. This will make sending mutable messages completely safe. This option is turned on in the :ref:`configuration` file like this: ++By having your Typed Actor implementation class implement ``TypedActor.PreStart``, ``TypedActor.PostStop``, ``TypedActor.PreRestart`` and/or ``TypedActor.PostRestart`` you can hook into the lifecyle of your Typed Actor. + +-.. code-block:: ruby ++Supercharging ++------------- + +- akka { +- actor { +- serialize-messages = on # does a deep clone of messages to ensure immutability +- } +- } ++Here's an example on how you can use traits to mix in behavior in your Typed Actors. + +-This will make a deep clone (using Java serialization) of all parameters. ++.. includecode:: code/TypedActorDocSpec.scala ++ :include: typed-actor-supercharge,typed-actor-supercharge-usage","Just tried the actual generated docs. This example comes out a little strange because of the different indenting. I think as two blocks is better for this: + +``` +.. includecode:: code/TypedActorDocSpec.scala#typed-actor-supercharge + +.. includecode:: code/TypedActorDocSpec.scala#typed-actor-supercharge-usage +``` +",9d2ab2e7145ed1e200ed29aee42f9a5734f108ab +258115,2,"I find it a bit weird to have a method called """"toValue"""" that returns a key-value pair...""",2011-11-29 05:56:01,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/129#discussion_r258115,2011-11-29T16:56:01Z,2011-12-02T07:52:49Z,viktorklang,CONTRIBUTOR,akka-actor-tests/src/test/scala/akka/performance/workbench/Report.scala,,,,,1,"@@ -222,7 +222,7 @@ class Report( + sb.append(""Akka version: "").append(system.settings.ConfigVersion) + sb.append(""\n"") + sb.append(""Akka config:"") +- for ((key, value) ← system.settings.config.toObject) { ++ for ((key, value) ← system.settings.config.toValue) {","I find it a bit weird to have a method called ""toValue"" that returns a key-value pair... +",66bf11681c88b7ba58e9a3134374d21065130937 +269614,1,"You guessed it. Also, use pbhash :-)""",2011-12-05 01:21:44,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269614,2011-12-05T12:21:44Z,2011-12-05T21:48:28Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorPath.scala,,145.0,,,202,"@@ -85,48 +59,119 @@ trait ActorPath { + def /(child: Iterable[String]): ActorPath = (this /: child)(_ / _) + + /** +- * String representation of this path. Different from toString for root path. ++ * Sequence of names for this path. Performance implication: has to allocate a list. + */ +- def string: String ++ def pathElements: Iterable[String] + + /** +- * Sequence of names for this path. ++ * Walk up the tree to obtain and return the RootActorPath. + */ +- def path: Iterable[String] ++ def root: RootActorPath + +- /** +- * Is this the root path? +- */ +- def isRoot: Boolean + } + +-class RootActorPath(val remoteAddress: RemoteAddress) extends ActorPath { +- +- def name: String = ""/"" ++/** ++ * Root of the hierarchy of ActorPaths. There is exactly root per ActorSystem ++ * and node (for remote-enabled or clustered systems). ++ */ ++final case class RootActorPath(address: Address, name: String = ActorPath.separator) extends ActorPath { + + def parent: ActorPath = this + +- def /(child: String): ActorPath = new ChildActorPath(remoteAddress, this, child) ++ def root: RootActorPath = this + +- def string: String = """" ++ def /(child: String): ActorPath = new ChildActorPath(this, child) + +- def path: Iterable[String] = Iterable.empty ++ val pathElements: Iterable[String] = List("""") + +- def isRoot: Boolean = true ++ override val toString = address + name + +- override def toString = ActorPath.separator ++ def compareTo(other: ActorPath) = other match { ++ case r: RootActorPath ⇒ toString compareTo r.toString ++ case c: ChildActorPath ⇒ 1 ++ } + } + +-class ChildActorPath(val remoteAddress: RemoteAddress, val parent: ActorPath, val name: String) extends ActorPath { ++final class ChildActorPath(val parent: ActorPath, val name: String) extends ActorPath { + +- def /(child: String): ActorPath = new ChildActorPath(remoteAddress, this, child) ++ def address: Address = root.address + +- def string: String = parent.string + ActorPath.separator + name ++ def /(child: String): ActorPath = new ChildActorPath(this, child) + +- def path: Iterable[String] = parent.path ++ Iterable(name) ++ def pathElements: Iterable[String] = { ++ @tailrec ++ def rec(p: ActorPath, acc: List[String]): Iterable[String] = p match { ++ case r: RootActorPath ⇒ acc ++ case _ ⇒ rec(p.parent, p.name :: acc) ++ } ++ rec(this, Nil) ++ } + +- def isRoot: Boolean = false ++ def root = { ++ @tailrec ++ def rec(p: ActorPath): RootActorPath = p match { ++ case r: RootActorPath ⇒ r ++ case _ ⇒ rec(p.parent) ++ } ++ rec(this) ++ } + +- override def toString = string ++ // TODO research whether this should be cached somehow (might be fast enough, but creates GC pressure) ++ /* ++ * idea: add one field which holds the total length (because that is known) ++ * so that only one String needs to be allocated before traversal; this is ++ * cheaper than any cache ++ */ ++ override def toString = { ++ @tailrec ++ def rec(p: ActorPath, s: String): String = p match { ++ case r: RootActorPath ⇒ r + s ++ case _ if s.isEmpty ⇒ rec(p.parent, name) ++ case _ ⇒ rec(p.parent, p.name + ActorPath.separator + s) ++ } ++ rec(this, """") ++ } ++ ++ override def equals(other: Any): Boolean = { ++ @tailrec ++ def rec(left: ActorPath, right: ActorPath): Boolean = ++ if (left eq right) true ++ else if (left.isInstanceOf[RootActorPath]) left equals right ++ else if (right.isInstanceOf[RootActorPath]) right equals left ++ else left.name == right.name && rec(left.parent, right.parent) ++ ++ other match { ++ case p: ActorPath ⇒ rec(this, p) ++ case _ ⇒ false ++ } ++ } ++ ++ // TODO RK investigate Phil’s hash from scala.collection.mutable.HashTable.improve ++ override def hashCode: Int = {","You guessed it. + +Also, use pbhash :-) +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +221048,0,"Drop all constructors except for this one""",2011-11-10 01:47:25,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/106#discussion_r221048,2011-11-10T12:47:25Z,2011-11-10T15:33:03Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/java/org/jboss/netty/akka/util/HashedWheelTimer.java,,103.0,,,103,"@@ -0,0 +1,555 @@ ++/* ++ * Copyright 2009 Red Hat, Inc. ++ * ++ * Red Hat licenses this file to you under the Apache License, version 2.0 ++ * (the ""License""); you may not use this file except in compliance with the ++ * License. You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an ""AS IS"" BASIS, WITHOUT ++ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the ++ * License for the specific language governing permissions and limitations ++ * under the License. ++ */ ++package org.jboss.netty.akka.util; ++ ++import java.util.ArrayList; ++import java.util.Collections; ++import java.util.HashSet; ++import java.util.List; ++import java.util.Set; ++import java.util.concurrent.Executors; ++import java.util.concurrent.ThreadFactory; ++import java.util.concurrent.TimeUnit; ++import java.util.concurrent.atomic.AtomicBoolean; ++import java.util.concurrent.atomic.AtomicInteger; ++import java.util.concurrent.locks.ReadWriteLock; ++import java.util.concurrent.locks.ReentrantReadWriteLock; ++ ++import org.jboss.netty.akka.logging.InternalLogger; ++import org.jboss.netty.akka.logging.InternalLoggerFactory; ++import org.jboss.netty.akka.util.internal.ConcurrentIdentityHashMap; ++import org.jboss.netty.akka.util.internal.ReusableIterator; ++import org.jboss.netty.akka.util.internal.SharedResourceMisuseDetector; ++ ++/** ++ * A {@link Timer} optimized for approximated I/O timeout scheduling. ++ * ++ *

Tick Duration

++ * ++ * As described with 'approximated', this timer does not execute the scheduled ++ * {@link TimerTask} on time. {@link org.jboss.netty.akka.util.HashedWheelTimer}, on every tick, will ++ * check if there are any {@link TimerTask}s behind the schedule and execute ++ * them. ++ *

++ * You can increase or decrease the accuracy of the execution timing by ++ * specifying smaller or larger tick duration in the constructor. In most ++ * network applications, I/O timeout does not need to be accurate. Therefore, ++ * the default tick duration is 100 milliseconds and you will not need to try ++ * different configurations in most cases. ++ * ++ *

Ticks per Wheel (Wheel Size)

++ * ++ * {@link org.jboss.netty.akka.util.HashedWheelTimer} maintains a data structure called 'wheel'. ++ * To put simply, a wheel is a hash table of {@link TimerTask}s whose hash ++ * function is 'dead line of the task'. The default number of ticks per wheel ++ * (i.e. the size of the wheel) is 512. You could specify a larger value ++ * if you are going to schedule a lot of timeouts. ++ * ++ *

Do not create many instances.

++ * ++ * {@link org.jboss.netty.akka.util.HashedWheelTimer} creates a new thread whenever it is instantiated and ++ * started. Therefore, you should make sure to create only one instance and ++ * share it across your application. One of the common mistakes, that makes ++ * your application unresponsive, is to create a new instance in ++ * {@link ChannelPipelineFactory}, which results in the creation of a new thread ++ * for every connection. ++ * ++ *

Implementation Details

++ * ++ * {@link org.jboss.netty.akka.util.HashedWheelTimer} is based on ++ * George Varghese and ++ * Tony Lauck's paper, ++ * 'Hashed ++ * and Hierarchical Timing Wheels: data structures to efficiently implement a ++ * timer facility'. More comprehensive slides are located ++ * here. ++ * ++ * @author The Netty Project ++ * @author Trustin Lee ++ * @version $Rev: 2297 $, $Date: 2010-06-07 10:50:02 +0900 (Mon, 07 Jun 2010) $ ++ */ ++public class HashedWheelTimer implements Timer { ++ ++ static final InternalLogger logger = ++ InternalLoggerFactory.getInstance(HashedWheelTimer.class); ++ private static final AtomicInteger id = new AtomicInteger(); ++ ++ private static final SharedResourceMisuseDetector misuseDetector = ++ new SharedResourceMisuseDetector(HashedWheelTimer.class); ++ ++ private final Worker worker = new Worker(); ++ final Thread workerThread; ++ final AtomicBoolean shutdown = new AtomicBoolean(); ++ ++ private final long roundDuration; ++ final long tickDuration; ++ final Set[] wheel; ++ final ReusableIterator[] iterators; ++ final int mask; ++ final ReadWriteLock lock = new ReentrantReadWriteLock(); ++ volatile int wheelCursor; ++ ++ /** ++ * Creates a new timer with the default thread factory ++ * ({@link java.util.concurrent.Executors#defaultThreadFactory()}), default tick duration, and ++ * default number of ticks per wheel. ++ */ ++ public HashedWheelTimer() { ++ this(Executors.defaultThreadFactory()); ++ } ++ ++ /** ++ * Creates a new timer with the default thread factory ++ * ({@link java.util.concurrent.Executors#defaultThreadFactory()}) and default number of ticks ++ * per wheel. ++ * ++ * @param tickDuration the duration between tick ++ * @param unit the time unit of the {@code tickDuration} ++ */ ++ public HashedWheelTimer(long tickDuration, TimeUnit unit) { ++ this(Executors.defaultThreadFactory(), tickDuration, unit); ++ } ++ ++ /** ++ * Creates a new timer with the default thread factory ++ * ({@link java.util.concurrent.Executors#defaultThreadFactory()}). ++ * ++ * @param tickDuration the duration between tick ++ * @param unit the time unit of the {@code tickDuration} ++ * @param ticksPerWheel the size of the wheel ++ */ ++ public HashedWheelTimer(long tickDuration, TimeUnit unit, int ticksPerWheel) { ++ this(Executors.defaultThreadFactory(), tickDuration, unit, ticksPerWheel); ++ } ++ ++ /** ++ * Creates a new timer with the default tick duration and default number of ++ * ticks per wheel. ++ * ++ * @param threadFactory a {@link java.util.concurrent.ThreadFactory} that creates a ++ * background {@link Thread} which is dedicated to ++ * {@link TimerTask} execution. ++ */ ++ public HashedWheelTimer(ThreadFactory threadFactory) { ++ this(threadFactory, 100, TimeUnit.MILLISECONDS); ++ } ++ ++ /** ++ * Creates a new timer with the default number of ticks per wheel. ++ * ++ * @param threadFactory a {@link java.util.concurrent.ThreadFactory} that creates a ++ * background {@link Thread} which is dedicated to ++ * {@link TimerTask} execution. ++ * @param tickDuration the duration between tick ++ * @param unit the time unit of the {@code tickDuration} ++ */ ++ public HashedWheelTimer( ++ ThreadFactory threadFactory, long tickDuration, TimeUnit unit) { ++ this(threadFactory, tickDuration, unit, 512); ++ } ++ ++ /** ++ * Creates a new timer. ++ * ++ * @param threadFactory a {@link java.util.concurrent.ThreadFactory} that creates a ++ * background {@link Thread} which is dedicated to ++ * {@link TimerTask} execution. ++ * @param tickDuration the duration between tick ++ * @param unit the time unit of the {@code tickDuration} ++ * @param ticksPerWheel the size of the wheel ++ */ ++ public HashedWheelTimer(","Drop all constructors except for this one +",1577f8bcb32097e2e317302089d92fd417b69913 +20216,0,"Why does it require a HashSet?""",2011-04-17 23:39:45,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/73#discussion_r20216,2011-04-18T11:39:45Z,2011-04-18T11:39:45Z,viktorklang,CONTRIBUTOR,akka-stm/src/main/scala/stm/TransactionalSet.scala,,32.0,,,32,"@@ -0,0 +1,88 @@ ++package akka.stm ++ ++/** ++ * TransactionalSet : completely based on TransactionalMap ++ * @author - Dhananjay Nene ++ */ ++ ++/* ++ * TODO: Change package names in imports. This has been compiled against akka_2.8.0-1.0-M1.zip ++ */ ++ ++import scala.collection.mutable.HashSet ++import se.scalablesolutions.akka.stm.{Transactional, Ref} ++import se.scalablesolutions.akka.actor.{newUuid} ++ ++/** ++ * Transactional set that implements the mutable Set interface with an underlying Ref and HashSet. ++ */ ++ ++object TransactionalSet { ++ def apply[K]() = new TransactionalSet[K]() ++ ++ def apply[K](elems: K*) = new TransactionalSet(HashSet(elems: _*)) ++} ++ ++/** ++ * Transactional Set that implements the mutable Set interface with an underlying Ref and HashSet. ++ * ++ * From Scala you can use TSet as a shorter alias for TransactionalSet. ++ */ ++ ++class TransactionalSet[T](initialValue: HashSet[T]) extends Transactional with scala.collection.mutable.Set[T] {","Why does it require a HashSet? +",95aff5ef78f4b8aca6f624f1402f3198acab056b +507277,0,"done""",2012-03-01 05:13:09,RayRoestenburg,akka,akka,,https://github.com/akka/akka-core/pull/344#discussion_r507277,2012-03-01T16:13:09Z,2012-03-20T12:17:27Z,RayRoestenburg,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/internal/ActivationTracker.scala,,,,,1,"@@ -0,0 +1,88 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel.internal ++ ++import akka.actor._ ++import collection.mutable.WeakHashMap ++ ++class ActivationTracker extends Actor with ActorLogging { ++ val activations = new WeakHashMap[ActorRef, ActivationStateMachine] ++ ++ class ActivationStateMachine { ++ type State = PartialFunction[ActivationMessage, Unit] ++ ++ var receive: State = notActivated() ++ ++ def notActivated(): State = { ++ var awaitingActivation = List[ActorRef]() ++ var awaitingDeActivation = List[ActorRef]() ++ ++ { ++ case AwaitActivation(ref) ⇒ awaitingActivation ::= sender ++ case AwaitDeActivation(ref) ⇒ awaitingDeActivation ::= sender ++ ++ case msg @ EndpointActivated(ref) ⇒ { ++ awaitingActivation.foreach(_ ! msg) ++ receive = activated(awaitingDeActivation) ++ } ++ ++ case EndpointFailedToActivate(ref, cause) ⇒ { ++ awaitingActivation.foreach(_ ! EndpointFailedToActivate(ref, cause)) ++ receive = failedToActivate(cause) ++ } ++ } ++ } ++ ++ def activated(currentAwaitingDeActivation: List[ActorRef]): State = { ++ var awaitingDeActivation = currentAwaitingDeActivation ++ ++ { ++ case AwaitActivation(ref) ⇒ sender ! EndpointActivated(ref) ++ case AwaitDeActivation(ref) ⇒ awaitingDeActivation ::= sender ++ case msg @ EndpointDeActivated(ref) ⇒ { ++ awaitingDeActivation foreach (_ ! msg) ++ receive = deactivated ++ } ++ case msg @ EndpointFailedToDeActivate(ref, cause) ⇒ {","done +",f74616f828d3e31724d768dd86ce05af85d97ade +506518,2,"Holy shit, github has completely fucked my review :(""",2012-03-01 01:47:50,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/329#discussion_r506518,2012-03-01T12:47:50Z,2012-03-12T18:22:15Z,viktorklang,CONTRIBUTOR,akka-cluster/src/main/scala/akka/cluster/Node.scala,,239.0,,,239,"@@ -0,0 +1,803 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.cluster ++ ++import akka.actor._ ++import akka.actor.Status._ ++import akka.remote._ ++import akka.routing._ ++import akka.event.Logging ++import akka.dispatch.Await ++import akka.pattern.ask ++import akka.util._ ++import akka.config.ConfigurationException ++ ++import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } ++import java.util.concurrent.TimeUnit._ ++import java.util.concurrent.TimeoutException ++import java.security.SecureRandom ++ ++import scala.collection.immutable.{ Map, SortedSet } ++import scala.annotation.tailrec ++ ++import com.google.protobuf.ByteString ++ ++/** ++ * Interface for membership change listener. ++ */ ++trait MembershipChangeListener { ++ def notify(members: SortedSet[Member]): Unit ++} ++ ++/** ++ * Interface for meta data change listener. ++ */ ++trait MetaDataChangeListener { // FIXME add management and notification for MetaDataChangeListener ++ def notify(meta: Map[String, Array[Byte]]): Unit ++} ++ ++// FIXME create Protobuf messages out of all the Gossip stuff - but wait until the prototol is fully stablized. ++ ++/** ++ * Base trait for all cluster messages. All ClusterMessage's are serializable. ++ */ ++sealed trait ClusterMessage extends Serializable ++ ++/** ++ * Cluster commands sent by the USER. ++ */ ++object ClusterAction { ++ ++ /** ++ * Command to join the cluster. Sent when a node (reprsesented by 'address') ++ * wants to join another node (the receiver). ++ */ ++ case class Join(address: Address) extends ClusterMessage ++ ++ /** ++ * Command to set a node to Up (from Joining). ++ */ ++ case object Up extends ClusterMessage ++ ++ /** ++ * Command to leave the cluster. ++ */ ++ case object Leave extends ClusterMessage ++ ++ /** ++ * Command to mark node as temporary down. ++ */ ++ case object Down extends ClusterMessage ++ ++ /** ++ * Command to mark a node to be removed from the cluster immediately. ++ */ ++ case object Exit extends ClusterMessage ++ ++ /** ++ * Command to remove a node from the cluster immediately. ++ */ ++ case object Remove extends ClusterMessage ++} ++ ++/** ++ * Represents the address and the current status of a cluster member node. ++ */ ++case class Member(address: Address, status: MemberStatus) extends ClusterMessage ++ ++/** ++ * Envelope adding a sender address to the gossip. ++ */ ++case class GossipEnvelope(sender: Member, gossip: Gossip) extends ClusterMessage ++ ++/** ++ * Defines the current status of a cluster member node ++ * ++ * Can be one of: Joining, Up, Leaving, Exiting and Down. ++ */ ++sealed trait MemberStatus extends ClusterMessage ++object MemberStatus { ++ case object Joining extends MemberStatus ++ case object Up extends MemberStatus ++ case object Leaving extends MemberStatus ++ case object Exiting extends MemberStatus ++ case object Down extends MemberStatus ++ case object Removed extends MemberStatus ++} ++ ++// sealed trait PartitioningStatus ++// object PartitioningStatus { ++// case object Complete extends PartitioningStatus ++// case object Awaiting extends PartitioningStatus ++// } ++ ++// case class PartitioningChange( ++// from: Address, ++// to: Address, ++// path: PartitionPath, ++// status: PartitioningStatus) ++ ++/** ++ * Represents the overview of the cluster, holds the cluster convergence table and set with unreachable nodes. ++ */ ++case class GossipOverview( ++ seen: Map[Address, VectorClock] = Map.empty[Address, VectorClock], ++ unreachable: Set[Address] = Set.empty[Address]) { ++ ++ override def toString = ++ ""GossipOverview(seen = ["" + seen.mkString("", "") + ++ ""], unreachable = ["" + unreachable.mkString("", "") + ++ ""])"" ++} ++ ++/** ++ * Represents the state of the cluster; cluster ring membership, ring convergence, meta data - all versioned by a vector clock. ++ */ ++case class Gossip( ++ overview: GossipOverview = GossipOverview(), ++ members: SortedSet[Member], // sorted set of members with their status, sorted by name ++ //partitions: Tree[PartitionPath, Node] = Tree.empty[PartitionPath, Node], // name/partition service ++ //pending: Set[PartitioningChange] = Set.empty[PartitioningChange], ++ meta: Map[String, Array[Byte]] = Map.empty[String, Array[Byte]], ++ version: VectorClock = VectorClock()) // vector clock version ++ extends ClusterMessage // is a serializable cluster message ++ with Versioned[Gossip] { ++ ++ /** ++ * Increments the version for this 'Node'. ++ */ ++ def +(node: VectorClock.Node): Gossip = copy(version = version + node) ++ ++ def +(member: Member): Gossip = { ++ if (members contains member) this ++ else this copy (members = members + member) ++ } ++ ++ /** ++ * Marks the gossip as seen by this node (remoteAddress) by updating the address entry in the 'gossip.overview.seen' ++ * Map with the VectorClock for the new gossip. ++ */ ++ def seen(address: Address): Gossip = ++ this copy (overview = overview copy (seen = overview.seen + (address -> version))) ++ ++ override def toString = ++ ""Gossip("" + ++ ""overview = "" + overview + ++ "", members = ["" + members.mkString("", "") + ++ ""], meta = ["" + meta.mkString("", "") + ++ ""], version = "" + version +","Holy shit, github has completely fucked my review :( +",cf3fa9fa3ce9e9312db0922370c93ce1af9db7c8 +434846,0,"?""",2012-02-09 09:41:46,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/318#discussion_r434846,2012-02-09T20:41:46Z,2012-02-10T09:40:42Z,viktorklang,CONTRIBUTOR,akka-docs/scala/code/akka/docs/zeromq/ZeromqDocSpec.scala,,,,,1,"@@ -0,0 +1,187 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.docs.zeromq ++ ++import akka.actor.Actor ++import akka.actor.Props ++import akka.util.duration._ ++import akka.testkit._ ++import akka.zeromq.ZeroMQVersion ++import akka.zeromq.ZeroMQExtension ++import java.text.SimpleDateFormat ++import java.util.Date ++import akka.zeromq.SocketType ++import akka.zeromq.Bind ++ ++object ZeromqDocSpec { ++ ++ //#health ++ import akka.zeromq._ ++ import akka.actor.Actor ++ import akka.actor.Props ++ import akka.actor.ActorLogging ++ import akka.serialization.SerializationExtension ++ import java.lang.management.ManagementFactory ++ ++ case object Tick ++ case class Heap(timestamp: Long, used: Long, max: Long) ++ case class Load(timestamp: Long, loadAverage: Double) ++ ++ class HealthProbe extends Actor { ++ ++ val pubSocket = context.system.newSocket(SocketType.Pub, Bind(""tcp://127.0.0.1:1235"")) ++ val memory = ManagementFactory.getMemoryMXBean ++ val os = ManagementFactory.getOperatingSystemMXBean ++ val ser = SerializationExtension(context.system) ++ ++ context.system.scheduler.schedule(1 second, 1 second, self, Tick) ++ ++ def receive: Receive = { ++ case Tick ⇒ ++ val currentHeap = memory.getHeapMemoryUsage ++ val timestamp = System.currentTimeMillis ++ ++ // use akka SerializationExtension to convert to bytes ++ val heapPayload = ser.serialize(Heap(timestamp, currentHeap.getUsed, currentHeap.getMax)).fold(throw _, identity) ++ // the first frame is the topic, second is the message ++ pubSocket ! ZMQMessage(Seq(Frame(""health.heap""), Frame(heapPayload))) ++ ++ // use akka SerializationExtension to convert to bytes ++ val loadPayload = ser.serialize(Load(timestamp, os.getSystemLoadAverage)).fold(throw _, identity) ++ // the first frame is the topic, second is the message ++ pubSocket ! ZMQMessage(Seq(Frame(""health.load""), Frame(loadPayload))) ++ } ++ } ++ //#health ++ ++ //#logger ++ class Logger extends Actor with ActorLogging { ++ ++ context.system.newSocket(SocketType.Sub, Listener(self), Connect(""tcp://127.0.0.1:1235""), Subscribe(""health"")) ++ val ser = SerializationExtension(context.system) ++ val timestampFormat = new SimpleDateFormat(""HH:mm:ss.SSS"") ++ ++ def receive = { ++ // the first frame is the topic, second is the message ++ case m: ZMQMessage if m.firstFrameAsString == ""health.heap"" ⇒ ++ ser.deserialize(m.payload(1), classOf[Heap], None) match { ++ case Right(Heap(timestamp, used, max)) ⇒ ++ log.info(""Used heap {} bytes, at {}"", used, timestampFormat.format(new Date(timestamp))) ++ case Left(e) ⇒ throw e ++ } ++ ++ case m: ZMQMessage if m.firstFrameAsString == ""health.load"" ⇒ ++ ser.deserialize(m.payload(1), classOf[Load], None) match { ++ case Right(Load(timestamp, loadAverage)) ⇒ ++ log.info(""Load average {}, at {}"", loadAverage, timestampFormat.format(new Date(timestamp))) ++ case Left(e) ⇒ throw e ++ } ++ } ++ } ++ //#logger ++ ++ //#alerter ++ class HeapAlerter extends Actor with ActorLogging { ++ ++ context.system.newSocket(SocketType.Sub, Listener(self), Connect(""tcp://127.0.0.1:1235""), Subscribe(""health.heap"")) ++ val ser = SerializationExtension(context.system) ++ var count = 0 ++ ++ def receive = { ++ // the first frame is the topic, second is the message ++ case m: ZMQMessage if m.firstFrameAsString == ""health.heap"" ⇒ ++ ser.deserialize(m.payload(1), classOf[Heap], None) match { ++ case Right(Heap(timestamp, used, max)) ⇒ ++ if ((used.toDouble / max) > 0.9) count += 1 ++ else count = 0 ++ if (count > 10) log.warning(""Need more memory, using {} %"", (100.0 * used / max)) ++ case Left(e) ⇒ throw e ++ } ++ } ++ } ++ //#alerter ++ ++} ++ ++class ZeromqDocSpec extends AkkaSpec(""akka.loglevel=INFO"") { ++ import ZeromqDocSpec._ ++ ++ ""demonstrate how to create socket"" in { ++ checkZeroMQInstallation() ++ ++ //#pub-socket ++ import akka.zeromq.ZeroMQExtension ++ val pubSocket = ZeroMQExtension(system).newSocket(SocketType.Pub, Bind(""tcp://127.0.0.1:1234"")) ++ //#pub-socket ++ ++ //#pub-socket2 ++ import akka.zeromq._ ++ val pubSocket2 = system.newSocket(SocketType.Pub, Bind(""tcp://127.0.0.1:1234"")) ++ //#pub-socket2 ++ ++ //#sub-socket ++ import akka.zeromq._ ++ val listener = system.actorOf(Props(new Actor { ++ def receive: Receive = { ++ case Connecting ⇒ //... ++ case m: ZMQMessage ⇒ //... ++ case _ ⇒ //... ++ } ++ })) ++ val subSocket = system.newSocket(SocketType.Sub, Listener(listener), Connect(""tcp://127.0.0.1:1234""), SubscribeAll) ++ //#sub-socket ++ ++ //#sub-topic-socket ++ val subTopicSocket = system.newSocket(SocketType.Sub, Listener(listener), Connect(""tcp://127.0.0.1:1234""), Subscribe(""foo.bar"")) ++ //#sub-topic-socket ++ ++ //#unsub-topic-socket ++ subTopicSocket ! Unsubscribe(""foo.bar"") ++ //#unsub-topic-socket ++ ++ val payload = Array.empty[Byte] ++ //#pub-topic ++ pubSocket ! ZMQMessage(Seq(Frame(""foo.bar""), Frame(payload))) ++ //#pub-topic ++ ++ //#high-watermark ++ val highWatermarkSocket = system.newSocket( ++ SocketType.Router, ++ Listener(listener), ++ Bind(""tcp://127.0.0.1:1234""), ++ HighWatermark(50000)) ++ //#high-watermark ++ } ++ ++ ""demonstrate pub-sub"" in { ++ checkZeroMQInstallation() ++ ++ //#health ++ ++ system.actorOf(Props[HealthProbe], name = ""health"") ++ //#health ++ ++ //#logger ++ ++ system.actorOf(Props[Logger], name = ""logger"") ++ //#logger ++ ++ //#alerter ++ ++ system.actorOf(Props[HeapAlerter], name = ""alerter"") ++ //#alerter ++ ++ Thread.sleep(3000)","? +",5b50ca96f2b3e4fc32f077e730e050f99cddfe79 +297217,2,"Trust me, I have spent quite some time pondering these issues. The ask/? we have for 2.0 is the least of all evils.""",2011-12-15 07:00:08,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/147#discussion_r297217,2011-12-15T18:00:08Z,2011-12-15T18:00:08Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,450.0,,,58,"@@ -455,11 +448,13 @@ class AskActorRef( + } + + override def ?(message: Any)(implicit timeout: Timeout): Future[Any] =","Trust me, I have spent quite some time pondering these issues. +The ask/? we have for 2.0 is the least of all evils. +",0af92f24400f1b05d1919be54dfd822037f0076f +506515,2,"WDYM? Are you ironic? What is wrong? Please provide understandable feedback. """,2012-03-01 01:46:47,jboner,akka,akka,,https://github.com/akka/akka-core/pull/329#discussion_r506515,2012-03-01T12:46:47Z,2012-03-12T18:22:15Z,jboner,CONTRIBUTOR,akka-cluster/src/main/scala/akka/cluster/Node.scala,,,,,1,"@@ -0,0 +1,803 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.cluster ++ ++import akka.actor._ ++import akka.actor.Status._ ++import akka.remote._ ++import akka.routing._ ++import akka.event.Logging ++import akka.dispatch.Await ++import akka.pattern.ask ++import akka.util._ ++import akka.config.ConfigurationException ++ ++import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } ++import java.util.concurrent.TimeUnit._ ++import java.util.concurrent.TimeoutException ++import java.security.SecureRandom ++ ++import scala.collection.immutable.{ Map, SortedSet } ++import scala.annotation.tailrec ++ ++import com.google.protobuf.ByteString ++ ++/** ++ * Interface for membership change listener. ++ */ ++trait MembershipChangeListener { ++ def notify(members: SortedSet[Member]): Unit ++} ++ ++/** ++ * Interface for meta data change listener. ++ */ ++trait MetaDataChangeListener { // FIXME add management and notification for MetaDataChangeListener ++ def notify(meta: Map[String, Array[Byte]]): Unit ++} ++ ++// FIXME create Protobuf messages out of all the Gossip stuff - but wait until the prototol is fully stablized. ++ ++/** ++ * Base trait for all cluster messages. All ClusterMessage's are serializable. ++ */ ++sealed trait ClusterMessage extends Serializable ++ ++/** ++ * Cluster commands sent by the USER. ++ */ ++object ClusterAction { ++ ++ /** ++ * Command to join the cluster. Sent when a node (reprsesented by 'address') ++ * wants to join another node (the receiver). ++ */ ++ case class Join(address: Address) extends ClusterMessage ++ ++ /** ++ * Command to set a node to Up (from Joining). ++ */ ++ case object Up extends ClusterMessage ++ ++ /** ++ * Command to leave the cluster. ++ */ ++ case object Leave extends ClusterMessage ++ ++ /** ++ * Command to mark node as temporary down. ++ */ ++ case object Down extends ClusterMessage ++ ++ /** ++ * Command to mark a node to be removed from the cluster immediately. ++ */ ++ case object Exit extends ClusterMessage ++ ++ /** ++ * Command to remove a node from the cluster immediately. ++ */ ++ case object Remove extends ClusterMessage ++} ++ ++/** ++ * Represents the address and the current status of a cluster member node. ++ */ ++case class Member(address: Address, status: MemberStatus) extends ClusterMessage ++ ++/** ++ * Envelope adding a sender address to the gossip. ++ */ ++case class GossipEnvelope(sender: Member, gossip: Gossip) extends ClusterMessage ++ ++/** ++ * Defines the current status of a cluster member node ++ * ++ * Can be one of: Joining, Up, Leaving, Exiting and Down. ++ */ ++sealed trait MemberStatus extends ClusterMessage ++object MemberStatus { ++ case object Joining extends MemberStatus ++ case object Up extends MemberStatus ++ case object Leaving extends MemberStatus ++ case object Exiting extends MemberStatus ++ case object Down extends MemberStatus ++ case object Removed extends MemberStatus ++} ++ ++// sealed trait PartitioningStatus ++// object PartitioningStatus { ++// case object Complete extends PartitioningStatus ++// case object Awaiting extends PartitioningStatus ++// } ++ ++// case class PartitioningChange( ++// from: Address, ++// to: Address, ++// path: PartitionPath, ++// status: PartitioningStatus) ++ ++/** ++ * Represents the overview of the cluster, holds the cluster convergence table and set with unreachable nodes. ++ */ ++case class GossipOverview( ++ seen: Map[Address, VectorClock] = Map.empty[Address, VectorClock], ++ unreachable: Set[Address] = Set.empty[Address]) { ++ ++ override def toString = ++ ""GossipOverview(seen = ["" + seen.mkString("", "") + ++ ""], unreachable = ["" + unreachable.mkString("", "") + ++ ""])"" ++} ++ ++/** ++ * Represents the state of the cluster; cluster ring membership, ring convergence, meta data - all versioned by a vector clock. ++ */ ++case class Gossip( ++ overview: GossipOverview = GossipOverview(), ++ members: SortedSet[Member], // sorted set of members with their status, sorted by name ++ //partitions: Tree[PartitionPath, Node] = Tree.empty[PartitionPath, Node], // name/partition service ++ //pending: Set[PartitioningChange] = Set.empty[PartitioningChange], ++ meta: Map[String, Array[Byte]] = Map.empty[String, Array[Byte]], ++ version: VectorClock = VectorClock()) // vector clock version ++ extends ClusterMessage // is a serializable cluster message ++ with Versioned[Gossip] { ++ ++ /** ++ * Increments the version for this 'Node'. ++ */ ++ def +(node: VectorClock.Node): Gossip = copy(version = version + node) ++ ++ def +(member: Member): Gossip = { ++ if (members contains member) this ++ else this copy (members = members + member) ++ } ++ ++ /** ++ * Marks the gossip as seen by this node (remoteAddress) by updating the address entry in the 'gossip.overview.seen' ++ * Map with the VectorClock for the new gossip. ++ */ ++ def seen(address: Address): Gossip = ++ this copy (overview = overview copy (seen = overview.seen + (address -> version))) ++ ++ override def toString = ++ ""Gossip("" + ++ ""overview = "" + overview + ++ "", members = ["" + members.mkString("", "") + ++ ""], meta = ["" + meta.mkString("", "") + ++ ""], version = "" + version + ++ "")"" ++} ++ ++/** ++ * FSM actor managing the different cluster nodes states. ++ * Single instance - e.g. serialized access to Node - message after message. ++ */ ++final class ClusterCommandDaemon(system: ActorSystem, node: Node) extends Actor with FSM[MemberStatus, Unit] { ++ ++ // start in JOINING ++ startWith(MemberStatus.Joining, Unit) ++ ++ // ======================== ++ // === IN JOINING === ++ when(MemberStatus.Joining) { ++ case Event(ClusterAction.Up, _) ⇒ ++ node.up() ++ goto(MemberStatus.Up) ++ } ++ ++ // ======================== ++ // === IN UP === ++ when(MemberStatus.Up) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Leave, _) ⇒ ++ node.leaving() ++ goto(MemberStatus.Leaving) ++ ++ case Event(ClusterAction.Exit, _) ⇒ ++ node.exiting() ++ goto(MemberStatus.Exiting) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN LEAVING === ++ when(MemberStatus.Leaving) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN EXITING === ++ when(MemberStatus.Exiting) { ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN DOWN === ++ when(MemberStatus.Down) { ++ // FIXME How to transition from DOWN => JOINING when node comes back online. Can't just listen to Gossip message since it is received be another actor. How to fix this? ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN REMOVED === ++ when(MemberStatus.Removed) { ++ case command ⇒ ++ log.warning(""Removed node [{}] received cluster command [{}]"", system.name, command) ++ stay ++ } ++ ++ // ======================== ++ // === GENERIC AND UNHANDLED COMMANDS === ++ whenUnhandled { ++ // should be able to handle Join in any state ++ case Event(ClusterAction.Join(address), _) ⇒ ++ node.joining(address) ++ stay ++ ++ case Event(command, _) ⇒ { ++ log.warning(""Unhandled command [{}] in state [{}]"", command, stateName) ++ stay ++ } ++ } ++} ++ ++/** ++ * Pooled and routed wit N number of configurable instances. ++ * Concurrent access to Node. ++ */ ++final class ClusterGossipDaemon(system: ActorSystem, node: Node) extends Actor { ++ val log = Logging(system, ""ClusterGossipDaemon"")","WDYM? Are you ironic? What is wrong? Please provide understandable feedback. +",cf3fa9fa3ce9e9312db0922370c93ce1af9db7c8 +434677,2,"Yeah, I’m feeling so BAD today.""",2012-02-09 09:25:00,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/317#discussion_r434677,2012-02-09T20:25:00Z,2012-02-10T13:17:34Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/routing/Routing.scala,,,,,1,"@@ -30,14 +31,77 @@ private[akka] class RoutedActorRef(_system: ActorSystemImpl, _props: Props, _sup + _supervisor, + _path) { + +- private val routeeProps = _props.copy(routerConfig = NoRouter) +- private val resizeProgress = new AtomicBoolean ++ /* ++ * CAUTION: RoutedActorRef is PROBLEMATIC ++ * ====================================== ++ * ++ * We are constructing/assembling the children outside of the scope of the ++ * Router actor, inserting them in its childrenRef list, which is not at all ++ * synchronized. This is done exactly once at start-up, all other accesses ++ * are done from the Router actor. This means that the only thing which is ++ * really hairy is making sure that the Router does not touch its childrenRefs ++ * before we are done with them: create a locked latch really early (hence the ++ * override of newActorCell) and use that to block the Router constructor for ++ * as long as it takes to setup the RoutedActorRef itself. ++ */ ++ private[akka] var routeReady: ReentrantLock = _ ++ override def newActorCell( ++ system: ActorSystemImpl, ++ ref: InternalActorRef, ++ props: Props, ++ supervisor: InternalActorRef, ++ receiveTimeout: Option[Duration]): ActorCell = { ++ /* ++ * TODO RK: check that this really sticks, since this is executed before ++ * the constructor of RoutedActorRef is executed (invoked from ++ * LocalActorRef); works on HotSpot and JRockit. ++ */ ++ routeReady = new ReentrantLock ++ routeReady.lock() ++ super.newActorCell(system, ref, props, supervisor, receiveTimeout) ++ } ++ ++ private[akka] val routerConfig = _props.routerConfig ++ private[akka] val routeeProps = _props.copy(routerConfig = NoRouter) ++ private[akka] val resizeProgress = new AtomicBoolean + private val resizeCounter = new AtomicLong + + @volatile + private var _routees: IndexedSeq[ActorRef] = IndexedSeq.empty[ActorRef] // this MUST be initialized during createRoute + def routees = _routees + ++ private[akka] var routeeProvider: RouteeProvider = _ ++ val route = ++ try { ++ routeeProvider = routerConfig.createRouteeProvider(actorContext) ++ val r = routerConfig.createRoute(routeeProps, routeeProvider) ++ // initial resize, before message send ++ resize() ++ r ++ } finally routeReady.unlock() // unblock Router’s constructor","Yeah, I’m feeling so BAD today. +",36247b10fe79c8aa6ec878eaf4946c2cd1b65084 +436775,0,"What exactly (and I mean really the details) is the test meant to ensure? The current implementation of the test fails if any resize() call does not lead to an actual Resize event, which is definitely flawed. And hacking it so that the test works (by doin""",2012-02-09 20:17:40,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/317#discussion_r436775,2012-02-10T07:17:40Z,2012-02-10T13:17:34Z,rkuhn,CONTRIBUTOR,akka-actor-tests/src/test/scala/akka/routing/ResizerSpec.scala,,,,,1,"@@ -123,7 +123,15 @@ class ResizerSpec extends AkkaSpec(ResizerSpec.config) with DefaultTimeout with + current.routees.size must be(2) + } + +- ""resize when busy"" in { ++ /* ++ * TODO RK This test seems invalid to me, because it relies on that no resize() event is lost; ++ * this currently fails because I made resize() asynchronous (by sending a message to the ++ * Router), but it could also fail for concurrent send operations, i.e. when one of thread ++ * fails the resizeInProgress.compareAndSet(false, true) check. ++ * ++ * Either the test must be fixed/removed or resize() must be changed to be blocking. ++ */ ++ ""resize when busy"" ignore {","What exactly (and I mean really the details) is the test meant to ensure? The current implementation of the test fails if any resize() call does not lead to an actual Resize event, which is definitely flawed. And hacking it so that the test works (by doing some blocking) misses the point, IMHO, because that is not what user code would do. +",36247b10fe79c8aa6ec878eaf4946c2cd1b65084 +457023,0,"This way you encode a default value directly. We have made an effort to put all defaults into `reference.conf` to keep them central (and testable). val dispatcher = context.system.settings.config.getConfig(context.props.dispatcher) val config = d""",2012-02-16 02:18:22,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/307#discussion_r457023,2012-02-16T13:18:22Z,2012-02-28T09:22:30Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/Stash.scala,,,,,1,"@@ -0,0 +1,135 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++package akka.actor ++ ++import akka.dispatch.{ Envelope, DequeBasedMessageQueue } ++import akka.AkkaException ++ ++/** ++ * The `Stash` trait enables an actor to temporarily stash away messages that can not or ++ * should not be handled using the actor's current behavior. ++ *

++ * Example: ++ *

++ *    class ActorWithProtocol extends Actor with Stash {
++ *      def receive = {
++ *        case ""open"" ⇒
++ *          unstashAll {
++ *            case ""write"" ⇒ // do writing...
++ *            case ""close"" ⇒
++ *              unstashAll()
++ *              context.unbecome()
++ *            case msg ⇒ stash()
++ *          }
++ *        case ""done"" ⇒ // done
++ *        case msg    ⇒ stash()
++ *      }
++ *    }
++ *  
++ * ++ * Note that the `Stash` trait can only be used together with actors that have a deque-based ++ * mailbox. Actors can be configured to use a deque-based mailbox using a configuration like ++ * the following (see the documentation on dispatchers on how to configure a custom ++ * dispatcher): ++ *
++ *  akka {
++ *    actor {
++ *      my-custom-dispatcher {
++ *        mailbox-type = ""akka.dispatch.UnboundedDequeBasedMailbox""
++ *      }
++ *    }
++ *  }
++ *  
++ */ ++trait Stash extends Actor { ++ this: Actor ⇒ ++ ++ /* The private stash of the actor. It is only accessible using `stash()` and ++ * `unstashAll()`. ++ */ ++ private var theStash = Vector.empty[Envelope] ++ ++ /* The capacity of the stash. Either configured in the actor's dispatcher config, or ++ * `Int.MaxValue`. ++ */ ++ private val capacity = ++ try context.system.settings.config.getConfig(context.props.dispatcher).getInt(""stash-capacity"") ++ catch { ++ case _ ⇒ Int.MaxValue ++ }","This way you encode a default value directly. We have made an effort to put all defaults into `reference.conf` to keep them central (and testable). + +``` +val dispatcher = context.system.settings.config.getConfig(context.props.dispatcher) +val config = dispatcher.withFallback(context.system.settings.config.getConfig(""akka.actor.default-dispatcher"")) +config.getInt(""stash-capacity"") +``` + +which will never fail because there is a default in the default-dispatcher section. +",8ea949857b39388ac25bdc08df7feee3757f3ee9 +416655,1,"@VladimirMangos when you supply """"/dev/rsa"""" as commit author in git you instead put full path to git bins plus /dev/rsa :D lol can be seen in twitter or here: https://github.com/mangos/mangos/commit/6d0667fb72827b7ad23096dc04def141c12ecaae.patch""",2012-02-03 23:34:19,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/297#discussion_r416655,2012-02-04T10:34:19Z,2012-02-05T08:20:16Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/AkkaException.scala,,,,,1,"@@ -7,6 +7,26 @@ package akka + import akka.actor.newUuid + import java.net.{ InetAddress, UnknownHostException } + ++object AkkaException { ++ val hostname = try InetAddress.getLocalHost.getHostAddress catch { case e: UnknownHostException ⇒ ""unknown"" } ++ ++ def toStringWithStackTrace(throwable: Throwable): String = { ++ if (throwable eq null) ""Unknown Exception""","Technically ""unknown throwable"" +",42f5af7fb011059d8098532493eb13ec3116c7e5 +392257,2,"I don't like this at all. It's the provider who should provide these, don't like the leakage.""",2012-01-27 04:45:37,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/275#discussion_r392257,2012-01-27T15:45:37Z,2012-01-30T10:58:22Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,355.0,,,32,"@@ -349,6 +349,15 @@ case class SerializedActorRef(path: String) { + } + } + ++object SerializedActorRef { ++ def apply(path: ActorPath): SerializedActorRef = { ++ Serialization.currentTransportAddress.value match { ++ case null ⇒ new SerializedActorRef(path.toString)","I don't like this at all. It's the provider who should provide these, don't like the leakage. +",4fb0858e557232e5f4a7e5b364d1697c0e21bdd1 +388691,0,"Has Derek signed the CLA?""",2012-01-26 06:45:26,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/273#discussion_r388691,2012-01-26T17:45:26Z,2012-01-26T17:45:26Z,viktorklang,CONTRIBUTOR,akka-docs/java/fault-tolerance-sample.rst,,8.0,,,8,"@@ -1,5 +1,51 @@ + .. _fault-tolerance-sample-java: + ++Diagrams of the Fault Tolerance Sample (Java) ++---------------------------------------------- ++ ++.. image:: ../images/faulttolerancesample-normal-flow.png ++ ++*The above diagram illustrates the normal message flow.*","Has Derek signed the CLA? +",f8741c326e881d05b6b9d89b1345c62dbfbaa226 +373889,1,"good point, will do, including specific section in migration docs""",2012-01-21 02:39:47,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/241#discussion_r373889,2012-01-21T13:39:47Z,2012-01-23T17:25:54Z,rkuhn,CONTRIBUTOR,akka-actor-migration/src/main/scala/akka/migration/package.scala,,36.0,,,6,"@@ -31,4 +31,7 @@ package object migration { + def stop(): Unit = GlobalActorSystem.stop(actorRef) + } + ++ implicit def ask(actorRef: ActorRef) = new akka.migration.AskableActorRef(actorRef) ++ def ask(actorRef: ActorRef, message: Any)(implicit timeout: Timeout = null): Future[Any] = akka.pattern.ask(actorRef, message)(timeout) ++","good point, will do, including specific section in migration docs +",9d7ed5eba16152d15269bf4f626260432359c0d4 +289597,2,"I think it's really bad, especially with PinnedDispatchers, since they create a new Dispatcher for every actor. We must fix this""",2011-12-13 02:16:42,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/149#discussion_r289597,2011-12-13T13:16:42Z,2011-12-13T14:05:16Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/Dispatchers.scala,,78.0,,,1,"@@ -62,9 +69,30 @@ class Dispatchers(val settings: ActorSystem.Settings, val prerequisites: Dispatc + + val defaultDispatcherConfig = settings.config.getConfig(""akka.actor.default-dispatcher"") + +- // TODO PN Shouldn't we fail hard if default-dispatcher is wrong? +- lazy val defaultGlobalDispatcher = +- from(defaultDispatcherConfig) getOrElse newDispatcher(""AkkaDefaultGlobalDispatcher"", 1, MailboxType).build ++ lazy val defaultGlobalDispatcher: MessageDispatcher = ++ from(defaultDispatcherConfig) getOrElse { ++ throw new ConfigurationException(""Wrong configuration [akka.actor.default-dispatcher]"") ++ } ++ ++ private val dispatchers = new ConcurrentHashMap[String, MessageDispatcher]","I think it's really bad, especially with PinnedDispatchers, since they create a new Dispatcher for every actor. +We must fix this +",7a17eb00bf7043b8b89839aecebf0e66b91dd9e9 +171073,0,"But you're calling out to the methods of the Actor, which is a violation of the ActorModel, since the Actor could be shut down or it's fields might be invisible from a JMM perspective, why not put the connect, bind etc as methods inside the select-functio""",2011-10-13 19:35:23,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/92#discussion_r171073,2011-10-14T07:35:23Z,2011-10-19T18:11:55Z,viktorklang,CONTRIBUTOR,akka-zeromq/src/main/scala/akka/zeromq/ConcurrentSocketActor.scala,,,,,1,"@@ -0,0 +1,113 @@ ++/** ++ * Copyright (C) 2009-2011 Typesafe Inc. ++ */ ++package akka.zeromq ++ ++import akka.actor.{Actor, ActorRef} ++import akka.dispatch.MessageDispatcher ++import akka.zeromq.SocketType._ ++import java.util.concurrent.atomic.AtomicReference ++import org.zeromq.ZMQ.{Socket, Poller} ++import org.zeromq.{ZMQ => JZMQ} ++import scala.annotation.tailrec ++import scala.collection.mutable.MutableList ++ ++private[zeromq] class ConcurrentSocketActor( ++ context: Context, ++ socketType: SocketType, ++ listener: Option[ActorRef], ++ deserializer: Deserializer, ++ dispatcher: MessageDispatcher) extends Actor { ++ private val pollTimeoutMsec = 10 ++ private val requests = new AtomicReference(List.empty[Request]) ++ private val socket: Socket = context.socket(socketType) ++ private val poller: Poller = context.poller ++ private var socketClosed: Boolean = false ++ self.dispatcher = dispatcher ++ poller.register(socket, Poller.POLLIN) ++ private val select = { () => ++ if (!socketClosed) { ++ if (poller.poll(pollTimeoutMsec) > 0) { ++ if (poller.pollin(0)) { ++ receiveFrames match { ++ case frames if (frames.length > 0) => listener.foreach { listener => ++ if (listener.isRunning) ++ listener ! deserializer(frames) ++ } ++ } ++ } ++ } ++ requests.getAndSet(Nil).foreach {","But you're calling out to the methods of the Actor, which is a violation of the ActorModel, since the Actor could be shut down or it's fields might be invisible from a JMM perspective, why not put the connect, bind etc as methods inside the select-function? (or are they used internally in the actor as well?= +",826495e912557a63835cd0f79ae1a2acd3525811 +506788,0,"it is only used in test, moved it there""",2012-03-01 03:10:58,RayRoestenburg,akka,akka,,https://github.com/akka/akka-core/pull/344#discussion_r506788,2012-03-01T14:10:58Z,2012-03-20T12:17:27Z,RayRoestenburg,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/Consumer.scala,,,,,1,"@@ -1,144 +1,67 @@ + /** +- * Copyright (C) 2009-2010 Typesafe Inc. ++ * Copyright (C) 2009-2012 Typesafe Inc. + */ + + package akka.camel + ++import internal.component.DurationTypeConverter + import org.apache.camel.model.{ RouteDefinition, ProcessorDefinition } + + import akka.actor._ ++import akka.util.Duration ++import akka.util.duration._ + + /** + * Mixed in by Actor implementations that consume message from Camel endpoints. + * + * @author Martin Krasser + */ +-trait Consumer { this: Actor ⇒ +- import RouteDefinitionHandler._ ++trait Consumer extends Actor with ConsumerConfig { + +- /** +- * The default route definition handler is the identity function +- */ +- private[camel] var routeDefinitionHandler: RouteDefinitionHandler = identity +- +- /** +- * Returns the Camel endpoint URI to consume messages from. +- */ + def endpointUri: String ++ protected[this] implicit lazy val camel = CamelExtension(context.system) + +- /** +- * Determines whether two-way communications between an endpoint and this consumer actor +- * should be done in blocking or non-blocking mode (default is non-blocking). This method +- * doesn't have any effect on one-way communications (they'll never block). +- */ +- def blocking = false +- +- /** +- * Determines whether one-way communications between an endpoint and this consumer actor +- * should be auto-acknowledged or system-acknowledged. +- */ +- def autoack = true +- +- /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- */ +- def onRouteDefinition(h: RouteDefinition ⇒ ProcessorDefinition[_]): Unit = onRouteDefinition(from(h)) +- +- /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- *

+- * Java API. +- */ +- def onRouteDefinition(h: RouteDefinitionHandler): Unit = routeDefinitionHandler = h ++ camel.registerConsumer(endpointUri, this, activationTimeout) + } + +-/** +- * Java-friendly Consumer. +- * +- * Subclass this abstract class to create an MDB-style untyped consumer actor. This +- * class is meant to be used from Java. +- * +- * @author Martin Krasser +- */ +-abstract class UntypedConsumerActor extends UntypedActor with Consumer { +- final override def endpointUri = getEndpointUri +- final override def blocking = isBlocking +- final override def autoack = isAutoack ++trait ConsumerConfig { ++ //TODO: Explain the parameters better with some examples! + + /** +- * Returns the Camel endpoint URI to consume messages from. ++ * How long should the actor wait for activation before it fails. + */ +- def getEndpointUri(): String ++ def activationTimeout: Duration = 10 seconds + + /** +- * Determines whether two-way communications between an endpoint and this consumer actor +- * should be done in blocking or non-blocking mode (default is non-blocking). This method +- * doesn't have any effect on one-way communications (they'll never block). ++ * When endpoint is out-capable (can produce responses) replyTimeout is the maximum time ++ * the endpoint can take to send the response before the message exchange fails. It defaults to 1 minute. ++ * This setting is used for out-capable, in-only, manually acknowledged communication. ++ * When the blocking is set to Blocking replyTimeout is ignored. + */ +- def isBlocking() = super.blocking ++ def replyTimeout: Duration = 1 minute + + /** + * Determines whether one-way communications between an endpoint and this consumer actor +- * should be auto-acknowledged or system-acknowledged. ++ * should be auto-acknowledged or application-acknowledged. ++ * This flag has only effect when exchange is in-only. + */ +- def isAutoack() = super.autoack +-} +- +-/** +- * A callback handler for route definitions to consumer actors. +- * +- * @author Martin Krasser +- */ +-trait RouteDefinitionHandler { +- def onRouteDefinition(rd: RouteDefinition): ProcessorDefinition[_] +-} ++ def autoack: Boolean = true + +-/** +- * The identity route definition handler. +- * +- * @author Martin Krasser +- * +- */ +-class RouteDefinitionIdentity extends RouteDefinitionHandler { +- def onRouteDefinition(rd: RouteDefinition) = rd +-} +- +-/** +- * @author Martin Krasser +- */ +-object RouteDefinitionHandler { + /** +- * Returns the identity route definition handler ++ * The route definition handler for creating a custom route to this consumer instance. + */ +- val identity = new RouteDefinitionIdentity ++ //TODO: write a test confirming onRouteDefinition gets called ++ def onRouteDefinition(rd: RouteDefinition): ProcessorDefinition[_] = rd + +- /** +- * Created a route definition handler from the given function. +- */ +- def from(f: RouteDefinition ⇒ ProcessorDefinition[_]) = new RouteDefinitionHandler { +- def onRouteDefinition(rd: RouteDefinition) = f(rd) +- } ++ private[camel] def toCamelParameters: String = ""autoack=%s&replyTimeout=%s"" format (autoack, DurationTypeConverter.toString(replyTimeout)) + } + +-/** +- * @author Martin Krasser +- */ +-private[camel] object Consumer { +- /** +- * Applies a function f to actorRef if actorRef +- * references a consumer actor. A valid reference to a consumer actor is a local actor +- * reference with a target actor that implements the Consumer trait. The +- * target Consumer instance is passed as argument to f. This +- * method returns None if actorRef is not a valid reference +- * to a consumer actor, Some contained the return value of f +- * otherwise. +- */ +- def withConsumer[T](actorRef: ActorRef)(f: Consumer ⇒ T): Option[T] = actorRef match { +- case l: LocalActorRef ⇒ +- l.underlyingActorInstance match { +- case c: Consumer ⇒ Some(f(c)) +- case _ ⇒ None +- } +- case _ ⇒ None +- } ++trait ManualAckConsumer extends Consumer {","it is only used in test, moved it there +",f74616f828d3e31724d768dd86ce05af85d97ade +465397,1,"Yup, agreed, and indeed I was mislead by the other issue (which was the UrlMatcher not decoding). I tested and it seems to work all good now, with full encoding. I just decode the / back and turn %20 into + because it looks better and seems to work just fi""",2012-02-19 03:58:35,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/330#discussion_r465397,2012-02-19T14:58:35Z,2012-02-19T14:58:35Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/Future.scala,,357.0,,,22,"@@ -353,33 +354,33 @@ object Future { + } + + private val _taskStack = new ThreadLocal[Stack[() ⇒ Unit]]() +- private val _executionContext = new ThreadLocal[ExecutionContext]() ++ private val _executionContext = new DynamicVariable[ExecutionContext](null)","I knew it! There was something which successfully hid amidst the shadows. +",258d710dab3b9532275ca5412bd33fcde124965d +392351,0,"it's documented on ActorPath, and it's used for cheaply injecting the transport address into its string rep (for local Addresses)""",2012-01-27 05:10:53,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/275#discussion_r392351,2012-01-27T16:10:53Z,2012-01-30T10:58:22Z,rkuhn,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorPath.scala,,120.0,,,37,"@@ -105,6 +117,10 @@ final case class RootActorPath(address: Address, name: String = ""/"") extends Act + + override val toString = address + name + ++ def toStringWithAddress(addr: Address): String =","it's documented on ActorPath, and it's used for cheaply injecting the transport address into its string rep (for local Addresses) +",4fb0858e557232e5f4a7e5b364d1697c0e21bdd1 +362454,2,"This is most likely a bad default""",2012-01-18 01:00:15,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362454,2012-01-18T12:00:15Z,2012-01-18T16:57:18Z,viktorklang,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/Consumer.scala,,40.0,,,1,"@@ -1,144 +1,71 @@ + /** +- * Copyright (C) 2009-2010 Typesafe Inc. ++ * Copyright (C) 2009-2010 Scalable Solutions AB + */ + + package akka.camel + +-import org.apache.camel.model.{ RouteDefinition, ProcessorDefinition } ++import org.apache.camel.model.{RouteDefinition, ProcessorDefinition} + + import akka.actor._ ++import akka.util.Duration ++import akka.util.duration._ + + /** + * Mixed in by Actor implementations that consume message from Camel endpoints. + * + * @author Martin Krasser + */ +-trait Consumer { this: Actor ⇒ +- import RouteDefinitionHandler._ ++trait Consumer extends Actor with ConsumerConfig{ + +- /** +- * The default route definition handler is the identity function +- */ +- private[camel] var routeDefinitionHandler: RouteDefinitionHandler = identity ++ def endpointUri : String + +- /** +- * Returns the Camel endpoint URI to consume messages from. +- */ +- def endpointUri: String +- +- /** +- * Determines whether two-way communications between an endpoint and this consumer actor +- * should be done in blocking or non-blocking mode (default is non-blocking). This method +- * doesn't have any effect on one-way communications (they'll never block). +- */ +- def blocking = false ++ CamelExtension(context.system).registerConsumer(endpointUri, this, activationTimeout) ++} + +- /** +- * Determines whether one-way communications between an endpoint and this consumer actor +- * should be auto-acknowledged or system-acknowledged. +- */ +- def autoack = true + +- /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- */ +- def onRouteDefinition(h: RouteDefinition ⇒ ProcessorDefinition[_]): Unit = onRouteDefinition(from(h)) ++trait ConsumerConfig{ ++ //TODO: Explain the parameters better with some examples! + + /** +- * Sets the route definition handler for creating a custom route to this consumer instance. +- *

+- * Java API. ++ * How long should the actor wait for activation before it fails. + */ +- def onRouteDefinition(h: RouteDefinitionHandler): Unit = routeDefinitionHandler = h +-} +- +-/** +- * Java-friendly Consumer. +- * +- * Subclass this abstract class to create an MDB-style untyped consumer actor. This +- * class is meant to be used from Java. +- * +- * @author Martin Krasser +- */ +-abstract class UntypedConsumerActor extends UntypedActor with Consumer { +- final override def endpointUri = getEndpointUri +- final override def blocking = isBlocking +- final override def autoack = isAutoack ++ def activationTimeout: Duration = 10 seconds + + /** +- * Returns the Camel endpoint URI to consume messages from. ++ * When endpoint is outCapable (can produce responses) outTimeout is the maximum time ++ * the endpoint can take to send the response before the message exchange fails. It defaults to Int.MaxValue seconds. ++ * It can be also overwritten by setting @see blocking property + */ +- def getEndpointUri(): String ++ def outTimeout : Duration = Int.MaxValue seconds","This is most likely a bad default +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +362749,0,"This version will silently update and break the checksum, which is why we pull a specific version out of Google Code instead. Maybe make a new patch that uses an updated specific Google Code revision for `url`.""",2012-01-18 02:21:27,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r362749,2012-01-18T13:21:27Z,2012-01-18T16:57:20Z,viktorklang,CONTRIBUTOR,akka-camel/src/on-hold/test/scala/akka/camel/component/ActorComponentFeatureTest.scala,,3.0,,,4,"@@ -1,22 +1,22 @@ + package akka.camel.component + +-import java.util.concurrent.{ TimeUnit, CountDownLatch } ++import java.util.concurrent.{TimeUnit, CountDownLatch}","Does any of the code after this even compile? I'd say, no? +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +365589,0,"Ok - I take it. Forgive my ignorance - what's wrong with starting children that way? Is this violating the rule of not sending behaviour to an actor?""",2012-01-18 14:17:07,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/217#discussion_r365589,2012-01-19T01:17:07Z,2012-01-19T01:17:07Z,piotrga,CONTRIBUTOR,akka-camel/src/main/scala/akka/camelexamples/_2_SupervisedConsumers.scala,,26.0,,,26,"@@ -0,0 +1,37 @@ ++package akka.camelexamples ++ ++import akka.actor.{PoisonPill, Terminated, Props, ActorSystem, Actor} ++import ExamplesSupport._ ++import RichString._ ++ ++ ++object SupervisedConsumersExample extends App{ ++ ++ val system = ActorSystem(""test1"") ++ ++ system.actorOf(Props(new Actor{ ++ context.watch(context.actorOf(Props(faultHandler = retry3xWithin1s, creator = () => new EndpointManager))) ++ protected def receive = { ++ case Terminated(ref) => system.shutdown() ++ } ++ })) ++ ++ ++ ""data/input/CamelConsumer/file1.txt"" << ""test data ""+math.random ++} ++ ++class EndpointManager extends Actor { ++ ++ override def preStart() { ++ self ! Props[SysOutConsumer]","Ok - I take it. +Forgive my ignorance - what's wrong with starting children that way? +Is this violating the rule of not sending behaviour to an actor? +",457354db8ba6b42dbacc246c11f0ff15e38ebab4 +344791,0,"In the end it does actorFor so it should be fine with ActorPath. Will try to change that in separate refactoring. On Wed, Jan 11, 2012 at 7:36 PM, viktorklang < reply@reply.github.com > wrote: > > + * Please note that providing both 'nrOfInstances' and """,2012-01-11 09:41:37,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/209#discussion_r344791,2012-01-11T20:41:37Z,2012-01-12T08:54:25Z,patriknw,CONTRIBUTOR,akka-remote/src/main/scala/akka/routing/RemoteRouters.scala,,103.0,,,21,"@@ -83,6 +83,33 @@ case class RemoteRandomRouter(nrOfInstances: Int, routees: Iterable[String], ove + } + + /** ++ * A Router that tries to send to routee with fewest messages in mailbox. ++ *
++ * Please note that providing both 'nrOfInstances' and 'routees' does not make logical sense as this means ++ * that the random router should both create new actors and use the 'routees' actor(s). ++ * In this case the 'nrOfInstances' will be ignored and the 'routees' will be used. ++ *
++ * The configuration parameter trumps the constructor arguments. This means that ++ * if you provide either 'nrOfInstances' or 'routees' to during instantiation they will ++ * be ignored if the 'nrOfInstances' is defined in the configuration file for the actor being used. ++ */ ++case class RemoteSmallestMailboxRouter(nrOfInstances: Int, routees: Iterable[String], override val resizer: Option[Resizer] = None) ++ extends RemoteRouterConfig with SmallestMailboxLike { ++ ++ /** ++ * Constructor that sets the routees to be used. ++ * Java API ++ */ ++ def this(n: Int, t: java.lang.Iterable[String]) = this(n, t.asScala)","In the end it does actorFor so it should be fine with ActorPath. Will try +to change that in separate refactoring. + +On Wed, Jan 11, 2012 at 7:36 PM, viktorklang < +reply@reply.github.com + +> wrote: +> +> > - \* Please note that providing both 'nrOfInstances' and 'routees' does +> > not make logical sense as this means +> > - \* that the random router should both create new actors and use the +> > 'routees' actor(s). +> > - \* In this case the 'nrOfInstances' will be ignored and the 'routees' +> > will be used. +> > - \*
+> > - \* The configuration parameter trumps the constructor arguments. +> > This means that +> > - \* if you provide either 'nrOfInstances' or 'routees' to during +> > instantiation they will +> > - \* be ignored if the 'nrOfInstances' is defined in the configuration +> > file for the actor being used. +> > - */ +> > +case class RemoteSmallestMailboxRouter(nrOfInstances: Int, routees: +> > Iterable[String], override val resizer: Option[Resizer] = None) +> > - extends RemoteRouterConfig with SmallestMailboxLike { +> > + +> > - /** +> > - \* Constructor that sets the routees to be used. +> > - \* Java API +> > - */ +> > - def this(n: Int, t: java.lang.Iterable[String]) = this(n, t.asScala) +> +> Why is this an Iterable of Strings and not an Iterable of ActorPath? +> +> --- +> +> Reply to this email directly or view it on GitHub: +> https://github.com/jboner/akka/pull/209/files#r344268 + +## + +Patrik Nordwall +Typesafe http://typesafe.com/ - The modern software stack for +applications that scale +Twitter: @patriknw +",2399f02531ce1d8fda33f5f65bbf951f2671976a +289647,0,"WeakHashMap is bad for gc, but ticket is created #1494 I leave it with a FIXME refererence to that ticket. On Tue, Dec 13, 2011 at 2:32 PM, viktorklang < reply@reply.github.com > wrote: > > @@ -62,9 +69,30 @@ class Dispatchers(val settings: ActorSystem.""",2011-12-13 02:56:14,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/149#discussion_r289647,2011-12-13T13:56:14Z,2011-12-13T14:05:16Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/Dispatchers.scala,,78.0,,,1,"@@ -62,9 +69,30 @@ class Dispatchers(val settings: ActorSystem.Settings, val prerequisites: Dispatc + + val defaultDispatcherConfig = settings.config.getConfig(""akka.actor.default-dispatcher"") + +- // TODO PN Shouldn't we fail hard if default-dispatcher is wrong? +- lazy val defaultGlobalDispatcher = +- from(defaultDispatcherConfig) getOrElse newDispatcher(""AkkaDefaultGlobalDispatcher"", 1, MailboxType).build ++ lazy val defaultGlobalDispatcher: MessageDispatcher = ++ from(defaultDispatcherConfig) getOrElse { ++ throw new ConfigurationException(""Wrong configuration [akka.actor.default-dispatcher]"") ++ } ++ ++ private val dispatchers = new ConcurrentHashMap[String, MessageDispatcher]","WeakHashMap is bad for gc, but ticket is created #1494 +I leave it with a FIXME refererence to that ticket. + +On Tue, Dec 13, 2011 at 2:32 PM, viktorklang < +reply@reply.github.com + +> wrote: +> +> > @@ -62,9 +69,30 @@ class Dispatchers(val settings: ActorSystem.Settings, +> > val prerequisites: Dispatc +> > +> > val defaultDispatcherConfig = +> > settings.config.getConfig(""akka.actor.default-dispatcher"") +> > - // TODO PN Shouldn't we fail hard if default-dispatcher is wrong? +> > - lazy val defaultGlobalDispatcher = +> > - from(defaultDispatcherConfig) getOrElse +> > newDispatcher(""AkkaDefaultGlobalDispatcher"", 1, MailboxType).build +> > - lazy val defaultGlobalDispatcher: MessageDispatcher = +> > - from(defaultDispatcherConfig) getOrElse { +> > - throw new ConfigurationException(""Wrong configuration +> > [akka.actor.default-dispatcher]"") +> > - } +> > + +> > - private val dispatchers = new ConcurrentHashMap[String, +> > MessageDispatcher] +> +> No, it's probably for the best that pinned dispatchers aren't registered +> there. +> +> Open a ticket about making sure that it gets cleaned up and assign it for +> 2.0, we might want to use a WeakHashMap or equivalent. +> +> --- +> +> Reply to this email directly or view it on GitHub: +> https://github.com/jboner/akka/pull/149/files#r289614 + +## + +Patrik Nordwall +Typesafe http://typesafe.com/ - Enterprise-Grade Scala from the Experts +Twitter: @patriknw +",7a17eb00bf7043b8b89839aecebf0e66b91dd9e9 +254337,0,"MAke it so that the dispatcher to be used is specified through the constructor, in that way it'll be easier to configure it later.""",2011-11-28 04:27:04,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/127#discussion_r254337,2011-11-28T15:27:04Z,2011-11-30T14:24:56Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,,,,1,"@@ -429,8 +429,7 @@ class DefaultScheduler(hashedWheelTimer: HashedWheelTimer, system: ActorSystem) + private def createSingleTask(runnable: Runnable): TimerTask = + new TimerTask() { + def run(timeout: org.jboss.netty.akka.util.Timeout) { +- // FIXME: consider executing runnable inside main dispatcher to prevent blocking of scheduler +- runnable.run() ++ system.dispatcher.dispatchTask(() ⇒ runnable.run())","MAke it so that the dispatcher to be used is specified through the constructor, in that way it'll be easier to configure it later. +",b3107aed733958523abad1ea386d1de95e6a421a +248167,0,"Netty""",2011-11-23 03:55:13,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/120#discussion_r248167,2011-11-23T14:55:13Z,2011-11-24T07:35:31Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/resources/akka-actor-reference.conf,,,,,1,"@@ -214,6 +214,21 @@ akka { + # } + } + ++ # Used to set the behavior of the scheduler. ++ # Changing the default values may change the system behavior drastically so make sure you know what you're doing! ++ # ++ scheduler { ++ # The HashedWheelTimer (HWT) implementation from Jetty is used as the default scheduler in the system.","Netty +",4a2a5123bfff655e8839d28846fe445d950f5934 +269717,0,"hashedWheelTimer.stop().asScala foreach execDirectly""",2011-12-05 02:19:14,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269717,2011-12-05T13:19:14Z,2011-12-05T21:48:29Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,,,,1,"@@ -471,12 +619,24 @@ class DefaultScheduler(hashedWheelTimer: HashedWheelTimer, log: LoggingAdapter, + new TimerTask { + def run(timeout: org.jboss.netty.akka.util.Timeout) { + dispatcher.dispatchTask(f) +- timeout.getTimer.newTimeout(this, delay) ++ try timeout.getTimer.newTimeout(this, delay) catch { ++ case _: IllegalStateException ⇒ // stop recurring if timer is stopped ++ } + } + } + } + +- def close() = hashedWheelTimer.stop() ++ private def execDirectly(t: HWTimeout): Unit = { ++ try t.getTask.run(t) catch { ++ case e: InterruptedException ⇒ throw e ++ case e: Exception ⇒ log.error(e, ""exception while executing timer task"") ++ } ++ } ++ ++ def close() = { ++ import scala.collection.JavaConverters._ ++ hashedWheelTimer.stop().asScala foreach (t ⇒ execDirectly(t))","hashedWheelTimer.stop().asScala foreach execDirectly +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +272492,0,"On Mon, Dec 5, 2011 at 3:32 PM, viktorklang < reply@reply.github.com > wrote: > > @@ -459,7 +605,9 @@ class DefaultScheduler(hashedWheelTimer: > HashedWheelTimer, log: LoggingAdapter, > > // Check if the receiver is still alive and kicking befor""",2011-12-05 20:55:48,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r272492,2011-12-06T07:55:48Z,2011-12-06T07:55:48Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,612.0,,,633,"@@ -459,7 +605,9 @@ class DefaultScheduler(hashedWheelTimer: HashedWheelTimer, log: LoggingAdapter, + // Check if the receiver is still alive and kicking before sending it a message and reschedule the task + if (!receiver.isTerminated) { + receiver ! message +- timeout.getTimer.newTimeout(this, delay) ++ try timeout.getTimer.newTimeout(this, delay) catch { ++ case _: IllegalStateException ⇒ // stop recurring if timer is stopped","On Mon, Dec 5, 2011 at 3:32 PM, viktorklang < +reply@reply.github.com + +> wrote: +> +> > @@ -459,7 +605,9 @@ class DefaultScheduler(hashedWheelTimer: +> > HashedWheelTimer, log: LoggingAdapter, +> > // Check if the receiver is still alive and kicking before +> > sending it a message and reschedule the task +> > if (!receiver.isTerminated) { +> > receiver ! message +> > - timeout.getTimer.newTimeout(this, delay) +> > - try timeout.getTimer.newTimeout(this, delay) catch { +> > - case _: IllegalStateException => // stop recurring if timer +> > is stopped +> +> Shouldn't this verify that the IllegalState was from it being stopped? + +If not already done, the IllegalStateException should be well documented in +the api of the Scheduler, because now we rely on that behavior for proper +dispatcher shutdown, and the Scheduler is (in theory) replaceable. + +> --- +> +> Reply to this email directly or view it on GitHub: +> https://github.com/jboner/akka/pull/119/files#r269820 + +## + +Patrik Nordwall +Typesafe http://typesafe.com/ - Enterprise-Grade Scala from the Experts +Twitter: @patriknw +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +242131,1,"sexy""",2011-11-21 04:26:06,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/118#discussion_r242131,2011-11-21T15:26:06Z,2011-11-21T15:26:06Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/resources/akka-actor-reference.conf,,26.0,,,14,"@@ -25,7 +23,7 @@ akka { + # Dispatcher, (BalancingDispatcher, only valid when all actors using it are of the same type), + # A FQCN to a class inheriting MessageDispatcherConfigurator with a no-arg visible constructor + name = ""EventHandlerDispatcher"" # Optional, will be a generated UUID if omitted +- keep-alive-time = 60 # Keep alive time for threads ++ keep-alive-time = 60s # Keep alive time for threads","sexy +",e5f8a41cb85fe950df8de3f2f8abf3746c9d65b1 +228563,0,"Do you really want to force the size to know if empty?""",2011-11-14 11:20:37,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/110#discussion_r228563,2011-11-14T22:20:37Z,2011-11-15T13:40:37Z,viktorklang,CONTRIBUTOR,akka-durable-mailboxes/akka-file-mailbox/src/main/scala/akka/actor/mailbox/FiledBasedMailbox.scala,,62.0,,,81,"@@ -4,55 +4,63 @@ + + package akka.actor.mailbox + +-import akka.actor.LocalActorRef +-import akka.dispatch._ +-import akka.config.Config._ +-import akka.event.EventHandler +- + import org.apache.commons.io.FileUtils ++import akka.actor.ActorCell ++import akka.config.Configuration ++import akka.dispatch.Envelope ++import akka.event.Logging ++import akka.actor.ActorRef + +-/** +- * @author Jonas Bonér +- */ +-private[akka] object FileBasedMailboxUtil { +- val queuePath = config.getString(""akka.actor.mailbox.file-based.directory-path"", ""./_mb"") // /var/spool/akka ++object FileBasedMailbox { ++ def queuePath(config: Configuration): String = { ++ config.getString(""akka.actor.mailbox.file-based.directory-path"", ""./_mb"") // /var/spool/akka ++ } + } + +-class FileBasedMailbox(val owner: LocalActorRef) extends DurableExecutableMailbox(owner) { +- import FileBasedMailboxUtil._ ++class FileBasedMailbox(val owner: ActorCell) extends DurableMailbox(owner) with DurableMessageSerialization { ++ ++ val log = Logging(app, this) ++ ++ val queuePath = FileBasedMailbox.queuePath(owner.app.config) + + private val queue = try { + try { FileUtils.forceMkdir(new java.io.File(queuePath)) } catch { case e ⇒ {} } +- val queue = new filequeue.PersistentQueue(queuePath, name, config) ++ val queue = new filequeue.PersistentQueue(queuePath, name, owner.app.config, log) + queue.setup // replays journal + queue.discardExpired + queue + } catch { + case e: Exception ⇒ +- EventHandler.error(e, this, ""Could not create a file-based mailbox"") ++ log.error(e, ""Could not create a file-based mailbox"") + throw e + } + +- def enqueue(message: MessageInvocation) = { +- EventHandler.debug(this, ""\nENQUEUING message in file-based mailbox [%s]"".format(message)) +- queue.add(serialize(message)) ++ def enqueue(receiver: ActorRef, envelope: Envelope) { ++ log.debug(""ENQUEUING message in file-based mailbox [{}]"", envelope) ++ queue.add(serialize(envelope)) + } + +- def dequeue: MessageInvocation = try { ++ def dequeue(): Envelope = try { + val item = queue.remove + if (item.isDefined) { + queue.confirmRemove(item.get.xid) +- val messageInvocation = deserialize(item.get.data) +- EventHandler.debug(this, ""\nDEQUEUING message in file-based mailbox [%s]"".format(messageInvocation)) +- messageInvocation ++ val envelope = deserialize(item.get.data) ++ log.debug(""DEQUEUING message in file-based mailbox [{}]"", envelope) ++ envelope + } else null + } catch { + case e: java.util.NoSuchElementException ⇒ null + case e: Exception ⇒ +- EventHandler.error(e, this, ""Couldn't dequeue from file-based mailbox"") ++ log.error(e, ""Couldn't dequeue from file-based mailbox"") + throw e + } + ++ def numberOfMessages: Int = { ++ queue.length.toInt ++ } ++ ++ def hasMessages: Boolean = numberOfMessages > 0","Do you really want to force the size to know if empty? +",a6e75fb702df10eb00070b8631aca6f009f727d5 +221062,0,"If this is only used in one place for one usage I'd rather just use ConcurrentHashMap and make sure that whatever gets put into it only has identity equality defined""",2011-11-10 01:56:47,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/106#discussion_r221062,2011-11-10T12:56:47Z,2011-11-10T15:33:03Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/java/org/jboss/netty/akka/util/internal/ConcurrentIdentityHashMap.java,,51.0,,,51,"@@ -0,0 +1,1418 @@ ++/* ++ * Copyright 2009 Red Hat, Inc. ++ * ++ * Red Hat licenses this file to you under the Apache License, version 2.0 ++ * (the ""License""); you may not use this file except in compliance with the ++ * License. You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an ""AS IS"" BASIS, WITHOUT ++ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the ++ * License for the specific language governing permissions and limitations ++ * under the License. ++ */ ++/* ++ * Written by Doug Lea with assistance from members of JCP JSR-166 ++ * Expert Group and released to the public domain, as explained at ++ * http://creativecommons.org/licenses/publicdomain ++ */ ++package org.jboss.netty.akka.util.internal; ++ ++import java.util.AbstractCollection; ++import java.util.AbstractMap; ++import java.util.AbstractSet; ++import java.util.Collection; ++import java.util.ConcurrentModificationException; ++import java.util.Enumeration; ++import java.util.Hashtable; ++import java.util.Iterator; ++import java.util.Map; ++import java.util.NoSuchElementException; ++import java.util.Set; ++import java.util.concurrent.ConcurrentMap; ++import java.util.concurrent.locks.ReentrantLock; ++ ++ ++/** ++ * An alternative identity-comparing {@link java.util.concurrent.ConcurrentMap} which is similar to ++ * {@link java.util.concurrent.ConcurrentHashMap}. ++ * ++ * @author The Netty Project ++ * @author Doug Lea ++ * @author Jason T. Greene ++ * @author Trustin Lee ++ * @version $Rev: 2371 $, $Date: 2010-10-19 15:00:42 +0900 (Tue, 19 Oct 2010) $ ++ * ++ * @param the type of keys maintained by this map ++ * @param the type of mapped values ++ */ ++public final class ConcurrentIdentityHashMap extends AbstractMap","If this is only used in one place for one usage I'd rather just use ConcurrentHashMap and make sure that whatever gets put into it only has identity equality defined +",1577f8bcb32097e2e317302089d92fd417b69913 +489604,2,"doesn’t work: there is no system available while constructing MailboxType :-(""",2012-02-26 08:17:40,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/353#discussion_r489604,2012-02-26T19:17:40Z,2012-02-26T20:39:34Z,rkuhn,CONTRIBUTOR,akka-durable-mailboxes/akka-beanstalk-mailbox/src/main/scala/akka/actor/mailbox/BeanstalkBasedMailbox.scala,,,,,1,"@@ -27,9 +27,9 @@ class BeanstalkBasedMailboxType(config: Config) extends MailboxType { + /** + * @author Jonas Bonér + */ +-class BeanstalkBasedMessageQueue(_owner: ActorContext) extends DurableMessageQueue(_owner) with DurableMessageSerialization { ++class BeanstalkBasedMessageQueue(_owner: ActorContext, _config: Config) extends DurableMessageQueue(_owner) with DurableMessageSerialization { + +- private val settings = BeanstalkBasedMailboxExtension(owner.system) ++ private val settings = new BeanstalkMailboxSettings(owner.system, _config)","doesn’t work: there is no system available while constructing MailboxType :-( +",b4fcc3b2f2d7e534908057750d0086b9c6e20764 +488501,0,"removed def system from Camel trait, it is not needed in the end for the user. (you can always get to the system using the normal routes) Added a comment that Camel is shutdown when the associated ActorSystem that uses a Camel extension is shut down.""",2012-02-24 21:26:34,RayRoestenburg,akka,akka,,https://github.com/akka/akka-core/pull/344#discussion_r488501,2012-02-25T08:26:34Z,2012-03-20T12:17:25Z,RayRoestenburg,CONTRIBUTOR,akka-camel/src/main/scala/akka/camel/Camel.scala,,,,,1,"@@ -0,0 +1,84 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel ++ ++import internal._ ++import akka.actor._ ++import org.apache.camel.{ ProducerTemplate, CamelContext } ++ ++//TODO complete this doc ++/** ++ * Camel trait encapsulates the underlying camel machinery. ++ * ++ */ ++trait Camel extends ConsumerRegistry with ProducerRegistry with Extension with Activation { ++ /** ++ * Underlying camel context. ++ * ++ * It can be used to configure camel manually, i.e. when the user wants to add new routes or endpoints, ++ * i.e.

camel.context.addRoutes(...)
++ * ++ * @see [[org.apache.camel.CamelContext]] ++ */ ++ def context: CamelContext ++ ++ /** ++ * Producer template. ++ * @see [[org.apache.camel.ProducerTemplate]] ++ */ ++ def template: ProducerTemplate ++ ++ /** ++ * Associated `ActorSystem`. ++ * ++ *

It can be used to start producers, consumers or any other actors which need to interact with camel, ++ * for example: ++ * {{{ ++ * val system = ActorSystem(""test"") ++ * system.actorOf(Props[SysOutConsumer])","removed def system from Camel trait, it is not needed in the end for the user. (you can always get to the system using the normal routes) Added a comment that Camel is shutdown when the associated ActorSystem that uses a Camel extension is shut down. +",f74616f828d3e31724d768dd86ce05af85d97ade +506662,2,"Because I'm stupid. Fixed. """,2012-03-01 02:27:24,jboner,akka,akka,,https://github.com/akka/akka-core/pull/329#discussion_r506662,2012-03-01T13:27:24Z,2012-03-12T18:22:16Z,jboner,CONTRIBUTOR,akka-cluster/src/main/scala/akka/cluster/Node.scala,,,,,1,"@@ -0,0 +1,803 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.cluster ++ ++import akka.actor._ ++import akka.actor.Status._ ++import akka.remote._ ++import akka.routing._ ++import akka.event.Logging ++import akka.dispatch.Await ++import akka.pattern.ask ++import akka.util._ ++import akka.config.ConfigurationException ++ ++import java.util.concurrent.atomic.{ AtomicReference, AtomicBoolean } ++import java.util.concurrent.TimeUnit._ ++import java.util.concurrent.TimeoutException ++import java.security.SecureRandom ++ ++import scala.collection.immutable.{ Map, SortedSet } ++import scala.annotation.tailrec ++ ++import com.google.protobuf.ByteString ++ ++/** ++ * Interface for membership change listener. ++ */ ++trait MembershipChangeListener { ++ def notify(members: SortedSet[Member]): Unit ++} ++ ++/** ++ * Interface for meta data change listener. ++ */ ++trait MetaDataChangeListener { // FIXME add management and notification for MetaDataChangeListener ++ def notify(meta: Map[String, Array[Byte]]): Unit ++} ++ ++// FIXME create Protobuf messages out of all the Gossip stuff - but wait until the prototol is fully stablized. ++ ++/** ++ * Base trait for all cluster messages. All ClusterMessage's are serializable. ++ */ ++sealed trait ClusterMessage extends Serializable ++ ++/** ++ * Cluster commands sent by the USER. ++ */ ++object ClusterAction { ++ ++ /** ++ * Command to join the cluster. Sent when a node (reprsesented by 'address') ++ * wants to join another node (the receiver). ++ */ ++ case class Join(address: Address) extends ClusterMessage ++ ++ /** ++ * Command to set a node to Up (from Joining). ++ */ ++ case object Up extends ClusterMessage ++ ++ /** ++ * Command to leave the cluster. ++ */ ++ case object Leave extends ClusterMessage ++ ++ /** ++ * Command to mark node as temporary down. ++ */ ++ case object Down extends ClusterMessage ++ ++ /** ++ * Command to mark a node to be removed from the cluster immediately. ++ */ ++ case object Exit extends ClusterMessage ++ ++ /** ++ * Command to remove a node from the cluster immediately. ++ */ ++ case object Remove extends ClusterMessage ++} ++ ++/** ++ * Represents the address and the current status of a cluster member node. ++ */ ++case class Member(address: Address, status: MemberStatus) extends ClusterMessage ++ ++/** ++ * Envelope adding a sender address to the gossip. ++ */ ++case class GossipEnvelope(sender: Member, gossip: Gossip) extends ClusterMessage ++ ++/** ++ * Defines the current status of a cluster member node ++ * ++ * Can be one of: Joining, Up, Leaving, Exiting and Down. ++ */ ++sealed trait MemberStatus extends ClusterMessage ++object MemberStatus { ++ case object Joining extends MemberStatus ++ case object Up extends MemberStatus ++ case object Leaving extends MemberStatus ++ case object Exiting extends MemberStatus ++ case object Down extends MemberStatus ++ case object Removed extends MemberStatus ++} ++ ++// sealed trait PartitioningStatus ++// object PartitioningStatus { ++// case object Complete extends PartitioningStatus ++// case object Awaiting extends PartitioningStatus ++// } ++ ++// case class PartitioningChange( ++// from: Address, ++// to: Address, ++// path: PartitionPath, ++// status: PartitioningStatus) ++ ++/** ++ * Represents the overview of the cluster, holds the cluster convergence table and set with unreachable nodes. ++ */ ++case class GossipOverview( ++ seen: Map[Address, VectorClock] = Map.empty[Address, VectorClock], ++ unreachable: Set[Address] = Set.empty[Address]) { ++ ++ override def toString = ++ ""GossipOverview(seen = ["" + seen.mkString("", "") + ++ ""], unreachable = ["" + unreachable.mkString("", "") + ++ ""])"" ++} ++ ++/** ++ * Represents the state of the cluster; cluster ring membership, ring convergence, meta data - all versioned by a vector clock. ++ */ ++case class Gossip( ++ overview: GossipOverview = GossipOverview(), ++ members: SortedSet[Member], // sorted set of members with their status, sorted by name ++ //partitions: Tree[PartitionPath, Node] = Tree.empty[PartitionPath, Node], // name/partition service ++ //pending: Set[PartitioningChange] = Set.empty[PartitioningChange], ++ meta: Map[String, Array[Byte]] = Map.empty[String, Array[Byte]], ++ version: VectorClock = VectorClock()) // vector clock version ++ extends ClusterMessage // is a serializable cluster message ++ with Versioned[Gossip] { ++ ++ /** ++ * Increments the version for this 'Node'. ++ */ ++ def +(node: VectorClock.Node): Gossip = copy(version = version + node) ++ ++ def +(member: Member): Gossip = { ++ if (members contains member) this ++ else this copy (members = members + member) ++ } ++ ++ /** ++ * Marks the gossip as seen by this node (remoteAddress) by updating the address entry in the 'gossip.overview.seen' ++ * Map with the VectorClock for the new gossip. ++ */ ++ def seen(address: Address): Gossip = ++ this copy (overview = overview copy (seen = overview.seen + (address -> version))) ++ ++ override def toString = ++ ""Gossip("" + ++ ""overview = "" + overview + ++ "", members = ["" + members.mkString("", "") + ++ ""], meta = ["" + meta.mkString("", "") + ++ ""], version = "" + version + ++ "")"" ++} ++ ++/** ++ * FSM actor managing the different cluster nodes states. ++ * Single instance - e.g. serialized access to Node - message after message. ++ */ ++final class ClusterCommandDaemon(system: ActorSystem, node: Node) extends Actor with FSM[MemberStatus, Unit] { ++ ++ // start in JOINING ++ startWith(MemberStatus.Joining, Unit) ++ ++ // ======================== ++ // === IN JOINING === ++ when(MemberStatus.Joining) { ++ case Event(ClusterAction.Up, _) ⇒ ++ node.up() ++ goto(MemberStatus.Up) ++ } ++ ++ // ======================== ++ // === IN UP === ++ when(MemberStatus.Up) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Leave, _) ⇒ ++ node.leaving() ++ goto(MemberStatus.Leaving) ++ ++ case Event(ClusterAction.Exit, _) ⇒ ++ node.exiting() ++ goto(MemberStatus.Exiting) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN LEAVING === ++ when(MemberStatus.Leaving) { ++ case Event(ClusterAction.Down, _) ⇒ ++ node.downing() ++ goto(MemberStatus.Down) ++ ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN EXITING === ++ when(MemberStatus.Exiting) { ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN DOWN === ++ when(MemberStatus.Down) { ++ // FIXME How to transition from DOWN => JOINING when node comes back online. Can't just listen to Gossip message since it is received be another actor. How to fix this? ++ case Event(ClusterAction.Remove, _) ⇒ ++ node.removing() ++ goto(MemberStatus.Removed) ++ } ++ ++ // ======================== ++ // === IN REMOVED === ++ when(MemberStatus.Removed) { ++ case command ⇒ ++ log.warning(""Removed node [{}] received cluster command [{}]"", system.name, command) ++ stay ++ } ++ ++ // ======================== ++ // === GENERIC AND UNHANDLED COMMANDS === ++ whenUnhandled { ++ // should be able to handle Join in any state ++ case Event(ClusterAction.Join(address), _) ⇒ ++ node.joining(address) ++ stay ++ ++ case Event(command, _) ⇒ { ++ log.warning(""Unhandled command [{}] in state [{}]"", command, stateName) ++ stay ++ } ++ } ++} ++ ++/** ++ * Pooled and routed wit N number of configurable instances. ++ * Concurrent access to Node. ++ */ ++final class ClusterGossipDaemon(system: ActorSystem, node: Node) extends Actor { ++ val log = Logging(system, ""ClusterGossipDaemon"") ++ ++ def receive = { ++ case GossipEnvelope(sender, gossip) ⇒ node.receive(sender, gossip) ++ case unknown ⇒ log.error(""Unknown message sent to cluster daemon ["" + unknown + ""]"") ++ } ++} ++ ++/** ++ * Node Extension Id and factory for creating Node extension. ++ * Example: ++ * {{{ ++ * val node = NodeExtension(system) ++ * ++ * if (node.isLeader) { ... } ++ * }}} ++ * ++ * Example: ++ * {{{ ++ * import akka.cluster._ ++ * ++ * val node = system.node // implicit conversion adds 'node' method ++ * ++ * if (node.isLeader) { ... } ++ * }}} ++ */ ++object NodeExtension extends ExtensionId[Node] with ExtensionIdProvider { ++ override def get(system: ActorSystem): Node = super.get(system) ++ ++ override def lookup = NodeExtension ++ ++ override def createExtension(system: ExtendedActorSystem): Node = new Node(system.asInstanceOf[ActorSystemImpl]) // not nice but need API in ActorSystemImpl inside Node ++} ++ ++/** ++ * This module is responsible for Gossiping cluster information. The abstraction maintains the list of live ++ * and dead members. Periodically i.e. every 1 second this module chooses a random member and initiates a round ++ * of Gossip with it. Whenever it gets gossip updates it updates the Failure Detector with the liveness ++ * information. ++ *

++ * During each of these runs the member initiates gossip exchange according to following rules (as defined in the ++ * Cassandra documentation [http://wiki.apache.org/cassandra/ArchitectureGossip]: ++ *

++ *   1) Gossip to random live member (if any)
++ *   2) Gossip to random unreachable member with certain probability depending on number of unreachable and live members
++ *   3) If the member gossiped to at (1) was not deputy, or the number of live members is less than number of deputy list,
++ *       gossip to random deputy with certain probability depending on number of unreachable, deputy and live members.
++ * 
++ * ++ * Example: ++ * {{{ ++ * val node = NodeExtension(system) ++ * ++ * if (node.isLeader) { ... } ++ * }}} ++ * ++ * Example: ++ * {{{ ++ * import akka.cluster._ ++ * ++ * val node = system.node // implicit conversion adds 'node' method ++ * ++ * if (node.isLeader) { ... } ++ * }}} ++ */ ++class Node(system: ActorSystemImpl) extends Extension { ++ ++ /** ++ * Represents the state for this Node. Implemented using optimistic lockless concurrency, ++ * all state is represented by this immutable case class and managed by an AtomicReference. ++ */ ++ private case class State( ++ self: Member, ++ latestGossip: Gossip, ++ memberMembershipChangeListeners: Set[MembershipChangeListener] = Set.empty[MembershipChangeListener]) ++ ++ if (!system.provider.isInstanceOf[RemoteActorRefProvider]) ++ throw new ConfigurationException(""ActorSystem["" + system + ""] needs to have a 'RemoteActorRefProvider' enabled in the configuration"") ++ ++ private val remote: RemoteActorRefProvider = system.provider.asInstanceOf[RemoteActorRefProvider] ++ ++ private val remoteSettings = new RemoteSettings(system.settings.config, system.name) ++ private val clusterSettings = new ClusterSettings(system.settings.config, system.name) ++ ++ private val remoteAddress = remote.transport.address ++ private val vclockNode = VectorClock.Node(remoteAddress.toString) ++ ++ private val gossipInitialDelay = clusterSettings.GossipInitialDelay ++ private val gossipFrequency = clusterSettings.GossipFrequency ++ ++ implicit private val memberOrdering = Ordering.fromLessThan[Member](_.address.toString < _.address.toString) ++ ++ implicit private val defaultTimeout = Timeout(remoteSettings.RemoteSystemDaemonAckTimeout) ++ ++ val failureDetector = new AccrualFailureDetector( ++ system, remoteAddress, clusterSettings.FailureDetectorThreshold, clusterSettings.FailureDetectorMaxSampleSize) ++ ++ private val nrOfDeputyNodes = clusterSettings.NrOfDeputyNodes ++ private val nrOfGossipDaemons = clusterSettings.NrOfGossipDaemons ++ private val nodeToJoin: Option[Address] = clusterSettings.NodeToJoin filter (_ != remoteAddress) ++ ++ private val serialization = remote.serialization ++ ++ private val isRunning = new AtomicBoolean(true) ++ private val log = Logging(system, ""Node"") ++ private val random = SecureRandom.getInstance(""SHA1PRNG"") ++ ++ private val clusterCommandDaemon = system.systemActorOf( ++ Props(new ClusterCommandDaemon(system, this)), ""clusterCommand"") ++ ++ private val clusterGossipDaemon = system.systemActorOf( ++ Props(new ClusterGossipDaemon(system, this)).withRouter(RoundRobinRouter(nrOfGossipDaemons)), ""clusterGossip"") ++ ++ private val state = { ++ val member = Member(remoteAddress, MemberStatus.Joining) ++ val gossip = Gossip(members = SortedSet.empty[Member] + member) + vclockNode // add me as member and update my vector clock ++ new AtomicReference[State](State(member, gossip)) ++ } ++ ++ import Versioned.latestVersionOf ++ ++ log.info(""Node [{}] - Starting cluster Node..."", remoteAddress) ++ ++ // try to join the node defined in the 'akka.cluster.node-to-join' option ++ autoJoin() ++ ++ // start periodic gossip to random nodes in cluster ++ private val gossipCanceller = system.scheduler.schedule(gossipInitialDelay, gossipFrequency) { ++ gossip() ++ } ++ ++ // start periodic cluster scrutinization (moving nodes condemned by the failure detector to unreachable list) ++ private val scrutinizeCanceller = system.scheduler.schedule(gossipInitialDelay, gossipFrequency) { ++ scrutinize() ++ } ++ ++ // ====================================================== ++ // ===================== PUBLIC API ===================== ++ // ====================================================== ++ ++ /** ++ * Latest gossip. ++ */ ++ def latestGossip: Gossip = state.get.latestGossip ++ ++ /** ++ * Member status for this node. ++ */ ++ def self: Member = state.get.self ++ ++ /** ++ * Is this node the leader? ++ */ ++ def isLeader: Boolean = { ++ val currentState = state.get ++ remoteAddress == currentState.latestGossip.members.head.address ++ } ++ ++ /** ++ * Is this node a singleton cluster? ++ */ ++ def isSingletonCluster: Boolean = isSingletonCluster(state.get) ++ ++ /** ++ * Checks if we have a cluster convergence. ++ * ++ * @returns Some(convergedGossip) if convergence have been reached and None if not ++ */ ++ def convergence: Option[Gossip] = convergence(latestGossip) ++ ++ /** ++ * Shuts down all connections to other members, the cluster daemon and the periodic gossip and cleanup tasks. ++ */ ++ def shutdown() { ++ ++ // FIXME Cheating for now. Can't just shut down. Node must first gossip an Leave command, wait for Leader to do proper Handoff and then await an Exit command before switching to Removed ++ ++ if (isRunning.compareAndSet(true, false)) { ++ log.info(""Node [{}] - Shutting down Node and ClusterDaemon..."", remoteAddress) ++ ++ try system.stop(clusterCommandDaemon) finally { ++ try system.stop(clusterGossipDaemon) finally { ++ try gossipCanceller.cancel() finally { ++ try scrutinizeCanceller.cancel() finally { ++ log.info(""Node [{}] - Node and ClusterDaemon shut down successfully"", remoteAddress) ++ } ++ } ++ } ++ } ++ } ++ } ++ ++ /** ++ * Registers a listener to subscribe to cluster membership changes. ++ */ ++ @tailrec ++ final def registerListener(listener: MembershipChangeListener) { ++ val localState = state.get ++ val newListeners = localState.memberMembershipChangeListeners + listener ++ val newState = localState copy (memberMembershipChangeListeners = newListeners) ++ if (!state.compareAndSet(localState, newState)) registerListener(listener) // recur ++ } ++ ++ /** ++ * Unsubscribes to cluster membership changes. ++ */ ++ @tailrec ++ final def unregisterListener(listener: MembershipChangeListener) { ++ val localState = state.get ++ val newListeners = localState.memberMembershipChangeListeners - listener ++ val newState = localState copy (memberMembershipChangeListeners = newListeners) ++ if (!state.compareAndSet(localState, newState)) unregisterListener(listener) // recur ++ } ++ ++ /** ++ * Send command to JOIN one node to another. ++ */ ++ def sendJoin(address: Address) { ++ clusterCommandDaemon ! ClusterAction.Join(address) ++ } ++ ++ /** ++ * Send command to issue state transition to LEAVING. ++ */ ++ def sendLeave() { ++ clusterCommandDaemon ! ClusterAction.Leave ++ } ++ ++ /** ++ * Send command to issue state transition to EXITING. ++ */ ++ def sendDown() { ++ clusterCommandDaemon ! ClusterAction.Down ++ } ++ ++ /** ++ * Send command to issue state transition to REMOVED. ++ */ ++ def sendRemove() { ++ clusterCommandDaemon ! ClusterAction.Remove ++ } ++ ++ // ======================================================== ++ // ===================== INTERNAL API ===================== ++ // ======================================================== ++ ++ /** ++ * State transition to JOINING. ++ * New node joining. ++ */ ++ @tailrec ++ private[cluster] final def joining(node: Address) { ++ log.info(""Node [{}] - Node [{}] is joining"", remoteAddress, node) ++ ++ failureDetector heartbeat node // update heartbeat in failure detector ++ ++ val localState = state.get ++ val localGossip = localState.latestGossip ++ val localMembers = localGossip.members ++ ++ val newMembers = localMembers + Member(node, MemberStatus.Joining) // add joining node as Joining ++ val newGossip = localGossip copy (members = newMembers) ++ ++ val versionedGossip = newGossip + vclockNode ++ val seenVersionedGossip = versionedGossip seen remoteAddress ++ ++ val newState = localState copy (latestGossip = seenVersionedGossip) ++ ++ if (!state.compareAndSet(localState, newState)) joining(node) // recur if we failed update ++ else { ++ if (convergence(newState.latestGossip).isDefined) { ++ newState.memberMembershipChangeListeners map { _ notify newMembers }","Because I'm stupid. Fixed. +",cf3fa9fa3ce9e9312db0922370c93ce1af9db7c8 +405333,0,"Is this test run automatically? In that case why do we have so many spec wrappers for other junit tests?""",2012-01-31 19:51:32,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/286#discussion_r405333,2012-02-01T06:51:32Z,2012-02-01T06:51:32Z,patriknw,CONTRIBUTOR,akka-testkit/src/test/java/akka/testkit/TestActorRefJavaSpec.java,,12.0,,,12,"@@ -0,0 +1,19 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.testkit; ++ ++import org.junit.Test; ++import akka.actor.Props; ++ ++import static org.junit.Assert.*; ++ ++public class TestActorRefJavaSpec {","Is this test run automatically? In that case why do we have so many spec wrappers for other junit tests? +",dcebd1deb209d953912976f2183893ab88a8f593 +392986,0,"I don’t know, I just copied your code into a new file …""",2012-01-27 07:46:20,rkuhn,akka,akka,,https://github.com/akka/akka-core/pull/275#discussion_r392986,2012-01-27T18:46:20Z,2012-01-30T10:58:22Z,rkuhn,CONTRIBUTOR,akka-remote/src/main/scala/akka/remote/netty/Client.scala,,196.0,,,196,"@@ -0,0 +1,346 @@ ++/** ++ * Copyright (C) 2009-2011 Typesafe Inc. ++ */ ++package akka.remote.netty ++ ++import java.net.InetSocketAddress ++import org.jboss.netty.util.HashedWheelTimer ++import org.jboss.netty.bootstrap.ClientBootstrap ++import org.jboss.netty.channel.group.DefaultChannelGroup ++import org.jboss.netty.channel.{ ChannelHandler, StaticChannelPipeline, SimpleChannelUpstreamHandler, MessageEvent, ExceptionEvent, ChannelStateEvent, ChannelPipelineFactory, ChannelPipeline, ChannelHandlerContext, ChannelFuture, Channel } ++import org.jboss.netty.handler.codec.frame.{ LengthFieldPrepender, LengthFieldBasedFrameDecoder } ++import org.jboss.netty.handler.execution.ExecutionHandler ++import org.jboss.netty.handler.timeout.{ ReadTimeoutHandler, ReadTimeoutException } ++import akka.remote.RemoteProtocol.{ RemoteControlProtocol, CommandType, AkkaRemoteProtocol } ++import akka.remote.{ RemoteProtocol, RemoteMessage, RemoteLifeCycleEvent, RemoteClientStarted, RemoteClientShutdown, RemoteClientException, RemoteClientError, RemoteClientDisconnected, RemoteClientConnected } ++import akka.actor.{ simpleName, Address } ++import akka.AkkaException ++import akka.event.Logging ++import akka.util.Switch ++import akka.actor.ActorRef ++import org.jboss.netty.channel.ChannelFutureListener ++import akka.remote.RemoteClientWriteFailed ++import java.net.InetAddress ++import org.jboss.netty.util.TimerTask ++import org.jboss.netty.util.Timeout ++import java.util.concurrent.TimeUnit ++ ++class RemoteClientMessageBufferException(message: String, cause: Throwable) extends AkkaException(message, cause) { ++ def this(msg: String) = this(msg, null) ++} ++ ++/** ++ * This is the abstract baseclass for netty remote clients, currently there's only an ++ * ActiveRemoteClient, but others could be feasible, like a PassiveRemoteClient that ++ * reuses an already established connection. ++ */ ++abstract class RemoteClient private[akka] ( ++ val netty: NettyRemoteTransport, ++ val remoteAddress: Address) { ++ ++ val log = Logging(netty.system, ""RemoteClient"") ++ ++ val name = simpleName(this) + ""@"" + remoteAddress ++ ++ private[remote] val runSwitch = new Switch() ++ ++ private[remote] def isRunning = runSwitch.isOn ++ ++ protected def currentChannel: Channel ++ ++ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean ++ ++ def shutdown(): Boolean ++ ++ def isBoundTo(address: Address): Boolean = remoteAddress == address ++ ++ /** ++ * Converts the message to the wireprotocol and sends the message across the wire ++ */ ++ def send(message: Any, senderOption: Option[ActorRef], recipient: ActorRef): Unit = if (isRunning) { ++ if (netty.remoteSettings.LogSend) log.debug(""Sending message {} from {} to {}"", message, senderOption, recipient) ++ send((message, senderOption, recipient)) ++ } else { ++ val exception = new RemoteClientException(""RemoteModule client is not running, make sure you have invoked 'RemoteClient.connect()' before using it."", netty, remoteAddress) ++ netty.notifyListeners(RemoteClientError(exception, netty, remoteAddress)) ++ throw exception ++ } ++ ++ /** ++ * Sends the message across the wire ++ */ ++ private def send(request: (Any, Option[ActorRef], ActorRef)): Unit = { ++ try { ++ val channel = currentChannel ++ val f = channel.write(request) ++ f.addListener( ++ new ChannelFutureListener { ++ def operationComplete(future: ChannelFuture) { ++ if (future.isCancelled || !future.isSuccess) { ++ netty.notifyListeners(RemoteClientWriteFailed(request, future.getCause, netty, remoteAddress)) ++ } ++ } ++ }) ++ // Check if we should back off ++ if (!channel.isWritable) { ++ val backoff = netty.settings.BackoffTimeout ++ if (backoff.length > 0 && !f.await(backoff.length, backoff.unit)) f.cancel() //Waited as long as we could, now back off ++ } ++ } catch { ++ case e: Exception ⇒ netty.notifyListeners(RemoteClientError(e, netty, remoteAddress)) ++ } ++ } ++ ++ override def toString = name ++} ++ ++/** ++ * RemoteClient represents a connection to an Akka node. Is used to send messages to remote actors on the node. ++ */ ++class ActiveRemoteClient private[akka] ( ++ netty: NettyRemoteTransport, ++ remoteAddress: Address, ++ localAddress: Address) ++ extends RemoteClient(netty, remoteAddress) { ++ ++ import netty.settings ++ ++ //TODO rewrite to a wrapper object (minimize volatile access and maximize encapsulation) ++ @volatile ++ private var bootstrap: ClientBootstrap = _ ++ @volatile ++ private var connection: ChannelFuture = _ ++ @volatile ++ private[remote] var openChannels: DefaultChannelGroup = _ ++ @volatile ++ private var executionHandler: ExecutionHandler = _ ++ ++ @volatile ++ private var reconnectionTimeWindowStart = 0L ++ ++ def notifyListeners(msg: RemoteLifeCycleEvent): Unit = netty.notifyListeners(msg) ++ ++ def currentChannel = connection.getChannel ++ ++ /** ++ * Connect to remote server. ++ */ ++ def connect(reconnectIfAlreadyConnected: Boolean = false): Boolean = { ++ ++ def sendSecureCookie(connection: ChannelFuture) { ++ val handshake = RemoteControlProtocol.newBuilder.setCommandType(CommandType.CONNECT) ++ if (settings.SecureCookie.nonEmpty) handshake.setCookie(settings.SecureCookie.get) ++ handshake.setOrigin(RemoteProtocol.AddressProtocol.newBuilder ++ .setSystem(localAddress.system) ++ .setHostname(localAddress.host.get) ++ .setPort(localAddress.port.get) ++ .build) ++ connection.getChannel.write(netty.createControlEnvelope(handshake.build)) ++ } ++ ++ def attemptReconnect(): Boolean = { ++ val remoteIP = InetAddress.getByName(remoteAddress.host.get) ++ log.debug(""Remote client reconnecting to [{}|{}]"", remoteAddress, remoteIP) ++ connection = bootstrap.connect(new InetSocketAddress(remoteIP, remoteAddress.port.get)) ++ openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. ++ ++ if (!connection.isSuccess) { ++ notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) ++ false ++ } else { ++ sendSecureCookie(connection) ++ true ++ } ++ } ++ ++ runSwitch switchOn { ++ openChannels = new DefaultDisposableChannelGroup(classOf[RemoteClient].getName) ++ ++ executionHandler = new ExecutionHandler(netty.executor) ++ ++ bootstrap = new ClientBootstrap(netty.clientChannelFactory) ++ bootstrap.setPipelineFactory(new ActiveRemoteClientPipelineFactory(name, bootstrap, executionHandler, remoteAddress, this)) ++ bootstrap.setOption(""tcpNoDelay"", true) ++ bootstrap.setOption(""keepAlive"", true) ++ bootstrap.setOption(""connectTimeoutMillis"", settings.ConnectionTimeout.toMillis) ++ ++ val remoteIP = InetAddress.getByName(remoteAddress.host.get) ++ log.debug(""Starting remote client connection to [{}|{}]"", remoteAddress, remoteIP) ++ ++ connection = bootstrap.connect(new InetSocketAddress(remoteIP, remoteAddress.port.get)) ++ ++ openChannels.add(connection.awaitUninterruptibly.getChannel) // Wait until the connection attempt succeeds or fails. ++ ++ if (!connection.isSuccess) { ++ notifyListeners(RemoteClientError(connection.getCause, netty, remoteAddress)) ++ false ++ } else { ++ sendSecureCookie(connection) ++ notifyListeners(RemoteClientStarted(netty, remoteAddress)) ++ true ++ } ++ } match { ++ case true ⇒ true ++ case false if reconnectIfAlreadyConnected ⇒ ++ connection.getChannel.close() ++ openChannels.remove(connection.getChannel) ++ ++ log.debug(""Remote client reconnecting to [{}]"", remoteAddress) ++ attemptReconnect() ++ ++ case false ⇒ false ++ } ++ } ++ ++ // Please note that this method does _not_ remove the ARC from the NettyRemoteClientModule's map of clients","I don’t know, I just copied your code into a new file … +",4fb0858e557232e5f4a7e5b364d1697c0e21bdd1 +316230,0,"use: new Props(JAdvancedCalculatorActor.class)""",2011-12-27 11:21:33,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/194#discussion_r316230,2011-12-27T22:21:33Z,2011-12-28T00:41:32Z,viktorklang,CONTRIBUTOR,akka-samples/akka-sample-remote/src/main/java/sample/remote/calculator/java/JCreationApplication.java,,,,,1,"@@ -0,0 +1,35 @@ ++/** ++ * Copyright (C) 2009-2011 Typesafe Inc. ++ */ ++package sample.remote.calculator.java; ++ ++import akka.actor.ActorRef; ++import akka.actor.ActorSystem; ++import akka.actor.Props; ++import akka.kernel.Bootable; ++import com.typesafe.config.ConfigFactory; ++ ++public class JCreationApplication implements Bootable { ++ private ActorSystem system; ++ private ActorRef actor; ++ private ActorRef remoteActor; ++ ++ public JCreationApplication() { ++ system = ActorSystem.create(""CreationApplication"", ConfigFactory.load().getConfig(""remotecreation"")); ++ actor = system.actorOf(new Props().withCreator(JCreationActor.class)); ++ remoteActor = system.actorOf(new Props().withCreator(JAdvancedCalculatorActor.class), ""advancedCalculator"");","use: + +``` +new Props(JAdvancedCalculatorActor.class) +``` +",f21651d7479c5572b0339a02c0b1ce24cdc167da +309221,0,"On Wed, Dec 21, 2011 at 5:08 PM, viktorklang < reply@reply.github.com > wrote: > > > > // FIXME: Dispatchers registered here are are not removed, see ticket > #1494 > > - private val dispatchers = new ConcurrentHashMap[String, > MessageDispatcher] > """,2011-12-21 05:23:23,patriknw,akka,akka,,https://github.com/akka/akka-core/pull/182#discussion_r309221,2011-12-21T16:23:23Z,2011-12-21T22:47:22Z,patriknw,CONTRIBUTOR,akka-actor/src/main/scala/akka/dispatch/Dispatchers.scala,,65.0,,,1,"@@ -67,15 +68,18 @@ class Dispatchers(val settings: ActorSystem.Settings, val prerequisites: Dispatc + if (settings.MailboxCapacity < 1) UnboundedMailbox() + else BoundedMailbox(settings.MailboxCapacity, settings.MailboxPushTimeout) + +- val defaultDispatcherConfig = settings.config.getConfig(""akka.actor.default-dispatcher"") ++ val defaultDispatcherConfig = { ++ val key = ""akka.actor.default-dispatcher"" ++ keyConfig(key).withFallback(settings.config.getConfig(key)) ++ } + +- lazy val defaultGlobalDispatcher: MessageDispatcher = +- from(defaultDispatcherConfig) getOrElse { +- throw new ConfigurationException(""Wrong configuration [akka.actor.default-dispatcher]"") +- } ++ private lazy val defaultDispatcherConfigurator: MessageDispatcherConfigurator = ++ configuratorFrom(defaultDispatcherConfig) ++ ++ lazy val defaultGlobalDispatcher: MessageDispatcher = defaultDispatcherConfigurator.dispatcher() + + // FIXME: Dispatchers registered here are are not removed, see ticket #1494 +- private val dispatchers = new ConcurrentHashMap[String, MessageDispatcher] ++ private val dispatcherConfigurators = new ConcurrentHashMap[String, MessageDispatcherConfigurator] + + /** + * Returns a dispatcher as specified in configuration, or if not defined it uses","On Wed, Dec 21, 2011 at 5:08 PM, viktorklang < +reply@reply.github.com + +> wrote: +> +> > // FIXME: Dispatchers registered here are are not removed, see ticket +> > #1494 +> > - private val dispatchers = new ConcurrentHashMap[String, +> > MessageDispatcher] +> > - private val dispatcherConfigurators = new ConcurrentHashMap[String, +> > MessageDispatcherConfigurator] +> > +> > /** +> > - Returns a dispatcher as specified in configuration, or if not +> > defined it uses +> +> Could be a source of hard to diagnose problems if it just silently uses +> the default. Perhaps log a warning +> +> That's why I log a Debug, but I can change it to Info. Don't think it's a +> Warning, because I think as a start default-dispatcher is a good default, +> but still with possibility to tune a specific dispatcher, if needed. +> +> --- +> +> Reply to this email directly or view it on GitHub: +> https://github.com/jboner/akka/pull/182/files#r309168 + +## + +Patrik Nordwall +Typesafe http://typesafe.com/ - Enterprise-Grade Scala from the Experts +Twitter: @patriknw +",ed2fb14dcf0120a6c126fe9c99038d897dbcdf0d +296660,0,"I'm curious as to why the signature of ? still includes a timeout. Since it returns a Future, which will no longer time out, isn't the timeout superfluous?""",2011-12-15 04:12:55,nuttycom,akka,akka,,https://github.com/akka/akka-core/pull/147#discussion_r296660,2011-12-15T15:12:55Z,2011-12-15T15:54:48Z,nuttycom,NONE,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,450.0,,,58,"@@ -455,11 +448,13 @@ class AskActorRef( + } + + override def ?(message: Any)(implicit timeout: Timeout): Future[Any] =","I'm curious as to why the signature of ? still includes a timeout. Since it returns a Future, which will no longer time out, isn't the timeout superfluous? +",0af92f24400f1b05d1919be54dfd822037f0076f +284725,0,"final class?""",2011-12-10 01:01:32,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/144#discussion_r284725,2011-12-10T12:01:32Z,2011-12-13T15:06:05Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,412.0,,,107,"@@ -403,6 +404,41 @@ class DeadLetterActorRef(val eventStream: EventStream) extends MinimalActorRef { + private def writeReplace(): AnyRef = DeadLetterActorRef.serialized + } + ++class VirtualPathContainer(val path: ActorPath, override val getParent: InternalActorRef, val log: LoggingAdapter) extends MinimalActorRef {","final class? +",134fac4bfe8d6f8b4e9b96dfbfdf532d48ae3c86 +269682,2,"Damn clean, Mr Kuhn, damn clean!""",2011-12-05 01:54:55,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269682,2011-12-05T12:54:55Z,2011-12-05T21:48:28Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRef.scala,,159.0,,,98,"@@ -118,14 +111,76 @@ abstract class ActorRef extends java.lang.Comparable[ActorRef] with Serializable + */ + def isTerminated: Boolean + +- override def hashCode: Int = HashCode.hash(HashCode.SEED, address) ++ // FIXME RK check if we should scramble the bits or whether they can stay the same ++ override def hashCode: Int = path.hashCode + +- override def equals(that: Any): Boolean = { +- that.isInstanceOf[ActorRef] && +- that.asInstanceOf[ActorRef].address == address ++ override def equals(that: Any): Boolean = that match { ++ case other: ActorRef ⇒ path == other.path ++ case _ ⇒ false + } + +- override def toString = ""Actor[%s]"".format(address) ++ override def toString = ""Actor[%s]"".format(path) ++} ++ ++/** ++ * This trait represents the Scala Actor API ++ * There are implicit conversions in ../actor/Implicits.scala ++ * from ActorRef -> ScalaActorRef and back ++ */ ++trait ScalaActorRef { ref: ActorRef ⇒ ++ ++ /** ++ * Sends a one-way asynchronous message. E.g. fire-and-forget semantics. ++ *

++ * ++ * If invoked from within an actor then the actor reference is implicitly passed on as the implicit 'sender' argument. ++ *

++ * ++ * This actor 'sender' reference is then available in the receiving actor in the 'sender' member variable, ++ * if invoked from within an Actor. If not then no sender is available. ++ *

++   *   actor ! message
++   * 
++ *

++ */ ++ def !(message: Any)(implicit sender: ActorRef = null): Unit ++ ++ /** ++ * Sends a message asynchronously, returning a future which may eventually hold the reply. ++ */ ++ def ?(message: Any)(implicit timeout: Timeout): Future[Any] ++ ++ /** ++ * Sends a message asynchronously, returning a future which may eventually hold the reply. ++ * The implicit parameter with the default value is just there to disambiguate it from the version that takes the ++ * implicit timeout ++ */ ++ def ?(message: Any, timeout: Timeout)(implicit ignore: Int = 0): Future[Any] = ?(message)(timeout) ++}","Damn clean, Mr Kuhn, damn clean! +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +269792,2,"These two are :(""",2011-12-05 03:22:21,viktorklang,akka,akka,,https://github.com/akka/akka-core/pull/119#discussion_r269792,2011-12-05T14:22:21Z,2011-12-05T21:48:29Z,viktorklang,CONTRIBUTOR,akka-actor/src/main/scala/akka/actor/ActorRefProvider.scala,,313.0,,,318,"@@ -106,57 +133,183 @@ trait ActorRefFactory { + /** + * Father of all children created by this interface. + */ +- protected def guardian: ActorRef ++ protected def guardian: InternalActorRef ++ ++ protected def lookupRoot: InternalActorRef + + protected def randomName(): String + ++ /** ++ * Create new actor as child of this context and give it an automatically ++ * generated name (currently similar to base64-encoded integer count, ++ * reversed and with “$” prepended, may change in the future). ++ * ++ * See [[akka.actor.Props]] for details on how to obtain a `Props` object. ++ */ + def actorOf(props: Props): ActorRef = provider.actorOf(systemImpl, props, guardian, randomName(), false) + +- /* +- * TODO this will have to go at some point, because creating two actors with +- * the same address can race on the cluster, and then you never know which +- * implementation wins ++ /** ++ * Create new actor as child of this context with the given name, which must ++ * not be null, empty or start with “$”. If the given name is already in use, ++ * and `InvalidActorNameException` is thrown. ++ * ++ * See [[akka.actor.Props]] for details on how to obtain a `Props` object. + */ +- def actorOf(props: Props, name: String): ActorRef = { +- if (name == null || name == """" || name.startsWith(""$"")) +- throw new ActorInitializationException(""actor name must not be null, empty or start with $"") +- provider.actorOf(systemImpl, props, guardian, name, false) +- } ++ def actorOf(props: Props, name: String): ActorRef + ++ /** ++ * Create new actor of the given type as child of this context and give it an automatically ++ * generated name (currently similar to base64-encoded integer count, ++ * reversed and with “$” prepended, may change in the future). The type must have ++ * a no-arg constructor which will be invoked using reflection. ++ */ + def actorOf[T <: Actor](implicit m: Manifest[T]): ActorRef = actorOf(Props(m.erasure.asInstanceOf[Class[_ <: Actor]])) + ++ /** ++ * Create new actor of the given type as child of this context with the given name, which must ++ * not be null, empty or start with “$”. If the given name is already in use, ++ * and `InvalidActorNameException` is thrown. The type must have ++ * a no-arg constructor which will be invoked using reflection. ++ */ + def actorOf[T <: Actor](name: String)(implicit m: Manifest[T]): ActorRef = + actorOf(Props(m.erasure.asInstanceOf[Class[_ <: Actor]]), name) + ++ /** ++ * Create new actor of the given class as child of this context and give it an automatically ++ * generated name (currently similar to base64-encoded integer count, ++ * reversed and with “$” prepended, may change in the future). The class must have ++ * a no-arg constructor which will be invoked using reflection. ++ */ + def actorOf[T <: Actor](clazz: Class[T]): ActorRef = actorOf(Props(clazz)) + ++ /** ++ * Create new actor as child of this context and give it an automatically ++ * generated name (currently similar to base64-encoded integer count, ++ * reversed and with “$” prepended, may change in the future). Use this ++ * method to pass constructor arguments to the [[akka.actor.Actor]] while using ++ * only default [[akka.actor.Props]]; otherwise refer to `actorOf(Props)`. ++ */ + def actorOf(factory: ⇒ Actor): ActorRef = actorOf(Props(() ⇒ factory)) + ++ /** ++ * ''Java API'': Create new actor as child of this context and give it an ++ * automatically generated name (currently similar to base64-encoded integer ++ * count, reversed and with “$” prepended, may change in the future). ++ * ++ * Identical to `actorOf(Props(() => creator.create()))`. ++ */ + def actorOf(creator: UntypedActorFactory): ActorRef = actorOf(Props(() ⇒ creator.create())) + +- def actorFor(path: ActorPath): Option[ActorRef] = actorFor(path.path) ++ /** ++ * ''Java API'': Create new actor as child of this context with the given name, which must ++ * not be null, empty or start with “$”. If the given name is already in use, ++ * and `InvalidActorNameException` is thrown. ++ * ++ * Identical to `actorOf(Props(() => creator.create()), name)`. ++ */ ++ def actorOf(creator: UntypedActorFactory, name: String): ActorRef = actorOf(Props(() ⇒ creator.create()), name) + +- def actorFor(path: String): Option[ActorRef] = actorFor(ActorPath.split(path)) ++ /** ++ * Look-up an actor by path; if it does not exist, returns a reference to ++ * the dead-letter mailbox of the [[akka.actor.ActorSystem]]. If the path ++ * point to an actor which is not local, no attempt is made during this ++ * call to verify that the actor it represents does exist or is alive; use ++ * `watch(ref)` to be notified of the target’s termination, which is also ++ * signaled if the queried path cannot be resolved. ++ */ ++ def actorFor(path: ActorPath): ActorRef = provider.actorFor(path) + +- def actorFor(path: Iterable[String]): Option[ActorRef] = provider.actorFor(path) ++ /** ++ * Look-up an actor by path represented as string. ++ * ++ * Absolute URIs like `akka://appname/user/actorA` are looked up as described ++ * for look-ups by `actorOf(ActorPath)`. ++ * ++ * Relative URIs like `/service/actorA/childB` are looked up relative to the ++ * root path of the [[akka.actor.ActorSystem]] containing this factory and as ++ * described for look-ups by `actorOf(Iterable[String])`. ++ * ++ * Relative URIs like `myChild/grandChild` or `../myBrother` are looked up ++ * relative to the current context as described for look-ups by ++ * `actorOf(Iterable[String])` ++ */ ++ def actorFor(path: String): ActorRef = provider.actorFor(lookupRoot, path) ++ ++ /** ++ * Look-up an actor by applying the given path elements, starting from the ++ * current context, where `""..""` signifies the parent of an actor. ++ * ++ * Example: ++ * {{{ ++ * class MyActor extends Actor { ++ * def receive = { ++ * case msg => ++ * ... ++ * val target = context.actorFor(Seq("".."", ""myBrother"", ""myNephew"")) ++ * ... ++ * } ++ * } ++ * }}} ++ * ++ * For maximum performance use a collection with efficient head & tail operations. ++ */ ++ def actorFor(path: Iterable[String]): ActorRef = provider.actorFor(lookupRoot, path) ++ ++ /** ++ * Look-up an actor by applying the given path elements, starting from the ++ * current context, where `""..""` signifies the parent of an actor. ++ * ++ * Example: ++ * {{{ ++ * public class MyActor extends UntypedActor { ++ * public void onReceive(Object msg) throws Exception { ++ * ... ++ * final List path = new ArrayList(); ++ * path.add(""..""); ++ * path.add(""myBrother""); ++ * path.add(""myNephew""); ++ * final ActorRef target = context().actorFor(path); ++ * ... ++ * } ++ * } ++ * }}} ++ * ++ * For maximum performance use a collection with efficient head & tail operations. ++ */ ++ def actorFor(path: java.util.List[String]): ActorRef = { ++ import scala.collection.JavaConverters._ ++ provider.actorFor(lookupRoot, path.asScala) ++ } ++ ++ /** ++ * Construct an [[akka.actor.ActorSelection]] from the given path, which is ++ * parsed for wildcards (these are replaced by regular expressions ++ * internally). No attempt is made to verify the existence of any part of ++ * the supplied path, it is recommended to send a message and gather the ++ * replies in order to resolve the matching set of actors. ++ */ ++ def actorSelection(path: String): ActorSelection = ActorSelection(lookupRoot, path) + } + + class ActorRefProviderException(message: String) extends AkkaException(message) + ++private[akka] case class CreateChild(props: Props, name: String) ++ + /** + * Local ActorRef provider. + */ + class LocalActorRefProvider( ++ _systemName: String, + val settings: ActorSystem.Settings, + val eventStream: EventStream, + val scheduler: Scheduler, +- val rootPath: ActorPath, +- val nodename: String, +- val clustername: String) extends ActorRefProvider { ++ val deadLetters: InternalActorRef) extends ActorRefProvider { + +- def this(settings: ActorSystem.Settings, eventStream: EventStream, scheduler: Scheduler) { +- this(settings, eventStream, scheduler, new RootActorPath(LocalOnly), ""local"", ""local"") +- } ++ val rootPath: ActorPath = new RootActorPath(LocalAddress(_systemName)) ++ ++ // FIXME remove both","These two are :( +",9d7597c7282711889d74bd9b4d7bdae5ea254104 +160227,0,"Alright, I'll revise the code accordingly.""",2011-10-07 06:43:07,kro,akka,akka,,https://github.com/akka/akka-core/pull/91#discussion_r160227,2011-10-07T18:43:07Z,2011-10-07T18:43:07Z,kro,NONE,akka-zeromq/src/main/scala/akka/zeromq/AbstractSocketActor.scala,,13.0,,,13,"@@ -0,0 +1,46 @@ ++/** ++ * Copyright (C) 2009-2011 Typesafe Inc. ++ */ ++package akka.zeromq ++ ++import akka.actor.Actor ++import org.zeromq.ZMQ.Socket ++import org.zeromq.{ZMQ => ZeroMQ} ++ ++private[zeromq] abstract class AbstractSocketActor(socketType: Int, params: SocketParameters) extends Actor { ++ protected var remoteSocket: Socket = _ ++ protected def bindOrConnectRemoteSocket = self.supervisor.foreach { sup => ++ remoteSocket = (sup ? SocketRequest(socketType)).get.asInstanceOf[Socket]","Alright, I'll revise the code accordingly. +",ff32eb7654db0f2418aeb4a04131e0922c5abbd4 +383588,1,"just extract method, man :)""",2012-01-25 01:00:19,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/245#discussion_r383588,2012-01-25T12:00:19Z,2012-01-25T12:00:19Z,piotrga,CONTRIBUTOR,akka-camel/src/test/scala/akka/camel/ProducerRegistryTest.scala,,45.0,,,45,"@@ -0,0 +1,59 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel ++ ++import org.scalatest.matchers.MustMatchers ++import org.scalatest.WordSpec ++import akka.camel.TestSupport.SharedCamelSystem ++import akka.actor.Props ++import akka.util.duration._ ++ ++class ProducerRegistryTest extends WordSpec with MustMatchers with SharedCamelSystem { ++ ""A ProducerRegistry"" must { ++ ""register a started SendProcessor for the producer, which is stopped when the actor is stopped"" in { ++ val actorRef = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val (endpoint, processor) = camel.registerProducer(actorRef, ""mock:mock"") ++ camel.awaitActivation(actorRef, 1 second) ++ processor.isStarted must be(true) ++ endpoint.getCamelContext must equal(camel.context) ++ system.stop(actorRef) ++ camel.awaitDeactivation(actorRef, 1 second) ++ if (!processor.isStopping) { ++ processor.isStopped must be(true) ++ } ++ } ++ ""remove and stop the SendProcessor if the actorRef is registered"" in { ++ val actorRef = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val (_, processor) = camel.registerProducer(actorRef, ""mock:mock"") ++ camel.remove(actorRef) ++ if (!processor.isStopping) { ++ processor.isStopped must be(true) ++ } ++ } ++ ""remove and stop only the SendProcessor for the actorRef that is registered"" in { ++ val actorRef1 = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val actorRef2 = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ }))","just extract method, man :) +",f6646051f79f6f3c7fb3214652bce46034efed9d +386009,1,"re: """"Use Props.empty instead guys."""" Genious. And obviously it's in the docs... I just read it... :)""",2012-01-25 11:57:37,piotrga,akka,akka,,https://github.com/akka/akka-core/pull/245#discussion_r386009,2012-01-25T22:57:37Z,2012-01-25T22:57:37Z,piotrga,CONTRIBUTOR,akka-camel/src/test/scala/akka/camel/ProducerRegistryTest.scala,,45.0,,,45,"@@ -0,0 +1,59 @@ ++/** ++ * Copyright (C) 2009-2012 Typesafe Inc. ++ */ ++ ++package akka.camel ++ ++import org.scalatest.matchers.MustMatchers ++import org.scalatest.WordSpec ++import akka.camel.TestSupport.SharedCamelSystem ++import akka.actor.Props ++import akka.util.duration._ ++ ++class ProducerRegistryTest extends WordSpec with MustMatchers with SharedCamelSystem { ++ ""A ProducerRegistry"" must { ++ ""register a started SendProcessor for the producer, which is stopped when the actor is stopped"" in { ++ val actorRef = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val (endpoint, processor) = camel.registerProducer(actorRef, ""mock:mock"") ++ camel.awaitActivation(actorRef, 1 second) ++ processor.isStarted must be(true) ++ endpoint.getCamelContext must equal(camel.context) ++ system.stop(actorRef) ++ camel.awaitDeactivation(actorRef, 1 second) ++ if (!processor.isStopping) { ++ processor.isStopped must be(true) ++ } ++ } ++ ""remove and stop the SendProcessor if the actorRef is registered"" in { ++ val actorRef = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val (_, processor) = camel.registerProducer(actorRef, ""mock:mock"") ++ camel.remove(actorRef) ++ if (!processor.isStopping) { ++ processor.isStopped must be(true) ++ } ++ } ++ ""remove and stop only the SendProcessor for the actorRef that is registered"" in { ++ val actorRef1 = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ })) ++ val actorRef2 = system.actorOf(Props(behavior = ctx ⇒ { ++ case _ ⇒ {} ++ }))","re: ""Use Props.empty instead guys."" +Genious. And obviously it's in the docs... I just read it... :) +",f6646051f79f6f3c7fb3214652bce46034efed9d diff --git a/data/PR inline comments/android_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/android_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..9b184fd --- /dev/null +++ b/data/PR inline comments/android_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,81 @@ +comment_id,polarity,text,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,body,commit_id +1664046,0,"`GitHubAccountAuthenticator` sounds reasonable.""",2012-09-21 06:10:53,kevinsawicki,github,android,,https://github.com/pockethub/PocketHub/pull/224#discussion_r1664046,2012-09-21T18:10:53Z,2012-09-21T18:50:50Z,kevinsawicki,CONTRIBUTOR,app/src/main/java/com/github/mobile/accounts/AccountAuthenticator.java,,,,,1,"@@ -75,7 +85,58 @@ public Bundle editProperties(AccountAuthenticatorResponse response, + public Bundle getAuthToken(AccountAuthenticatorResponse response, + Account account, String authTokenType, Bundle options) + throws NetworkErrorException { +- return null; ++ ++ final Bundle bundle = new Bundle(); ++ ++ if(!authTokenType.equals(ACCOUNT_TYPE)) return bundle; ++ ++ AccountManager am = AccountManager.get(context); ++ String username = account.name; ++ String password = am.getPassword(account); ++ ++ String authToken = null; ++ DefaultClient client = new DefaultClient(); ++ client.setCredentials(username, password); ++ ++ OAuthService oAuthService = new OAuthService(client); ++ ++ // Get authorizations for app if they exist ++ try { ++ try { ++ List auths = oAuthService.getAuthorizations(); ++ for(Authorization auth : auths) ++ if(auth.getApp().getName().equals(ACCOUNT_NAME)) ++ authToken = auth.getToken(); ++ } ++ catch ( NullPointerException npe ) { } ++ ++ // Setup authorization for app if others didn't exist. ++ if(TextUtils.isEmpty(authToken)) { ++ Authorization auth = oAuthService.createAuthorization( ++ new Authorization().setNote(ACCOUNT_NAME).setUrl(APP_URL) ++ ); ++ if(auth != null) authToken = auth.getToken(); ++ } ++ ++ // If couldn't get authToken ++ if(TextUtils.isEmpty(authToken)) { ++ final Intent intent = new Intent(context, LoginActivity.class); ++ intent.putExtra(PARAM_AUTHTOKEN_TYPE, ACCOUNT_TYPE); ++ intent.putExtra(KEY_ACCOUNT_AUTHENTICATOR_RESPONSE, response); ++ bundle.putParcelable(KEY_INTENT, intent); ++ return bundle; ++ } ++ ++ // Assemble and return bundle ++ bundle.putString(AccountManager.KEY_ACCOUNT_NAME, account.name); ++ bundle.putString(AccountManager.KEY_ACCOUNT_TYPE, ACCOUNT_TYPE); ++ bundle.putString(AccountManager.KEY_AUTHTOKEN, authToken); ++ ++ // Clear password from account ++ am.clearPassword(account); ++ return bundle; ++ } catch ( Exception e ) { e.printStackTrace(); }","`GitHubAccountAuthenticator` sounds reasonable. +",e70b701b4b7bd5e2dc5cc00ba82605689c3723fb +1663939,1,"Yes! I'll do that. On Fri, Sep 21, 2012 at 12:57 PM, Kevin Sawicki wrote: > In app/src/main/java/com/github/mobile/AuthorizationClient.java: > > > @@ -0,0 +1,68 @@ > > +/* > > This class can be removed now right? > >""",2012-09-21 06:00:32,TrevorBasinger,github,android,,https://github.com/pockethub/PocketHub/pull/224#discussion_r1663939,2012-09-21T18:00:32Z,2012-09-21T18:50:50Z,TrevorBasinger,CONTRIBUTOR,app/src/main/java/com/github/mobile/AuthorizationClient.java,,,,,1,"@@ -0,0 +1,68 @@ ++/*","Yes! I'll do that. + +On Fri, Sep 21, 2012 at 12:57 PM, Kevin Sawicki notifications@github.comwrote: + +> In app/src/main/java/com/github/mobile/AuthorizationClient.java: +> +> > @@ -0,0 +1,68 @@ +> > +/* +> +> This class can be removed now right? +> +> — +> Reply to this email directly or view it on GitHubhttps://github.com/github/android/pull/224/files#r1663907. +",e70b701b4b7bd5e2dc5cc00ba82605689c3723fb +1222306,2,"Shame on me. You are absolutely right. I reworked the commit according to your comments.""",2012-07-23 17:37:37,Bananeweizen,github,android,,https://github.com/pockethub/PocketHub/pull/145#discussion_r1222306,2012-07-24T05:37:37Z,2012-07-24T05:37:37Z,Bananeweizen,CONTRIBUTOR,app/src/main/java/com/github/mobile/ui/commit/DiffStyler.java,,,,,1,"@@ -72,7 +72,10 @@ public DiffStyler setFiles(final Collection files) { + StyledText styled = new StyledText(); + while (end != -1) { + String line = patch.substring(start, end + 1); +- switch (patch.charAt(start)) { ++ if (line.length() == 0) { ++ continue;","Shame on me. You are absolutely right. I reworked the commit according to your comments. +",d1e578a144299d4a7e1cfcc1705f6d4502152f91 diff --git a/data/PR inline comments/automapper_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/automapper_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..fb16bbf --- /dev/null +++ b/data/PR inline comments/automapper_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,18 @@ +comment_id,polarity,text,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,body,commit_id +15859,2,"ok, sorry""",2011-04-05 21:59:44,andreialecu,AutoMapper,AutoMapper,,https://github.com/LuckyPennySoftware/AutoMapper/pull/29#discussion_r15859,2011-04-06T09:59:44Z,2011-04-06T10:01:28Z,andreialecu,CONTRIBUTOR,src/AutoMapper/Mappers/CollectionMapper.cs,,55.0,,,11,"@@ -48,11 +48,25 @@ namespace AutoMapper.Mappers + + protected override TCollection CreateDestinationObjectBase(Type destElementType, int sourceLength) + { +- var list = typeof(TCollection).IsInterface +- ? new List() +- : ObjectCreator.CreateDefaultValue(typeof (TCollection)); ++ Object collection; ++ ++ if (typeof(TCollection).IsInterface) ++ { ++ if (typeof(TCollection).Name == ""ISet`1"") ","A strong typed alternative to this would be: + +``` +if (typeof (TCollection).GetGenericTypeDefinition() == typeof (ISet<>)) +``` +",5492f5c2ed075c609e90c788ff4d43e97539cc4d diff --git a/data/PR inline comments/bitcoin_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/bitcoin_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..524ea23 --- /dev/null +++ b/data/PR inline comments/bitcoin_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,800 @@ +comment_id,polarity,text,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,body,commit_id +1119406,1,"ok! :)""",2012-07-08 04:54:02,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1469#discussion_r1119406,2012-07-08T16:54:02Z,2012-07-08T16:54:02Z,Diapolo,NONE,src/qt/addressbookpage.cpp,,204.0,,,45,"@@ -182,7 +186,25 @@ void AddressBookPage::on_signMessage_clicked() + QObject *qoGUI = parent()->parent(); + BitcoinGUI *gui = qobject_cast(qoGUI); + if (gui) +- gui->gotoMessagePage(addr); ++ gui->gotoSignMessageTab(addr); ++} ++ ++void AddressBookPage::on_verifyMessage_clicked() ++{ ++ QTableView *table = ui->tableView; ++ QModelIndexList indexes = table->selectionModel()->selectedRows(AddressTableModel::Address); ++ QString addr; ++ ++ foreach (QModelIndex index, indexes) ++ { ++ QVariant address = index.data(); ++ addr = address.toString(); ++ } ++ ++ QObject *qoGUI = parent()->parent();","@laanwj See #1569 for a fix for this. +",47894585aeaa4f5475c50bc4415ed6ced868fbf7 +3644466,1,"I'll add a ToDo in my local build :).""",2013-04-03 08:24:12,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2452#discussion_r3644466,2013-04-03T20:24:12Z,2013-04-03T20:24:12Z,Diapolo,NONE,src/qt/bitcoingui.h,,63.0,,,25,"@@ -56,9 +57,17 @@ class BitcoinGUI : public QMainWindow + + bool addWallet(const QString& name, WalletModel *walletModel); + bool setCurrentWallet(const QString& name); +- ++ + void removeAllWallets(); + ++ /** Used by WalletView to allow access to needed QActions */","I'll add a ToDo in my local build :). +",8726de26ee0010eaf64d44d69cc9b8e09e580a37 +2832316,1,"@luke-jr See #2217 ^^ someone just needs to do this.""",2013-01-30 08:58:06,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2247#discussion_r2832316,2013-01-30T19:58:06Z,2013-01-30T19:58:06Z,Diapolo,NONE,COPYING,,1.0,,,2,"@@ -1,4 +1,4 @@ +-Copyright (c) 2009-2012 Bitcoin Developers ++Copyright (c) 2009-2013 Bitcoin Developers","@luke-jr See #2217 ^^ someone just needs to do this. +",d38c6488d067c2e88726e2ca99bc76fd67dab49b +1928306,0,"Agree, in this case, it's an optimization not needing to calculate the hash.""",2012-10-24 01:36:53,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1953#discussion_r1928306,2012-10-24T13:36:53Z,2012-10-24T13:36:53Z,sipa,MEMBER,src/main.cpp,,1561.0,,,4,"@@ -1558,7 +1558,8 @@ bool CBlock::ConnectBlock(CBlockIndex* pindex, CCoinsViewCache &view, bool fJust + // Now that the whole chain is irreversibly beyond that time it is applied to all blocks except the + // two in the chain that violate it. This prevents exploiting the issue against nodes in their + // initial block download. +- bool fEnforceBIP30 = !((pindex->nHeight==91842 && pindex->GetBlockHash() == uint256(""0x00000000000a4d0a398161ffc163c503763b1f4360639393e0e4c8e300e0caec"")) ||","Agree, in this case, it's an optimization not needing to calculate the hash. +",faff50d129b6d4b9e6397ac989218e83a26ae692 +1449572,0,"@laanwj What do you say? Is the general idea for that reset button a good one and what about the detach thing?""",2012-08-23 09:50:50,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1685#discussion_r1449572,2012-08-23T21:50:50Z,2013-01-05T12:52:15Z,Diapolo,NONE,src/qt/optionsmodel.cpp,,,,,1,"@@ -167,7 +185,7 @@ QVariant OptionsModel::data(const QModelIndex & index, int role) const + case DisplayAddresses: + return QVariant(bDisplayAddresses); + case DetachDatabases: +- return QVariant(bitdb.GetDetach()); ++ return settings.value(""detachDB"", false);","@laanwj What do you say? Is the general idea for that reset button a good one and what about the detach thing? +",5fb445b49e80812f004f00d5adf8fdd39bec557f +1366333,1,"So everything fine here :)?""",2012-08-13 08:39:34,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1649#discussion_r1366333,2012-08-13T20:39:34Z,2012-08-13T20:39:34Z,Diapolo,NONE,src/qt/optionsdialog.cpp,,154.0,,,33,"@@ -147,6 +150,16 @@ void OptionsDialog::setMapper() + mapper->addMapping(ui->displayAddresses, OptionsModel::DisplayAddresses); + } + ++void OptionsDialog::enableApplyButton() ++{","So everything fine here :)? +",4aaa4313e7edf5d23143e393efd2d5892d5dde48 +974400,0,"If you add `TODO:` in the comment, many code editors will show it in an overview. Might be useful, and more clear to readers that it isn't just commented out code (otherwise, someone that doesn't know might remove it).""",2012-06-12 20:36:14,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1405#discussion_r974400,2012-06-13T08:36:14Z,2012-06-13T08:36:14Z,laanwj,MEMBER,src/db.cpp,,658.0,,,138,"@@ -535,6 +551,118 @@ CBlockIndex static * InsertBlockIndex(uint256 hash) + return pindexNew; + } + ++bool CTxDB::PruneBlockIndex(uint256 hashPruneFrom, uint256 hashPruneTo) ++{ ++// TODO: assert here, but cant #include main.h ++// if (hashPruneFrom != 0) ++// assert(hashPruneTo == hashBestBlock); ++ ++ CBlockIndex* pindexScan = pindexGenesisBlock; ++ uint256 hashOldBestCheckpoint; ++ if (ReadHashBestCheckpoint(hashOldBestCheckpoint) && hashOldBestCheckpoint == hashPruneTo) ++ return true; ++ ++ if (!mapBlockIndex.count(hashPruneTo)) ++ return true; ++ ++ if (hashPruneFrom != 0) ++ pindexScan = mapBlockIndex[hashPruneFrom]; ++ assert(pindexScan); ++ ++ printf(""Pruning Block Index from %s to %s.\n"", hashPruneFrom.ToString().substr(0,20).c_str(), hashPruneTo.ToString().substr(0,20).c_str()); ++ ++ // Cache of Txes by hash -> txouts spent before hashPruneTo flags + cant be deleted flag ++ map, bool> > mapTxIndexCache; ++ ++ while (pindexScan != NULL && *(pindexScan->phashBlock) != hashPruneTo) ++ { ++ if(fRequestShutdown) ++ return true; ++ ++ CBlock block; ++ block.ReadFromDisk(pindexScan); ++ ++ BOOST_FOREACH(CTransaction& tx, block.vtx) ++ { ++ if (tx.IsCoinBase()) ++ continue; ++ ++ BOOST_FOREACH(CTxIn& txin, tx.vin) ++ { ++ COutPoint& txout = txin.prevout; ++ uint256& hash = txout.hash; ++ pair, bool>& pairTx = mapTxIndexCache[hash]; ++ ++ if (pairTx.first.size() == 0) ++ { ++ CTxIndex txindex; ++ if (!ReadTxIndex(hash, txindex)) ++ { ++ // This should only ever happen if we get interrupted pruning and dont WriteHashBestCheckpoint ++ pairTx.second = false; ++ break; ++ } ++ ++ vector& vSpent = txindex.vSpent; ++ unsigned int vouts = vSpent.size(); ++ ++ pairTx.first.resize(vouts); ++ ++ pairTx.second = true; ++ for (unsigned int i = 0; i < vouts; i++) ++ { ++ if (vSpent[i].IsNull()) ++ { ++ pairTx.second = false; ++ break; ++ } ++ pairTx.first[i] = false; ++ } ++ } ++ ++ if (pairTx.second == false) ++ continue; ++ ++ pairTx.first[txout.n] = true; ++ } ++ } ++ ++ pindexScan = pindexScan->pnext; ++ } ++ ++ // TODO: It may be prudent to use DB Transactions here, but if we do we overrun our maximum lock objects ++ //if (!TxnBegin()) ++ // return false; ++ ++ unsigned int nTxsPruned = 0; ++ typedef pair, bool> > TxIndexCachePairType; ++ BOOST_FOREACH(TxIndexCachePairType& pair, mapTxIndexCache) ++ { ++ bool fPrunable = true; ++ BOOST_FOREACH(bool fSpent, pair.second.first) ++ if (!fSpent) ++ { ++ fPrunable = false; ++ break; ++ } ++ ++ if (!fPrunable) ++ continue; ++ ++ EraseTxIndex(pair.first); ++ nTxsPruned++; ++ } ++ ++ WriteHashBestCheckpoint(hashPruneTo); ++ ++ //if (!TxnCommit())","If you add `TODO:` in the comment, many code editors will show it in an overview. Might be useful, and more clear to readers that it isn't just commented out code (otherwise, someone that doesn't know might remove it). +",24f4c50f2997bfc9e374c26b25e234dea549774f +775694,1,"Didn't know that, but I think the names are more speaking than a simple -1 :), look how we guessed what that -1 means here. Would not have happend if we used the error codes.""",2012-05-04 05:48:34,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1180#discussion_r775694,2012-05-04T17:48:34Z,2012-05-04T17:48:34Z,Diapolo,NONE,src/net.cpp,,624.0,,,5,"@@ -621,7 +621,7 @@ void ThreadSocketHandler2(void* parg) + if (nSelect == SOCKET_ERROR) + { + int nErr = WSAGetLastError(); +- if (hSocketMax > -1) ++ if (hSocketMax > (SOCKET) -1)","Didn't know that, but I think the names are more speaking than a simple -1 :), look how we guessed what that -1 means here. Would not have happend if we used the error codes. +",024fa1cb44b8ec577fef07e7b37a4e5b0501dbea +1637626,2,"If all who contribute translatable strings to the source would be a little more straight there would be no need for this. It really annoys me, that we have strings that have incorrect grammar or punctuation. At least for the string """"Error: Transaction cr""",2012-09-18 20:50:10,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1830#discussion_r1637626,2012-09-19T08:50:10Z,2012-10-25T20:34:18Z,Diapolo,NONE,src/qt/sendcoinsdialog.cpp,,151.0,,,5,"@@ -148,7 +148,7 @@ void SendCoinsDialog::on_sendButton_clicked() + break; + case WalletModel::TransactionCreationFailed: + QMessageBox::warning(this, tr(""Send Coins""), +- tr(""Error: Transaction creation failed.""), ++ tr(""Error: Transaction creation failed!""),","If all who contribute translatable strings to the source would be a little more straight there would be no need for this. +It really annoys me, that we have strings that have incorrect grammar or punctuation. + +At least for the string ""Error: Transaction creation failed."" we currently already have 2 strings in the translations, this pull makes one out of them. + +You are free to close such pulls you consider not valuable, even if I don't agree here, as a good overall string handling was needed badly and this covers the last things I could find! +",6b3783a9c9cc47afcf72aa0a86ea26122392efdb +1681635,0,"No problem here, it is entirely correct: calling itostr casts the unsigned short to a signed integer (which is larger, so there is never undefined behavior), before feeding it into strprintf as a signed integer (%d). """,2012-09-24 20:46:54,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1862#discussion_r1681635,2012-09-25T08:46:54Z,2012-09-25T08:46:54Z,laanwj,MEMBER,src/bitcoinrpc.cpp,,1063.0,,,26,"@@ -1055,7 +1060,7 @@ Object CallRPC(const string& strMethod, const Array& params) + asio::ssl::stream sslStream(io_service, context); + SSLIOStreamDevice d(sslStream, fUseSSL); + iostreams::stream< SSLIOStreamDevice > stream(d); +- if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", ""8332""))) ++ if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", itostr(GetDefaultRPCPort()))))","No problem here, it is entirely correct: calling itostr casts the unsigned short to a signed integer (which is larger, so there is never undefined behavior), before feeding it into strprintf as a signed integer (%d). +",b202d430762c1b5c9925e948f357c66040f95f10 +1118206,2,"I don't really like this code as it makes assumptions about the parent object and does explicit casts. Why not use a signal 'verifyMessage(QString addr)'?""",2012-07-06 23:48:13,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1469#discussion_r1118206,2012-07-07T11:48:13Z,2012-07-07T11:48:13Z,laanwj,MEMBER,src/qt/addressbookpage.cpp,,204.0,,,45,"@@ -182,7 +186,25 @@ void AddressBookPage::on_signMessage_clicked() + QObject *qoGUI = parent()->parent(); + BitcoinGUI *gui = qobject_cast(qoGUI); + if (gui) +- gui->gotoMessagePage(addr); ++ gui->gotoSignMessageTab(addr); ++} ++ ++void AddressBookPage::on_verifyMessage_clicked() ++{ ++ QTableView *table = ui->tableView; ++ QModelIndexList indexes = table->selectionModel()->selectedRows(AddressTableModel::Address); ++ QString addr; ++ ++ foreach (QModelIndex index, indexes) ++ { ++ QVariant address = index.data(); ++ addr = address.toString(); ++ } ++ ++ QObject *qoGUI = parent()->parent();","I don't really like this code as it makes assumptions about the parent object and does explicit casts. Why not use a signal 'verifyMessage(QString addr)'? +",47894585aeaa4f5475c50bc4415ed6ced868fbf7 +1716918,1,"Fine, same is true for me thinking about Linux ;). End of OT then ^^.""",2012-09-28 00:47:47,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1851#discussion_r1716918,2012-09-28T12:47:47Z,2012-09-28T12:47:47Z,Diapolo,NONE,doc/build-unix.txt,,50.0,,,5,"@@ -47,7 +47,7 @@ Licenses of statically linked libraries: + + Versions used in this release: + GCC 4.3.3 +- OpenSSL 0.9.8g ++ OpenSSL 1.0.1c","Fine, same is true for me thinking about Linux ;). End of OT then ^^. +",0eaaa83ba521af8453c11ad688bdbb6bd4e33870 +1527978,1,"When I now read the first comment all that makes sense ^^ you are such a patient person :-P. With your new code we would pass hSocketMax == 0 to select(), when -proxy is invalid (no BOOST_FOREACH pass), I'm not sure if this is valid. But you are right, w""",2012-09-04 09:23:01,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1772#discussion_r1527978,2012-09-04T21:23:01Z,2012-09-04T21:45:41Z,Diapolo,NONE,src/net.cpp,,787.0,,,10,"@@ -781,10 +781,9 @@ void ThreadSocketHandler2(void* parg) + return; + if (nSelect == SOCKET_ERROR) + { +- int nErr = WSAGetLastError(); +- if (hSocketMax != INVALID_SOCKET) ++ if ((hSocketMax != INVALID_SOCKET) && (hSocketMax != (SOCKET)0)) + { +- printf(""socket select error %d\n"", nErr); ++ printf(""socket select error %d\n"", WSAGetLastError()); + for (unsigned int i = 0; i <= hSocketMax; i++)","When I now read the first comment all that makes sense ^^ you are such a patient person :-P. + +With your new code we would pass hSocketMax == 0 to select(), when -proxy is invalid (no BOOST_FOREACH pass), I'm not sure if this is valid. But you are right, when hSocketMax stays 0, we have no fds set, but I'm not sure if it can ever become 1. It would be 1, if vhListenSocket or vNodes is not empty and the contained sockets are == 0. + +vhListenSocket is empty, when we are not listening and vNodes if we are not connected to any peers IMO. +",8207857f401bc1a48f863be646c5a508a7cdfe9c +2106752,2,"Satoshi is a very bad person to learn C++ style from :poodle: If you want an example of well-structured, readable C++ I can recommend reading source from LLVM. And I'm not sure either, it doesn't warrant changing all the functions I guess... maybe just l""",2012-11-12 20:31:06,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1479#discussion_r2106752,2012-11-13T07:31:06Z,2012-11-13T07:32:59Z,laanwj,MEMBER,src/wallet.cpp,,929.0,,,7,"@@ -926,9 +926,8 @@ int64 CWallet::GetImmatureBalance() const + LOCK(cs_wallet); + for (map::const_iterator it = mapWallet.begin(); it != mapWallet.end(); ++it) + { +- const CWalletTx& pcoin = (*it).second; +- if (pcoin.IsCoinBase() && pcoin.GetBlocksToMaturity() > 0 && pcoin.IsInMainChain()) +- nTotal += GetCredit(pcoin); ++ const CWalletTx* pcoin = &(*it).second;","Satoshi is a very bad person to learn C++ style from :poodle: If you want an example of well-structured, readable C++ I can recommend reading source from LLVM. +And I'm not sure either, it doesn't warrant changing all the functions I guess... maybe just leave it like this then. +",966a0e8cc94f2590521e0a2513e0cea32b5bb005 +766482,0,"Is this code path ever followed? I mean, will an unsigned integer ever be larger than (unsigned) -1? """,2012-05-02 17:59:39,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1180#discussion_r766482,2012-05-03T05:59:39Z,2012-05-03T05:59:39Z,laanwj,MEMBER,src/net.cpp,,624.0,,,5,"@@ -621,7 +621,7 @@ void ThreadSocketHandler2(void* parg) + if (nSelect == SOCKET_ERROR) + { + int nErr = WSAGetLastError(); +- if (hSocketMax > -1) ++ if (hSocketMax > (SOCKET) -1)","Is this code path ever followed? I mean, will an unsigned integer ever be larger than (unsigned) -1? +",024fa1cb44b8ec577fef07e7b37a4e5b0501dbea +1677922,0,"itostr() does ``strprintf(""""%d"""", n)``, which is for signed integers, so should we better use ``strprintf(""""%u"""", GetDefaultRPCPort())`` instead, as we have an unsigned short as port number?""",2012-09-24 09:10:48,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1862#discussion_r1677922,2012-09-24T21:10:48Z,2012-09-24T21:10:48Z,Diapolo,NONE,src/bitcoinrpc.cpp,,1063.0,,,26,"@@ -1055,7 +1060,7 @@ Object CallRPC(const string& strMethod, const Array& params) + asio::ssl::stream sslStream(io_service, context); + SSLIOStreamDevice d(sslStream, fUseSSL); + iostreams::stream< SSLIOStreamDevice > stream(d); +- if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", ""8332""))) ++ if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", itostr(GetDefaultRPCPort()))))","itostr() does `strprintf(""%d"", n)`, which is for signed integers, so should we better use `strprintf(""%u"", GetDefaultRPCPort())` instead, as we have an unsigned short as port number? +",b202d430762c1b5c9925e948f357c66040f95f10 +1679137,0,"My understanding is that this is well defined. The implicit type conversion is from unsigned short to signed int, which is always safe. (Since the short is unsigned, there is no sign to extend.) The compiler doesn't even blink at this with -Wall, but i""",2012-09-24 11:49:15,kjj2,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1862#discussion_r1679137,2012-09-24T23:49:15Z,2012-09-24T23:49:15Z,kjj2,NONE,src/bitcoinrpc.cpp,,1063.0,,,26,"@@ -1055,7 +1060,7 @@ Object CallRPC(const string& strMethod, const Array& params) + asio::ssl::stream sslStream(io_service, context); + SSLIOStreamDevice d(sslStream, fUseSSL); + iostreams::stream< SSLIOStreamDevice > stream(d); +- if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", ""8332""))) ++ if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", itostr(GetDefaultRPCPort()))))","My understanding is that this is well defined. The implicit type conversion is from unsigned short to signed int, which is always safe. (Since the short is unsigned, there is no sign to extend.) The compiler doesn't even blink at this with -Wall, but it does gripe often about the 3 lines in net.h that assign a literal -1 to an unsigned int. +",b202d430762c1b5c9925e948f357c66040f95f10 +1680583,1,"Alright I'm fine with this patch then, but now I'm interested in which lines give you that signed/unsigned warning, as we are always interested in finding and fixing these :).""",2012-09-24 17:03:10,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1862#discussion_r1680583,2012-09-25T05:03:10Z,2012-09-25T05:03:10Z,Diapolo,NONE,src/bitcoinrpc.cpp,,1063.0,,,26,"@@ -1055,7 +1060,7 @@ Object CallRPC(const string& strMethod, const Array& params) + asio::ssl::stream sslStream(io_service, context); + SSLIOStreamDevice d(sslStream, fUseSSL); + iostreams::stream< SSLIOStreamDevice > stream(d); +- if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", ""8332""))) ++ if (!d.connect(GetArg(""-rpcconnect"", ""127.0.0.1""), GetArg(""-rpcport"", itostr(GetDefaultRPCPort()))))","Alright I'm fine with this patch then, but now I'm interested in which lines give you that signed/unsigned warning, as we are always interested in finding and fixing these :). +",b202d430762c1b5c9925e948f357c66040f95f10 +3689433,1,"Because the old text """"src/qt/res/src/*.svg"""" had covered the newly generated src/qt/res/src/bitcoin.svg i had to """"rewrite it arithmetically"""". :) I think it's okay like this.""",2013-04-07 19:10:57,jonasschnelli,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2477#discussion_r3689433,2013-04-08T07:10:57Z,2013-04-08T08:37:30Z,jonasschnelli,CONTRIBUTOR,doc/assets-attribution.txt,,3.0,,,4,"@@ -1,5 +1,8 @@ + Icon: src/qt/res/icons/clock*.png, src/qt/res/icons/tx*.png, +- src/qt/res/src/*.svg ++ src/qt/res/src/clock_green.svg, src/qt/res/src/clock1.svg ++ src/qt/res/src/clock2.svg, src/qt/res/src/clock3.svg","Because the old text ""src/qt/res/src/*.svg"" had covered the newly generated src/qt/res/src/bitcoin.svg i had to ""rewrite it arithmetically"". :) +I think it's okay like this. +",a653191f748f56addf0cfa219319cfc5358b962d +3686931,0,"Why not a single mapSigHashCache for the whole EvalScript evaluation?""",2013-04-07 06:29:02,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2275#discussion_r3686931,2013-04-07T18:29:02Z,2013-04-08T23:50:28Z,sipa,MEMBER,src/script.cpp,,,,,1,"@@ -1061,16 +1064,29 @@ bool EvalScript(vector >& stack, const CScript& script, co + scriptCode.FindAndDelete(CScript(vchSig)); + } + ++ // Avoid repeatedly recomputing signature hashes ++ map mapSigHashCache;","Why not a single mapSigHashCache for the whole EvalScript evaluation? +",61a29a7c0676eb0d422ff828f0ba006ba0fc8e2e +847483,0,"This should be moved into the for loop, IMO: ```C++ for(unsigned int idx = 0; idx < input.size(); removeChar ? input.remove(idx, 1) : ++idx) ``` (also changed idx type to unsigned since it is compared with .size()) """,2012-05-18 07:36:41,luke-jr,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1329#discussion_r847483,2012-05-18T19:36:41Z,2012-05-21T16:53:31Z,luke-jr,MEMBER,src/qt/bitcoinaddressvalidator.cpp,,44.0,,,33,"@@ -21,21 +21,28 @@ + QValidator::State BitcoinAddressValidator::validate(QString &input, int &pos) const + { + // Correction +- for(int idx=0; idxstream << HTTPReply(200, strReply, fRun) << std::flush; + } + catch (std::exception& e) + { +- ErrorReply(stream, JSONRPCError(-1, e.what()), id); ++ ErrorReply(conn->stream, JSONRPCError(-1, e.what()), id); ++ fRun = false; ++ } ++ catch (Object& e) ++ { ++ ErrorReply(conn->stream, e, id); ++ fRun = false; + } + } + catch (Object& objError) + { +- ErrorReply(stream, objError, id); ++ ErrorReply(conn->stream, objError, id); ++ break; + } + catch (std::exception& e) + { +- ErrorReply(stream, JSONRPCError(-32700, e.what()), id); ++ ErrorReply(conn->stream, JSONRPCError(-32700, e.what()), id);","Same fRun thing here too... someone should probably note whether -32700 vs -1 is the correct response? +",96c5269511b0cecbea67c0981aaea1a8a3345ba3 +268795,1,"Good catch :)""",2011-12-04 03:24:08,luke-jr,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/677#discussion_r268795,2011-12-04T14:24:08Z,2011-12-14T06:08:26Z,luke-jr,MEMBER,src/main.h,,536.0,,,23,"@@ -523,10 +530,10 @@ class CTransaction + return dPriority > COIN * 144 / 250; + } + +- int64 GetMinFee(unsigned int nBlockSize=1, bool fAllowFree=true, bool fForRelay=false) const ++ int64 GetMinFee(unsigned int nBlockSize=1, bool fAllowFree=true, enum GetMinFee_mode mode=GMF_BLOCK) const + { + // Base fee is either MIN_TX_FEE or MIN_RELAY_TX_FEE +- int64 nBaseFee = fForRelay ? MIN_RELAY_TX_FEE : MIN_TX_FEE; ++ int64 nBaseFee = (mode == GMF_RELAY) ? MIN_RELAY_TX_FEE : MIN_TX_FEE;","Good catch :) +",dbbf1d4a48c8761a67a4477bef48f17c0badef7b +1660813,2,"I really don't like CBlocks storing their own reject string, seems like a layer violation. Maybe not directly related to this change, as nDoS does the same. I'd rather see a CValidationResult which stores such information, which is returned or pass-by-re""",2012-09-21 00:55:09,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1816#discussion_r1660813,2012-09-21T12:55:09Z,2014-11-20T15:33:15Z,sipa,MEMBER,src/main.h,,,,,1,"@@ -837,6 +837,8 @@ class CBlock + // Denial-of-service detection: + mutable int nDoS; + bool DoS(int nDoSIn, bool fIn) const { nDoS += nDoSIn; return fIn; } ++ mutable std::string strRejectReason; ++ bool reject(const std::string& strRejectReasonIn, bool fIn) const { strRejectReason = strRejectReasonIn; return fIn; }","I really don't like CBlocks storing their own reject string, seems like a layer violation. Maybe not directly related to this change, as nDoS does the same. + +I'd rather see a CValidationResult which stores such information, which is returned or pass-by-ref inside the block- and transaction validation functions. That's maybe out of scope for this change, though. +",b867e409e5dd34b84eb9d6d0d8f257dbb19b986d +2604303,1,"Can we keep a space after starting comments :)?""",2013-01-10 02:08:31,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2129#discussion_r2604303,2013-01-10T13:08:31Z,2013-01-10T13:08:31Z,Diapolo,NONE,src/wallet.cpp,,1157.0,,,5,"@@ -1154,7 +1154,10 @@ bool CWallet::CreateTransaction(const vector >& vecSend, CW + BOOST_FOREACH(PAIRTYPE(const CWalletTx*, unsigned int) pcoin, setCoins) + { + int64 nCredit = pcoin.first->vout[pcoin.second].nValue; +- dPriority += (double)nCredit * pcoin.first->GetDepthInMainChain(); ++ //The priority after the next block (depth+1) is used instead of the current,","Can we keep a space after starting comments :)? +",d7836552e64b1f949385f7b11836ece99f7c3d67 +2487321,2,"And I still don't get the meaning in german from that weird english description...""",2012-12-20 23:22:06,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2048#discussion_r2487321,2012-12-21T10:22:06Z,2012-12-21T10:22:06Z,Diapolo,NONE,src/init.cpp,,261.0,,,4,"@@ -258,6 +258,7 @@ bool static Bind(const CService &addr, unsigned int flags) { + "" -onlynet= "" + _(""Only connect to nodes in network (IPv4, IPv6 or Tor)"") + ""\n"" + + "" -discover "" + _(""Discover own IP address (default: 1 when listening and no -externalip)"") + ""\n"" + + "" -irc "" + _(""Find peers using internet relay chat (default: 0)"") + ""\n"" + ++ "" -checkpoints "" + _(""Lock in block chain with compiled-in checkpoints (default: 1)"") + ""\n"" +","And I still don't get the meaning in german from that weird english description... +",e6955d04111e842cce55dc230d9dcc971560a299 +948155,1,"That Commented out code should be removed, if not used :).""",2012-06-07 11:21:24,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1405#discussion_r948155,2012-06-07T23:21:24Z,2012-06-07T23:21:34Z,Diapolo,NONE,src/db.cpp,,658.0,,,138,"@@ -535,6 +551,118 @@ CBlockIndex static * InsertBlockIndex(uint256 hash) + return pindexNew; + } + ++bool CTxDB::PruneBlockIndex(uint256 hashPruneFrom, uint256 hashPruneTo) ++{ ++// TODO: assert here, but cant #include main.h ++// if (hashPruneFrom != 0) ++// assert(hashPruneTo == hashBestBlock); ++ ++ CBlockIndex* pindexScan = pindexGenesisBlock; ++ uint256 hashOldBestCheckpoint; ++ if (ReadHashBestCheckpoint(hashOldBestCheckpoint) && hashOldBestCheckpoint == hashPruneTo) ++ return true; ++ ++ if (!mapBlockIndex.count(hashPruneTo)) ++ return true; ++ ++ if (hashPruneFrom != 0) ++ pindexScan = mapBlockIndex[hashPruneFrom]; ++ assert(pindexScan); ++ ++ printf(""Pruning Block Index from %s to %s.\n"", hashPruneFrom.ToString().substr(0,20).c_str(), hashPruneTo.ToString().substr(0,20).c_str()); ++ ++ // Cache of Txes by hash -> txouts spent before hashPruneTo flags + cant be deleted flag ++ map, bool> > mapTxIndexCache; ++ ++ while (pindexScan != NULL && *(pindexScan->phashBlock) != hashPruneTo) ++ { ++ if(fRequestShutdown) ++ return true; ++ ++ CBlock block; ++ block.ReadFromDisk(pindexScan); ++ ++ BOOST_FOREACH(CTransaction& tx, block.vtx) ++ { ++ if (tx.IsCoinBase()) ++ continue; ++ ++ BOOST_FOREACH(CTxIn& txin, tx.vin) ++ { ++ COutPoint& txout = txin.prevout; ++ uint256& hash = txout.hash; ++ pair, bool>& pairTx = mapTxIndexCache[hash]; ++ ++ if (pairTx.first.size() == 0) ++ { ++ CTxIndex txindex; ++ if (!ReadTxIndex(hash, txindex)) ++ { ++ // This should only ever happen if we get interrupted pruning and dont WriteHashBestCheckpoint ++ pairTx.second = false; ++ break; ++ } ++ ++ vector& vSpent = txindex.vSpent; ++ unsigned int vouts = vSpent.size(); ++ ++ pairTx.first.resize(vouts); ++ ++ pairTx.second = true; ++ for (unsigned int i = 0; i < vouts; i++) ++ { ++ if (vSpent[i].IsNull()) ++ { ++ pairTx.second = false; ++ break; ++ } ++ pairTx.first[i] = false; ++ } ++ } ++ ++ if (pairTx.second == false) ++ continue; ++ ++ pairTx.first[txout.n] = true; ++ } ++ } ++ ++ pindexScan = pindexScan->pnext; ++ } ++ ++ // TODO: It may be prudent to use DB Transactions here, but if we do we overrun our maximum lock objects ++ //if (!TxnBegin()) ++ // return false; ++ ++ unsigned int nTxsPruned = 0; ++ typedef pair, bool> > TxIndexCachePairType; ++ BOOST_FOREACH(TxIndexCachePairType& pair, mapTxIndexCache) ++ { ++ bool fPrunable = true; ++ BOOST_FOREACH(bool fSpent, pair.second.first) ++ if (!fSpent) ++ { ++ fPrunable = false; ++ break; ++ } ++ ++ if (!fPrunable) ++ continue; ++ ++ EraseTxIndex(pair.first); ++ nTxsPruned++; ++ } ++ ++ WriteHashBestCheckpoint(hashPruneTo); ++ ++ //if (!TxnCommit())","That Commented out code should be removed, if not used :). +",24f4c50f2997bfc9e374c26b25e234dea549774f +553739,2,"Do we really want to increment a full major version...not that it matters, but it seems excessive...""",2012-03-13 08:44:26,TheBlueMatt,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/932#discussion_r553739,2012-03-13T19:44:26Z,2012-03-13T19:44:26Z,TheBlueMatt,CONTRIBUTOR,src/serialize.h,,56.0,,,5,"@@ -53,7 +53,7 @@ + class CAutoFile; + static const unsigned int MAX_SIZE = 0x02000000; + +-static const int PROTOCOL_VERSION = 60000; ++static const int PROTOCOL_VERSION = 70000;","Do we really want to increment a full major version...not that it matters, but it seems excessive... +",2fef9dac628524a528263e0ac1efcb719cd8a98d +263952,0,"coding style: brace on next line""",2011-12-01 03:13:49,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/654#discussion_r263952,2011-12-01T14:13:49Z,2011-12-01T14:13:49Z,sipa,MEMBER,src/net.cpp,,1226.0,,,22,"@@ -1213,8 +1214,28 @@ static const char *strDNSSeed[] = { + ""dnsseed.bluematt.me"", + }; + +-void DNSAddressSeed() ++void ThreadDNSAddressSeed(void* parg) + { ++ IMPLEMENT_RANDOMIZE_STACK(ThreadDNSAddressSeed(parg)); ++ try ++ { ++ vnThreadsRunning[6]++; ++ ThreadDNSAddressSeed2(parg); ++ vnThreadsRunning[6]--; ++ } ++ catch (std::exception& e) {","coding style: brace on next line +",2bc6cecebba52e32db43a0b2d9b519ac4a48c479 +314387,0,"re select specific coins: If you read the original motivation for this patch it was to prevent linkages of addresses/private-keys. Selecting individual coins doesn't matter for that. re the RPC command arguments: I'm not sold that it's worth doing what """,2011-12-24 14:17:07,coderrr,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/415#discussion_r314387,2011-12-25T01:17:07Z,2012-03-23T09:45:25Z,,NONE,src/bitcoinrpc.cpp,,578.0,,,18,"@@ -498,38 +498,74 @@ Value settxfee(const Array& params, bool fHelp) + + Value sendtoaddress(const Array& params, bool fHelp) + { +- if (pwalletMain->IsCrypted() && (fHelp || params.size() < 2 || params.size() > 4)) ++ string crypt_usage = pwalletMain->IsCrypted() ? ""\nrequires wallet passphrase to be set with walletpassphrase first"" : """"; ++ ++ if (fHelp || params.size() < 2 || params.size() > 4) + throw runtime_error( +- ""sendtoaddress [comment] [comment-to]\n"" +- "" is a real and is rounded to the nearest 0.00000001\n"" +- ""requires wallet passphrase to be set with walletpassphrase first""); +- if (!pwalletMain->IsCrypted() && (fHelp || params.size() < 2 || params.size() > 4)) ++ ""sendtoaddress [:[,[,...]]] [comment] [comment-to]\n"" ++ "" is a real and is rounded to the nearest 0.00000001"" + crypt_usage); ++ ++ string strAddress = params[0].get_str(); ++ vector splitAddresses; ++ boost::split(splitAddresses, strAddress, boost::is_any_of("":""));","re select specific coins: If you read the original motivation for this patch it was to prevent linkages of addresses/private-keys. Selecting individual coins doesn't matter for that. + +re the RPC command arguments: I'm not sold that it's worth doing what you suggested purely because of your aversion to bitcoind parsing strings. The way it's done now is pretty minimally invasive and the other ways sound like a lot more changes. But I'd like to hear other people's opinions on the matter. +",a4d98b650bc4d644c6188ea50891a80061bb0c8b +2563520,0,"Oh, yes - you'll need compatibility with boost filesystem v2 though... for now""",2013-01-07 01:12:09,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2124#discussion_r2563520,2013-01-07T12:12:09Z,2013-09-16T21:20:57Z,sipa,MEMBER,src/util.cpp,,977.0,,,1,"@@ -944,12 +944,31 @@ bool WildcardMatch(const string& str, const string& mask) + return WildcardMatch(str.c_str(), mask.c_str()); + } + +- +- +- +- +- +- ++vector GetFilesAtPath(const boost::filesystem::path& _path, unsigned int flags) ++{ ++ vector vstrFiles; ++ if (!boost::filesystem::exists(_path)) ++ throw runtime_error(""Path does not exist.""); ++ ++ if ((flags & file_option_flags::REGULAR_FILES) && boost::filesystem::is_regular_file(_path)) ++ { ++ vstrFiles.push_back(_path.filename().string());","Oh, yes - you'll need compatibility with boost filesystem v2 though... for now +",6c7f86ae371f955371fba05c7f747e4659cd39c8 +1926227,2,"I don't really like the fact that the hash in CBlockIndex::phashBlock is optional. We have a GetBlockHash()... why doesn't it just calculate the hash if it's not available? """,2012-10-23 21:14:24,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1953#discussion_r1926227,2012-10-24T09:14:24Z,2012-10-24T09:14:44Z,sipa,MEMBER,src/main.cpp,,1561.0,,,4,"@@ -1558,7 +1558,8 @@ bool CBlock::ConnectBlock(CBlockIndex* pindex, CCoinsViewCache &view, bool fJust + // Now that the whole chain is irreversibly beyond that time it is applied to all blocks except the + // two in the chain that violate it. This prevents exploiting the issue against nodes in their + // initial block download. +- bool fEnforceBIP30 = !((pindex->nHeight==91842 && pindex->GetBlockHash() == uint256(""0x00000000000a4d0a398161ffc163c503763b1f4360639393e0e4c8e300e0caec"")) ||","I don't really like the fact that the hash in CBlockIndex::phashBlock is optional. We have a GetBlockHash()... why doesn't it just calculate the hash if it's not available? +",faff50d129b6d4b9e6397ac989218e83a26ae692 +1848920,0,"0.7.0 is a first-time stable release: it's built off master, not a stable branch. I wouldn't suggest touching the user's PGP setup, but verifying without touching it. If GPG really needs to keep keys somewhere, ~/.bitcoin/.gnupg or similar makes sense.""",2012-10-15 11:21:27,luke-jr,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1935#discussion_r1848920,2012-10-15T23:21:27Z,2012-10-15T23:21:39Z,luke-jr,MEMBER,contrib/verifysfbinaries/verify.sh,,85.0,,,85,"@@ -0,0 +1,119 @@ ++#!/bin/bash ++ ++### This script attempts to download the signature file SHA256SUMS.asc from SourceForge ++### It first checks if the signature passes, and then downloads the files specified in ++### the file, and checks if the hashes of these files match those that are specified ++### in the signature file. ++### The script returns 0 if everything passes the checks. It returns 1 if either the ++### signature check or the hash check doesn't pass. If an error occurs the return value is 2 ++ ++function clean_up { ++ for file in $* ++ do ++ rm ""$file"" 2> /dev/null ++ done ++} ++ ++WORKINGDIR=""/tmp/bitcoin"" ++TMPFILE=""hashes.tmp"" ++ ++#this URL is used if a version number is not specified as an argument to the script ++SIGNATUREFILE=""http://downloads.sourceforge.net/project/bitcoin/Bitcoin/bitcoin-0.7.1/test/SHA256SUMS.asc"" ++ ++SIGNATUREFILENAME=""SHA256SUMS.asc"" ++RCSUBDIR=""test/"" ++BASEDIR=""http://downloads.sourceforge.net/project/bitcoin/Bitcoin/"" ++VERSIONPREFIX=""bitcoin-"" ++RCVERSIONSTRING=""rc"" ++ ++if [ ! -d ""$WORKINGDIR"" ]; then ++ mkdir ""$WORKINGDIR"" ++fi ++ ++cd ""$WORKINGDIR"" ++ ++#test if a version number has been passed as an argument ++if [ -n ""$1"" ]; then ++ #let's also check if the version number includes the prefix 'bitcoin-', ++ # and add this prefix if it doesn't ++ if [[ $1 == ""$VERSIONPREFIX""* ]]; then ++ VERSION=""$1"" ++ else ++ VERSION=""$VERSIONPREFIX$1"" ++ fi ++ ++ #now let's see if the version string contains ""rc"", and strip it off if it does ++ # and simultaneously add RCSUBDIR to BASEDIR, where we will look for SIGNATUREFILENAME ++ if [[ $VERSION == *""$RCVERSIONSTRING""* ]]; then ++ BASEDIR=""$BASEDIR${VERSION/%-$RCVERSIONSTRING*}/"" ++ BASEDIR=""$BASEDIR$RCSUBDIR"" ++ else ++ BASEDIR=""$BASEDIR$VERSION/"" ++ fi ++ ++ SIGNATUREFILE=""$BASEDIR$SIGNATUREFILENAME"" ++else ++ BASEDIR=""${SIGNATUREFILE%/*}/"" ++fi ++ ++#first we fetch the file containing the signature ++WGETOUT=$(wget -N ""$BASEDIR$SIGNATUREFILENAME"" 2>&1) ++ ++#and then see if wget completed successfully ++if [ $? -ne 0 ]; then ++ echo ""Error: couldn't fetch signature file. Have you specified the version number in the following format?"" ++ echo ""[bitcoin-]-[rc[0-9]] (example: bitcoin-0.7.1-rc1)"" ++ echo ""wget output:"" ++ echo ""$WGETOUT""|sed 's/^/\t/g' ++ exit 2 ++fi ++ ++#then we check it ++GPGOUT=$(gpg --yes --decrypt --output ""$TMPFILE"" ""$SIGNATUREFILENAME"" 2>&1) ++ ++#return value 0: good signature ++#return value 1: bad signature ++#return value 2: gpg error ++ ++RET=""$?"" ++if [ $RET -ne 0 ]; then ++ if [ $RET -eq 1 ]; then ++ #and notify the user if it's bad ++ echo ""Bad signature."" ++ elif [ $RET -eq 2 ]; then ++ #or if a gpg error has occured ++ echo ""gpg error. Do you have Gavin's code signing key installed?""","0.7.0 is a first-time stable release: it's built off master, not a stable branch. + +I wouldn't suggest touching the user's PGP setup, but verifying without touching it. If GPG really needs to keep keys somewhere, ~/.bitcoin/.gnupg or similar makes sense. + +contrib/gitian-downloader contains PGP keys. There's also a git repository here on GitHub with signatures of multiple developers for most releases which would be better to use than the SHA256SUMS file (which can only have one signature). +",de91ea0c0c2fead60bfe9a531558cbe1c562346e +2106671,1,"Indeed, I just used what was there. Sometimes I have not that wide view/knowlegde you have in the area of coding :).""",2012-11-12 20:21:51,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1479#discussion_r2106671,2012-11-13T07:21:51Z,2012-11-13T07:21:51Z,Diapolo,NONE,src/wallet.h,,571.0,,,31,"@@ -563,6 +567,20 @@ class CWalletTx : public CMerkleTx + return nCreditCached; + } + ++ int64 GetImmatureCredit(bool fUseCache=true) const ++ {","Indeed, I just used what was there. Sometimes I have not that wide view/knowlegde you have in the area of coding :). +",966a0e8cc94f2590521e0a2513e0cea32b5bb005 +775576,0,"Both evaluate to binary all-ones and are equivalent on all architectures with two's complement notation for signed numbers (ie, at least all that windows supports.. and certainly that bitcoin supports).""",2012-05-04 05:29:53,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1180#discussion_r775576,2012-05-04T17:29:53Z,2012-05-04T17:29:53Z,laanwj,MEMBER,src/net.cpp,,624.0,,,5,"@@ -621,7 +621,7 @@ void ThreadSocketHandler2(void* parg) + if (nSelect == SOCKET_ERROR) + { + int nErr = WSAGetLastError(); +- if (hSocketMax > -1) ++ if (hSocketMax > (SOCKET) -1)","Both evaluate to binary all-ones and are equivalent on all architectures with two's complement notation for signed numbers (ie, at least all that windows supports.. and certainly that bitcoin supports). +",024fa1cb44b8ec577fef07e7b37a4e5b0501dbea +4119756,0,"zlib 1.2.6 is still available on sourceforge: http://sourceforge.net/projects/libpng/files/zlib/1.2.6/zlib-1.2.6.tar.gz/download same with libpng 1.5.9: http://sourceforge.net/projects/libpng/files/libpng15/older-releases/1.5.9/libpng-1.5.9.tar.gz/downlo""",2013-05-07 06:33:31,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2622#discussion_r4119756,2013-05-07T18:33:31Z,2013-05-07T18:33:31Z,laanwj,MEMBER,doc/release-process.txt,,32.0,,,7,"@@ -28,8 +28,8 @@ + wget 'http://miniupnp.free.fr/files/download.php?file=miniupnpc-1.6.tar.gz' -O miniupnpc-1.6.tar.gz + wget 'http://www.openssl.org/source/openssl-1.0.1c.tar.gz' + wget 'http://download.oracle.com/berkeley-db/db-4.8.30.NC.tar.gz' +- wget 'http://zlib.net/zlib-1.2.6.tar.gz' +- wget 'ftp://ftp.simplesystems.org/pub/libpng/png/src/libpng-1.5.9.tar.gz' ++ wget 'http://zlib.net/zlib-1.2.8.tar.gz' ++ wget 'http://prdownloads.sourceforge.net/libpng/libpng-1.6.2.tar.gz?download'","zlib 1.2.6 is still available on sourceforge: http://sourceforge.net/projects/libpng/files/zlib/1.2.6/zlib-1.2.6.tar.gz/download +same with libpng 1.5.9: http://sourceforge.net/projects/libpng/files/libpng15/older-releases/1.5.9/libpng-1.5.9.tar.gz/download +",5f479bec525c8dbf12eb431f22577b62b9a9d63d +3794197,0,"a new script must be called before packaging (fancy dmg) and before code sign. """,2013-04-15 02:18:25,jonasschnelli,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2532#discussion_r3794197,2013-04-15T14:18:25Z,2013-04-15T14:18:25Z,jonasschnelli,CONTRIBUTOR,doc/release-process.txt,,83.0,,,4,"@@ -80,6 +80,7 @@ + make + export QTDIR=/opt/local/share/qt4 # needed to find translations/qt_*.qm files + T=$(contrib/qt_translations.py $QTDIR/translations src/qt/locale) ++ python2.7 share/qt/clean_mac_info_plist.py","a new script must be called before packaging (fancy dmg) and before code sign. +",f95279ba79be1c46fe14468269ae53cdb3ac9c24 +3695711,0,"Luke had a pullreq some time ago which introduced this abstraction, yes. I think it's overkill. Also, I want to extend it to transaction hashes too.""",2013-04-08 02:52:46,sipa,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2478#discussion_r3695711,2013-04-08T14:52:46Z,2013-04-08T14:52:46Z,sipa,MEMBER,src/main.h,,198.0,,,4,"@@ -195,11 +195,6 @@ + + + +-static inline std::string BlockHashStr(const uint256& hash)","Luke had a pullreq some time ago which introduced this abstraction, yes. I think it's overkill. Also, I want to extend it to transaction hashes too. +",1c06aa98c63fff02679d446588fad06ae8cd706f +3732049,0,"If we want to give people a choice about how long the hashes are (using a setting in bitcoin.conf), then surely this function would need to be re-introduced again, wouldn't it?""",2013-04-09 23:53:29,rebroad,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2478#discussion_r3732049,2013-04-10T11:53:29Z,2013-04-10T11:53:29Z,rebroad,CONTRIBUTOR,src/main.h,,198.0,,,4,"@@ -195,11 +195,6 @@ + + + +-static inline std::string BlockHashStr(const uint256& hash)","If we want to give people a choice about how long the hashes are (using a setting in bitcoin.conf), then surely this function would need to be re-introduced again, wouldn't it? +",1c06aa98c63fff02679d446588fad06ae8cd706f +3685878,1,"Can you indent these and the 2 below with the above line :)?""",2013-04-06 22:51:43,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2477#discussion_r3685878,2013-04-07T10:51:43Z,2013-04-08T08:37:30Z,Diapolo,NONE,doc/assets-attribution.txt,,3.0,,,4,"@@ -1,5 +1,8 @@ + Icon: src/qt/res/icons/clock*.png, src/qt/res/icons/tx*.png, +- src/qt/res/src/*.svg ++ src/qt/res/src/clock_green.svg, src/qt/res/src/clock1.svg ++ src/qt/res/src/clock2.svg, src/qt/res/src/clock3.svg","Can you indent these and the 2 below with the above line :)? +",a653191f748f56addf0cfa219319cfc5358b962d +1527031,1,"Now I got it yes, my brain told me hey, when SUCKS4 and -proxy, SetLimited(Tor and IPv6), which currently is not the case ^^. But I like the idea, as really no one expects a non-proxy connection, when -proxy was set. If you want me to integrate tha""",2012-09-04 08:05:36,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1781#discussion_r1527031,2012-09-04T20:05:36Z,2012-11-04T16:26:50Z,Diapolo,NONE,src/init.cpp,,,,,1,"@@ -226,9 +226,10 @@ bool static Bind(const CService &addr, bool fError = true) { + "" -dbcache= "" + _(""Set database cache size in megabytes (default: 25)"") + ""\n"" + + "" -dblogsize= "" + _(""Set database disk log size in megabytes (default: 100)"") + ""\n"" + + "" -timeout= "" + _(""Specify connection timeout in milliseconds (default: 5000))"") + ""\n"" + +- "" -proxy= "" + _(""Connect through socks proxy"") + ""\n"" + +- "" -socks= "" + _(""Select the version of socks proxy to use (4-5, default: 5)"") + ""\n"" + +- "" -tor= "" + _(""Use proxy to reach tor hidden services (default: same as -proxy)"") + ""\n"" ++ "" -proxy= "" + _(""Connect through SOCKS proxy"") + ""\n"" + ++ "" -socks= "" + _(""Select SOCKS version for -proxy (4 or 5, default: 5)"") + ""\n"" + ++ "" -proxy6= "" + _(""Use separate SOCKS5 proxy to reach IPv6 peers (default: -proxy if no -socks=4)"") + ""\n"" +","Now I got it yes, my brain told me hey, when SUCKS4 and -proxy, SetLimited(Tor and IPv6), which currently is not the case ^^. But I like the idea, as really no one expects a non-proxy connection, when -proxy was set. If you want me to integrate that change that's good, as it makes sense (even as another special-case). +",d513e73239b03774cf81c32a329f2464824057f7 +1527632,0,"I found out that setting an invalid -proxy leads to not even entering the BOOST_FOREACH loops, as vhListenSocket and vNodes are empty. That means hSocketMax simply keeps the init value of 0, which leads to 10022 spam. I also did some research for the fir""",2012-09-04 08:50:37,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1772#discussion_r1527632,2012-09-04T20:50:37Z,2012-09-04T20:51:59Z,Diapolo,NONE,src/net.cpp,,787.0,,,10,"@@ -781,10 +781,9 @@ void ThreadSocketHandler2(void* parg) + return; + if (nSelect == SOCKET_ERROR) + { +- int nErr = WSAGetLastError(); +- if (hSocketMax != INVALID_SOCKET) ++ if ((hSocketMax != INVALID_SOCKET) && (hSocketMax != (SOCKET)0)) + { +- printf(""socket select error %d\n"", nErr); ++ printf(""socket select error %d\n"", WSAGetLastError()); + for (unsigned int i = 0; i <= hSocketMax; i++)","I found out that setting an invalid -proxy leads to not even entering the BOOST_FOREACH loops, as vhListenSocket and vNodes are empty. That means hSocketMax simply keeps the init value of 0, which leads to 10022 spam. + +I also did some research for the first select()-parameter and found this: + +

The first parameter to select() is the maximum file descriptor that is set in the structs PLUS ONE. That is, if you have 20 file descriptors in the sets, and the maximum value a file descriptor has is 123, then the value passed as the first parameter must be 124.
+ +This makes me think the + 1 before hSocketMax in select() is indeed correct and valid. +",8207857f401bc1a48f863be646c5a508a7cdfe9c +766471,0,"We could define a constant for (unsigned int) -1, as it's a magic marker value we use in many places. """,2012-05-02 17:56:15,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1180#discussion_r766471,2012-05-03T05:56:15Z,2012-05-03T05:56:15Z,laanwj,MEMBER,src/main.cpp,,1839.0,,,5,"@@ -1836,7 +1836,7 @@ bool CheckDiskSpace(uint64 nAdditionalBytes) + + FILE* OpenBlockFile(unsigned int nFile, unsigned int nBlockPos, const char* pszMode) + { +- if (nFile == -1) ++ if ((nFile < 1) || (nFile == (unsigned int) -1))","We could define a constant for (unsigned int) -1, as it's a magic marker value we use in many places. +",024fa1cb44b8ec577fef07e7b37a4e5b0501dbea +364314,0,"Note: sourceforge shows MD5 and SHA1 sums for files (poke the i-in-a-circle next to a file), and SHA-1 is the default for the shasum utility. Seems to me it would be better to sign a SHASUMS that contained BOTH SHA1 and SHA256 checksums. """,2012-01-18 08:32:06,gavinandresen,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/764#discussion_r364314,2012-01-18T19:32:06Z,2012-01-18T19:32:06Z,gavinandresen,CONTRIBUTOR,doc/release-process.txt,,84.0,,,14,"@@ -77,9 +79,9 @@ + Build output expected: + Bitcoin-Qt.dmg + +-* upload source and builds to SourceForge ++* upload builds to SourceForge + +-* create SHA1SUMS for builds, and PGP-sign it ++* create SHA256SUMS for builds, and PGP-sign it","Note: sourceforge shows MD5 and SHA1 sums for files (poke the i-in-a-circle next to a file), and SHA-1 is the default for the shasum utility. + +Seems to me it would be better to sign a SHASUMS that contained BOTH SHA1 and SHA256 checksums. +",9965e1d044a11cbfdb098d57a6a3c7ba477f36f4 +139375,0,"Passphrase"""" was just fine IMHO. We really want users to use a longer sentence instead of just a password. Also, """"Verschlüsselungs-Kennwort zur Dekodierung"""" has too much redundancy.""",2011-09-24 07:18:15,tcatm,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/530#discussion_r139375,2011-09-24T19:18:15Z,2011-09-24T19:18:15Z,lost-tty,NONE,locale/de/LC_MESSAGES/bitcoin.po,,268.0,,,82,"@@ -265,13 +265,13 @@ msgstr ""Passphrase"" + + #: ../../../src/ui.cpp:267 + msgid ""Please supply the current wallet decryption passphrase."" +-msgstr ""Bitte geben Sie die derzeitige Passphrase zur Entschlüsselung der Brieftasche an."" ++msgstr ""Bitte geben Sie das derzeitige Verschlüsselungs-Kennwort zur Dekodierung der Brieftasche an.""","""Passphrase"" was just fine IMHO. We really want users to use a longer sentence instead of just a password. Also, ""Verschlüsselungs-Kennwort zur Dekodierung"" has too much redundancy. +",3449db1bef59fb0deadffd32d9d494815189b6dd +1537620,0,"It is nicer this way, especially if you are stepping through with a debugger.""",2012-09-05 08:11:42,jgarzik,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1786#discussion_r1537620,2012-09-05T20:11:42Z,2012-09-05T20:11:42Z,jgarzik,CONTRIBUTOR,src/net.cpp,,790.0,,,37,"@@ -775,15 +778,16 @@ void ThreadSocketHandler2(void* parg) + } + + vnThreadsRunning[THREAD_SOCKETHANDLER]--; +- int nSelect = select(hSocketMax + 1, &fdsetRecv, &fdsetSend, &fdsetError, &timeout); ++ int nSelect = select(have_fds ? hSocketMax + 1 : 0, ++ &fdsetRecv, &fdsetSend, &fdsetError, &timeout); + vnThreadsRunning[THREAD_SOCKETHANDLER]++; + if (fShutdown) + return; + if (nSelect == SOCKET_ERROR) + { +- int nErr = WSAGetLastError(); +- if (hSocketMax != INVALID_SOCKET) ++ if (have_fds) + { ++ int nErr = WSAGetLastError();","It is nicer this way, especially if you are stepping through with a debugger. +",2387944782fa61a3137afda91e9e8105d8cc5ddf +3853387,1,"If you take a look at some other Qt files, you'll see this is already the case in several files, which I also couldn't compile. If you are able to come up with a solution that works, that is fine :).""",2013-04-18 01:40:02,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2538#discussion_r3853387,2013-04-18T13:40:02Z,2013-04-18T13:40:02Z,Diapolo,NONE,src/qt/splashscreen.cpp,,1.0,,,1,"@@ -1,9 +1,10 @@ ++#include ","If you take a look at some other Qt files, you'll see this is already the case in several files, which I also couldn't compile. If you are able to come up with a solution that works, that is fine :). +",4193a647354634326957f5e00d53fc474a36e125 +1120245,0,"I like the first idea, saves a few lines of code and we won't forget to add it (perhaps as another pull or commit for this one?). I can't comment on the Boost.Thread stuff, but as we include or somewhere else use it (the lib is there), why not ... perhap""",2012-07-08 17:06:14,Diapolo,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/1515#discussion_r1120245,2012-07-09T05:06:14Z,2012-07-16T23:53:42Z,Diapolo,NONE,src/net.cpp,,408.0,,,2,"@@ -407,6 +407,9 @@ bool GetMyExternalIP(CNetAddr& ipRet) + + void ThreadGetMyExternalIP(void* parg)","I like the first idea, saves a few lines of code and we won't forget to add it (perhaps as another pull or commit for this one?). + +I can't comment on the Boost.Thread stuff, but as we include or somewhere else use it (the lib is there), why not ... perhaps main devs can comment here. +",36fe96581f343aaae91e34b59c223c8e156f14e3 +2302910,1,"Ok, cool :)""",2012-12-03 20:07:01,laanwj,bitcoin,bitcoin,,https://github.com/bitcoin/bitcoin/pull/2045#discussion_r2302910,2012-12-04T07:07:01Z,2012-12-04T07:07:01Z,laanwj,MEMBER,src/qt/bitcoingui.cpp,,897.0,,,86,"@@ -898,8 +894,12 @@ void BitcoinGUI::backupWallet() + QString filename = QFileDialog::getSaveFileName(this, tr(""Backup Wallet""), saveDir, tr(""Wallet Data (*.dat)"")); + if(!filename.isEmpty()) { + if(!walletModel->backupWallet(filename)) { +- QMessageBox::warning(this, tr(""Backup Failed""), tr(""There was an error trying to save the wallet data to the new location."")); ++ message(tr(""Backup Failed""), tr(""There was an error trying to save the wallet data to the new location.""),","Ok, cool :) +",50ecd7b68970062bfb540798370dbfab6d376086 diff --git a/data/PR inline comments/boto_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/boto_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..7de2804 --- /dev/null +++ b/data/PR inline comments/boto_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,189 @@ +comment_id,polarity,text,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,body,commit_id +742471,0,"PVS Studio. Also it's free (month license) for opensource projects (http://www.viva64.com/en/b/0092/).""",2012-04-26 12:00:35,evanworley,boto,boto,,https://github.com/boto/boto/pull/727#discussion_r742471,2012-04-27T00:00:35Z,2012-04-27T00:00:35Z,evanworley,CONTRIBUTOR,boto/gs/resumable_upload_handler.py,,406.0,,,34,"@@ -399,11 +397,18 @@ def _attempt_resumable_upload(self, key, fp, file_length, headers, cb, + self._query_server_pos(conn, file_length)) + self.server_has_bytes = server_start + +- # Cannot use incremental md5 calculation if the server already has some of the data. + if server_end: +- self.incremental_md5 = False ++ # If the server already has some of the content, we need to update the md5 with ++ # the bytes that have already been uploaded to ensure we get a complete hash in ++ # the end. ++ print 'Catching up md5 for resumed upload' ++ fp.seek(0) ++ bytes_to_go = server_end + 1 ++ while bytes_to_go: ++ chunk = fp.read(min(key.BufferSize, bytes_to_go)) ++ md5sum.update(chunk) ++ bytes_to_go -= len(chunk) + +- key=key","Cleaned up +",d03726240e0fba8b7366242b360feb5676dfccfe +329404,0,"Hmmm. I don't have an environment at home to test with but I was thinking... why do I have to decode() here??? Shouldn't the object already be in unicode? I remembered thinking about the DeleteMarker and Key objects where they parse xml and pondering the """,2012-01-04 23:47:13,tpodowd,boto,boto,,https://github.com/boto/boto/pull/461#discussion_r329404,2012-01-05T10:47:13Z,2012-01-06T02:46:36Z,tpodowd,CONTRIBUTOR,boto/s3/bucket.py,,523.0,,,66,"@@ -503,15 +509,26 @@ def delete_keys(self, keys, quiet=False, mfa_token=None, headers=None): + key_name = key.name + version_id = key.version_id + else: +- skipped.append(key) ++ if isinstance(key, Prefix): ++ key_name = key.name ++ code = 'PrefixSkipped' # Don't delete Prefix ++ else: ++ key_name = repr(key) # try get a string ++ code = 'InvalidArgument' # other unknown type ++ message = 'Invalid. No delete action taken for this object.' ++ error = Error(key_name, code=code, message=message) ++ result.errors.append(error) + continue +- data += ""%s"" % xml.sax.saxutils.escape(key_name) ++ count += 1 ++ key_name = key_name.decode('utf-8')","Hmmm. I don't have an environment at home to test with but I was thinking... why do I have to decode() here??? Shouldn't the object already be in unicode? I remembered thinking about the DeleteMarker and Key objects where they parse xml and pondering the logic behind the following. + +``` +if name == 'Key': + self.name = value.encode('utf-8') +``` + +I think internally, we should be using unicode keys names. So I think this should be. I'll do some tests with that. + +``` +if name == 'Key': + self.name = value +``` + +That way, I guess we wouldn't need the decode here. +",c7be0020db068420ef4974c1a502c317fbc33947 +1882593,0,"This is just another of the things I noticed and fixed while this pull request was waiting. I don't believe I can create multiple pull requests with the different bits so it got included here. The reason for this change is that the cookie_expiration_pe""",2012-10-18 06:56:04,reversefold,boto,boto,,https://github.com/boto/boto/pull/833#discussion_r1882593,2012-10-18T18:56:04Z,2012-11-27T06:35:23Z,reversefold,CONTRIBUTOR,boto/ec2/elb/__init__.py,,438.0,,,7,"@@ -432,8 +432,8 @@ def create_app_cookie_stickiness_policy(self, name, lb_name, policy_name): + 'PolicyName': policy_name} + return self.get_status('CreateAppCookieStickinessPolicy', params) + +- def create_lb_cookie_stickiness_policy(self, cookie_expiration_period, +- lb_name, policy_name): ++ def create_lb_cookie_stickiness_policy(self, lb_name, policy_name, ++ cookie_expiration_period=None):","This is just another of the things I noticed and fixed while this pull request was waiting. I don't believe I can create multiple pull requests with the different bits so it got included here. + +The reason for this change is that the cookie_expiration_period isn't required as part of these calls. Leaving the value as None (and not sending to the API) allows you to create a stickiness policy with a non-expiring cookie. +",ccfa42f3402c08c79fb8fd5ae0269cb96a58e775 +357720,2,"That's what I get for hand-copying the patch after spending all night on a plane. Thanks for catching that. :-\\""",2012-01-16 17:03:04,gholms,boto,boto,,https://github.com/boto/boto/pull/477#discussion_r357720,2012-01-17T04:03:04Z,2012-01-17T04:03:04Z,gholms,CONTRIBUTOR,boto/jsonresponse.py,,145.0,,,13,"@@ -134,12 +134,14 @@ def get_name(self, name): + def startElement(self, name, attrs, connection): + for lm in self.list_marker: + if name.endswith(lm): +- l = ListElement(self.connection, name, self.item_marker, ++ l = ListElement(self.connection, name, self.list_marker, ++ self.item_marker, + pythonize_name=self.pythonize_name) + setattr(self, self.get_name(name), l) + return l + if name in self.item_marker: + e = Element(self.connection, name, parent=self, ++ self.list_marker, self.item_marker, + pythonize_name=self.pythonize_name)","That's what I get for hand-copying the patch after spending all night on a plane. Thanks for catching that. :-\ +",84fd00b5762c95245cdfca117a7495ba3b06a39a +13980,0,"some debug print statements, I think.""",2011-03-30 12:11:44,garnaat,boto,boto,,https://github.com/boto/boto/pull/122#discussion_r13980,2011-03-31T00:11:44Z,2011-03-31T00:11:44Z,garnaat,MEMBER,boto/ec2/autoscale/group.py,,126.0,,,44,"@@ -106,12 +109,21 @@ class AutoScalingGroup(object): + zones = availability_zones or [] + self.availability_zone = availability_zone + self.availability_zones = ListElement(zones) ++ self.group_arn = group_arn ++ self.health_check_type = health_check_type ++ self.health_check_period = health_check_period ++ self.suspended = suspended ++ self.placement_group = placement_group ++ self.vpc_zone = vpc_zone ++ self.metrics = None + self.instances = None + + def __repr__(self): + return 'AutoScalingGroup:%s' % self.name + + def startElement(self, name, attrs, connection): ++ print '-' *50 ++ print 'Start %s' % name","some debug print statements, I think. +",26660231433d920f53d82e985d7b05310f6868d0 +1878831,0,"Not quite sure why these LB cookie methods are included in this PR. Are they related? The changes will break existing code but if there is a good reason, it may still be the right thing to do. Just need a bit more background.""",2012-10-18 01:57:22,garnaat,boto,boto,,https://github.com/boto/boto/pull/833#discussion_r1878831,2012-10-18T13:57:22Z,2012-11-27T06:35:23Z,garnaat,MEMBER,boto/ec2/elb/__init__.py,,438.0,,,7,"@@ -432,8 +432,8 @@ def create_app_cookie_stickiness_policy(self, name, lb_name, policy_name): + 'PolicyName': policy_name} + return self.get_status('CreateAppCookieStickinessPolicy', params) + +- def create_lb_cookie_stickiness_policy(self, cookie_expiration_period, +- lb_name, policy_name): ++ def create_lb_cookie_stickiness_policy(self, lb_name, policy_name, ++ cookie_expiration_period=None):","Not quite sure why these LB cookie methods are included in this PR. Are they related? The changes will break existing code but if there is a good reason, it may still be the right thing to do. Just need a bit more background. +",ccfa42f3402c08c79fb8fd5ae0269cb96a58e775 +2491018,0,"I think that's reasonable. One of the changes I've added to the provider module was to log where the credentials are coming from, so I went ahead and added log messages for keyring credentials as well. I also added a log message when the keyring module """,2012-12-21 06:52:51,jamesls,boto,boto,,https://github.com/boto/boto/pull/1157#discussion_r2491018,2012-12-21T17:52:51Z,2012-12-21T17:52:51Z,jamesls,MEMBER,boto/provider.py,,251.0,,,6,"@@ -246,6 +246,11 @@ def get_credentials(self, access_key=None, secret_key=None): + self.secret_key = os.environ[secret_key_name.upper()] + elif config.has_option('Credentials', secret_key_name): + self.secret_key = config.get('Credentials', secret_key_name) ++ elif config.has_option('Credentials', 'keyring'): ++ keyring_name = config.get('Credentials', 'keyring') ++ import keyring","I think that's reasonable. One of the changes I've added to the provider module was to log where the credentials are coming from, so I went ahead and added log messages for keyring credentials as well. I also added a log message when the keyring module can't be imported, but still let the ImportError propogate. + +Thanks for the pull request. +",86c0e28ef19e2f3c0f7ffcd649985ec2eed14fe2 +1247543,0,"@leprechaun This should be: ``` BotoConfigLocations.append(os.path.join(os.getcwd(), """".boto"""")) ``` This avoids an invalid path on Windows. Also spaces after or before parenthesis aren't preferred according to PEP8 which I believe @garnaat is trying to""",2012-07-26 07:41:53,jtriley,boto,boto,,https://github.com/boto/boto/pull/885#discussion_r1247543,2012-07-26T19:41:53Z,2012-07-26T19:41:53Z,jtriley,CONTRIBUTOR,boto/pyami/config.py,,46.0,,,13,"@@ -35,12 +35,15 @@ + # This is probably running on App Engine. + expanduser = (lambda x: x) + +-# By default we use two locations for the boto configurations, ++# By default we use three locations for the boto configurations, + # /etc/boto.cfg and ~/.boto (which works on Windows and Unix). ++# os.getcwd() was added so users could have multiple config files ++# without using environment variables + BotoConfigPath = '/etc/boto.cfg' + BotoConfigLocations = [BotoConfigPath] + UserConfigPath = os.path.join(expanduser('~'), '.boto') + BotoConfigLocations.append(UserConfigPath) ++BotoConfigLocations.append( os.getcwd() + ""/.boto"" )","@leprechaun This should be: + +``` +BotoConfigLocations.append(os.path.join(os.getcwd(), "".boto"")) +``` + +This avoids an invalid path on Windows. + +Also spaces after or before parenthesis aren't preferred according to PEP8 which I believe @garnaat is trying to stick with... +",82f46b33874b145abf797a60396e4029c0ca8c09 +1878850,0,"Unfortunately, this will cause SSL certificate verification to fail on all Python versions < 2.7.3. The endpoint we use MUST match the commonName in the SSL certificate since the Python ssl module is unable to find the subjectAlt fields in the certificat""",2012-10-18 01:59:42,garnaat,boto,boto,,https://github.com/boto/boto/pull/833#discussion_r1878850,2012-10-18T13:59:42Z,2012-11-27T06:35:23Z,garnaat,MEMBER,boto/rds/__init__.py,,,,,1,"@@ -79,7 +79,7 @@ def connect_to_region(region_name, **kw_params): + class RDSConnection(AWSQueryConnection): + + DefaultRegionName = 'us-east-1' +- DefaultRegionEndpoint = 'rds.amazonaws.com' ++ DefaultRegionEndpoint = 'rds.us-east-1.amazonaws.com'","Unfortunately, this will cause SSL certificate verification to fail on all Python versions < 2.7.3. The endpoint we use MUST match the commonName in the SSL certificate since the Python ssl module is unable to find the subjectAlt fields in the certificate. +",ccfa42f3402c08c79fb8fd5ae0269cb96a58e775 +5668963,0,"Corrected in SHA: 58a13d7""",2013-08-08 08:26:41,toastdriven,boto,boto,,https://github.com/boto/boto/pull/1660#discussion_r5668963,2013-08-08T20:26:41Z,2013-08-08T20:26:41Z,toastdriven,CONTRIBUTOR,boto/dynamodb2/items.py,,,,,1,"@@ -383,6 +383,17 @@ def partial_save(self): + if not final_data: + return False + ++ # Remove the key(s) if present.","Corrected in SHA: 58a13d7 +",58a13d7104e27bd123bbfd85c4d7294497defb94 +2572998,0,"In the unit tests I wrote to handle _sign_string's three use cases (private key file object, private key file name, private key string), the private key file object test was failing because no data was being read from the file object. The seek(0) fixed th""",2013-01-07 15:35:34,seandst,boto,boto,,https://github.com/boto/boto/pull/1214#discussion_r2572998,2013-01-08T02:35:34Z,2013-01-08T14:13:11Z,tehsmyers,CONTRIBUTOR,boto/cloudfront/distribution.py,,,,,1,"@@ -654,18 +654,17 @@ def _sign_string(message, private_key_file=None, private_key_string=None): + raise ValueError(""Only specify the private_key_file or the private_key_string not both"") + if not private_key_file and not private_key_string: + raise ValueError(""You must specify one of private_key_file or private_key_string"") +- # if private_key_file is a file object read the key string from there ++ # If private_key_file is a file, read its contents. Otherwise, open it and then read it + if isinstance(private_key_file, file): ++ private_key_file.seek(0)","In the unit tests I wrote to handle _sign_string's three use cases (private key file object, private key file name, private key string), the private key file object test was failing because no data was being read from the file object. The seek(0) fixed the problem. I think the test may have been flawed, so I'll take a closer look before submitting the unit tests for this. + +In hindsight, the seek will probably break on file-like objects that don't support seek, so it's probably best to remove it. Any preparation of the passed-in private_key_file can/should be done before the call to create_signed_url. + +Unless there are objections, I'll go ahead and pull out the seek(0) here. +",5ee626a7b47f44a7d7414b2d178858a501708d54 +1969789,0,"Wouldn't it make more sense for this check to happen up a level? It just seems odd that `compute_hashes_from_fileobj` raises an `EmptyArchiveError`. It seems more appropriate for `Vault.upload_archive` to perform the empty file check and raise an except""",2012-10-29 07:20:01,jamesls,boto,boto,,https://github.com/boto/boto/pull/1083#discussion_r1969789,2012-10-29T18:20:01Z,2012-10-29T18:20:01Z,jamesls,MEMBER,boto/glacier/writer.py,,95.0,,,14,"@@ -89,6 +90,10 @@ def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): + linear_hash.update(chunk) + chunks.append(hashlib.sha256(chunk).digest()) + chunk = fileobj.read(chunk_size) ++ ++ if not chunks: ++ raise EmptyArchiveError()","Wouldn't it make more sense for this check to happen up a level? It just seems odd that `compute_hashes_from_fileobj` raises an `EmptyArchiveError`. It seems more appropriate for `Vault.upload_archive` to perform the empty file check and raise an exception if appropriate +",05915453e3887515338ef9c8a062f78c44430058 +42421,0,"We try not to change the function footprints unless absolutely necessary. Why is it necessary to have both secuirty_group_ids and security_groups?""",2011-06-09 07:38:02,kopertop,boto,boto,,https://github.com/boto/boto/pull/221#discussion_r42421,2011-06-09T19:38:02Z,2011-06-10T06:12:04Z,kopertop,CONTRIBUTOR,boto/ec2/connection.py,,,,,1,"@@ -442,10 +442,10 @@ class EC2Connection(AWSQueryConnection): + [('item', Reservation)], verb='POST') + + def run_instances(self, image_id, min_count=1, max_count=1, +- key_name=None, security_groups=None, +- user_data=None, addressing_type=None, +- instance_type='m1.small', placement=None, +- kernel_id=None, ramdisk_id=None, ++ key_name=None, security_group_ids=None, ++ security_groups=None, user_data=None, ++ addressing_type=None, instance_type='m1.small', ++ placement=None, kernel_id=None, ramdisk_id=None,","We try not to change the function footprints unless absolutely necessary. Why is it necessary to have both secuirty_group_ids and security_groups? +",84f4e05c83ef49f0ebfe4a221839d6a29a5675e6 diff --git a/data/PR inline comments/cakephp_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/cakephp_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..93386dd --- /dev/null +++ b/data/PR inline comments/cakephp_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,4243 @@ +comment_id,polarity_gold,text_gold,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,text_kaiaulu,polarity_kaiaulu,commit_id +6044242,0,"I had it implemented that way originally. The `rijndael` function uses substrings so I decided to make this the same. Being more strict is an option though.""",2013-08-28 07:51:55,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1568#discussion_r6044242,2013-08-28T19:51:55Z,2013-09-02T01:44:56Z,markstory,MEMBER,lib/Cake/Utility/Security.php,,,,,1,"@@ -289,4 +289,69 @@ protected static function _crypt($password, $salt = false) { + return crypt($password, $salt); + } + ++/** ++ * Encrypt a value using AES-256. ++ * ++ * *Caveat* You cannot properly encrypt/decrypt data with trailing null bytes. ++ * Any trailing null bytes will be removed on decryption due to how PHP pads messages ++ * with nulls prior to encryption. ++ * ++ * @param string $plain The value to encrypt. ++ * @param string $key The 256 bit/32 byte key to use as a cipher key. ++ * @return string Encrypted data. ++ * @throws CakeException On invalid data or key. ++ */ ++ public static function encrypt($plain, $key) { ++ self::_checkKey($key, 'encrypt()'); ++ if (empty($plain)) { ++ throw new CakeException(__d('cake_dev', 'The data to encrypt cannot be empty.')); ++ } ++ $key = substr($key, 0, 32);","I had it implemented that way originally. The `rijndael` function uses substrings so I decided to make this the same. Being more strict is an option though. +",0,13b870d7e183375822eea4ffd66aaacaeec760ff +5949747,0,"This block of code is repeated 3 times in Hash now. Perhaps it would be best extracted into a helper method that does something like: ```php list($token, $conditions) = self::_splitConditions($token); ``` That would address the duplication in the """,2013-08-23 01:02:48,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1549#discussion_r5949747,2013-08-23T13:02:48Z,2013-08-23T23:04:34Z,markstory,MEMBER,lib/Cake/Utility/Hash.php,,149.0,,,30,"@@ -222,16 +222,36 @@ protected static function _matches(array $data, $selector) { + * @return array The data with $values inserted. + */ + public static function insert(array $data, $path, $values = null) { +- $tokens = explode('.', $path); +- if (strpos($path, '{') === false) { ++ if (strpos($path, '[') === false) { ++ $tokens = explode('.', $path); ++ } else { ++ $tokens = String::tokenize($path, '.', '[', ']'); ++ } ++ ++ if (strpos($path, '{') === false && strpos($path, '[') === false) { + return self::_simpleOp('insert', $data, $tokens, $values); + } + + $token = array_shift($tokens); + $nextPath = implode('.', $tokens); ++ ++ $conditions = false; ++ $position = strpos($token, '['); ++ if ($position !== false) { ++ $conditions = substr($token, $position); ++ $token = substr($token, 0, $position); ++ }","This block of code is repeated 3 times in Hash now. Perhaps it would be best extracted into a helper method that does something like: + +``` php +list($token, $conditions) = self::_splitConditions($token); +``` + +That would address the duplication in the code. +",0,a0014e7a303067bb9c36d438de5a70fe819d22a7 +4288367,0,"This looks good, but makes me think we should have originally added methods like `Security::blowfish($input)` and `Security::matchBlowfish($input, $hashedPassword)` or made a separate utility class for Bcrypt that didn't have as difficult to remember meth""",2013-05-18 04:31:19,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1275#discussion_r4288367,2013-05-18T16:31:19Z,2013-05-26T05:59:44Z,markstory,MEMBER,lib/Cake/Controller/Component/Auth/BlowfishPasswordHasher.php,,44.0,,,44,"@@ -0,0 +1,58 @@ ++reset();","Resetting validation after it finishes will just break it :( +",0,1224ed595adb3286a6d94ddee2dab18b3a83d7ae +2427846,0,"as @ADmad and I pointed out, you need to respect the coding conventions. too many whitespaces here inside () for example.""",2012-12-14 15:18:31,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1026#discussion_r2427846,2012-12-15T02:18:31Z,2012-12-17T21:25:05Z,dereuromark,MEMBER,lib/Cake/View/Helper/TextHelper.php,,,,,1,"@@ -228,6 +228,29 @@ public function highlight($text, $phrase, $options = array()) { + } + + /** ++ * Formats paragraphs around given text for all line breaks ++ *
added for single line return ++ *

added for double line return ++ * ++ * @param string $text Text ++ * @return string The text with proper

tags ++ * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::autoParagraph ++ */ ++ public static function autoParagraph($text) { ++ if ( trim($text) !== '' ) {","as @ADmad and I pointed out, you need to respect the coding conventions. too many whitespaces here inside () for example. +",0,03fdd4a14bfed87d35452b5c24c67fdb9f41b6fb +2385747,0,"`return $readyFds > 0` would suffice here. also, never forget the {} for if statements""",2012-12-11 11:12:22,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1017#discussion_r2385747,2012-12-11T22:12:22Z,2012-12-12T17:24:00Z,dereuromark,MEMBER,lib/Cake/Console/ConsoleInput.php,,,,,1,"@@ -48,4 +48,20 @@ public function read() { + return fgets($this->_input); + } + ++/** ++ * Check if data is available on stdin ++ * ++ * @param integer $timeout An optional time to wait for data ++ * @return bool True for data available, false otherwise ++ */ ++ public function data_avail($timeout = 0) { ++ $read_fds = array($this->_input); ++ $ready_fds = stream_select($read_fds, $w = NULL, $e = NULL, $timeout); ++ ++ if($ready_fds > 0) ++ return true; ++ else ++ return false;","`return $readyFds > 0` would suffice here. also, never forget the {} for if statements +",0,d6579a983819a5a9e9209928a241bec48f3e2733 +2217916,1,"yup. i thought of that too but wanted to avoid introducing a new option. but i agree it's cleaner and less hacky. I'll adjust the PR. Thanks.""",2012-11-25 15:17:09,rchavik,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/974#discussion_r2217916,2012-11-26T02:17:09Z,2012-11-28T03:11:43Z,rchavik,MEMBER,lib/Cake/View/Helper/FormHelper.php,,,,,1,"@@ -1094,6 +1071,48 @@ public function input($fieldName, $options = array()) { + unset($options['dateFormat'], $options['timeFormat']); + } + ++ return compact('options', 'label', 'modelKey', 'fieldKey', 'fieldDef', ++ 'divOptions', 'radioOptions', 'error', 'selected', 'dateFormat', ++ 'timeFormat'); ++ } ++ ++/** ++ * Generates a form input element complete with label and wrapper div ++ * ++ * ### Options ++ * ++ * See each field type method for more information. Any options that are part of ++ * $attributes or $options for the different **type** methods can be included in `$options` for input().i ++ * Additionally, any unknown keys that are not in the list below, or part of the selected type's options ++ * will be treated as a regular html attribute for the generated input. ++ * ++ * - `type` - Force the type of widget you want. e.g. `type => 'select'` ++ * - `label` - Either a string label, or an array of options for the label. See FormHelper::label() ++ * - `div` - Either `false` to disable the div, or an array of options for the div. ++ * See HtmlHelper::div() for more options. ++ * - `options` - for widgets that take options e.g. radio, select ++ * - `error` - control the error message that is produced ++ * - `empty` - String or boolean to enable empty select box options. ++ * - `before` - Content to place before the label + input. ++ * - `after` - Content to place after the label + input. ++ * - `between` - Content to place between the label + input. ++ * - `format` - format template for element order. Any element that is not in the array, will not be in the output. ++ * - Default input format order: array('before', 'label', 'between', 'input', 'after', 'error') ++ * - Default checkbox format order: array('before', 'input', 'between', 'label', 'after', 'error') ++ * - Hidden input will not be formatted ++ * - Radio buttons cannot have the order of input and label elements controlled with these settings. ++ * ++ * @param string $fieldName This should be ""Modelname.fieldname"" ++ * @param array $options Each type of input takes different options. ++ * @return string Completed form widget. ++ * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/form.html#creating-form-elements ++ */ ++ public function input($fieldName, $options = array()) { ++ if (!isset($options['modelKey'])) { ++ $inputOptions = $this->_inputOptions($fieldName, $options); ++ extract($inputOptions);","yup. i thought of that too but wanted to avoid introducing a new option. +but i agree it's cleaner and less hacky. + +I'll adjust the PR. + +Thanks. +",0,6670997283431c81fc687b79bf2c57e62d9992f4 +1942196,0,"fixed (i will install code sniffer quite soon!)""",2012-10-25 03:37:54,thepeg,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/913#discussion_r1942196,2012-10-25T15:37:54Z,2013-01-20T08:13:46Z,marcopeg,NONE,lib/Cake/Model/Validator/CakeValidationRule.php,,,,,1,"@@ -271,6 +274,17 @@ public function process($field, &$data, &$methods) { + $this->_valid = call_user_func_array($methods[$rule], $this->_ruleParams); + } elseif (class_exists('Validation') && method_exists('Validation', $this->_rule)) { + $this->_valid = call_user_func_array(array('Validation', $this->_rule), $this->_ruleParams); ++ } elseif (strpos($this->_rule, '::')) { ++ list($plugin, $class) = pluginSplit($this->_rule); ++ list($className,$method) = explode('::', $class); ++ $location = 'Model/Validation'; ++ if ( $plugin ) $location = $plugin . '.' . $location;","fixed (i will install code sniffer quite soon!) +",0,b2cbeeeb501285431bdc7fb2a5b37f260b792261 +1616452,0,"it is for adding fields to a query or to the whitelist?""",2012-09-16 19:45:13,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/851#discussion_r1616452,2012-09-17T07:45:13Z,2012-11-02T09:54:30Z,lorenzo,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -2299,6 +2302,29 @@ public function saveAssociated($data = null, $options = array()) { + } + + /** ++ * Helper method for saveAll() and friends, to add foreign key to fieldlist ++ * ++ * @param string $key fieldname to be added to list ++ * @param array $options ++ * @return array $options ++ */ ++ public function addToFieldList($key, $options) {","it is for adding fields to a query or to the whitelist? +",0,7007dba0eb836f852aaca95fada103bc4ba993a9 +1518476,2,"Ah, true. I forgot about that. Browsers... :(""",2012-09-03 09:02:57,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/811#discussion_r1518476,2012-09-03T21:02:57Z,2012-09-03T21:02:57Z,jrbasso,MEMBER,lib/Cake/View/Helper/FormHelper.php,,1614.0,,,24,"@@ -1607,7 +1612,7 @@ public function postLink($title, $url = null, $options = array(), $confirmMessag + $formName = uniqid('post_'); + $formUrl = $this->url($url); + $out = $this->Html->useTag('form', $formUrl, array('name' => $formName, 'id' => $formName, 'style' => 'display:none;', 'method' => 'post'));","Ah, true. I forgot about that. Browsers... :( +",0,fca98e39f9041c1aee915968e0523dfc83e0c2c8 +1221562,0,"Exceptions should include a useful description based on what the failure is. Perhaps 'Invalid encryption scheme chosen'.""",2012-07-23 13:29:22,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/734#discussion_r1221562,2012-07-24T01:29:22Z,2012-07-25T18:21:05Z,markstory,MEMBER,lib/Cake/Network/CakeSocket.php,,,,,1,"@@ -277,4 +298,34 @@ public function reset($state = null) { + return true; + } + +-} ++/** ++ * Encrypts current stream socket, using one of the defined encryption methods ++ * ++ * @param string $type can be one of 'ssl2', 'ssl3', 'ssl23' or 'tls' ++ * @param string $clientOrServer can be one of 'client', 'server'. Default is 'client' ++ * @param boolean $enable enable or disable encryption. Default is true (enable) ++ * @return boolean True on success ++ * @throws SocketException ++ * @see stream_socket_enable_crypto ++ */ ++ public function enableCrypto($type, $clientOrServer = 'client', $enable = true) { ++ if (!array_key_exists($type . '_' . $clientOrServer, $this->_encryptMethods)) { ++ throw new InvalidArgumentException();","Exceptions should include a useful description based on what the failure is. Perhaps 'Invalid encryption scheme chosen'. +",0,7418be04d3fd24bd7cef09527bd40e55783c28c6 +157669,1,"lol, I left that variable in there and forgot about it :P""",2011-10-06 02:08:38,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/229#discussion_r157669,2011-10-06T14:08:38Z,2011-10-06T14:08:38Z,lorenzo,MEMBER,lib/Cake/Model/Datasource/Database/Postgres.php,,784.0,,,16,"@@ -781,7 +778,12 @@ class Postgres extends DboSource { + * @return string The database encoding + */ + public function getEncoding() { +- $cosa = $this->_execute('SHOW client_encoding')->fetch();","lol, I left that variable in there and forgot about it :P +",0,ce8ece85aad66f2ad2c38614cd6deba173d43c21 +6432269,0,"sprintf is not necessary here.""",2013-09-18 00:59:49,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1654#discussion_r6432269,2013-09-18T12:59:49Z,2013-10-04T04:05:07Z,dereuromark,MEMBER,lib/Cake/Cache/Engine/MemcachedEngine.php,,,,,1,"@@ -113,14 +127,41 @@ public function init($settings = array()) { + /** + * Settings the memcached instance + * ++ * @throws CacheException when the Memcached extension is not built with the desired serializer engine + */ + protected function _setOptions() { + $this->_Memcached->setOption(Memcached::OPT_LIBKETAMA_COMPATIBLE, true); + +- if (Memcached::HAVE_IGBINARY) { +- $this->_Memcached->setOption(Memcached::OPT_SERIALIZER, Memcached::SERIALIZER_IGBINARY); ++ if (!array_key_exists($this->settings['serializer'], self::$serializer)) { ++ throw new CacheException( ++ __d('cake_dev', sprintf('%s is not a valid serializer engine for Memcached', $this->settings['serializer']))","sprintf is not necessary here. +",0,5d30cb15591af89b33fcbb61c10e4b96452f4ff6 +851797,0,"This is not very readable plus has unnecessary merge with empty array. Better to replace with this: ```php if ($merge) { $this->_inputDefaults = array_merge($this->_inputDefaults, $defaults); } else { $this->_inputDefaults = $defaults; } ```""",2012-05-20 19:52:51,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/659#discussion_r851797,2012-05-21T07:52:51Z,2012-05-21T07:52:51Z,ADmad,MEMBER,lib/Cake/View/Helper/FormHelper.php,,2595.0,,,22,"@@ -2583,4 +2583,18 @@ protected function _initInputField($field, $options = array()) { + return $result; + } + ++/** ++ * Set/Get inputDefaults for form elements ++ * ++ * @param array $defaults New default values ++ * @param boolean Merge with current defaults ++ * @return array inputDefaults ++ */ ++ public function inputDefaults($defaults = null, $merge = false) { ++ if (!is_null($defaults)) { ++ $this->_inputDefaults = array_merge($merge ? $this->_inputDefaults : array(), (array)$defaults);","This is not very readable plus has unnecessary merge with empty array. Better to replace with this: + +``` php +if ($merge) { + $this->_inputDefaults = array_merge($this->_inputDefaults, $defaults); +} else { + $this->_inputDefaults = $defaults; +} +``` +",0,57ad5e2573198781deb1586242f62a5b62fe26f9 +4976178,2,"This is going to end poorly. I don't like overloading conditions like this, it will break eventually for someone somewhere.""",2013-07-01 13:23:40,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1392#discussion_r4976178,2013-07-02T01:23:40Z,2013-07-11T12:35:45Z,markstory,MEMBER,lib/Cake/Model/Datasource/Database/Mysql.php,,353.0,,,5,"@@ -349,6 +349,12 @@ public function describe($model) { + * @return array + */ + public function update(Model $model, $fields = array(), $values = null, $conditions = null) { ++ $query = array('joins' => array()); ++ if (isset($conditions['joins'])) {","This is going to end poorly. I don't like overloading conditions like this, it will break eventually for someone somewhere. +",0,096f5f13eedd763c81e186d024110e32e3e01afd +6052285,0,"Do you mean run the key through pbkdf2 before using it as the key on the AES-256 stage?""",2013-08-28 15:22:08,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1568#discussion_r6052285,2013-08-29T03:22:08Z,2013-09-02T01:44:56Z,markstory,MEMBER,lib/Cake/Utility/Security.php,,,,,1,"@@ -289,4 +289,69 @@ protected static function _crypt($password, $salt = false) { + return crypt($password, $salt); + } + ++/** ++ * Encrypt a value using AES-256. ++ * ++ * *Caveat* You cannot properly encrypt/decrypt data with trailing null bytes. ++ * Any trailing null bytes will be removed on decryption due to how PHP pads messages ++ * with nulls prior to encryption. ++ * ++ * @param string $plain The value to encrypt. ++ * @param string $key The 256 bit/32 byte key to use as a cipher key. ++ * @return string Encrypted data. ++ * @throws CakeException On invalid data or key. ++ */ ++ public static function encrypt($plain, $key) { ++ self::_checkKey($key, 'encrypt()'); ++ if (empty($plain)) { ++ throw new CakeException(__d('cake_dev', 'The data to encrypt cannot be empty.')); ++ } ++ $key = substr($key, 0, 32);","Do you mean run the key through pbkdf2 before using it as the key on the AES-256 stage? +",0,13b870d7e183375822eea4ffd66aaacaeec760ff +5181780,1,"Models is fine, they can be isolated, and are easy to talk to directly though the class registry anyway If a normal request has the same event manager for everything but models, then I'm a happy man for a crud plugin perspective :)""",2013-07-14 01:22:44,jippi,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1418#discussion_r5181780,2013-07-14T13:22:44Z,2013-07-27T21:17:13Z,jippi,CONTRIBUTOR,lib/Cake/Controller/ComponentCollection.php,,55.0,,,71,"@@ -39,20 +41,23 @@ class ComponentCollection extends ObjectCollection implements EventListener { + protected $_Controller = null; + + /** +- * Initializes all the Components for a controller. +- * Attaches a reference of each component to the Controller. ++ * The event manager to bind components to. + * +- * @param Controller $Controller Controller to initialize components for. +- * @return void ++ * @var Cake\Event\EventManager + */ +- public function init(Controller $Controller) { +- if (empty($Controller->components)) { +- return; +- } +- $this->_Controller = $Controller; +- $components = static::normalizeObjectArray($Controller->components); +- foreach ($components as $name => $properties) { +- $Controller->{$name} = $this->load($properties['class'], $properties['settings']); ++ protected $_eventManager = null; ++ ++/** ++ * Constructor. ++ * ++ * @param Cake\Controller\Controller $Controller ++ */ ++ public function __construct(Controller $Controller = null) {","Models is fine, they can be isolated, and are easy to talk to directly though the class registry anyway + +If a normal request has the same event manager for everything but models, then I'm a happy man for a crud plugin perspective :) +",0,c8f6c84285080bfc9020401522b01bb7d50777ab +4604911,2,"nooo, why you hate on Wooohoo ! :(""",2013-06-09 08:21:09,jippi,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1342#discussion_r4604911,2013-06-09T20:21:09Z,2013-06-09T20:21:09Z,jippi,CONTRIBUTOR,lib/Cake/Model/Datasource/DboSource.php,,1330.0,,,4,"@@ -1327,7 +1327,7 @@ public function queryAssociation(Model $model, &$linkModel, $type, $association, + } + + /** +- * A more efficient way to fetch associations. Woohoo!","nooo, why you hate on Wooohoo ! :( +",0,42777b7809061116a9162cb0f51398999f5bc826 +4394619,0,"I was thinking the upgrade shell might be better as well. Munging the fixtures might be simpler in terms of long term maintainability and correctness. Would updating a the upgrade shell to include a munger, and issuing warnings/exceptions when old styl""",2013-05-26 03:52:40,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1303#discussion_r4394619,2013-05-26T15:52:40Z,2013-05-29T02:13:15Z,markstory,MEMBER,lib/Cake/Test/Fixture/PostFixture.php,,,,,1,"@@ -41,13 +41,16 @@ class PostFixture extends TestFixture { + * @var array + */ + public $fields = array( +- 'id' => array('type' => 'integer', 'key' => 'primary'), ++ 'id' => array('type' => 'integer'), + 'author_id' => array('type' => 'integer', 'null' => false), + 'title' => array('type' => 'string', 'null' => false), + 'body' => 'text', + 'published' => array('type' => 'string', 'length' => 1, 'default' => 'N'), + 'created' => 'datetime', +- 'updated' => 'datetime' ++ 'updated' => 'datetime', ++ 'constraints' => [","I was thinking the upgrade shell might be better as well. Munging the fixtures might be simpler in terms of long term maintainability and correctness. + +Would updating a the upgrade shell to include a munger, and issuing warnings/exceptions when old style fixtures are found sound reasonable? +",0,2b90e7dbc75cccae5f7b1fc76eba57bf6fcbe049 +2011137,0,"Extra space after `array(`""",2012-11-01 11:15:03,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/913#discussion_r2011137,2012-11-01T22:15:03Z,2013-01-20T08:13:46Z,ADmad,MEMBER,lib/Cake/Test/test_app/Controller/CustomValidationObjectController.php,,,,,1,"@@ -0,0 +1,28 @@ ++ array(","Extra space after `array(` +",0,b2cbeeeb501285431bdc7fb2a5b37f260b792261 +1723748,2,"Oh that is a bit silly.""",2012-09-29 06:36:55,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/875#discussion_r1723748,2012-09-29T18:36:55Z,2012-12-10T10:10:06Z,markstory,MEMBER,lib/Cake/Test/Case/View/Helper/FormHelperTest.php,,6120.0,,,5,"@@ -5966,7 +5966,7 @@ public function testTextAreaWithStupidCharacters() { + 'label' => 'Current Text', 'value' => ""GREAT®"", 'rows' => '15', 'cols' => '75' + )); + $expected = array( +- 'div' => array('class' => 'input text'), ++ 'div' => array('class' => 'input textarea'),","Oh that is a bit silly. +",0,2d908885c86de68f0e0c4cbfd88bce811d23aa54 +1624729,0,"But isn't `fieldList` a copy of the whitelist? I think that is what @lorenzo and I are trying to get at. Perhaps this doesn't need to be a public method? It is just a helper method after all.""",2012-09-17 12:24:15,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/851#discussion_r1624729,2012-09-18T00:24:15Z,2012-11-02T09:54:30Z,markstory,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -2299,6 +2302,29 @@ public function saveAssociated($data = null, $options = array()) { + } + + /** ++ * Helper method for saveAll() and friends, to add foreign key to fieldlist ++ * ++ * @param string $key fieldname to be added to list ++ * @param array $options ++ * @return array $options ++ */ ++ public function addToFieldList($key, $options) {","But isn't `fieldList` a copy of the whitelist? I think that is what @lorenzo and I are trying to get at. Perhaps this doesn't need to be a public method? It is just a helper method after all. +",0,7007dba0eb836f852aaca95fada103bc4ba993a9 +1231261,0,"Same about the translation.""",2012-07-24 12:25:55,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/734#discussion_r1231261,2012-07-25T00:25:55Z,2012-07-25T18:21:05Z,jrbasso,MEMBER,lib/Cake/Network/CakeSocket.php,,,,,1,"@@ -277,4 +298,34 @@ public function reset($state = null) { + return true; + } + +-} ++/** ++ * Encrypts current stream socket, using one of the defined encryption methods ++ * ++ * @param string $type can be one of 'ssl2', 'ssl3', 'ssl23' or 'tls' ++ * @param string $clientOrServer can be one of 'client', 'server'. Default is 'client' ++ * @param boolean $enable enable or disable encryption. Default is true (enable) ++ * @return boolean True on success ++ * @throws SocketException ++ * @see stream_socket_enable_crypto ++ */ ++ public function enableCrypto($type, $clientOrServer = 'client', $enable = true) { ++ if (!array_key_exists($type . '_' . $clientOrServer, $this->_encryptMethods)) { ++ throw new InvalidArgumentException(__('Invalid encryption scheme chosen')); ++ } ++ $enableCryptoResult = false; ++ try { ++ $enableCryptoResult = stream_socket_enable_crypto($this->connection, $enable, $this->_encryptMethods[$type . '_' . $clientOrServer]); ++ } catch (Exception $e) { ++ $this->setLastError(null, $e->getMessage()); ++ throw new SocketException($e->getMessage()); ++ } ++ if ($enableCryptoResult === true) { ++ $this->encrypted = $enable; ++ return true; ++ } else { ++ $errorMessage = __('Unable to perform enableCrypto operation on CakeSocket');","Same about the translation. +",0,7418be04d3fd24bd7cef09527bd40e55783c28c6 +2561154,0,"I guess it should. I was more concerned about the hostnames as sharing cookies between domains is going to cause problems. I forgot about path based cookies though. Both Client::_storeCookies() & Client::_addCookies() will need to be updated.""",2013-01-06 14:03:47,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2561154,2013-01-07T01:03:47Z,2013-01-24T02:45:59Z,markstory,MEMBER,lib/Cake/Network/Http/Client.php,,,,,1,"@@ -0,0 +1,515 @@ ++get('/users', [], ['type' => 'json']);` ++ * ++ * The `type` option sets both the `Content-Type` and `Accept` header, to ++ * the same mime type. When using `type` you can use either a full mime ++ * type or an alias. If you need different types in the Accept and Content-Type ++ * headers you should set them manually and not use `type` ++ * ++ * ### Using authentication ++ * ++ * By using the `auth` key you can use authentication. The type sub option ++ * can be used to specify which authentication strategy you want to use. ++ * CakePHP comes with a few built-in strategies: ++ * ++ * - Basic ++ * - Digest ++ * - Oauth ++ * ++ * ### Using proxies ++ * ++ * By using the `proxy` key you can set authentication credentials for ++ * a proxy if you need to use one.. The type sub option can be used to ++ * specify which authentication strategy you want to use. ++ * CakePHP comes with built-in support for basic authentication. ++ * ++ */ ++class Client { ++ ++/** ++ * Stored configuration for the client. ++ * ++ * @var array ++ */ ++ protected $_config = [ ++ 'host' => null, ++ 'port' => null, ++ 'scheme' => 'http', ++ 'timeout' => 30, ++ 'ssl_verify_peer' => true, ++ 'ssl_verify_depth' => 5, ++ 'ssl_verify_host' => true, ++ 'redirect' => false, ++ ]; ++ ++/** ++ * List of cookies from responses made with this client. ++ * ++ * Cookies are indexed by the cookie's domain or ++ * request host name. ++ * ++ * @var array ++ */ ++ protected $_cookies = []; ++ ++/** ++ * Adapter for sending requests. Defaults to ++ * Cake\Network\Http\Stream ++ * ++ * @var Cake\Network\Http\Stream ++ */ ++ protected $_adapter; ++ ++/** ++ * Create a new HTTP Client. ++ * ++ * ### Config options ++ * ++ * You can set the following options when creating a client: ++ * ++ * - host - The hostname to do requests on. ++ * - port - The port to use. ++ * - scheme - The default scheme/protocol to use. Defaults to http. ++ * - timeout - The timeout in seconds. Defaults to 30 ++ * - ssl_verify_peer - Whether or not SSL certificates should be validated. ++ * Defaults to true. ++ * - ssl_verify_depth - The maximum certificate chain depth to travers. ++ * Defaults to 5. ++ * - ssl_verify_host - Verify that the certificate and hostname match. ++ * Defaults to true. ++ * - redirect - Number of redirects to follow. Defaults to false. ++ * ++ * @param array $config Config options for scoped clients. ++ */ ++ public function __construct($config = []) { ++ $adapter = 'Cake\Network\Http\Adapter\Stream'; ++ if (isset($config['adapter'])) { ++ $adapter = $config['adapter']; ++ unset($config['adapter']); ++ } ++ $this->config($config); ++ ++ if (is_string($adapter)) { ++ $adapter = new $adapter(); ++ } ++ $this->_adapter = $adapter; ++ } ++ ++/** ++ * Get or set additional config options. ++ * ++ * Setting config will use Hash::merge() for appending into ++ * the existing configuration. ++ * ++ * @param array|null $config Configuration options. null to get. ++ * @return this|array ++ */ ++ public function config($config = null) { ++ if ($config === null) { ++ return $this->_config; ++ } ++ $this->_config = Hash::merge($this->_config, $config); ++ return $this; ++ } ++ ++/** ++ * Get the cookies stored in the Client. ++ * ++ * @return array ++ */ ++ public function cookies() { ++ return $this->_cookies; ++ } ++ ++/** ++ * Do a GET request. ++ * ++ * The $data argument supports a special `_content` key ++ * for providing a request body in a GET request. This is ++ * generally not used but services like ElasticSearch use ++ * this feature. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The query data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function get($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $body = []; ++ if (isset($data['_content'])) { ++ $body = $data['_content']; ++ unset($data['_content']); ++ } ++ $url = $this->buildUrl($url, $data, $options); ++ return $this->_doRequest( ++ Request::METHOD_GET, ++ $url, ++ $body, ++ $options ++ ); ++ } ++ ++/** ++ * Do a POST request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The post data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function post($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_POST, $url, $data, $options); ++ } ++ ++/** ++ * Do a PUT request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function put($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PUT, $url, $data, $options); ++ } ++ ++/** ++ * Do a PATCH request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function patch($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PATCH, $url, $data, $options); ++ } ++ ++/** ++ * Do a DELETE request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function delete($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_DELETE, $url, $data, $options); ++ } ++ ++/** ++ * Helper method for doing non-GET requests. ++ * ++ * @param string $method HTTP method. ++ * @param string $url URL to request. ++ */ ++ protected function _doRequest($method, $url, $data, $options) { ++ $request = $this->_createRequest( ++ $method, ++ $url, ++ $data, ++ $options ++ ); ++ return $this->send($request, $options); ++ } ++ ++/** ++ * Does a recursive merge of the parameter with the scope config. ++ * ++ * @param array $options Options to merge. ++ * @return array Options merged with set config. ++ */ ++ protected function _mergeOptions($options) { ++ return Hash::merge($this->_config, $options); ++ } ++ ++/** ++ * Send a request. ++ * ++ * Used internally by other methods, but can also be used to send ++ * handcrafted Request objects. ++ * ++ * @param Cake\Network\Http\Request $request The request to send. ++ * @param array $options Additional options to use. ++ * @return Cake\Network\Http\Response ++ */ ++ public function send(Request $request, $options = []) { ++ $responses = $this->_adapter->send($request, $options); ++ $host = parse_url($request->url(), PHP_URL_HOST); ++ foreach ($responses as $response) { ++ $this->_storeCookies($response, $host); ++ } ++ return array_pop($responses); ++ } ++ ++/** ++ * Store cookies in a response to be used in future requests. ++ * ++ * Non-expired cookies will be stored for use in future requests ++ * made with the same Client instance. Cookies are not saved ++ * between instances. ++ * ++ * @param Response $response The response to read cookies from ++ * @param string $host The request host, used for getting host names ++ * in case the cookies didn't set a domain. ++ * @return void ++ */ ++ protected function _storeCookies(Response $response, $host) {","I guess it should. I was more concerned about the hostnames as sharing cookies between domains is going to cause problems. I forgot about path based cookies though. Both Client::_storeCookies() & Client::_addCookies() will need to be updated. +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +2628259,0,"@markstory To follow the [RFC 2109](http://www.ietf.org/rfc/rfc2109.txt) (section 4.3.1) the path should be the base path of the requested URL when not specified. > Defaults to the path of the request URL that generated the > Set-Cookie response, up t""",2013-01-12 13:23:22,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2628259,2013-01-13T00:23:22Z,2013-01-24T02:45:59Z,jrbasso,MEMBER,lib/Cake/Network/Http/Client.php,,,,,1,"@@ -0,0 +1,527 @@ ++get('/users', [], ['type' => 'json']);` ++ * ++ * The `type` option sets both the `Content-Type` and `Accept` header, to ++ * the same mime type. When using `type` you can use either a full mime ++ * type or an alias. If you need different types in the Accept and Content-Type ++ * headers you should set them manually and not use `type` ++ * ++ * ### Using authentication ++ * ++ * By using the `auth` key you can use authentication. The type sub option ++ * can be used to specify which authentication strategy you want to use. ++ * CakePHP comes with a few built-in strategies: ++ * ++ * - Basic ++ * - Digest ++ * - Oauth ++ * ++ * ### Using proxies ++ * ++ * By using the `proxy` key you can set authentication credentials for ++ * a proxy if you need to use one.. The type sub option can be used to ++ * specify which authentication strategy you want to use. ++ * CakePHP comes with built-in support for basic authentication. ++ * ++ */ ++class Client { ++ ++/** ++ * Stored configuration for the client. ++ * ++ * @var array ++ */ ++ protected $_config = [ ++ 'host' => null, ++ 'port' => null, ++ 'scheme' => 'http', ++ 'timeout' => 30, ++ 'ssl_verify_peer' => true, ++ 'ssl_verify_depth' => 5, ++ 'ssl_verify_host' => true, ++ 'redirect' => false, ++ ]; ++ ++/** ++ * List of cookies from responses made with this client. ++ * ++ * Cookies are indexed by the cookie's domain or ++ * request host name. ++ * ++ * @var array ++ */ ++ protected $_cookies = []; ++ ++/** ++ * Adapter for sending requests. Defaults to ++ * Cake\Network\Http\Stream ++ * ++ * @var Cake\Network\Http\Stream ++ */ ++ protected $_adapter; ++ ++/** ++ * Create a new HTTP Client. ++ * ++ * ### Config options ++ * ++ * You can set the following options when creating a client: ++ * ++ * - host - The hostname to do requests on. ++ * - port - The port to use. ++ * - scheme - The default scheme/protocol to use. Defaults to http. ++ * - timeout - The timeout in seconds. Defaults to 30 ++ * - ssl_verify_peer - Whether or not SSL certificates should be validated. ++ * Defaults to true. ++ * - ssl_verify_depth - The maximum certificate chain depth to travers. ++ * Defaults to 5. ++ * - ssl_verify_host - Verify that the certificate and hostname match. ++ * Defaults to true. ++ * - redirect - Number of redirects to follow. Defaults to false. ++ * ++ * @param array $config Config options for scoped clients. ++ */ ++ public function __construct($config = []) { ++ $adapter = 'Cake\Network\Http\Adapter\Stream'; ++ if (isset($config['adapter'])) { ++ $adapter = $config['adapter']; ++ unset($config['adapter']); ++ } ++ $this->config($config); ++ ++ if (is_string($adapter)) { ++ $adapter = new $adapter(); ++ } ++ $this->_adapter = $adapter; ++ } ++ ++/** ++ * Get or set additional config options. ++ * ++ * Setting config will use Hash::merge() for appending into ++ * the existing configuration. ++ * ++ * @param array|null $config Configuration options. null to get. ++ * @return this|array ++ */ ++ public function config($config = null) { ++ if ($config === null) { ++ return $this->_config; ++ } ++ $this->_config = Hash::merge($this->_config, $config); ++ return $this; ++ } ++ ++/** ++ * Get the cookies stored in the Client. ++ * ++ * Returns an array of cookie data arrays. ++ * ++ * @return array ++ */ ++ public function cookies() { ++ return $this->_cookies; ++ } ++ ++/** ++ * Do a GET request. ++ * ++ * The $data argument supports a special `_content` key ++ * for providing a request body in a GET request. This is ++ * generally not used but services like ElasticSearch use ++ * this feature. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The query data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function get($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $body = []; ++ if (isset($data['_content'])) { ++ $body = $data['_content']; ++ unset($data['_content']); ++ } ++ $url = $this->buildUrl($url, $data, $options); ++ return $this->_doRequest( ++ Request::METHOD_GET, ++ $url, ++ $body, ++ $options ++ ); ++ } ++ ++/** ++ * Do a POST request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The post data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function post($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_POST, $url, $data, $options); ++ } ++ ++/** ++ * Do a PUT request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function put($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PUT, $url, $data, $options); ++ } ++ ++/** ++ * Do a PATCH request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function patch($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PATCH, $url, $data, $options); ++ } ++ ++/** ++ * Do a DELETE request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function delete($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_DELETE, $url, $data, $options); ++ } ++ ++/** ++ * Helper method for doing non-GET requests. ++ * ++ * @param string $method HTTP method. ++ * @param string $url URL to request. ++ */ ++ protected function _doRequest($method, $url, $data, $options) { ++ $request = $this->_createRequest( ++ $method, ++ $url, ++ $data, ++ $options ++ ); ++ return $this->send($request, $options); ++ } ++ ++/** ++ * Does a recursive merge of the parameter with the scope config. ++ * ++ * @param array $options Options to merge. ++ * @return array Options merged with set config. ++ */ ++ protected function _mergeOptions($options) { ++ return Hash::merge($this->_config, $options); ++ } ++ ++/** ++ * Send a request. ++ * ++ * Used internally by other methods, but can also be used to send ++ * handcrafted Request objects. ++ * ++ * @param Cake\Network\Http\Request $request The request to send. ++ * @param array $options Additional options to use. ++ * @return Cake\Network\Http\Response ++ */ ++ public function send(Request $request, $options = []) { ++ $responses = $this->_adapter->send($request, $options); ++ $host = parse_url($request->url(), PHP_URL_HOST); ++ foreach ($responses as $response) { ++ $this->_storeCookies($response, $host); ++ } ++ return array_pop($responses); ++ } ++ ++/** ++ * Store cookies in a response to be used in future requests. ++ * ++ * Non-expired cookies will be stored for use in future requests ++ * made with the same Client instance. Cookies are not saved ++ * between instances. ++ * ++ * @param Response $response The response to read cookies from ++ * @param string $host The request host, used for getting host names ++ * in case the cookies didn't set a domain. ++ * @return void ++ */ ++ protected function _storeCookies(Response $response, $host) { ++ $cookies = $response->cookies(); ++ foreach ($cookies as $name => $cookie) { ++ $expires = isset($cookie['expires']) ? $cookie['expires'] : false; ++ if ($expires) { ++ $expires = \DateTime::createFromFormat('D, j-M-Y H:i:s e', $expires); ++ } ++ if ($expires && $expires->getTimestamp() <= time()) { ++ continue; ++ } ++ if (empty($cookie['domain'])) { ++ $cookie['domain'] = $host; ++ } ++ $cookie['domain'] = trim($cookie['domain'], '.'); ++ if (empty($cookie['path'])) { ++ $cookie['path'] = '/';","@markstory To follow the [RFC 2109](http://www.ietf.org/rfc/rfc2109.txt) (section 4.3.1) the path should be the base path of the requested URL when not specified. + +> Defaults to the path of the request URL that generated the +> Set-Cookie response, up to, but not including, the +> right-most /. +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +6224808,0,"@markstory Agree about the usage and expectations of support of those options...""",2013-09-07 02:25:24,ravage84,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1606#discussion_r6224808,2013-09-07T14:25:24Z,2013-09-09T14:49:11Z,ravage84,MEMBER,App/App/Config/bootstrap.php,,,,,1,"@@ -14,9 +14,11 @@ + */ + namespace App\Config; + +-if (file_exists(dirname(__DIR__) . 'vendor/autoload.php')) { +- require dirname(__DIR__) . 'vendor/autoload.php'; ++if (!file_exists(dirname(dirname(__DIR__)) . '/vendor/autoload.php')) { ++ die('Could not find vendor/autoload.php. You need to install dependencies with `php composer.phar install` first.');","@markstory Agree about the usage and expectations of support of those options... +",0,5bdb78603590331da86f3aea10ab718e0a29a0b9 +6060638,0,"Another possible good idea is to add an hmac to detect tampering. I know zf2 does this as do a few other implementations I've seen.""",2013-08-29 01:52:47,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1568#discussion_r6060638,2013-08-29T13:52:47Z,2013-09-02T01:44:56Z,markstory,MEMBER,lib/Cake/Utility/Security.php,,,,,1,"@@ -289,4 +289,69 @@ protected static function _crypt($password, $salt = false) { + return crypt($password, $salt); + } + ++/** ++ * Encrypt a value using AES-256. ++ * ++ * *Caveat* You cannot properly encrypt/decrypt data with trailing null bytes. ++ * Any trailing null bytes will be removed on decryption due to how PHP pads messages ++ * with nulls prior to encryption. ++ * ++ * @param string $plain The value to encrypt. ++ * @param string $key The 256 bit/32 byte key to use as a cipher key. ++ * @return string Encrypted data. ++ * @throws CakeException On invalid data or key. ++ */ ++ public static function encrypt($plain, $key) { ++ self::_checkKey($key, 'encrypt()'); ++ if (empty($plain)) { ++ throw new CakeException(__d('cake_dev', 'The data to encrypt cannot be empty.')); ++ } ++ $key = substr($key, 0, 32);","Another possible good idea is to add an hmac to detect tampering. I know zf2 does this as do a few other implementations I've seen. +",0,13b870d7e183375822eea4ffd66aaacaeec760ff +5912764,2,"That was the reason I started to extract stuff into a Trait, found out the hard way it was not possible :(""",2013-08-21 11:27:35,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1544#discussion_r5912764,2013-08-21T23:27:35Z,2013-08-24T16:35:52Z,lorenzo,MEMBER,lib/Cake/ORM/ResultCollectionTrait.php,,27.0,,,27,"@@ -0,0 +1,72 @@ ++_eventManager)) { + $this->_eventManager = new EventManager();","yes, I wanted to listen to a view render in a crud listener, but couldn't access the view to attach an event, since the view isn't created until render() is called - had to hack around it to make it work :) +I expected a View.beforeRender event in any class attaching to the controller event manager would have worked, but alas it did not :) +",0,c8f6c84285080bfc9020401522b01bb7d50777ab +4464918,0,"This permits SQL injection. User data should never be interpolated into the keys of a find condition.""",2013-05-30 07:37:21,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1319#discussion_r4464918,2013-05-30T19:37:21Z,2013-05-30T19:37:21Z,markstory,MEMBER,lib/Cake/Console/Templates/default/actions/controller_actions.ctp,,182.0,,,26,"@@ -157,3 +157,62 @@ + + $this->redirect(array('action' => 'index')); + } ++ ++/** ++ * search method ++ * ++ * @return void ++ */ ++ public function search() { ++ if ($this->request->is('post') || $this->request->is('put')) { ++ $conditions = array(); ++ foreach ($this->data[''] as $key => $value) { ++ $empty = true; ++ if ((substr($key, -3) == ""_id"")) { // associated input have _id suffix ++ if ($value != 0) ++ $empty = false; ++ } else { ++ if ($value != """") ++ $empty = false; ++ } ++ ++ if (!$empty) { ++ if (strstr($value, ""*"")) { ++ $conditions[] = array( ++ '.'.$key."" LIKE"" => str_replace('*', '%', $value)","This permits SQL injection. User data should never be interpolated into the keys of a find condition. +",0,221f95f0af250cf48c90c350282af33478e5c15f +4394900,2,"Odd, it might be the test being wrong... I remember spending some time fixing the multi insert in sqlite so I was quite surprised it did not work.""",2013-05-26 06:58:24,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1303#discussion_r4394900,2013-05-26T18:58:24Z,2013-05-29T02:13:15Z,lorenzo,MEMBER,lib/Cake/Database/Dialect/SqliteDialectTrait.php,,,,,1,"@@ -102,28 +102,33 @@ protected function _insertQueryTranslator($query) { + return $query; + } + +- $cols = $v->columns(); + $newQuery = $query->connection()->newQuery(); ++ $cols = $v->columns(); + $values = []; + foreach ($v->values() as $k => $val) { + $values[] = $val; +- $val = array_merge($val, array_fill(0, count($cols) - count($val), null)); ++ $fillLength = count($cols) - count($val); ++ if ($fillLength > 0) { ++ $val = array_merge($val, array_fill(0, $fillLength, null)); ++ } ++ // TODO this doesn't work all columns are inserted as null.","Odd, it might be the test being wrong... I remember spending some time fixing the multi insert in sqlite so I was quite surprised it did not work. +",0,2b90e7dbc75cccae5f7b1fc76eba57bf6fcbe049 +3613646,0,"10, integers in postgres are signed so one of the bits is used for the sign. It is actually the same in Mysql, unless it is declared as unsigned""",2013-04-01 19:44:51,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1212#discussion_r3613646,2013-04-02T07:44:51Z,2013-04-03T21:35:55Z,lorenzo,MEMBER,lib/Cake/Model/Datasource/Database/Dialect/PostgresDialectTrait.php,,205.0,,,58,"@@ -148,4 +148,128 @@ protected function _transformFunctionExpression(FunctionExpression $expression) + } + } + ++/** ++ * Get the SQL to list the tables ++ * ++ * @param array $config The connection configuration to use for ++ * getting tables from. ++ * @return array An array of (sql, params) to execute. ++ */ ++ public function listTablesSql($config) { ++ $sql = ""SELECT table_name as name FROM INFORMATION_SCHEMA.tables WHERE table_schema = ? ORDER BY name""; ++ $schema = empty($config['schema']) ? 'public' : $config['schema']; ++ return [$sql, [$schema]]; ++ } ++ ++/** ++ * Get the SQL to describe a table in Postgres. ++ * ++ * @param string $table The table name to describe ++ * @param array $config The connection configuration to use ++ * @return array An array of (sql, params) to execute. ++ */ ++ public function describeTableSql($table, $config) { ++ $sql = ++ ""SELECT DISTINCT table_schema AS schema, column_name AS name, data_type AS type, ++ is_nullable AS null, column_default AS default, ordinal_position AS position, ++ character_maximum_length AS char_length, character_octet_length AS oct_length, ++ d.description as comment, i.indisprimary = 't' as pk ++ FROM information_schema.columns c ++ INNER JOIN pg_catalog.pg_namespace ns ON (ns.nspname = table_schema) ++ INNER JOIN pg_catalog.pg_class cl ON (cl.relnamespace = ns.oid AND cl.relname = table_name) ++ LEFT JOIN pg_catalog.pg_index i ON (i.indrelid = cl.oid AND i.indkey[0] = c.ordinal_position) ++ LEFT JOIN pg_catalog.pg_description d on (cl.oid = d.objoid AND d.objsubid = c.ordinal_position) ++ WHERE table_name = ? AND table_schema = ? ORDER BY position""; ++ $schema = empty($config['schema']) ? 'public' : $config['schema']; ++ return [$sql, [$table, $schema]]; ++ } ++ ++/** ++ * Convert a column definition to the abstract types. ++ * ++ * The returned type will be a type that ++ * Cake\Model\Datasource\Database\Type can handle. ++ * ++ * @param string $column The column type + length ++ * @return array List of (type, length) ++ */ ++ public function convertColumn($column) { ++ $col = strtolower($column); ++ if (in_array($col, array('date', 'time', 'boolean'))) { ++ return [$col, null]; ++ } ++ if (strpos($col, 'timestamp') !== false) { ++ return ['datetime', null]; ++ } ++ if ($col === 'serial' || $col === 'integer') { ++ return ['integer', 10];","10, integers in postgres are signed so one of the bits is used for the sign. It is actually the same in Mysql, unless it is declared as unsigned +",0,caec717bce20e06429dd54bafe39b4b0ee6682bb +3622375,2,"Derp, I missed that sorry.""",2013-04-02 05:23:11,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1212#discussion_r3622375,2013-04-02T17:23:11Z,2013-04-03T21:35:55Z,markstory,MEMBER,lib/Cake/Test/TestCase/Model/Datasource/Database/Driver/PostgresTest.php,,343.0,,,225,"@@ -126,4 +129,218 @@ public function testConnectionConfigCustom() { + $driver->connect($config); + } + ++/** ++ * Helper method for skipping tests that need a real connection. ++ * ++ * @return void ++ */ ++ protected function _needsConnection() { ++ $config = Configure::read('Datasource.test'); ++ $this->skipIf(strpos($config['datasource'], 'Postgres') === false, 'Not using Postgres for test config'); ++ } ++ ++/** ++ * Helper method for testing methods. ++ * ++ * @return void ++ */ ++ protected function _createTables($connection) { ++ $this->_needsConnection(); ++ $connection->execute('DROP TABLE IF EXISTS articles'); ++ $connection->execute('DROP TABLE IF EXISTS authors'); ++ ++ $table = <<execute($table); ++ ++ $table = <<execute($table); ++ $connection->execute('COMMENT ON COLUMN ""articles"".""title"" IS \'a title\''); ++ } ++ ++/** ++ * Dataprovider for column testing ++ * ++ * @return array ++ */ ++ public static function columnProvider() { ++ return [ ++ [ ++ 'TIMESTAMP', ++ ['datetime', null] ++ ], ++ [ ++ 'TIMESTAMP WITHOUT TIME ZONE', ++ ['datetime', null] ++ ], ++ [ ++ 'DATE', ++ ['date', null] ++ ], ++ [ ++ 'TIME', ++ ['time', null] ++ ], ++ [ ++ 'SMALLINT', ++ ['integer', 5] ++ ], ++ [ ++ 'INTEGER', ++ ['integer', 10] ++ ], ++ [ ++ 'SERIAL', ++ ['integer', 10] ++ ], ++ [ ++ 'BIGINT', ++ ['biginteger', 20] ++ ], ++ [ ++ 'NUMERIC', ++ ['decimal', null] ++ ], ++ [ ++ 'VARCHAR', ++ ['string', null] ++ ], ++ [ ++ 'CHARACTER VARYING', ++ ['string', null] ++ ], ++ [ ++ 'CHAR', ++ ['string', null] ++ ], ++ [ ++ 'CHARACTER', ++ ['string', null] ++ ], ++ [ ++ 'TEXT', ++ ['text', null] ++ ], ++ [ ++ 'BYTEA', ++ ['binary', null] ++ ], ++ [ ++ 'REAL', ++ ['float', null] ++ ], ++ [ ++ 'DOUBLE PRECISION', ++ ['float', null] ++ ], ++ [ ++ 'BIGSERIAL', ++ ['biginteger', 20] ++ ], ++ ]; ++ } ++ ++/** ++ * Test parsing Postgres column types. ++ * ++ * @dataProvider columnProvider ++ * @return void ++ */ ++ public function testConvertColumnType($input, $expected) { ++ $driver = new Postgres(); ++ $this->assertEquals($expected, $driver->convertColumn($input)); ++ } ++ ++ ++/** ++ * Test listing tables with Postgres ++ * ++ * @return void ++ */ ++ public function testListTables() { ++ $connection = new Connection(Configure::read('Datasource.test')); ++ $this->_createTables($connection); ++ ++ $result = $connection->listTables(); ++ $this->assertInternalType('array', $result); ++ $this->assertCount(2, $result); ++ $this->assertEquals('articles', $result[0]); ++ $this->assertEquals('authors', $result[1]); ++ } ++ ++/** ++ * Test describing a table with Postgres ++ * ++ * @return void ++ */ ++ public function testDescribeTable() { ++ $connection = new Connection(Configure::read('Datasource.test')); ++ $this->_createTables($connection); ++ ++ $result = $connection->describe('articles'); ++ $expected = [ ++ 'id' => [ ++ 'type' => 'biginteger', ++ 'null' => false, ++ 'default' => null, ++ 'length' => 20, ++ 'key' => 'primary', ++ ], ++ 'title' => [ ++ 'type' => 'string', ++ 'null' => true, ++ 'default' => null, ++ 'length' => 20, ++ 'comment' => 'a title', ++ ], ++ 'body' => [ ++ 'type' => 'text', ++ 'null' => true, ++ 'default' => null, ++ 'length' => null, ++ ], ++ 'author_id' => [ ++ 'type' => 'integer', ++ 'null' => false, ++ 'default' => null, ++ 'length' => 10, ++ ], ++ 'published' => [ ++ 'type' => 'boolean', ++ 'null' => true, ++ 'default' => 0, ++ 'length' => null, ++ ], ++ 'views' => [ ++ 'type' => 'integer', ++ 'null' => true, ++ 'default' => 0, ++ 'length' => 5, ++ ], ++ 'created' => [ ++ 'type' => 'datetime', ++ 'null' => true, ++ 'default' => null, ++ 'length' => null, ++ ], ++ ]; ++ $this->assertEquals($expected, $result);","Derp, I missed that sorry. +",0,caec717bce20e06429dd54bafe39b4b0ee6682bb +1941298,0,"Extra space after `(` and before `)`. If block should always have braces `{}` even for single lines.""",2012-10-25 02:34:46,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/913#discussion_r1941298,2012-10-25T14:34:46Z,2013-01-20T08:13:45Z,ADmad,MEMBER,lib/Cake/Model/Validator/CakeValidationRule.php,,,,,1,"@@ -271,6 +274,13 @@ public function process($field, &$data, &$methods) { + $this->_valid = call_user_func_array($methods[$rule], $this->_ruleParams); + } elseif (class_exists('Validation') && method_exists('Validation', $this->_rule)) { + $this->_valid = call_user_func_array(array('Validation', $this->_rule), $this->_ruleParams); ++ } elseif (strpos($this->_rule ,'::')) { ++ list($plugin, $class) = pluginSplit($this->_rule); ++ list($className,$method) = explode('::',$class); ++ $location = 'Model/Validation'; ++ if ( $plugin ) $location = $plugin . '.' . $location;","Extra space after `(` and before `)`. If block should always have braces `{}` even for single lines. +",0,b2cbeeeb501285431bdc7fb2a5b37f260b792261 +1714815,0,"Indentation is still incorrect, better use the code sniffer.""",2012-09-27 17:55:21,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/872#discussion_r1714815,2012-09-28T05:55:21Z,2012-09-28T06:07:43Z,ADmad,MEMBER,lib/Cake/Cache/Engine/WincacheEngine.php,,,,,1,"@@ -183,6 +183,7 @@ public function groups() { + * @return boolean success + **/ + public function clearGroup($group) { ++ $success = null;","Indentation is still incorrect, better use the code sniffer. +",0,f4565d51fad4ba81906f085cd5abf043d4852365 +1460045,2,"Having a destructive get() seems like a bad idea. Why not just call set() afterwards to clear the block. I'd like to avoid duplicated functionality.""",2012-08-25 09:11:14,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/795#discussion_r1460045,2012-08-25T21:11:14Z,2012-08-25T21:19:27Z,markstory,MEMBER,lib/Cake/View/ViewBlock.php,,125.0,,,8,"@@ -119,13 +119,18 @@ public function set($name, $value) { + * Get the content for a block. + * + * @param string $name Name of the block ++ * @param boolean $clear Whether or not to clear the block after retrieval (default false) + * @return The block content or '' if the block does not exist. + */ +- public function get($name) { ++ public function get($name, $clear = false) {","Having a destructive get() seems like a bad idea. Why not just call set() afterwards to clear the block. I'd like to avoid duplicated functionality. +",0,123a59baa20c336a089956e00a4f88b6f8d95860 +408211,1,"Yeah, true, But I dunno if it would break userland code if init() would not register the Behavior in the Collection, but instead do nothing. But I trust your judgement in these things :-)""",2012-02-01 11:02:29,tPl0ch,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/455#discussion_r408211,2012-02-01T22:02:29Z,2012-02-03T18:33:39Z,tPl0ch,NONE,lib/Cake/Model/BehaviorCollection.php,,82.0,,,1,"@@ -70,6 +69,18 @@ public function init($modelName, $behaviors = array()) { + } + + /** ++ * Backwards compatible alias for __construct() ++ * ++ * @param string $modelName ++ * @param array $behaviors ++ * @return void ++ * @deprecated Initialize with constructor instead ++ */ ++ public function init($modelName, $behaviors = array()) { ++ $this->__construct($modelName, $behaviors); ++ } ++ ++/**","Yeah, true, But I dunno if it would break userland code if init() would not register the Behavior in the Collection, but instead do nothing. But I trust your judgement in these things :-) +",0,ea4f39c13bb80524ad470196be75d5a6a6964048 +54071,2,"Yeah, that's right. My bad. Commit 788a7e4 corrects this issue.""",2011-06-28 11:01:01,luisarmando,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/136#discussion_r54071,2011-06-28T23:01:01Z,2011-07-01T10:19:38Z,labianchin,CONTRIBUTOR,lib/Cake/Console/Command/UpgradeShell.php,,,,,1,"@@ -74,16 +74,22 @@ class UpgradeShell extends Shell { + public function locations() { + $cwd = getcwd(); + +- if (is_dir('plugins')) { +- +- $Folder = new Folder('plugins'); +- list($plugins) = $Folder->read(); +- foreach($plugins as $plugin) { +- chdir($cwd . DS . 'plugins' . DS . $plugin); +- $this->locations(); ++ $plugins = App::path('plugins'); ++ if (!$pluginsFolders) ++ $pluginsFolders = array('plugins'); ++ ++ foreach ($pluginsFolders as $pluginsFolder){ ++ if (is_dir($pluginsFolder)) { ++ ++ $Folder = new Folder($pluginsFolder); ++ list($plugins) = $Folder->read(); ++ foreach($plugins as $plugin) { ++ chdir($cwd . DS . 'plugins' . DS . $plugin);","Yeah, that's right. My bad. +Commit 788a7e4 corrects this issue. +",0,b22e30c5a31fb8a7ff95f6670193bc7697de0e9a +6630638,0,"You can also use the official [code sniffer](https://github.com/cakephp/cakephp-codesniffer).""",2013-09-27 03:27:07,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1688#discussion_r6630638,2013-09-27T15:27:07Z,2013-09-27T15:27:07Z,ADmad,MEMBER,lib/Cake/TestSuite/ControllerTestCase.php,,,,,1,"@@ -219,7 +219,8 @@ protected function _testAction($url = '', $options = array()) { + $options = array_merge(array( + 'data' => array(), + 'method' => 'POST', +- 'return' => 'result' ++ 'return' => 'result', ++ 'named' => array()","You can also use the official [code sniffer](https://github.com/cakephp/cakephp-codesniffer). +",0,9faf3ca5d46ad908974d590249083b5dd75971b9 +2671628,0,"Just as curiosity, I was looking the [`DateTime` documentation](http://us2.php.net/manual/en/class.datetime.php) today and they have the constant for the RFC formats. Form the [cookie RFC](http://tools.ietf.org/html/rfc2616#section-3.3.1), it could be """,2013-01-16 09:19:39,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2671628,2013-01-16T20:19:39Z,2013-01-24T02:45:59Z,jrbasso,MEMBER,lib/Cake/Network/Http/Cookies.php,,,,,1,"@@ -0,0 +1,117 @@ ++cookies(); ++ foreach ($cookies as $name => $cookie) { ++ if (empty($cookie['domain'])) { ++ $cookie['domain'] = $host; ++ } ++ if (empty($cookie['path'])) { ++ $cookie['path'] = $path; ++ } ++ $key = implode(';', [$cookie['name'], $cookie['domain'], $cookie['path']]); ++ ++ $expires = isset($cookie['expires']) ? $cookie['expires'] : false; ++ if ($expires) { ++ $expires = \DateTime::createFromFormat('D, j-M-Y H:i:s e', $expires);","Just as curiosity, I was looking the [`DateTime` documentation](http://us2.php.net/manual/en/class.datetime.php) today and they have the constant for the RFC formats. + +Form the [cookie RFC](http://tools.ietf.org/html/rfc2616#section-3.3.1), it could be ISO 1123 (most used and the same you used), RFC 1036 or asctime(). +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +2676081,0,"I ended up using `strtotime()` as the various formats that I saw in the wild and in the specs resulted in icky code. However, strtotime() seems to be able to sort everything out.""",2013-01-16 15:06:50,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2676081,2013-01-17T02:06:50Z,2013-01-24T02:45:59Z,markstory,MEMBER,lib/Cake/Network/Http/Cookies.php,,,,,1,"@@ -0,0 +1,117 @@ ++cookies(); ++ foreach ($cookies as $name => $cookie) { ++ if (empty($cookie['domain'])) { ++ $cookie['domain'] = $host; ++ } ++ if (empty($cookie['path'])) { ++ $cookie['path'] = $path; ++ } ++ $key = implode(';', [$cookie['name'], $cookie['domain'], $cookie['path']]); ++ ++ $expires = isset($cookie['expires']) ? $cookie['expires'] : false; ++ if ($expires) { ++ $expires = \DateTime::createFromFormat('D, j-M-Y H:i:s e', $expires);","I ended up using `strtotime()` as the various formats that I saw in the wild and in the specs resulted in icky code. However, strtotime() seems to be able to sort everything out. +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +4778220,1,"Yes you right""",2013-06-19 05:36:15,bhiv,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1364#discussion_r4778220,2013-06-19T17:36:15Z,2013-06-19T17:36:15Z,np42,NONE,lib/Cake/Model/Behavior/ContainableBehavior.php,,163.0,,,19,"@@ -152,6 +157,12 @@ public function beforeFind(Model $Model, $query) { + if (!$reset && empty($instance->__backOriginalAssociation)) { + $instance->__backOriginalAssociation = $backupBindings; + } ++ foreach ($unbind as $className) { ++ $this->rebinds[] = array ++ ( 'model' => $instance, 'type' => $type ++ , 'className' => $className, 'link' => &$instance->{$type}[$className]","Yes you right +",0,8e500c33349a5dbd05d1846c17538102669a5c51 +6103651,0,"One could default this to false and only enable it dynamically inside the code.""",2013-09-01 10:55:17,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1594#discussion_r6103651,2013-09-01T22:55:17Z,2013-09-01T22:55:17Z,dereuromark,MEMBER,lib/Cake/Console/ConsoleInput.php,,44.0,,,12,"@@ -33,11 +33,23 @@ class ConsoleInput { + protected $_input; + + /** ++ * Can this instance use readline? ++ * Two conditions must be met: ++ * 1. Readline support must be enabled. ++ * 2. Handle we are attached to must be stdin. ++ * Allows rich editing with arrow keys and history when inputting a string. ++ * ++ * @var bool ++ */ ++ protected $_canReadline;","One could default this to false and only enable it dynamically inside the code. +",0,bb98ac761fad08204cb7ad9fac9e6b22e283f5e0 +6060336,0,"One problem with pbkdf is the general lack of support. Prior to 5.5 I'm not aware of any commonly installed extension that provides pbkdf2. An alternative to length checks and pbkdf2 is to use sha256. That would mask the original key and ensure the correc""",2013-08-29 01:40:49,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1568#discussion_r6060336,2013-08-29T13:40:49Z,2013-09-02T01:44:56Z,markstory,MEMBER,lib/Cake/Utility/Security.php,,,,,1,"@@ -289,4 +289,69 @@ protected static function _crypt($password, $salt = false) { + return crypt($password, $salt); + } + ++/** ++ * Encrypt a value using AES-256. ++ * ++ * *Caveat* You cannot properly encrypt/decrypt data with trailing null bytes. ++ * Any trailing null bytes will be removed on decryption due to how PHP pads messages ++ * with nulls prior to encryption. ++ * ++ * @param string $plain The value to encrypt. ++ * @param string $key The 256 bit/32 byte key to use as a cipher key. ++ * @return string Encrypted data. ++ * @throws CakeException On invalid data or key. ++ */ ++ public static function encrypt($plain, $key) { ++ self::_checkKey($key, 'encrypt()'); ++ if (empty($plain)) { ++ throw new CakeException(__d('cake_dev', 'The data to encrypt cannot be empty.')); ++ } ++ $key = substr($key, 0, 32);","One problem with pbkdf is the general lack of support. Prior to 5.5 I'm not aware of any commonly installed extension that provides pbkdf2. An alternative to length checks and pbkdf2 is to use sha256. That would mask the original key and ensure the correct key length. +",0,13b870d7e183375822eea4ffd66aaacaeec760ff +5034403,2,"Sounds like hack, but with that difference I wonder if the `call_user_func_array` could be replaced until certain level by the same strategy used in [`Configure::write()` of 1.x version](https://github.com/cakephp/cakephp/blob/1.3/cake/libs/configure.php#""",2013-07-04 05:25:21,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1397#discussion_r5034403,2013-07-04T17:25:21Z,2013-07-05T03:45:44Z,jrbasso,MEMBER,lib/Cake/Event/EventManager.php,,,,,1,"@@ -243,8 +235,10 @@ public function dispatch($event) { + if ($event->isStopped()) { + break; + } +- if ($listener['passParams'] === true) { +- $result = call_user_func_array($listener['callable'], $event->data); ++ $data = $event->data(); ++ if ($data !== null) { ++ array_unshift($data, $event); ++ $result = call_user_func_array($listener['callable'], $data); + } else { + $result = call_user_func($listener['callable'], $event);","Sounds like hack, but with that difference I wonder if the `call_user_func_array` could be replaced until certain level by the same strategy used in [`Configure::write()` of 1.x version](https://github.com/cakephp/cakephp/blob/1.3/cake/libs/configure.php#L93-L107) +",0,3040dbe0c36960df4b3d20fd0cad8a570a98cdff +4288375,0,"It is needed and is set in [constructor](https://github.com/ADmad/cakephp/blob/8ae3934378e75b2bdc7ac850f7593467fa7e7fc3/lib/Cake/Controller/Component/Auth/BasicAuthenticate.php#L55). I removed the overriding of `$settings` property so that if its keys are""",2013-05-18 04:35:27,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1275#discussion_r4288375,2013-05-18T16:35:27Z,2013-05-26T05:59:44Z,ADmad,MEMBER,lib/Cake/Controller/Component/Auth/BasicAuthenticate.php,,68.0,,,25,"@@ -44,31 +44,6 @@ + class BasicAuthenticate extends BaseAuthenticate { + + /** +- * Settings for this object. +- * +- * - `fields` The fields to use to identify a user by. +- * - `userModel` The model name of the User, defaults to User. +- * - `scope` Additional conditions to use when looking up and authenticating users, +- * i.e. `array('User.is_active' => 1).` +- * - `recursive` The value of the recursive key passed to find(). Defaults to 0. +- * - `contain` Extra models to contain and store in session. +- * - `realm` The realm authentication is for. Defaults the server name. +- * +- * @var array +- */ +- public $settings = array( +- 'fields' => array( +- 'username' => 'username', +- 'password' => 'password' +- ), +- 'userModel' => 'User', +- 'scope' => array(), +- 'recursive' => 0, +- 'contain' => null, +- 'realm' => '',","It is needed and is set in [constructor](https://github.com/ADmad/cakephp/blob/8ae3934378e75b2bdc7ac850f7593467fa7e7fc3/lib/Cake/Controller/Component/Auth/BasicAuthenticate.php#L55). I removed the overriding of `$settings` property so that if its keys are updated in `BaseAuthenticate` they are easily inherited and you don't have to update this class too. +",0,dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa +2930599,0,"should the regex then consider the following? ```html
,
,
,
``` Mark, I'm not sure if I follow the tag formatting that you've created and how it would account for the 4 versions of 'br' above?""",2013-02-07 07:07:30,TeckniX,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1107#discussion_r2930599,2013-02-07T18:07:30Z,2013-03-25T15:01:22Z,TeckniX,CONTRIBUTOR,lib/Cake/View/Helper/TextHelper.php,,,,,1,"@@ -228,6 +228,29 @@ public function highlight($text, $phrase, $options = array()) { + } + + /** ++ * Formats paragraphs around given text for all line breaks ++ *
added for single line return ++ *

added for double line return ++ * ++ * @param string $text Text ++ * @return string The text with proper

tags ++ * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::autoParagraph ++ */ ++ public static function autoParagraph($text) { ++ if (trim($text) !== '') { ++ $text = preg_replace('|
\s*
|', ""\n\n"", $text . ""\n"");","should the regex then consider the following? + +``` html +
,
,
,
+``` + +Mark, I'm not sure if I follow the tag formatting that you've created and how it would account for the 4 versions of 'br' above? +",0,d260f4a5b3c4e057355397da65ab7f8a3088d5a7 +1616412,0,"Can you please rename it to addToWhitelist ? First time I read this I thought it was ment to add fields to a query""",2012-09-16 19:33:50,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/851#discussion_r1616412,2012-09-17T07:33:50Z,2012-11-02T09:54:29Z,lorenzo,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -2299,6 +2302,29 @@ public function saveAssociated($data = null, $options = array()) { + } + + /** ++ * Helper method for saveAll() and friends, to add foreign key to fieldlist ++ * ++ * @param string $key fieldname to be added to list ++ * @param array $options ++ * @return array $options ++ */ ++ public function addToFieldList($key, $options) {","Can you please rename it to addToWhitelist ? First time I read this I thought it was ment to add fields to a query +",0,7007dba0eb836f852aaca95fada103bc4ba993a9 +1620325,0,"fieldList == whiteList though.""",2012-09-17 04:39:36,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/851#discussion_r1620325,2012-09-17T16:39:36Z,2012-11-02T09:54:30Z,markstory,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -2299,6 +2302,29 @@ public function saveAssociated($data = null, $options = array()) { + } + + /** ++ * Helper method for saveAll() and friends, to add foreign key to fieldlist ++ * ++ * @param string $key fieldname to be added to list ++ * @param array $options ++ * @return array $options ++ */ ++ public function addToFieldList($key, $options) {","fieldList == whiteList though. +",0,7007dba0eb836f852aaca95fada103bc4ba993a9 +1616459,2,"Can we add a default return? throwing an exception sounds a bit too much for view related code""",2012-09-16 19:47:21,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/850#discussion_r1616459,2012-09-17T07:47:21Z,2012-09-17T11:22:15Z,lorenzo,MEMBER,lib/Cake/Utility/CakeNumber.php,,127.0,,,1,"@@ -102,6 +102,28 @@ public static function toReadableSize($size) { + } + + /** ++ * Converts filesize from human readable string to bytes ++ * ++ * @param string $size Size in human readable string like '5MB' ++ * @return integer Bytes ++ */ ++ public static function fromReadableSize($size) { ++ if (ctype_digit($size)) { ++ return $size * 1; ++ } ++ $units = array('KB', 'MB', 'GB', 'TB', 'PB'); ++ foreach ($units as $i => $unit) { ++ if ($unit == substr($size, -2)) { ++ return $size * pow(1024, $i + 1); ++ } ++ } ++ if (substr($size, -1) == 'B' && ctype_digit(substr($size, 0, strlen($size) - 1))) { ++ return $size * 1; ++ } ++ throw new CakeException(__d('cake_dev', 'No unit type.'));","Can we add a default return? throwing an exception sounds a bit too much for view related code +",0,9530e68ae62dc9cb49b00e61814165c46009f56e +1560837,0,"Why not have the _setupFilesystem() method return a list of variables that you can unpack with list(). That solves the duplication and variable scoping issues.""",2012-09-08 05:35:24,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/626#discussion_r1560837,2012-09-08T17:35:24Z,2012-09-08T17:35:24Z,markstory,MEMBER,lib/Cake/Test/Case/Utility/FolderTest.php,,1023.0,,,62,"@@ -848,14 +973,76 @@ public function testCopy() { + file_put_contents($folderThree . DS . 'folder2' . DS . 'file2.php', 'untouched'); + + $Folder = new Folder($folderOne); +- $result = $Folder->copy($folderThree); ++ $result = $Folder->copy(array('to'=>$folderThree, 'scheme'=>Folder::SKIP)); + $this->assertTrue($result); + $this->assertTrue(file_exists($folderThree . DS . 'file1.php')); + $this->assertEquals('untouched', file_get_contents($folderThree . DS . 'folder2' . DS . 'file2.php')); + + $Folder = new Folder($path); + $Folder->delete(); ++ } ++ ++ /** ++ * testCopyWithOverwrite ++ * ++ * Verify that subdirectories existing in both destination and source directory ++ * are overwritten/replaced recursivly. ++ * ++ * $path: folder_test/ ++ * $folderOne: folder_test/folder1/ ++ * - file1.php ++ * $folderTwo: folder_test/folder2/ ++ * - file2.php ++ * $folderThree: folder_test/folder1/folder3/ ++ * - file3.php ++ * $folderFour: folder_test/folder2/folder4/ ++ * - file4.php ++ * $folderFive: folder_test/folder4/ ++ */ ++ function testCopyWithOverwrite() { ++ $path = TMP . 'folder_test'; ++ $folderOne = $path . DS . 'folder1'; ++ $folderTwo = $path . DS . 'folder2'; ++ $folderThree = $folderOne . DS . 'folder'; ++ $folderFour = $folderTwo . DS . 'folder'; ++ $folderFive = $path . DS . 'folder5'; ++ $fileOne = $folderOne . DS . 'file1.php'; ++ $fileTwo = $folderTwo . DS . 'file2.php'; ++ $file3 = $folderThree . DS . 'file3.php'; ++ $file4 = $folderFour . DS . 'file4.php'; ++ ++ new Folder($path, true); ++ new Folder($folderOne, true); ++ new Folder($folderTwo, true); ++ new Folder($folderThree, true); ++ new Folder($folderFour, true); ++ new Folder($folderFive, true); ++ touch($fileOne); ++ touch($fileTwo); ++ touch($file3); ++ touch($file4);","Why not have the _setupFilesystem() method return a list of variables that you can unpack with list(). That solves the duplication and variable scoping issues. +",0,0a35f514cb07b016ac7bd1b904a273b6a0026d05 +165826,1,"I think the mask config option is useful. I just think that chmod() has no chance of side effects in a multi-threaded webserver unlike umask()""",2011-10-11 13:29:24,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/240#discussion_r165826,2011-10-12T01:29:24Z,2011-10-12T08:20:12Z,markstory,MEMBER,lib/Cake/Cache/Engine/FileEngine.php,,,,,1,"@@ -288,9 +283,10 @@ class FileEngine extends CacheEngine { + if (!$createKey && !$path->isFile()) { + return false; + } +- $old = umask(0); ++ ++ $old = umask(0666 & ~$this->settings['mask']);","I think the mask config option is useful. I just think that chmod() has no chance of side effects in a multi-threaded webserver unlike umask() +",0,40aeabe03afb523261c36d3da690d1527207412b +6632562,1,"Returning early for `if (Configure::read('Cache.check') === true) {` would be great too!""",2013-09-27 04:41:53,renan,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1687#discussion_r6632562,2013-09-27T16:41:53Z,2013-09-28T08:19:33Z,renan,CONTRIBUTOR,lib/Cake/Model/Model.php,,,,,1,"@@ -3501,17 +3501,20 @@ public function onError() { + protected function _clearCache($type = null) { + if ($type === null) {","Returning early for `if (Configure::read('Cache.check') === true) {` would be great too! +",0,be61a5023f6052b7765bddfb3b29e52407d0df85 +4779589,0,"beeing => being and singup => signup ?""",2013-06-19 06:22:19,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1352#discussion_r4779589,2013-06-19T18:22:19Z,2013-06-19T18:22:19Z,dereuromark,MEMBER,lib/Cake/Test/Case/View/Helper/FormHelperTest.php,,6636.0,,,8,"@@ -6629,6 +6629,31 @@ public function testYearAutoExpandRange() { + } + + /** ++ * testInputDateMaxYear method ++ * ++ * Let's say we want to only allow users born from ++ * 2006 to 2008 to register ++ * This beeing the first singup page, we still don't have any data","beeing => being and singup => signup ? +",0,2c2f357f30afb956187d89f84313ea268ff22ea9 +6374521,0,"If you want you can, if not I can go through and fix the old code. There isn't a phpcs test for this which is why it is inconsistent.""",2013-09-15 23:55:27,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1638#discussion_r6374521,2013-09-16T11:55:27Z,2013-09-16T12:06:12Z,markstory,MEMBER,lib/Cake/Test/Case/Event/CakeEventManagerTest.php,,,,,1,"@@ -414,4 +422,63 @@ public function testStopPropagation() { + $expected = array('secondListenerFunction'); + $this->assertEquals($expected, $listener->callStack); + } ++ ++/** ++ * Tests event dispatching using priorities ++ * ++ * @return void ++ */ ++ public function testDispatchPrioritizedWithGlobal() { ++ $generalManager = $this->getMock('CakeEventManager'); ++ $manager = new CakeEventManager; ++ $listener = new CustomTestEventListerner;","If you want you can, if not I can go through and fix the old code. There isn't a phpcs test for this which is why it is inconsistent. +",0,8a87f7e1b56af69b3ac9b2df52ace9caad45e9f5 +6101204,2,":(""",2013-08-31 11:25:29,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1584#discussion_r6101204,2013-08-31T23:25:29Z,2013-09-01T02:33:40Z,jrbasso,MEMBER,lib/Cake/ORM/Query.php,,,,,1,"@@ -558,6 +558,29 @@ public function mapReduce(callable $mapper = null, callable $reducer = null, $ov + } + + /** ++ * Returns the first result out of executed this query, if the query has not been ++ * executed before, it will set the limit clause to 1 for performance reasons. ++ * ++ * ###Example: ++ * ++ * ``$singleUser = $query->select(['id', 'username'])->first()`` ++ * ++ * @return mixed the first result from the ResultSet ++ */ ++ public function first() { ++ if ($this->_dirty) { ++ $this->limit(1); ++ } ++ $this->bufferResults(); ++ $this->_results = $this->execute(); ++ // Calls foreach so we cursor is rewinded automatically ++ foreach ($this->_results as $row) {",":( +",0,88c0ab3a2f5be7840d559e8873b0b6562e915416 +5829535,2,"I'm glad I'm not the only person who hates Cache::set(). That method is all sorts of silly. I'm on board for removing it if others are. It adds a pile of complexity to Cache for almost no benefit other than making the calling code more complicated as well""",2013-08-17 06:44:30,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1534#discussion_r5829535,2013-08-17T18:44:30Z,2013-08-17T21:09:51Z,markstory,MEMBER,lib/Cake/Cache/Cache.php,,181.0,,,71,"@@ -125,25 +132,62 @@ class Cache { + + /** + * This method can be used to define cache adapters for an application +- * during the bootstrapping process. You can use this method to add new cache adapters +- * at runtime as well. New cache configurations will be constructed upon the next write. ++ * or read existing configuration. + * + * To change an adapter's configuration at runtime, first drop the adapter and then + * reconfigure it. + * + * Adapters will not be constructed until the first operation is done. + * ++ * ### Usage ++ * ++ * Reading config data back: ++ * ++ * `Cache::config('default');` ++ * ++ * Setting a cache engine up. ++ * ++ * `Cache::config('default', $settings);` ++ * ++ * Injecting a constructed adapter in: ++ * ++ * `Cache::config('default', $instance);` ++ * ++ * Using a factory function to get an adapter: ++ * ++ * `Cache::config('default', function () { return new FileEngine(); });` ++ * ++ * Configure multiple adapters at once: ++ * ++ * `Cache::config($arrayOfConfig);` ++ * + * @param string|array $key The name of the cache config, or an array of multiple configs. + * @param array $config An array of name => config data for adapter. +- * @return void ++ * @return mixed null when adding configuration and an array of configuration data when reading. ++ * @throws Cake\Error\Exception When trying to modify an existing config. + */ + public static function config($key, $config = null) { +- if ($config !== null && is_string($key)) { +- static::$_config[$key] = $config; ++ // Read config. ++ if ($config === null && is_string($key)) { ++ return isset(static::$_config[$key]) ? static::$_config[$key] : null; ++ } ++ if ($config === null && is_array($key)) { ++ foreach ($key as $name => $settings) { ++ static::config($name, $settings); ++ } + return; + } +- +- static::$_config = array_merge(static::$_config, $key); ++ if (isset(static::$_config[$key])) { ++ throw new Error\Exception(__d('cake_dev', 'Cannot reconfigure existing adapter ""%s""', $key));","I'm glad I'm not the only person who hates Cache::set(). That method is all sorts of silly. I'm on board for removing it if others are. It adds a pile of complexity to Cache for almost no benefit other than making the calling code more complicated as well. +",0,0ecdd6f5b831281d144abd821e93f9157897709d +5591330,0,"There are a number of classname checks throughout the new code. Would it make sense to have a simple class that implemented the ExpressionInterface that we could use to wrap other values with? That might help remove the various `instanceof` checks through""",2013-08-05 09:50:16,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1479#discussion_r5591330,2013-08-05T21:50:16Z,2013-08-10T09:37:05Z,markstory,MEMBER,lib/Cake/Database/Query.php,,1209.0,,,196,"@@ -1157,8 +1182,27 @@ protected function _buildInsertPart($parts) { + * @param array $parts + * @return string SQL fragment. + */ +- protected function _buildValuesPart($parts) { +- return implode('', $parts); ++ protected function _buildValuesPart($parts, $generator) { ++ return implode('', $this->_stringifyExpressions($parts, $generator)); ++ } ++ ++/** ++ * Helper function used to covert ExpressionInterface objects inside an array ++ * into their string representation ++ * ++ * @param array $expression list of strings and ExpressionInterface objects ++ * @param ValueBinder $generator the placeholder generator to be used in expressions ++ * @return array ++ */ ++ protected function _stringifyExpressions(array $expressions, ValueBinder $generator) { ++ $result = []; ++ foreach ($expressions as $k => $expression) { ++ if ($expression instanceof ExpressionInterface) { ++ $expression = '(' . $expression->sql($generator) . ')'; ++ }","There are a number of classname checks throughout the new code. Would it make sense to have a simple class that implemented the ExpressionInterface that we could use to wrap other values with? That might help remove the various `instanceof` checks throughout the code as all values would implement the ExpressionInterface. +",0,8962e041fb4d9c46bb7cd809418d8f6f92cb2daf +5609729,2,"Blerg. Do we need/want to support that syntax?""",2013-08-06 05:06:40,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1479#discussion_r5609729,2013-08-06T17:06:40Z,2013-08-10T09:37:05Z,markstory,MEMBER,lib/Cake/Database/Query.php,,,,,1,"@@ -1453,6 +1497,48 @@ public function defaultTypes(array $types = null) { + } + + /** ++ * Associates a query placeholder to a value and a type. ++ * ++ * If type is expressed as ""atype[]"" (note braces) then it will cause the ++ * placeholder to be re-written dynamically so if the value is an array, it ++ * will create as many placeholders as values are in it. For example ""string[]"" ++ * will create several placeholders of type string. ++ * ++ * @param string|integer $token placeholder to be replaced with quoted version ++ * of $value ++ * @param mixed $value the value to be bound ++ * @param string|integer $type the mapped type name, used for casting when sending ++ * to database ++ * @return Query ++ */ ++ public function bind($param, $value, $type) {","Blerg. Do we need/want to support that syntax? +",0,8962e041fb4d9c46bb7cd809418d8f6f92cb2daf +4604322,0,"Yes, please.""",2013-06-09 03:19:14,Phally,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1340#discussion_r4604322,2013-06-09T15:19:14Z,2013-06-16T04:10:38Z,Phally,CONTRIBUTOR,lib/Cake/Database/Expression/QueryExpression.php,,,,,1,"@@ -551,9 +551,12 @@ protected function _parseCondition($field, $value, $types) { + $type = isset($types[$expression]) ? $types[$expression] : null; + $multi = false; + +- if (in_array(strtolower(trim($operator)), ['in', 'not in'])) { ++ $typeMultiple = strpos($type, '[]') !== false; ++ if (in_array(strtolower(trim($operator)), ['in', 'not in']) || $typeMultiple) { + $type = $type ?: 'string'; +- $type .= strpos($type, '[]') === false ? '[]' : null; ++ $type .= $typeMultiple ? null : '[]'; ++ $operator = $operator == '=' ? 'in' : $operator; ++ $operator = $operator == '!=' ? 'not in' : $operator;","Yes, please. +",0,adb78e6c3992279bb0dcede982f7ba86db765a25 +4412256,1,"I like. It's not obvious in the diff, but does this allow setting the value 'once' like so: ```php $this->Html->pathPrefix = '//cakephp.org/css'; $this->Html->css('foo'); // http://cakephp.org/css/foo.css $this->Html->css(array('bar', 'spam'); //""",2013-05-28 01:15:40,rchavik,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1305#discussion_r4412256,2013-05-28T13:15:40Z,2013-08-07T02:45:05Z,rchavik,MEMBER,lib/Cake/Test/Case/View/Helper/HtmlHelperTest.php,,421.0,,,21,"@@ -386,6 +386,15 @@ public function testImageTag() { + + $result = $this->Html->image('test.gif?one=two&three=four'); + $this->assertTags($result, array('img' => array('src' => 'img/test.gif?one=two&three=four', 'alt' => ''))); ++ ++ $result = $this->Html->image('test.gif', array('pathPrefix' => '/my/custom/path/')); ++ $this->assertTags($result, array('img' => array('src' => '/my/custom/path/test.gif', 'alt' => ''))); ++ ++ $result = $this->Html->image('test.gif', array('pathPrefix' => 'http://cakephp.org/assets/img/')); ++ $this->assertTags($result, array('img' => array('src' => 'http://cakephp.org/assets/img/test.gif', 'alt' => ''))); ++ ++ $result = $this->Html->image('test.gif', array('pathPrefix' => '//cakephp.org/assets/img/'));","I like. + +It's not obvious in the diff, but does this allow setting the value 'once' like so: + +``` php +$this->Html->pathPrefix = '//cakephp.org/css'; +$this->Html->css('foo'); // http://cakephp.org/css/foo.css +$this->Html->css(array('bar', 'spam'); // http://cakephp.org/css/bar.css http://cakephp.org/css/spam.css +``` +",0,cda1a1e6c996ac91d7f0794b4134889f7f711024 +4288362,0,"Could you not use array_merge() here? None of the built-in hashers have nested settings.""",2013-05-18 04:27:10,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1275#discussion_r4288362,2013-05-18T16:27:10Z,2013-05-26T05:59:44Z,markstory,MEMBER,lib/Cake/Controller/Component/Auth/BasePasswordHasher.php,,,,,1,"@@ -0,0 +1,66 @@ ++settings = Hash::merge($this->settings, $settings);","Could you not use array_merge() here? None of the built-in hashers have nested settings. +",0,dd2892ad8d0e3a0b09990b0a9ef26c320f1901fa +2741043,0,"Better to use the `cake.power.gif` image from `Test/test_app/`. Also as per cake's coding standard there should be a space before and after each `.`. You can use https://github.com/cakephp/cakephp-codesniffer """,2013-01-23 01:44:24,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1080#discussion_r2741043,2013-01-23T12:44:24Z,2013-01-23T12:53:25Z,ADmad,MEMBER,lib/Cake/Test/Case/BasicsTest.php,,,,,1,"@@ -242,6 +242,10 @@ public function testH() { + $obj = new CakeResponse(array('body' => 'Body content')); + $result = h($obj); + $this->assertEquals('Body content', $result); ++ ++ $invalidString = file_get_contents(CAKE.'..'.DS.'..'.DS.'app'.DS.'webroot'.DS.'favicon.ico');","Better to use the `cake.power.gif` image from `Test/test_app/`. Also as per cake's coding standard there should be a space before and after each `.`. You can use https://github.com/cakephp/cakephp-codesniffer +",0,29326d77308437c1557843c090ef7ab77ac94c88 +2521324,0,"That's quite the selection in behaviour. This is all to handle the case where the referring URL & loginRedirect are inaccessible to the user who is logged in?""",2012-12-30 04:17:59,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1053#discussion_r2521324,2012-12-30T15:17:59Z,2013-01-12T05:57:49Z,markstory,MEMBER,lib/Cake/Controller/Component/AuthComponent.php,,222.0,,,1,"@@ -215,11 +215,13 @@ class AuthComponent extends Component { + public $authError = null; + + /** +- * Controls handling of unauthorized access. By default unauthorized user is +- * redirected to the referrer url or AuthComponent::$loginRedirect or '/'. +- * If set to false a ForbiddenException exception is thrown instead of redirecting. ++ * Controls handling of unauthorized access. ++ * - For default value `true` unauthorized user is redirected to the referrer url ++ * or AuthComponent::$loginRedirect or '/'. ++ * - If set to a string or array the value is used as an url to redirect to. ++ * - If set to false a ForbiddenException exception is thrown instead of redirecting.","That's quite the selection in behaviour. This is all to handle the case where the referring URL & loginRedirect are inaccessible to the user who is logged in? +",0,676872d623753db5f748a696582d7298a9a28914 +1940999,0,"Missing spaces after `,`. Please use the code sniffer for these types of errors.""",2012-10-25 02:11:27,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/912#discussion_r1940999,2012-10-25T14:11:27Z,2012-10-25T15:15:54Z,markstory,MEMBER,lib/Cake/Model/Validator/CakeValidationRule.php,,263.0,,,7,"@@ -257,6 +257,9 @@ public function isUpdate($exists = null) { + /** + * Dispatches the validation rule to the given validator method + * ++ * Use ""PluginName.ClassName::method"" as validation rule name to refer to a custom validator object. ++ * /App/Plugin/Model/Validation/ClassName.php ++ * + * @return boolean True if the rule could be dispatched, false otherwise","Missing spaces after `,`. Please use the code sniffer for these types of errors. +",0,b843eda89dc380108a48e5c6fc84036170edc3a2 +1735942,0,"I would name it Logger instead. Or at least I would try to avoid exposing the name trait on it as it is different from interfaces in that they act very much like a class""",2012-10-01 18:36:06,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/885#discussion_r1735942,2012-10-02T06:36:06Z,2012-10-03T03:00:37Z,lorenzo,MEMBER,lib/Cake/Log/LogTrait.php,,22.0,,,22,"@@ -0,0 +1,41 @@ ++getMock('Cake\Database\Driver\Mysql'); + $dialect = new MysqlSchema($driver); +- $this->assertEquals($expected, $dialect->convertColumn($input)); ++ $method = $this->getPrivateMethod($dialect, '_convertColumn'); ++ $this->assertEquals($expected, $method->invoke($dialect, $input)); + }","Perhaps this method should not directly tested if you need to use reflection hacks to test it. +",0,e88556e91540fd24aba8a434b351897153e56a13 +6698205,0,"Could simple unset them in CommandTask::getShellList from listing. TBH that is hacky at best. Using reflection to check for a attribute marking it as hidden would slow it down considerably but could be a clean way to do it.""",2013-10-01 08:22:36,WyriHaximus,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1644#discussion_r6698205,2013-10-01T20:22:36Z,2013-10-02T20:32:13Z,WyriHaximus,CONTRIBUTOR,lib/Cake/Test/Case/Console/Command/CommandListShellTest.php,,108.0,,,26,"@@ -98,7 +105,7 @@ public function testMain() { + $expected = ""/\[.*TestPluginTwo.*\] example, welcome/""; + $this->assertRegExp($expected, $output); + +- $expected = ""/\[.*CORE.*\] acl, api, bake, command_list, console, i18n, schema, server, test, testsuite, upgrade/""; ++ $expected = ""/\[.*CORE.*\] acl, api, bake, command_list, completion, console, i18n, schema, server, test, testsuite, upgrade/"";","Could simple unset them in CommandTask::getShellList from listing. TBH that is hacky at best. Using reflection to check for a attribute marking it as hidden would slow it down considerably but could be a clean way to do it. +",0,ac9b7f3882ba5ba8ab240508e46fc8d0ff30b056 +6101145,2,"@jrbasso I tried that, does not work :(""",2013-08-31 11:06:08,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1584#discussion_r6101145,2013-08-31T23:06:08Z,2013-09-01T02:33:40Z,lorenzo,MEMBER,lib/Cake/ORM/Query.php,,,,,1,"@@ -558,6 +558,29 @@ public function mapReduce(callable $mapper = null, callable $reducer = null, $ov + } + + /** ++ * Returns the first result out of executed this query, if the query has not been ++ * executed before, it will set the limit clause to 1 for performance reasons. ++ * ++ * ###Example: ++ * ++ * ``$singleUser = $query->select(['id', 'username'])->first()`` ++ * ++ * @return mixed the first result from the ResultSet ++ */ ++ public function first() { ++ if ($this->_dirty) { ++ $this->limit(1); ++ } ++ $this->bufferResults(); ++ $this->_results = $this->execute(); ++ // Calls foreach so we cursor is rewinded automatically ++ foreach ($this->_results as $row) {","@jrbasso I tried that, does not work :( +",0,88c0ab3a2f5be7840d559e8873b0b6562e915416 +5895963,1,"Well the current code sniffs complain about it. We could always change the rules :smile:""",2013-08-21 02:07:47,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1544#discussion_r5895963,2013-08-21T14:07:47Z,2013-08-24T16:35:52Z,markstory,MEMBER,lib/Cake/Test/TestCase/ORM/QueryTest.php,,,,,1,"@@ -888,4 +903,105 @@ public function testApplyOptions() { + $this->assertEquals($expected, $query->contain()); + } + ++/** ++ * Tests registering mappers with mapReduce() ++ * ++ * @return void ++ */ ++ public function testMapReduceOnlyMapper() { ++ $mapper1 = function() {}; ++ $mapper2 = function() {}; ++ $query = new Query($this->connection, $this->table); ++ $this->assertSame($query, $query->mapReduce($mapper1)); ++ $this->assertSame([['mapper' => $mapper1]], $query->mapReduce()); ++ ++ $this->assertSame($query, $query->mapReduce($mapper1)); ++ $result = $query->mapReduce(); ++ $this->assertEquals([['mapper' => $mapper1], ['mapper' => $mapper2]], $result); ++ } ++ ++/** ++ * Tests registering mappers and reducers with mapReduce() ++ * ++ * @return void ++ */ ++ public function testMapReduceBothMethods() { ++ $mapper1 = function() {}; ++ $mapper2 = function() {}; ++ $reducer1 = function() {}; ++ $reducer2 = function() {}; ++ $query = new Query($this->connection, $this->table); ++ $this->assertSame($query, $query->mapReduce($mapper1, $reducer1)); ++ $this->assertSame( ++ [['mapper' => $mapper1, 'reducer' => $reducer1]], ++ $query->mapReduce() ++ ); ++ ++ $this->assertSame($query, $query->mapReduce($mapper2, $reducer2)); ++ $this->assertSame( ++ [ ++ ['mapper' => $mapper1, 'reducer' => $reducer1], ++ ['mapper' => $mapper2, 'reducer' => $reducer2] ++ ], ++ $query->mapReduce() ++ ); ++ } ++ ++/** ++ * Tests that it is possible to overwrite previous map reducers ++ * ++ * @return void ++ */ ++ public function testOverwriteMapReduce() { ++ $mapper1 = function() {}; ++ $mapper2 = function() {}; ++ $reducer1 = function() {}; ++ $reducer2 = function() {}; ++ $query = new Query($this->connection, $this->table); ++ $this->assertSame($query, $query->mapReduce($mapper1, $reducer1)); ++ $this->assertSame( ++ [['mapper' => $mapper1, 'reducer' => $reducer1]], ++ $query->mapReduce() ++ ); ++ ++ $this->assertSame($query, $query->mapReduce($mapper2, $reducer2, true)); ++ $this->assertSame( ++ [['mapper' => $mapper2, 'reducer' => $reducer2]], ++ $query->mapReduce() ++ ); ++ } ++ ++/** ++ * Tests that multiple map reducers can be stacked ++ * ++ * @return void ++ */ ++ public function testResultsAreWrappedInMapReduce() { ++ $params = [$this->connection, $this->table]; ++ $query = $this->getMock('\Cake\ORM\Query', ['executeStatement'], $params); ++ ++ $statement = $this->getMock('\Database\StatementInterface', ['fetch']); ++ $statement->expects($this->at(0)) ++ ->method('fetch') ++ ->will($this->returnValue(['a' => 1])); ++ $statement->expects($this->at(1)) ++ ->method('fetch') ++ ->will($this->returnValue(['a' => 2])); ++ $statement->expects($this->at(2)) ++ ->method('fetch') ++ ->will($this->returnValue(false)); ++ ++ $query->expects($this->once()) ++ ->method('executeStatement') ++ ->will($this->returnValue($statement)); ++ ++ $query->mapReduce(function($k, $v, $mr) { $mr->emit($v['a']); });","Well the current code sniffs complain about it. We could always change the rules :smile: +",0,65cc26318d4c4fe3e0e590442a746385e46c152a +5575758,0,"A cleaner way could be to json_encode the string casted $message variable.""",2013-08-04 22:08:45,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1477#discussion_r5575758,2013-08-05T10:08:45Z,2013-08-05T11:11:30Z,lorenzo,MEMBER,lib/Cake/View/Helper/FormHelper.php,,,,,1,"@@ -1796,6 +1795,18 @@ public function postLink($title, $url = null, $options = array(), $confirmMessag + } + + /** ++ * Returns a string to be used as onclick handler for confirm messages. ++ * ++ * @param string $message ++ * @param string $action ++ */ ++ protected function getConfirmHandler($message, $action) { ++ $message = str_replace(array(""'"", '""'), array(""\'"", '\""'), $message);","A cleaner way could be to json_encode the string casted $message variable. +",0,560e4dc93b177f2ca5b522c2ca01dcb8d7b53018 +5436927,2,"This is bad, returning early is a good thing.""",2013-07-26 17:31:55,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1455#discussion_r5436927,2013-07-27T05:31:55Z,2013-07-27T05:31:55Z,ADmad,MEMBER,lib/Cake/Controller/Controller.php,,426.0,,,26,"@@ -422,15 +422,20 @@ public function __set($name, $value) { + case 'here': + case 'webroot': + case 'data': +- return $this->request->{$name} = $value; ++ $this->request->{$name} = $value; ++ break;","This is bad, returning early is a good thing. +",0,340847c19786d301a71ad5a34b9401258d7cc067 +3597681,0,"You could use an interface, I don't have a good grasp of whether an interface is better, or whether the current approach is better. I thought that the if statements that were added wouldn't be required if it were a 'pure' decorator. Something like: ```""",2013-03-31 13:30:56,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1210#discussion_r3597681,2013-04-01T01:30:56Z,2013-04-03T22:16:21Z,markstory,MEMBER,lib/Cake/Model/Datasource/Database/Log/LoggingStatement.php,,,,,1,"@@ -0,0 +1,95 @@ ++isEnumeration($real)) { ++ return count($this->enumerationValues($real));","It is trivial, it can be changed if you consider it appropriate... in fact, I just added this here for consistency I suppose. + +Cause the easiest way to 'hack' into generating enum values was to use PHPMyAdmin's approach: to use the (already available) length logic, which achieves the same cause they have the same shape: `string(255)` as opposed to `enum('a','b')`, but the enum value would end up being a string. + +Because there was a need to understand the enum values and later validate (and handle) them, it was better if they were an array instead, so `enumerationValues()` and `isEnumeration()` were born, and the [test](https://github.com/cakephp/cakephp/pull/1011/files#L1R325) inside `describe()` was done. +",0,8ce174647e132787676a237dd4c84e31895da335 +2098343,1,"Sorry, you are right about that, i think we should always unset it in BaseAuthenticate if present in the userdata, not only if its in the conditions.""",2012-11-12 04:25:54,ceeram,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/949#discussion_r2098343,2012-11-12T15:25:54Z,2012-11-12T15:25:54Z,ceeram,CONTRIBUTOR,lib/Cake/Controller/Component/Auth/DirectAuthenticate.php,,79.0,,,79,"@@ -0,0 +1,85 @@ ++request->data = array('User' => array('id' => $userId)); ++ * $this->Auth->authenticate = array('Direct' => array('contain' => array('Role.id'), 'fields'=>array('username' => 'id'))); ++ * $result = $this->Auth->login(); ++ * ++ * This has several advantages over using Auth->login($data) directly: ++ * - You keep it dry, especially when using contain ($data would have to have the exact same data). ++ * - No overhead - retrieving the data prior to the login is not necessary. It's short and easy. ++ * - You keep it centralized, only one single mechanism to login (using your Authentication adapters ++ * and its common _findUser() method). It also respects the scope and contain settings specified ++ * in your AppController just as any other adapter. ++ * ++ */ ++class DirectAuthenticate extends BaseAuthenticate { ++ ++/** ++ * Authenticates the identity contained in a request. Will use the `settings.userModel`, and `settings.fields` ++ * to find POST data that is used to find a matching record in the `settings.userModel`. Will return false if ++ * there is no post data, username is missing, of if the scope conditions have not been met. ++ * ++ * @param CakeRequest $request The request that contains login information. ++ * @param CakeResponse $response Unused response object. ++ * @return mixed. False on login failure. An array of User data on success. ++ */ ++ public function authenticate(CakeRequest $request, CakeResponse $response) { ++ $userModel = $this->settings['userModel']; ++ list($plugin, $model) = pluginSplit($userModel); ++ ++ $fields = $this->settings['fields']; ++ if (!$this->_checkFields($request, $model, $fields)) { ++ return false; ++ } ++ $conditions = array( ++ $model . '.' . $fields['username'] => $request->data[$model][$fields['username']] ++ ); ++ return $this->_findUser($conditions); ++ } ++ ++/** ++ * Checks the fields to ensure they are supplied. ++ * ++ * @param CakeRequest $request The request that contains login information. ++ * @param string $model The model used for login verification. ++ * @param array $fields The fields to be checked. ++ * @return boolean False if the fields have not been supplied. True if they exist. ++ */ ++ protected function _checkFields(CakeRequest $request, $model, $fields) { ++ if (empty($request->data[$model])) { ++ return false; ++ } ++ if (empty($request->data[$model][$fields['username']])) { ++ return false; ++ } ++ return true; ++ } ++ ++/** ++ * Find a user record using the standard options. ++ * ++ * The $conditions parameter can be a (string)username or an array containing conditions for Model::find('first'). ++ * ++ * @param array $conditions An array of find conditions. ++ * @return Mixed Either false on failure, or an array of user data. ++ */ ++ protected function _findUser($conditions, $password = null) { ++ $userModel = $this->settings['userModel']; ++ list($plugin, $model) = pluginSplit($userModel); ++ $fields = $this->settings['fields']; ++ ++ $user = parent::_findUser($conditions); ++ if (isset($user[$fields['password']])) {","Sorry, you are right about that, i think we should always unset it in BaseAuthenticate if present in the userdata, not only if its in the conditions. +",0,0be9ccd832417a542f3903e0371986a2d6c748de +2015276,2,"I disagree. Passing """"::somestring"""" would be invalid either way and the developers fault (you cant possibly cover all mistakes possible here). But using the type safe check we would at least result in a meaningful exception: """"trigger_error(__d('cake_dev'",2012-11-02 00:52:48,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/913#discussion_r2015276,2012-11-02T11:52:48Z,2013-01-20T08:13:47Z,dereuromark,MEMBER,lib/Cake/Model/Validator/CakeValidationRule.php,,,,,1,"@@ -271,6 +274,17 @@ public function process($field, &$data, &$methods) { + $this->_valid = call_user_func_array($methods[$rule], $this->_ruleParams); + } elseif (class_exists('Validation') && method_exists('Validation', $this->_rule)) { + $this->_valid = call_user_func_array(array('Validation', $this->_rule), $this->_ruleParams); ++ } elseif (strpos($this->_rule, '::')) {","I disagree. Passing ""::somestring"" would be invalid either way and the developers fault (you cant possibly cover all mistakes possible here). +But using the type safe check we would at least result in a meaningful exception: ""trigger_error(__d('cake_dev', 'Could not find custom validation rule %s'"" instead of trying to execute it as a preg match string. +",0,b2cbeeeb501285431bdc7fb2a5b37f260b792261 +6377811,0,"Makes sense, I haven't tested getters/setters yet""",2013-09-16 02:30:34,lorenzo,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1639#discussion_r6377811,2013-09-16T14:30:34Z,2013-10-19T14:50:16Z,lorenzo,MEMBER,Cake/ORM/Entity.php,,,,,1,"@@ -0,0 +1,139 @@ ++get($property); ++ } ++ ++/** ++ * Magic setter to add or edit a property in this entity ++ * ++ * @param string $property the name of the property to set ++ * @param mixed $value the value to set to the property ++ * @return void ++ */ ++ public function __set($property, $value) { ++ $this->set([$property => $value]); ++ } ++ ++/** ++ * Set a hashed array as properties in this entity by converting each ++ * key => value pair into properties in this object. ++ * ++ * ## Example: ++ * ++ * {{ ++ * $entity->set(['name' => 'andrew', 'id' => 1]); ++ * echo $entity->name // prints andrew ++ * echo $entity->id // prints 1 ++ * }} ++ * ++ * @param array $properties list of properties to set ++ * @param boolean $useSetters whether to use setter functions in this object ++ * or bypass them ++ * @return \Cake\ORM\Entity ++ */ ++ public function set(array $properties = [], $useSetters = true) { ++ if (!$useSetters) { ++ $this->_properties = $properties + $this->_properties; ++ return $this; ++ } ++ ++ foreach($properties as $property => $value) { ++ if (method_exists($this, 'set' . ucFirst($property))) { ++ $value = $this->{'set' . ucFirst($property)}($value); ++ } ++ $this->_properties[$property] = $value; ++ } ++ return $this; ++ } ++ ++/** ++ * Returns the value of a property by name ++ * ++ * @param string $property the name of the property to retrieve ++ * @return mixed ++ */ ++ public function &get($property) { ++ $method = 'get' . ucFirst($property); ++ if (method_exists($this, $method)) { ++ $value =& $this->{$method}();","Makes sense, I haven't tested getters/setters yet +",0,299e7b81cf0f0f975497454d75a8e191ce1e3c88 +2827387,0,"This is not something you can just do. It will break every encoded cookie out in the wild.""",2013-01-30 04:13:44,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1100#discussion_r2827387,2013-01-30T15:13:44Z,2013-01-30T18:06:50Z,markstory,MEMBER,lib/Cake/Controller/Component/CookieComponent.php,,,,,1,"@@ -131,11 +131,11 @@ class CookieComponent extends Component { + * Type of encryption to use. + * + * Currently two methods are available: cipher and rijndael +- * Defaults to Security::cipher(); ++ * Defaults to Security::rijndael(); + * + * @var string + */ +- protected $_type = 'cipher'; ++ protected $_type = 'rijndael';","This is not something you can just do. It will break every encoded cookie out in the wild. +",0,c9b14115bf619f0585524e431fae2ac9577a1b98 +5912524,2,"It is too bad a trait cannot implement an interface :(""",2013-08-21 11:17:48,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1544#discussion_r5912524,2013-08-21T23:17:48Z,2013-08-24T16:35:52Z,markstory,MEMBER,lib/Cake/ORM/ResultCollectionTrait.php,,27.0,,,27,"@@ -0,0 +1,72 @@ ++expects($this->any())->method('message')->will($this->returnValue(array('First Line', 'Second Line', '.Third Line', ''))); + + $data = ""From: CakePHP Test \r\n""; +- $data .= ""Return-Path: CakePHP Return \r\n"";","It seems you are right. I've just checked and indeed it has overwritten my own Return-Path. +",0,796e4b45dd092c7eb1a0ab128280578523191ef3 +3507806,0,"Moving the `!empty($config['log'])` check also inside the `_skipLogging()` function sounds good to me.""",2013-03-24 21:23:39,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1186#discussion_r3507806,2013-03-25T08:23:39Z,2013-03-26T14:10:02Z,ADmad,MEMBER,lib/Cake/Error/ErrorHandler.php,,,,,1,"@@ -110,7 +110,7 @@ class ErrorHandler { + */ + public static function handleException(Exception $exception) { + $config = Configure::read('Exception'); +- if (!empty($config['log'])) { ++ if (!empty($config['log']) && !self::_skipLogging($exception, $config)) {","Moving the `!empty($config['log'])` check also inside the `_skipLogging()` function sounds good to me. +",0,6bf9363217137b6deb1dccb44ecc7d93875d49a3 +1714707,0,"Indentation is not correct here. Also there should be space before and after `=`. You can use the [codesniffer](https://github.com/cakephp/cakephp-codesniffer) to avoid such errors.""",2012-09-27 17:27:22,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/872#discussion_r1714707,2012-09-28T05:27:22Z,2012-09-28T06:07:43Z,ADmad,MEMBER,lib/Cake/Cache/Engine/WincacheEngine.php,,,,,1,"@@ -183,6 +183,7 @@ public function groups() { + * @return boolean success + **/ + public function clearGroup($group) { ++ $success=0;","Indentation is not correct here. Also there should be space before and after `=`. You can use the [codesniffer](https://github.com/cakephp/cakephp-codesniffer) to avoid such errors. +",0,f4565d51fad4ba81906f085cd5abf043d4852365 +1625821,0,"Indeed the fieldList is a whitelist. I too think it should be a protected method.""",2012-09-17 17:05:16,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/851#discussion_r1625821,2012-09-18T05:05:16Z,2012-11-02T09:54:30Z,ADmad,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -2299,6 +2302,29 @@ public function saveAssociated($data = null, $options = array()) { + } + + /** ++ * Helper method for saveAll() and friends, to add foreign key to fieldlist ++ * ++ * @param string $key fieldname to be added to list ++ * @param array $options ++ * @return array $options ++ */ ++ public function addToFieldList($key, $options) {","Indeed the fieldList is a whitelist. I too think it should be a protected method. +",0,7007dba0eb836f852aaca95fada103bc4ba993a9 +1611246,0,"Isn't this a behavior change as well, the count could be > 1 now and pass.""",2012-09-14 07:58:35,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/846#discussion_r1611246,2012-09-14T19:58:35Z,2012-10-01T01:08:06Z,markstory,MEMBER,lib/Cake/View/Helper.php,,530.0,,,14,"@@ -526,12 +526,11 @@ public function setEntity($entity, $setScope = false) { + + $isHabtm = ( + isset($this->fieldset[$this->_modelScope]['fields'][$parts[0]]['type']) && +- $this->fieldset[$this->_modelScope]['fields'][$parts[0]]['type'] === 'multiple' && +- $count == 1","Isn't this a behavior change as well, the count could be > 1 now and pass. +",0,408e619c9fa6f114ca0531713e1df996165637eb +808124,0,"Missing `{` and `}` as per the [coding standards](http://book.cakephp.org/2.0/en/contributing/cakephp-coding-conventions.html)""",2012-05-11 00:00:17,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/647#discussion_r808124,2012-05-11T12:00:17Z,2012-05-11T12:00:17Z,markstory,MEMBER,lib/Cake/Network/CakeResponse.php,,378.0,,,4,"@@ -375,6 +375,8 @@ public function send() { + $this->_setContentLength(); + $this->_setContentType(); + foreach ($this->_headers as $header => $value) { ++ if (is_array($value))","Missing `{` and `}` as per the [coding standards](http://book.cakephp.org/2.0/en/contributing/cakephp-coding-conventions.html) +",0,5324365d94c7f598bc133adb0cc64217108efd9a +374831,2,"Sure that makes sense. I also broke the build - AllTests fails because of classnames - so I'm fixing that as well.""",2012-01-22 10:01:47,josegonzalez,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/437#discussion_r374831,2012-01-22T21:01:47Z,2012-01-22T21:01:47Z,josegonzalez,MEMBER,lib/Cake/View/View.php,,308.0,,,17,"@@ -299,6 +305,9 @@ public function __construct($controller) { + $this->{$var} = $controller->{$var}; + } + $this->_eventManager = $controller->getEventManager(); ++ if (!empty($controller->theme)) {","Sure that makes sense. I also broke the build - AllTests fails because of classnames - so I'm fixing that as well. +",0,c2519e702d26845654e25125412a8ad1485ed73f +284853,1,"Thanks! Fixed. You think after all those code standards commits I would have caught this. :)""",2011-12-10 06:07:31,shama,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/371#discussion_r284853,2011-12-10T17:07:31Z,2011-12-10T17:07:31Z,shama,CONTRIBUTOR,lib/Cake/TestSuite/CakeTestSuite.php,,,,,1,"@@ -52,6 +52,9 @@ public function addTestDirectoryRecursive($directory = '.') { + $files = $Folder->tree(null, false, 'files'); + + foreach ($files as $file) { ++ if (strpos($file, DS.'.') !== false) {","Thanks! Fixed. You think after all those code standards commits I would have caught this. :) +",0,17fcd0534fac8f672ef03b126faebb73f1edcfe1 +112620,2,"I really don't care much if PHP4 people can't run tests. I'd rather be able to ensure code isn't buggy than ensure compatibility with PHP4.""",2011-09-04 02:39:31,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/182#discussion_r112620,2011-09-04T14:39:31Z,2011-09-04T14:39:31Z,markstory,MEMBER,cake/tests/cases/libs/view/helpers/time.test.php,,414.0,,,6,"@@ -409,6 +409,16 @@ class TimeHelperTest extends CakeTestCase { + */ + function testToRss() { + $this->assertEqual(date('r'), $this->Time->toRss(time())); ++ ++ if (!$this->skipIf(!class_exists('DateTimeZone'), '%s DateTimeZone class not available.')) { ++ $timezones = array('Europe/London', 'Europe/Brussels', 'UTC', 'America/Denver', 'America/Caracas', 'Asia/Kathmandu');","I really don't care much if PHP4 people can't run tests. I'd rather be able to ensure code isn't buggy than ensure compatibility with PHP4. +",0,da4b75c99383957b3974b019a28dd1777765fe6d +6485024,1,"Perhaps reversing the previous decision is best. Having isset() and the ArrayAccess interface work as expected is more important than handling null values. Perhaps a separate method could be used to see if a property is defined?""",2013-09-20 00:14:52,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1639#discussion_r6485024,2013-09-20T12:14:52Z,2013-10-19T14:50:16Z,markstory,MEMBER,Cake/ORM/Entity.php,,,,,1,"@@ -0,0 +1,250 @@ ++ 1, 'name' => 'Andrew'])`` ++ * ++ * @param array $properties hash of properties to set in this entity ++ * @param boolean $useSetters whether use internal setters for properties or not ++ * @return void ++ */ ++ public function __construct(array $properties = [], $useSetters = true) { ++ $this->set($properties, $useSetters); ++ } ++ ++/** ++ * Magic getter to access properties that has be set in this entity ++ * ++ * @param string $property name of the property to access ++ * @return mixed ++ */ ++ public function &__get($property) { ++ return $this->get($property); ++ } ++ ++/** ++ * Magic setter to add or edit a property in this entity ++ * ++ * @param string $property the name of the property to set ++ * @param mixed $value the value to set to the property ++ * @return void ++ */ ++ public function __set($property, $value) { ++ $this->set([$property => $value]); ++ } ++ ++/** ++ * Returns whether this entity contains a property named $property ++ * regardless of if it is empty. ++ * ++ * @see \Cake\ORM\Entity::has() ++ * @param string $property ++ * @return boolean ++ */ ++ public function __isset($property) { ++ return $this->has($property); ++ } ++ ++/** ++ * Removes a property from this entity ++ * ++ * @param string $property ++ * @return void ++ */ ++ public function __unset($property) { ++ $this->unsetProperty($property); ++ } ++ ++/** ++ * Sets a single property inside this entity. ++ * ++ * ### Example: ++ * ++ * ``$entity->set('name', 'Andrew');`` ++ * ++ * It is also possible to mass-assign multiple properties to this entity ++ * with one call by passing a hashed array as properties in the form of ++ * property => value pairs ++ * ++ * ## Example: ++ * ++ * {{ ++ * $entity->set(['name' => 'andrew', 'id' => 1]); ++ * echo $entity->name // prints andrew ++ * echo $entity->id // prints 1 ++ * }} ++ * ++ * Some times it is handy to bypass setter functions in this entity when assigning ++ * properties. You can achieve this by setting the third argument to false when ++ * assigning a single property or the second param when using an array of ++ * properties. ++ * ++ * ### Example: ++ * ++ * ``$entity->set('name', 'Andrew', false);`` ++ * ++ * ``$entity->set(['name' => 'Andrew', 'id' => 1], false);`` ++ * ++ * @param string|array $property the name of property to set or a list of ++ * properties with their respective values ++ * @param mixed|boolean $value the value to set to the property or a boolean ++ * signifying whether to use internal setter functions or not ++ * @param boolean $useSetters whether to use setter functions in this object ++ * or bypass them ++ * @return \Cake\ORM\Entity ++ */ ++ public function set($property, $value = true, $useSetters = true) { ++ if (is_string($property)) { ++ $property = [$property => $value]; ++ } else { ++ $useSetters = $value; ++ } ++ ++ if (!$useSetters) { ++ $this->_properties = $property + $this->_properties; ++ return $this; ++ } ++ ++ foreach ($property as $p => $value) { ++ if (method_exists($this, 'set' . ucFirst($p))) { ++ $value = $this->{'set' . ucFirst($p)}($value); ++ } ++ $this->_properties[$p] = $value; ++ } ++ return $this; ++ } ++ ++/** ++ * Returns the value of a property by name ++ * ++ * @param string $property the name of the property to retrieve ++ * @return mixed ++ */ ++ public function &get($property) { ++ $method = 'get' . ucFirst($property); ++ $value = null; ++ ++ if (isset($this->_properties[$property])) { ++ $value =& $this->_properties[$property]; ++ } ++ ++ if (method_exists($this, $method)) { ++ $value = $this->{$method}($value); ++ } ++ return $value; ++ } ++ ++/** ++ * Returns whether this entity contains a property named $property ++ * regardless of if it is empty. ++ * ++ * ### Example: ++ * ++ * {{{ ++ * $entity = new Entity(['id' => 1, 'name' => null]); ++ * $entity->has('id'); // true ++ * $entity->has('name'); // true ++ * $entity->has('last_name'); // false ++ * }}} ++ * ++ * @param string $property ++ * @return boolean ++ */ ++ public function has($property) { ++ $set = array_key_exists($property, $this->_properties); ++ return $set || method_exists($this, 'get' . ucFirst($property));","Perhaps reversing the previous decision is best. Having isset() and the ArrayAccess interface work as expected is more important than handling null values. Perhaps a separate method could be used to see if a property is defined? +",0,299e7b81cf0f0f975497454d75a8e191ce1e3c88 +6687028,0,"What if you had 2 fields: * 1 string with 1$-dollar signs * 1 string with 1€-euro signs would these 2 not get the same id?""",2013-10-01 02:07:17,kimegede,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1635#discussion_r6687028,2013-10-01T14:07:17Z,2013-12-24T14:20:46Z,kimegede,CONTRIBUTOR,lib/Cake/Test/Case/View/HelperTest.php,,,,,1,"@@ -854,6 +854,32 @@ public function testClean() { + } + + /** ++ * testDomId method ++ * ++ * @return void ++ */ ++ public function testDomId() { ++ $result = $this->Helper->domId('Foo.bar'); ++ $this->assertEquals('FooBar', $result); ++ } ++ ++/** ++ * testDomIdSuffix method ++ * ++ * @return void ++ */ ++ public function testDomIdSuffix() { ++ $result = $this->Helper->domIdSuffix('1 string with 1$-dollar signs', 'xhtml'); ++ $this->assertEquals('1StringWith1-dollarSigns', $result);","What if you had 2 fields: +- 1 string with 1$-dollar signs +- 1 string with 1€-euro signs + +would these 2 not get the same id? +",0,b392254c9211aa521f44f4a4b61bb5b11e4b2b81 +6307484,2,"What IDE do you guys use? Mine has spelling correction so I never run into these mistakes!""",2013-09-11 09:21:57,milesj,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1621#discussion_r6307484,2013-09-11T21:21:57Z,2013-09-12T01:23:58Z,milesj,NONE,lib/Cake/Core/Plugin.php,,,,,1,"@@ -36,37 +34,73 @@ class Plugin { + * + * @var array + */ +- protected static $_plugins = array(); ++ protected static $_plugins = []; + + /** +- * Loads a plugin and optionally loads bootstrapping, routing files or loads a initialization function ++ * Loads a plugin and optionally loads bootstrapping, ++ * routing files or runs a initialization function. ++ * ++ * Plugins only need to be loaded if you want bootstrapping/routes/cli commands to ++ * be exposed. If your plugin doese not expose any of these features you do not need","What IDE do you guys use? Mine has spelling correction so I never run into these mistakes! +",0,0480325e8404f7792144aef7bd1e2b681b24d983 +6189897,0,"@ADmad Very true! testing with ```array_key_exists()``` now. @markstory @ravage84 It does, but I targeted master because I considered a bug: . throwing undefined index when a key is not in the ```viewVars``` array . returning a JSON encoded array whe""",2013-09-05 06:36:15,bar,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1613#discussion_r6189897,2013-09-05T18:36:15Z,2013-09-07T05:39:06Z,bar,CONTRIBUTOR,lib/Cake/View/JsonView.php,,,,,1,"@@ -130,13 +130,15 @@ public function render($view = null, $layout = null) { + */ + protected function _serialize($serialize) { + if (is_array($serialize)) { +- $data = array(); + foreach ($serialize as $alias => $key) { + if (is_numeric($alias)) { + $alias = $key; + } +- $data[$alias] = $this->viewVars[$key]; ++ if (isset($this->viewVars[$key])) {","@ADmad Very true! testing with `array_key_exists()` now. + +@markstory @ravage84 It does, but I targeted master because I considered a bug: +. throwing undefined index when a key is not in the `viewVars` array +. returning a JSON encoded array when no key is found and array is given to `_serialize` +",0,c3ee9a20586eed65edb3fa9b7e6107178bba1b8e +6186448,0,"Won't these changes cause strict errors in applications? Their code might have previously conformed to the old method signatures but will not anymore.""",2013-09-05 04:33:49,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1610#discussion_r6186448,2013-09-05T16:33:49Z,2013-09-05T16:33:49Z,markstory,MEMBER,lib/Cake/Model/ModelBehavior.php,,172.0,,,22,"@@ -163,9 +165,11 @@ public function afterValidate(Model $model) { + * will abort the save operation. + * + * @param Model $model Model using this behavior ++ * @param array $options Options passed from Model::save(). + * @return mixed False if the operation should abort. Any other result will continue. ++ * @see Model::save() + */ +- public function beforeSave(Model $model) { ++ public function beforeSave(Model $model, $options = array()) {","Won't these changes cause strict errors in applications? Their code might have previously conformed to the old method signatures but will not anymore. +",0,c524645738e666bf4a298d15e6fd6f39d7c6992e +5891580,0,"Should it still be a early-return or something in the lines of: ```php if ($this->_useBufferedResults) { $resultSet = new BufferedResultSet($this, $this->executeStatement()); } else { $resultSet = new ResultSet($this, $this->executeStatemen""",2013-08-20 22:08:44,renansaddam,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1544#discussion_r5891580,2013-08-21T10:08:44Z,2013-08-24T16:35:52Z,renan,CONTRIBUTOR,lib/Cake/ORM/Query.php,,393.0,,,28,"@@ -375,16 +383,18 @@ public function setResult($results) { + * Resulting object is traversable, so it can be used in any loop as you would + * with an array. + * +- * @return Cake\ORM\ResultSet ++ * @return Cake\ORM\ResultCollectionTrait + */ + public function execute() { + if (isset($this->_results)) { + return $this->_results; + } + if ($this->_useBufferedResults) { +- return new BufferedResultSet($this, parent::execute()); ++ return $this->_applyFormatters(","Should it still be a early-return or something in the lines of: + +``` php +if ($this->_useBufferedResults) { + $resultSet = new BufferedResultSet($this, $this->executeStatement()); +} else { + $resultSet = new ResultSet($this, $this->executeStatement()); +} +return $this->_applyFormatters($resultSet); +``` + +or even: + +``` php +$resultSetClass = ($this->_useBufferedResults) ? 'BufferedResultSet' : 'ResultSet'; +return $this->_applyFormatters(new $resultSetClass($this, $this->executeStatement())); +``` +",0,65cc26318d4c4fe3e0e590442a746385e46c152a +5621790,2,"Because I hadn't seen that, this is very out of place :( Will fix in a few.""",2013-08-06 13:23:24,tigrang,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1483#discussion_r5621790,2013-08-07T01:23:24Z,2013-08-07T01:31:13Z,tigrang,CONTRIBUTOR,lib/Cake/Utility/Debugger.php,,,,,1,"@@ -576,6 +576,11 @@ protected static function _object($var, $depth, $indent) { + $out = ''; + $props = array(); + ++ $type = gettype($var); ++ if ($type === 'unknown type') { ++ return $type; ++ }","Because I hadn't seen that, this is very out of place :( Will fix in a few. +",0,2150e8dce51c485808dafe67b9a415d4e9131ef6 +4604177,0,"Do you mean that some association types don't implement more than one strategy to fetch records?""",2013-06-09 02:21:56,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1340#discussion_r4604177,2013-06-09T14:21:56Z,2013-06-16T04:10:38Z,markstory,MEMBER,lib/Cake/ORM/Association.php,,124.0,,,124,"@@ -0,0 +1,406 @@ ++{'_' . $property} = $options[$property]; ++ } ++ } ++ ++ $this->_name = $name; ++ $this->_options($options); ++ ++ if (empty($this->_property)) { ++ $this->property($name); ++ } ++ ++ if (!empty($options['strategy'])) { ++ $this->strategy($options['strategy']); ++ } ++ } ++ ++/** ++ * Sets the name for this association. If no argument is passed then the current ++ * configured name will be returned ++ * ++ * @param string $name Name to be assigned ++ * @return string ++ */ ++ public function name($name = null) { ++ if ($name !== null) { ++ $this->_name = $name; ++ } ++ return $this->_name; ++ } ++ ++/** ++ * Sets the table instance for the source side of the association. If no arguments ++ * are passed, the current configured table instance is returned ++ * ++ * @param Cake\ORM\Table $table the instance to be assigned as source side ++ * @return Cake\ORM\Table ++ */ ++ public function source(Table $table = null) { ++ if ($table === null) { ++ return $this->_sourceTable; ++ } ++ return $this->_sourceTable = $table; ++ } ++ ++/** ++ * Sets the table instance for the target side of the association. If no arguments ++ * are passed, the current configured table instance is returned ++ * ++ * @param Cake\ORM\Table $table the instance to be assigned as target side ++ * @return Cake\ORM\Table ++ */ ++ public function target(Table $table = null) { ++ if ($table === null && $this->_targetTable) { ++ return $this->_targetTable; ++ } ++ ++ if ($table !== null) { ++ return $this->_targetTable = $table; ++ } ++ ++ if ($table === null) { ++ $className = $this->_className; ++ $this->_targetTable = Table::build($this->_name, compact('className')); ++ } ++ return $this->_targetTable; ++ } ++ ++/** ++ * Sets a list of conditions to be always included when fetching records from ++ * the target association. If no parameters are passed current list is returned ++ * ++ * @param array $conditions list of conditions to be used ++ * @see Cake\Database\Query::where() for examples on the format of the array ++ * @return array ++ */ ++ public function conditions($conditions = null) { ++ if ($conditions !== null) { ++ $this->_conditions = $conditions; ++ } ++ return $this->_conditions; ++ } ++ ++/** ++ * Sets the name of the field representing the foreign key to the target table. ++ * If no parameters are passed current field is returned ++ * ++ * @param string $key the key to be used to link both tables together ++ * @return string ++ */ ++ public function foreignKey($key = null) { ++ if ($key !== null) { ++ $this->_foreignKey = $key; ++ } ++ return $this->_foreignKey; ++ } ++ ++/** ++ * Sets Whether the records on the target table are dependent on the source table, ++ * often used to indicate that records should be removed is the owning record in ++ * the source table is deleted. ++ * If no parameters are passed current setting is returned. ++ * ++ * @param boolean $dependent ++ * @return boolean ++ */ ++ public function dependent($dependent = null) { ++ if ($dependent !== null) { ++ $this->_dependent = $dependent; ++ } ++ return $this->_dependent; ++ } ++ ++/** ++ * Whether this association can be expressed directly in a query join ++ * ++ * @param array $options custom options key that could alter the return value ++ * @return boolean ++ */ ++ public function canBeJoined($options = []) {","Yes, they are +",0,adb78e6c3992279bb0dcede982f7ba86db765a25 +4520550,0,"You can use the official [codesniffer](https://github.com/cakephp/cakephp-codesniffer).""",2013-06-03 23:58:28,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1323#discussion_r4520550,2013-06-04T11:58:28Z,2013-06-04T14:39:15Z,ADmad,MEMBER,lib/Cake/Model/Model.php,,,,,1,"@@ -1497,6 +1497,14 @@ public function create($data = array(), $filterKey = false) { + } + return $this->data; + } ++ ++/** ++ * This function is a convenient wrapper class to create(false) and, as the name suggests, clears the id, data, and validation errors. ++ * ++ * @return array The current Model::data; after clearing via create(false) ++ * @see Model::create() ++ */ ++ public function clear() { return $this->create(false); }","You can use the official [codesniffer](https://github.com/cakephp/cakephp-codesniffer). +",0,1a164c21422136bbd432574c3a7fc8e7e4ead41a +3788818,2,"I too dislike having huge lists in classes and the memory overhead. Can't we come up with a better way to avoid the extra memory cost for those who don't need this feature by having the list load conditionally from a config file and/or through `Configure:""",2013-04-14 18:27:20,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1229#discussion_r3788818,2013-04-15T06:27:20Z,2013-04-15T06:27:20Z,ADmad,MEMBER,lib/Cake/Network/CakeRequest.php,,992.0,,,355,"@@ -646,7 +986,13 @@ public function host() { + public function domain($tldLength = 1) { + $segments = explode('.', $this->host()); + $domain = array_slice($segments, -1 * ($tldLength + 1)); +- return implode('.', $domain); ++ $domain = implode('.', $domain); ++ ++ if (in_array('.' . $domain, $this->_slds)) { ++ return $this->domain($tldLength + 1);","I too dislike having huge lists in classes and the memory overhead. Can't we come up with a better way to avoid the extra memory cost for those who don't need this feature by having the list load conditionally from a config file and/or through `Configure::load()/write()`? +",0,db927fd419f56f733eb8106ea0a0f423e2993006 +3503370,0,"All right, I'll fix it up.""",2013-03-23 23:07:34,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1191#discussion_r3503370,2013-03-24T10:07:34Z,2013-03-24T10:09:06Z,dereuromark,MEMBER,lib/Cake/Utility/Validation.php,,624.0,,,1,"@@ -617,8 +617,8 @@ public static function phone($check, $regex = null, $country = 'all') { + if (is_null($regex)) { + switch ($country) { + case 'us': ++ case 'ca': + case 'all': +- case 'can': + // includes all NANPA members.","All right, I'll fix it up. +",0,f633e59091c41e42741971df7aeb6aa4c21c8d58 +2869489,0,"Should probably also add the solution. i.e. you should either copy or symlink the plugin assets into webroot""",2013-02-02 08:24:28,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1113#discussion_r2869489,2013-02-02T19:24:28Z,2013-02-03T15:37:15Z,ADmad,MEMBER,app/Config/core.php,,97.0,,,7,"@@ -92,7 +92,9 @@ + * /app/.htaccess + * /app/webroot/.htaccess + * +- * And uncomment the App.baseUrl below: ++ * And uncomment the App.baseUrl below. But keep in mind ++ * that plugin assets such as images, CSS and Javascript files ++ * will not work without url rewriting!","Should probably also add the solution. i.e. you should either copy or symlink the plugin assets into webroot +",0,804753ff6c5c5c5acc73952ae8511f5af0ba2217 +2020462,1,"Better not give the code sniffer reasons to complain :smile: """,2012-11-02 08:46:04,ADmad,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/934#discussion_r2020462,2012-11-02T19:46:04Z,2012-11-03T21:14:08Z,ADmad,MEMBER,lib/Cake/Test/Case/View/XmlViewTest.php,,,,,5,"@@ -64,6 +64,14 @@ public function testRenderWithoutView() { + + $expected = Xml::build(array('response' => array('users' => $data)))->asXML(); + $this->assertSame($expected, $output); ++ ++","Better not give the code sniffer reasons to complain :smile: +",0,c3af467476a9a1b2e46f69e5cef76c3dfa2e205b +2628267,0,"@markstory Why do you trim the dot from the domain? It can cause security issue. The `cakephp.org` cookie domain is accessible only by the `http://cakephp.org`, but the `.cakephp.org` cookie can be accessed by `http://cakephp.org` and `http://bakery.cakep""",2013-01-12 13:26:23,jrbasso,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2628267,2013-01-13T00:26:23Z,2013-01-24T02:45:59Z,jrbasso,MEMBER,lib/Cake/Network/Http/Client.php,,,,,1,"@@ -0,0 +1,527 @@ ++get('/users', [], ['type' => 'json']);` ++ * ++ * The `type` option sets both the `Content-Type` and `Accept` header, to ++ * the same mime type. When using `type` you can use either a full mime ++ * type or an alias. If you need different types in the Accept and Content-Type ++ * headers you should set them manually and not use `type` ++ * ++ * ### Using authentication ++ * ++ * By using the `auth` key you can use authentication. The type sub option ++ * can be used to specify which authentication strategy you want to use. ++ * CakePHP comes with a few built-in strategies: ++ * ++ * - Basic ++ * - Digest ++ * - Oauth ++ * ++ * ### Using proxies ++ * ++ * By using the `proxy` key you can set authentication credentials for ++ * a proxy if you need to use one.. The type sub option can be used to ++ * specify which authentication strategy you want to use. ++ * CakePHP comes with built-in support for basic authentication. ++ * ++ */ ++class Client { ++ ++/** ++ * Stored configuration for the client. ++ * ++ * @var array ++ */ ++ protected $_config = [ ++ 'host' => null, ++ 'port' => null, ++ 'scheme' => 'http', ++ 'timeout' => 30, ++ 'ssl_verify_peer' => true, ++ 'ssl_verify_depth' => 5, ++ 'ssl_verify_host' => true, ++ 'redirect' => false, ++ ]; ++ ++/** ++ * List of cookies from responses made with this client. ++ * ++ * Cookies are indexed by the cookie's domain or ++ * request host name. ++ * ++ * @var array ++ */ ++ protected $_cookies = []; ++ ++/** ++ * Adapter for sending requests. Defaults to ++ * Cake\Network\Http\Stream ++ * ++ * @var Cake\Network\Http\Stream ++ */ ++ protected $_adapter; ++ ++/** ++ * Create a new HTTP Client. ++ * ++ * ### Config options ++ * ++ * You can set the following options when creating a client: ++ * ++ * - host - The hostname to do requests on. ++ * - port - The port to use. ++ * - scheme - The default scheme/protocol to use. Defaults to http. ++ * - timeout - The timeout in seconds. Defaults to 30 ++ * - ssl_verify_peer - Whether or not SSL certificates should be validated. ++ * Defaults to true. ++ * - ssl_verify_depth - The maximum certificate chain depth to travers. ++ * Defaults to 5. ++ * - ssl_verify_host - Verify that the certificate and hostname match. ++ * Defaults to true. ++ * - redirect - Number of redirects to follow. Defaults to false. ++ * ++ * @param array $config Config options for scoped clients. ++ */ ++ public function __construct($config = []) { ++ $adapter = 'Cake\Network\Http\Adapter\Stream'; ++ if (isset($config['adapter'])) { ++ $adapter = $config['adapter']; ++ unset($config['adapter']); ++ } ++ $this->config($config); ++ ++ if (is_string($adapter)) { ++ $adapter = new $adapter(); ++ } ++ $this->_adapter = $adapter; ++ } ++ ++/** ++ * Get or set additional config options. ++ * ++ * Setting config will use Hash::merge() for appending into ++ * the existing configuration. ++ * ++ * @param array|null $config Configuration options. null to get. ++ * @return this|array ++ */ ++ public function config($config = null) { ++ if ($config === null) { ++ return $this->_config; ++ } ++ $this->_config = Hash::merge($this->_config, $config); ++ return $this; ++ } ++ ++/** ++ * Get the cookies stored in the Client. ++ * ++ * Returns an array of cookie data arrays. ++ * ++ * @return array ++ */ ++ public function cookies() { ++ return $this->_cookies; ++ } ++ ++/** ++ * Do a GET request. ++ * ++ * The $data argument supports a special `_content` key ++ * for providing a request body in a GET request. This is ++ * generally not used but services like ElasticSearch use ++ * this feature. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The query data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function get($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $body = []; ++ if (isset($data['_content'])) { ++ $body = $data['_content']; ++ unset($data['_content']); ++ } ++ $url = $this->buildUrl($url, $data, $options); ++ return $this->_doRequest( ++ Request::METHOD_GET, ++ $url, ++ $body, ++ $options ++ ); ++ } ++ ++/** ++ * Do a POST request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The post data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function post($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_POST, $url, $data, $options); ++ } ++ ++/** ++ * Do a PUT request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function put($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PUT, $url, $data, $options); ++ } ++ ++/** ++ * Do a PATCH request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function patch($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PATCH, $url, $data, $options); ++ } ++ ++/** ++ * Do a DELETE request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function delete($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_DELETE, $url, $data, $options); ++ } ++ ++/** ++ * Helper method for doing non-GET requests. ++ * ++ * @param string $method HTTP method. ++ * @param string $url URL to request. ++ */ ++ protected function _doRequest($method, $url, $data, $options) { ++ $request = $this->_createRequest( ++ $method, ++ $url, ++ $data, ++ $options ++ ); ++ return $this->send($request, $options); ++ } ++ ++/** ++ * Does a recursive merge of the parameter with the scope config. ++ * ++ * @param array $options Options to merge. ++ * @return array Options merged with set config. ++ */ ++ protected function _mergeOptions($options) { ++ return Hash::merge($this->_config, $options); ++ } ++ ++/** ++ * Send a request. ++ * ++ * Used internally by other methods, but can also be used to send ++ * handcrafted Request objects. ++ * ++ * @param Cake\Network\Http\Request $request The request to send. ++ * @param array $options Additional options to use. ++ * @return Cake\Network\Http\Response ++ */ ++ public function send(Request $request, $options = []) { ++ $responses = $this->_adapter->send($request, $options); ++ $host = parse_url($request->url(), PHP_URL_HOST); ++ foreach ($responses as $response) { ++ $this->_storeCookies($response, $host); ++ } ++ return array_pop($responses); ++ } ++ ++/** ++ * Store cookies in a response to be used in future requests. ++ * ++ * Non-expired cookies will be stored for use in future requests ++ * made with the same Client instance. Cookies are not saved ++ * between instances. ++ * ++ * @param Response $response The response to read cookies from ++ * @param string $host The request host, used for getting host names ++ * in case the cookies didn't set a domain. ++ * @return void ++ */ ++ protected function _storeCookies(Response $response, $host) { ++ $cookies = $response->cookies(); ++ foreach ($cookies as $name => $cookie) { ++ $expires = isset($cookie['expires']) ? $cookie['expires'] : false; ++ if ($expires) { ++ $expires = \DateTime::createFromFormat('D, j-M-Y H:i:s e', $expires); ++ } ++ if ($expires && $expires->getTimestamp() <= time()) { ++ continue; ++ } ++ if (empty($cookie['domain'])) { ++ $cookie['domain'] = $host; ++ } ++ $cookie['domain'] = trim($cookie['domain'], '.');","@markstory Why do you trim the dot from the domain? It can cause security issue. The `cakephp.org` cookie domain is accessible only by the `http://cakephp.org`, but the `.cakephp.org` cookie can be accessed by `http://cakephp.org` and `http://bakery.cakephp.org` and all others subdomains. +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +2629304,0,"In RFC 2109 an explicitly set domain must always start with a `.` (section 4.2.2). Only if the domain is omitted will the leading `.` be missing (section 4.3.1). Based on the domain matching examples, the rules seem like a cookie's inclusion in a request""",2013-01-13 05:26:02,markstory,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1057#discussion_r2629304,2013-01-13T16:26:02Z,2013-01-24T02:45:59Z,markstory,MEMBER,lib/Cake/Network/Http/Client.php,,,,,1,"@@ -0,0 +1,527 @@ ++get('/users', [], ['type' => 'json']);` ++ * ++ * The `type` option sets both the `Content-Type` and `Accept` header, to ++ * the same mime type. When using `type` you can use either a full mime ++ * type or an alias. If you need different types in the Accept and Content-Type ++ * headers you should set them manually and not use `type` ++ * ++ * ### Using authentication ++ * ++ * By using the `auth` key you can use authentication. The type sub option ++ * can be used to specify which authentication strategy you want to use. ++ * CakePHP comes with a few built-in strategies: ++ * ++ * - Basic ++ * - Digest ++ * - Oauth ++ * ++ * ### Using proxies ++ * ++ * By using the `proxy` key you can set authentication credentials for ++ * a proxy if you need to use one.. The type sub option can be used to ++ * specify which authentication strategy you want to use. ++ * CakePHP comes with built-in support for basic authentication. ++ * ++ */ ++class Client { ++ ++/** ++ * Stored configuration for the client. ++ * ++ * @var array ++ */ ++ protected $_config = [ ++ 'host' => null, ++ 'port' => null, ++ 'scheme' => 'http', ++ 'timeout' => 30, ++ 'ssl_verify_peer' => true, ++ 'ssl_verify_depth' => 5, ++ 'ssl_verify_host' => true, ++ 'redirect' => false, ++ ]; ++ ++/** ++ * List of cookies from responses made with this client. ++ * ++ * Cookies are indexed by the cookie's domain or ++ * request host name. ++ * ++ * @var array ++ */ ++ protected $_cookies = []; ++ ++/** ++ * Adapter for sending requests. Defaults to ++ * Cake\Network\Http\Stream ++ * ++ * @var Cake\Network\Http\Stream ++ */ ++ protected $_adapter; ++ ++/** ++ * Create a new HTTP Client. ++ * ++ * ### Config options ++ * ++ * You can set the following options when creating a client: ++ * ++ * - host - The hostname to do requests on. ++ * - port - The port to use. ++ * - scheme - The default scheme/protocol to use. Defaults to http. ++ * - timeout - The timeout in seconds. Defaults to 30 ++ * - ssl_verify_peer - Whether or not SSL certificates should be validated. ++ * Defaults to true. ++ * - ssl_verify_depth - The maximum certificate chain depth to travers. ++ * Defaults to 5. ++ * - ssl_verify_host - Verify that the certificate and hostname match. ++ * Defaults to true. ++ * - redirect - Number of redirects to follow. Defaults to false. ++ * ++ * @param array $config Config options for scoped clients. ++ */ ++ public function __construct($config = []) { ++ $adapter = 'Cake\Network\Http\Adapter\Stream'; ++ if (isset($config['adapter'])) { ++ $adapter = $config['adapter']; ++ unset($config['adapter']); ++ } ++ $this->config($config); ++ ++ if (is_string($adapter)) { ++ $adapter = new $adapter(); ++ } ++ $this->_adapter = $adapter; ++ } ++ ++/** ++ * Get or set additional config options. ++ * ++ * Setting config will use Hash::merge() for appending into ++ * the existing configuration. ++ * ++ * @param array|null $config Configuration options. null to get. ++ * @return this|array ++ */ ++ public function config($config = null) { ++ if ($config === null) { ++ return $this->_config; ++ } ++ $this->_config = Hash::merge($this->_config, $config); ++ return $this; ++ } ++ ++/** ++ * Get the cookies stored in the Client. ++ * ++ * Returns an array of cookie data arrays. ++ * ++ * @return array ++ */ ++ public function cookies() { ++ return $this->_cookies; ++ } ++ ++/** ++ * Do a GET request. ++ * ++ * The $data argument supports a special `_content` key ++ * for providing a request body in a GET request. This is ++ * generally not used but services like ElasticSearch use ++ * this feature. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The query data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function get($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $body = []; ++ if (isset($data['_content'])) { ++ $body = $data['_content']; ++ unset($data['_content']); ++ } ++ $url = $this->buildUrl($url, $data, $options); ++ return $this->_doRequest( ++ Request::METHOD_GET, ++ $url, ++ $body, ++ $options ++ ); ++ } ++ ++/** ++ * Do a POST request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The post data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function post($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_POST, $url, $data, $options); ++ } ++ ++/** ++ * Do a PUT request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function put($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PUT, $url, $data, $options); ++ } ++ ++/** ++ * Do a PATCH request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function patch($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_PATCH, $url, $data, $options); ++ } ++ ++/** ++ * Do a DELETE request. ++ * ++ * @param string $url The url or path you want to request. ++ * @param array $data The request data you want to send. ++ * @param array $options Additional options for the request. ++ * @return Cake\Network\Http\Response ++ */ ++ public function delete($url, $data = [], $options = []) { ++ $options = $this->_mergeOptions($options); ++ $url = $this->buildUrl($url, [], $options); ++ return $this->_doRequest(Request::METHOD_DELETE, $url, $data, $options); ++ } ++ ++/** ++ * Helper method for doing non-GET requests. ++ * ++ * @param string $method HTTP method. ++ * @param string $url URL to request. ++ */ ++ protected function _doRequest($method, $url, $data, $options) { ++ $request = $this->_createRequest( ++ $method, ++ $url, ++ $data, ++ $options ++ ); ++ return $this->send($request, $options); ++ } ++ ++/** ++ * Does a recursive merge of the parameter with the scope config. ++ * ++ * @param array $options Options to merge. ++ * @return array Options merged with set config. ++ */ ++ protected function _mergeOptions($options) { ++ return Hash::merge($this->_config, $options); ++ } ++ ++/** ++ * Send a request. ++ * ++ * Used internally by other methods, but can also be used to send ++ * handcrafted Request objects. ++ * ++ * @param Cake\Network\Http\Request $request The request to send. ++ * @param array $options Additional options to use. ++ * @return Cake\Network\Http\Response ++ */ ++ public function send(Request $request, $options = []) { ++ $responses = $this->_adapter->send($request, $options); ++ $host = parse_url($request->url(), PHP_URL_HOST); ++ foreach ($responses as $response) { ++ $this->_storeCookies($response, $host); ++ } ++ return array_pop($responses); ++ } ++ ++/** ++ * Store cookies in a response to be used in future requests. ++ * ++ * Non-expired cookies will be stored for use in future requests ++ * made with the same Client instance. Cookies are not saved ++ * between instances. ++ * ++ * @param Response $response The response to read cookies from ++ * @param string $host The request host, used for getting host names ++ * in case the cookies didn't set a domain. ++ * @return void ++ */ ++ protected function _storeCookies(Response $response, $host) { ++ $cookies = $response->cookies(); ++ foreach ($cookies as $name => $cookie) { ++ $expires = isset($cookie['expires']) ? $cookie['expires'] : false; ++ if ($expires) { ++ $expires = \DateTime::createFromFormat('D, j-M-Y H:i:s e', $expires); ++ } ++ if ($expires && $expires->getTimestamp() <= time()) { ++ continue; ++ } ++ if (empty($cookie['domain'])) { ++ $cookie['domain'] = $host; ++ } ++ $cookie['domain'] = trim($cookie['domain'], '.');","In RFC 2109 an explicitly set domain must always start with a `.` (section 4.2.2). Only if the domain is omitted will the leading `.` be missing (section 4.3.1). Based on the domain matching examples, the rules seem like a cookie's inclusion in a request should rely on: +- Secure flag matching. +- Domain matching request host exactly (for default values) +- Domain matching request host at the end of the domain, +- Path matching the request path at the beginning of the path. +- Cookie not being expired. +",0,c0865001dc9b44d763aba9eaa00720fa4e485ddc +5700222,0,"Is this good to go then?""",2013-08-11 02:44:08,dereuromark,cakephp,cakephp,,https://github.com/cakephp/cakephp/pull/1493#discussion_r5700222,2013-08-11T14:44:08Z,2013-08-11T14:44:08Z,dereuromark,MEMBER,lib/Cake/View/Helper/HtmlHelper.php,,921.0,,,5,"@@ -918,13 +918,10 @@ public function tag($name, $text = null, $options = array()) { + if (empty($name)) { + return $text; + } +- if (is_array($options) && isset($options['escape']) && $options['escape']) { ++ if (isset($options['escape']) && $options['escape']) {","Is this good to go then? +",0,fc2d28974b8080e7670ea1a302e662cbb2fa6489 diff --git a/data/PR inline comments/chosen_sentiment_pr_inline_comments_joined.csv b/data/PR inline comments/chosen_sentiment_pr_inline_comments_joined.csv new file mode 100644 index 0000000..0062117 --- /dev/null +++ b/data/PR inline comments/chosen_sentiment_pr_inline_comments_joined.csv @@ -0,0 +1,272 @@ +comment_id,polarity,text,created_at_gold,author_login,owner,repo,review_id,html_url,created_at_kaiaulu,updated_at,comment_user_login,author_association,file_path,start_line,line,original_start_line,original_line,position,diff_hunk,body,commit_id +6142365,2,"oh, you're absolutely right -- sorry for my confusion! That makes sense, it should add `word.length` to get the next match.""",2013-09-03 12:19:18,eliasdorneles,harvesthq,chosen,,https://github.com/harvesthq/chosen/pull/1037#discussion_r6142365,2013-09-04T00:19:18Z,2013-09-04T00:58:03Z,eliasdorneles,NONE,coffee/lib/abstract-chosen.coffee,,,,,1,"@@ -177,16 +172,22 @@ class AbstractChosen + this.update_results_content this.results_option_build() + this.winnow_results_set_highlight() + +- search_string_match: (search_string, regex) -> +- if regex.test search_string +- return true +- else if @enable_split_word_search and (search_string.indexOf("" "") >= 0 or search_string.indexOf(""["") == 0) +- #TODO: replace this substitution of /\[\]/ with a list of characters to skip. +- parts = search_string.replace(/\[|\]/g, """").split("" "") +- if parts.length +- for part in parts +- if regex.test part +- return true ++ search_string_match: (search_string, words) -> ++ for word in words ++ if search_string.toLowerCase().indexOf(word) < 0 ++ return false ++ return true ++ ++ highlight_search_text: (text, words) -> ++ # sort the query words to highlight the longest first ++ words.sort (a, b) -> b.length - a.length ++ highlight_offset = 10 # 1 + ''.length ++ for word in words ++ startpos = text.toLowerCase().indexOf word ++ while startpos >= 0 ++ text = text.substr(0, startpos) + '' + text.substr(startpos, word.length) + '' + text.substr(startpos + word.length) ++ startpos = text.toLowerCase().indexOf(word, startpos + highlight_offset)","oh, you're absolutely right -- sorry for my confusion! That makes sense, it should add `word.length` to get the next match. +",58146cea40855156adeae1620eeefe306e93933c +5383881,1,"Save here :)""",2013-07-24 08:56:58,koenpunt,harvesthq,chosen,,https://github.com/harvesthq/chosen/pull/1397#discussion_r5383881,2013-07-24T20:56:58Z,2013-07-24T21:52:23Z,koenpunt,CONTRIBUTOR,public/index.proto.html,,,,,1,"@@ -1448,12 +1448,9 @@ + + +