From 1f6e556945a66bbceeb72097263d917fc8b385ee Mon Sep 17 00:00:00 2001
From: Li Xu
Date: Tue, 4 Nov 2025 18:17:24 -0800
Subject: [PATCH 1/8] feat: crosswalk script, tutorial and queries
---
README.md | 32 +-
import_scripts/mysql/create_tables.sql | 6 +-
import_scripts/postgresql/create_tables.sql | 6 +-
...nd_best_ccssm_match_for_state_standard.sql | 14 +
.../crosswalk_queries/get_all_crosswalks.sql | 10 +
.../get_crosswalks_by_jaccard_threshold.sql | 16 +
.../get_crosswalks_for_state.sql | 16 +
...get_crosswalks_with_standards_metadata.sql | 21 +
...ared_learning_components_for_crosswalk.sql | 51 ++
...nd_best_ccssm_match_for_state_standard.sql | 14 +
.../crosswalk_queries/get_all_crosswalks.sql | 10 +
.../get_crosswalks_by_jaccard_threshold.sql | 16 +
.../get_crosswalks_for_state.sql | 16 +
...get_crosswalks_with_standards_metadata.sql | 21 +
...ared_learning_components_for_crosswalk.sql | 51 ++
.../compare_standards/js/compare-standards.js | 628 +++++++++-------
tutorials/compare_standards/python/README.md | 4 +-
.../python/compare_standards.py | 686 +++++++++---------
18 files changed, 1005 insertions(+), 613 deletions(-)
create mode 100644 sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
diff --git a/README.md b/README.md
index 8235370..31a5a22 100644
--- a/README.md
+++ b/README.md
@@ -54,10 +54,10 @@ There are two options to download the files: direct s3 links, or using curl comm
Click links to download files directly. Files will download to your browser's default location (typically `~/Downloads`).
**CSV files:**
-- [StandardsFramework.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/StandardsFramework.csv?ref=github)
-- [StandardsFrameworkItem.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/StandardsFrameworkItem.csv?ref=github)
-- [LearningComponent.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/LearningComponent.csv?ref=github)
-- [Relationships.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/Relationships.csv?ref=github)
+- [StandardsFramework.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/StandardsFramework.csv?ref=github)
+- [StandardsFrameworkItem.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/StandardsFrameworkItem.csv?ref=github)
+- [LearningComponent.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/LearningComponent.csv?ref=github)
+- [Relationships.csv](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/Relationships.csv?ref=github)
**For SQL database imports:** Move the downloaded CSV files to `/tmp/kg-data/` to use the import scripts without modification:
@@ -67,10 +67,10 @@ mv ~/Downloads/StandardsFramework.csv ~/Downloads/StandardsFrameworkItem.csv ~/D
```
**JSON files:**
-- [StandardsFramework.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/StandardsFramework.json?ref=github)
-- [StandardsFrameworkItem.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/StandardsFrameworkItem.json?ref=github)
-- [LearningComponent.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/LearningComponent.json?ref=github)
-- [Relationships.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/Relationships.json?ref=github)
+- [StandardsFramework.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/StandardsFramework.json?ref=github)
+- [StandardsFrameworkItem.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/StandardsFrameworkItem.json?ref=github)
+- [LearningComponent.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/LearningComponent.json?ref=github)
+- [Relationships.json](https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/Relationships.json?ref=github)
### Using curl commands
@@ -83,17 +83,17 @@ If you don't have `curl` installed, see [installation instructions](https://gith
mkdir -p /tmp/kg-data
cd /tmp/kg-data
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/StandardsFramework.csv?ref=gh_curl" -o StandardsFramework.csv
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/StandardsFrameworkItem.csv?ref=gh_curl" -o StandardsFrameworkItem.csv
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/LearningComponent.csv?ref=gh_curl" -o LearningComponent.csv
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/csv/Relationships.csv?ref=gh_curl" -o Relationships.csv
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/StandardsFramework.csv?ref=gh_curl" -o StandardsFramework.csv
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/StandardsFrameworkItem.csv?ref=gh_curl" -o StandardsFrameworkItem.csv
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/LearningComponent.csv?ref=gh_curl" -o LearningComponent.csv
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/csv/Relationships.csv?ref=gh_curl" -o Relationships.csv
```
```bash
# Download JSON files
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/StandardsFramework.json?ref=gh_curl" -o StandardsFramework.json
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/StandardsFrameworkItem.json?ref=gh_curl" -o StandardsFrameworkItem.json
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/LearningComponent.json?ref=gh_curl" -o LearningComponent.json
-curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.1.0/json/Relationships.json?ref=gh_curl" -o Relationships.json
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/StandardsFramework.json?ref=gh_curl" -o StandardsFramework.json
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/StandardsFrameworkItem.json?ref=gh_curl" -o StandardsFrameworkItem.json
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/LearningComponent.json?ref=gh_curl" -o LearningComponent.json
+curl -L "https://aidt-knowledge-graph-datasets-public-prod.s3.us-west-2.amazonaws.com/knowledge-graph/v1.2.0/json/Relationships.json?ref=gh_curl" -o Relationships.json
```
### **Next steps**
diff --git a/import_scripts/mysql/create_tables.sql b/import_scripts/mysql/create_tables.sql
index 3519ee3..a9d64e0 100644
--- a/import_scripts/mysql/create_tables.sql
+++ b/import_scripts/mysql/create_tables.sql
@@ -67,5 +67,9 @@ CREATE TABLE IF NOT EXISTS relationships (
`author` TEXT,
`provider` TEXT,
`license` TEXT,
- `attributionStatement` TEXT
+ `attributionStatement` TEXT,
+ `jaccard` DOUBLE,
+ `ccssLCCount` INT,
+ `sharedLCCount` INT,
+ `stateLCCount` INT
);
diff --git a/import_scripts/postgresql/create_tables.sql b/import_scripts/postgresql/create_tables.sql
index 065d5ee..42927c8 100644
--- a/import_scripts/postgresql/create_tables.sql
+++ b/import_scripts/postgresql/create_tables.sql
@@ -66,5 +66,9 @@ CREATE TABLE IF NOT EXISTS relationships (
"author" TEXT,
"provider" TEXT,
"license" TEXT,
- "attributionStatement" TEXT
+ "attributionStatement" TEXT,
+ "jaccard" DOUBLE PRECISION,
+ "ccssLCCount" INT,
+ "sharedLCCount" INT,
+ "stateLCCount" INT
);
diff --git a/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql b/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
new file mode 100644
index 0000000..ded0a43
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
@@ -0,0 +1,14 @@
+SELECT
+ r.`sourceEntityValue`,
+ r.`targetEntityValue`,
+ r.`jaccard`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`,
+ r.`sharedLCCount`
+FROM relationships r
+JOIN standards_framework_item sfi
+ ON sfi.`identifier` = r.`sourceEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND sfi.`statementCode` = '111.26.b.4.D'
+ AND sfi.`jurisdiction` = 'Texas'
+ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
new file mode 100644
index 0000000..c9149d9
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
@@ -0,0 +1,10 @@
+SELECT
+ `sourceEntityValue`,
+ `targetEntityValue`,
+ `jaccard`,
+ `stateLCCount`,
+ `ccssLCCount`,
+ `sharedLCCount`
+FROM relationships
+WHERE `relationshipType` = 'hasStandardAlignment'
+ORDER BY `jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
new file mode 100644
index 0000000..d405492
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -0,0 +1,16 @@
+SELECT
+ state_std.`statementCode` AS state_standard_code,
+ state_std.`jurisdiction` AS state_jurisdiction,
+ ccss_std.`statementCode` AS ccss_standard_code,
+ r.`jaccard`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`,
+ r.`sharedLCCount`
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std.`identifier` = r.`sourceEntityValue`
+JOIN standards_framework_item ccss_std
+ ON ccss_std.`identifier` = r.`targetEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND r.`jaccard` >= 0.7
+ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
new file mode 100644
index 0000000..94acc2f
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
@@ -0,0 +1,16 @@
+SELECT
+ state_std.`statementCode` AS state_standard_code,
+ state_std.`gradeLevel` AS state_grade_level,
+ state_std.`description` AS state_description,
+ ccss_std.`statementCode` AS ccss_standard_code,
+ r.`jaccard`,
+ r.`sharedLCCount`
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std.`identifier` = r.`sourceEntityValue`
+JOIN standards_framework_item ccss_std
+ ON ccss_std.`identifier` = r.`targetEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND state_std.`jurisdiction` = 'Texas'
+ AND state_std.`academicSubject` = 'Mathematics'
+ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
new file mode 100644
index 0000000..72e3bef
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -0,0 +1,21 @@
+SELECT
+ state_std.`statementCode` AS state_standard_code,
+ state_std.`jurisdiction` AS state_jurisdiction,
+ state_std.`gradeLevel` AS state_grade_level,
+ state_std.`description` AS state_description,
+ ccss_std.`statementCode` AS ccss_standard_code,
+ ccss_std.`gradeLevel` AS ccss_grade_level,
+ ccss_std.`description` AS ccss_description,
+ r.`jaccard`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`,
+ r.`sharedLCCount`
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std.`identifier` = r.`sourceEntityValue`
+JOIN standards_framework_item ccss_std
+ ON ccss_std.`identifier` = r.`targetEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND state_std.`statementCode` = '111.26.b.4.D'
+ AND state_std.`jurisdiction` = 'Texas'
+ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql b/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
new file mode 100644
index 0000000..cd932f6
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
@@ -0,0 +1,51 @@
+WITH state_lcs AS (
+ SELECT lc.`identifier`, lc.`description`
+ FROM relationships r
+ JOIN standards_framework_item sfi
+ ON sfi.`caseIdentifierUUID` = r.`targetEntityValue`
+ JOIN learning_component lc
+ ON lc.`identifier` = r.`sourceEntityValue`
+ WHERE r.`relationshipType` = 'supports'
+ AND sfi.`statementCode` = '111.26.b.4.D'
+ AND sfi.`jurisdiction` = 'Texas'
+),
+ccss_lcs AS (
+ SELECT lc.`identifier`, lc.`description`
+ FROM relationships r
+ JOIN standards_framework_item sfi
+ ON sfi.`caseIdentifierUUID` = r.`targetEntityValue`
+ JOIN learning_component lc
+ ON lc.`identifier` = r.`sourceEntityValue`
+ WHERE r.`relationshipType` = 'supports'
+ AND sfi.`statementCode` = '6.RP.A.2'
+ AND sfi.`jurisdiction` = 'Multi-State'
+)
+SELECT
+ 'shared' AS lc_type,
+ state_lcs.`identifier`,
+ state_lcs.`description`
+FROM state_lcs
+INNER JOIN ccss_lcs
+ ON state_lcs.`identifier` = ccss_lcs.`identifier`
+
+UNION ALL
+
+SELECT
+ 'state_only' AS lc_type,
+ state_lcs.`identifier`,
+ state_lcs.`description`
+FROM state_lcs
+LEFT JOIN ccss_lcs
+ ON state_lcs.`identifier` = ccss_lcs.`identifier`
+WHERE ccss_lcs.`identifier` IS NULL
+
+UNION ALL
+
+SELECT
+ 'ccss_only' AS lc_type,
+ ccss_lcs.`identifier`,
+ ccss_lcs.`description`
+FROM ccss_lcs
+LEFT JOIN state_lcs
+ ON ccss_lcs.`identifier` = state_lcs.`identifier`
+WHERE state_lcs.`identifier` IS NULL;
diff --git a/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql b/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
new file mode 100644
index 0000000..61a4810
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
@@ -0,0 +1,14 @@
+SELECT
+ r."sourceEntityValue",
+ r."targetEntityValue",
+ r."jaccard",
+ r."stateLCCount",
+ r."ccssLCCount",
+ r."sharedLCCount"
+FROM relationships r
+JOIN standards_framework_item sfi
+ ON sfi."identifier" = r."sourceEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND sfi."statementCode" = '111.26.b.4.D'
+ AND sfi."jurisdiction" = 'Texas'
+ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
new file mode 100644
index 0000000..ec8e4b1
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
@@ -0,0 +1,10 @@
+SELECT
+ "sourceEntityValue",
+ "targetEntityValue",
+ "jaccard",
+ "stateLCCount",
+ "ccssLCCount",
+ "sharedLCCount"
+FROM relationships
+WHERE "relationshipType" = 'hasStandardAlignment'
+ORDER BY "jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
new file mode 100644
index 0000000..07fd855
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -0,0 +1,16 @@
+SELECT
+ state_std."statementCode" AS state_standard_code,
+ state_std."jurisdiction" AS state_jurisdiction,
+ ccss_std."statementCode" AS ccss_standard_code,
+ r."jaccard",
+ r."stateLCCount",
+ r."ccssLCCount",
+ r."sharedLCCount"
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std."identifier" = r."sourceEntityValue"
+JOIN standards_framework_item ccss_std
+ ON ccss_std."identifier" = r."targetEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND r."jaccard" >= 0.7
+ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
new file mode 100644
index 0000000..f54ef10
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
@@ -0,0 +1,16 @@
+SELECT
+ state_std."statementCode" AS state_standard_code,
+ state_std."gradeLevel" AS state_grade_level,
+ state_std."description" AS state_description,
+ ccss_std."statementCode" AS ccss_standard_code,
+ r."jaccard",
+ r."sharedLCCount"
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std."identifier" = r."sourceEntityValue"
+JOIN standards_framework_item ccss_std
+ ON ccss_std."identifier" = r."targetEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND state_std."jurisdiction" = 'Texas'
+ AND state_std."academicSubject" = 'Mathematics'
+ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
new file mode 100644
index 0000000..872011c
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -0,0 +1,21 @@
+SELECT
+ state_std."statementCode" AS state_standard_code,
+ state_std."jurisdiction" AS state_jurisdiction,
+ state_std."gradeLevel" AS state_grade_level,
+ state_std."description" AS state_description,
+ ccss_std."statementCode" AS ccss_standard_code,
+ ccss_std."gradeLevel" AS ccss_grade_level,
+ ccss_std."description" AS ccss_description,
+ r."jaccard",
+ r."stateLCCount",
+ r."ccssLCCount",
+ r."sharedLCCount"
+FROM relationships r
+JOIN standards_framework_item state_std
+ ON state_std."identifier" = r."sourceEntityValue"
+JOIN standards_framework_item ccss_std
+ ON ccss_std."identifier" = r."targetEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND state_std."statementCode" = '111.26.b.4.D'
+ AND state_std."jurisdiction" = 'Texas'
+ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql b/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
new file mode 100644
index 0000000..932392c
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
@@ -0,0 +1,51 @@
+WITH state_lcs AS (
+ SELECT lc."identifier", lc."description"
+ FROM relationships r
+ JOIN standards_framework_item sfi
+ ON sfi."caseIdentifierUUID" = r."targetEntityValue"
+ JOIN learning_component lc
+ ON lc."identifier" = r."sourceEntityValue"
+ WHERE r."relationshipType" = 'supports'
+ AND sfi."statementCode" = '111.26.b.4.D'
+ AND sfi."jurisdiction" = 'Texas'
+),
+ccss_lcs AS (
+ SELECT lc."identifier", lc."description"
+ FROM relationships r
+ JOIN standards_framework_item sfi
+ ON sfi."caseIdentifierUUID" = r."targetEntityValue"
+ JOIN learning_component lc
+ ON lc."identifier" = r."sourceEntityValue"
+ WHERE r."relationshipType" = 'supports'
+ AND sfi."statementCode" = '6.RP.A.2'
+ AND sfi."jurisdiction" = 'Multi-State'
+)
+SELECT
+ 'shared' AS lc_type,
+ state_lcs."identifier",
+ state_lcs."description"
+FROM state_lcs
+INNER JOIN ccss_lcs
+ ON state_lcs."identifier" = ccss_lcs."identifier"
+
+UNION ALL
+
+SELECT
+ 'state_only' AS lc_type,
+ state_lcs."identifier",
+ state_lcs."description"
+FROM state_lcs
+LEFT JOIN ccss_lcs
+ ON state_lcs."identifier" = ccss_lcs."identifier"
+WHERE ccss_lcs."identifier" IS NULL
+
+UNION ALL
+
+SELECT
+ 'ccss_only' AS lc_type,
+ ccss_lcs."identifier",
+ ccss_lcs."description"
+FROM ccss_lcs
+LEFT JOIN state_lcs
+ ON ccss_lcs."identifier" = state_lcs."identifier"
+WHERE state_lcs."identifier" IS NULL;
diff --git a/tutorials/compare_standards/js/compare-standards.js b/tutorials/compare_standards/js/compare-standards.js
index b34285c..e4d87c0 100644
--- a/tutorials/compare_standards/js/compare-standards.js
+++ b/tutorials/compare_standards/js/compare-standards.js
@@ -8,8 +8,11 @@ const path = require('path');
const { parse } = require('csv-parse/sync');
require('dotenv').config();
-// Constants
-const TARGET_STANDARD_CODE = '6.RP.A.2';
+// Domain Constants
+// Pick a state standard to find its best CCSSM match
+// Note: We need to specify both the code AND jurisdiction since multiple states may use the same code
+const TARGET_STATE_STANDARD_CODE = '111.26.b.4.D'; // Texas 6th grade math standard on rates
+const TARGET_STATE_JURISDICTION = 'Texas';
// Environment setup
const dataDir = process.env.KG_DATA_PATH;
@@ -33,307 +36,406 @@ function loadCSV(filename) {
}
}
-function loadData(aq) {
- /*
- * Find Common Core and Texas standards
- *
- * SQL:
- * SELECT *
- * FROM standards_framework_item
- * WHERE "jurisdiction" = 'Multi-State'
- * AND "academicSubject" = 'Mathematics';
- *
- * SELECT *
- * FROM standards_framework_item
- * WHERE "jurisdiction" = 'Texas'
- * AND "academicSubject" = 'Mathematics';
- *
- * Cypher:
- * MATCH (sfi:StandardsFrameworkItem)
- * WHERE sfi.jurisdiction = 'Multi-State' AND sfi.academicSubject = 'Mathematics'
- * RETURN sfi;
- *
- * MATCH (sfi:StandardsFrameworkItem)
- * WHERE sfi.jurisdiction = 'Texas' AND sfi.academicSubject = 'Mathematics'
- * RETURN sfi;
- */
- /* Load and filter CSV data files for standards comparison
- * and filter for Multi-State (Common Core) and Texas math standards
+/* ================================
+ STEP 1: LOAD THE CROSSWALK DATA
+ ================================ */
+
+function loadCrosswalkData(aq) {
+ /**
+ * Load crosswalk data from relationships.csv
+ *
+ * Purpose: Crosswalk data lives in the relationships.csv file. Standards that have
+ * crosswalk data include four crosswalk-specific columns: jaccard, stateLCCount,
+ * ccssLCCount, and sharedLCCount.
+ *
+ * Each row shows one state → CCSSM crosswalk relationship.
*/
- const allStandardsFrameworkItems = aq.from(loadCSV('StandardsFrameworkItem.csv'));
- const learningComponentsData = aq.from(loadCSV('LearningComponent.csv'));
+
+ // Load CSV files
const relationshipsData = aq.from(loadCSV('Relationships.csv'));
+ const standardsFrameworkItemsData = aq.from(loadCSV('StandardsFrameworkItem.csv'));
+ const learningComponentsData = aq.from(loadCSV('LearningComponent.csv'));
- console.log('✅ Raw data loaded from KG CSV files');
- console.log({
- allStandardsFrameworkItems: allStandardsFrameworkItems.numRows(),
- learningComponentsData: learningComponentsData.numRows(),
- relationshipsData: relationshipsData.numRows()
- });
+ console.log('✅ Data loaded from KG CSV files');
+ console.log(` Total Relationships: ${relationshipsData.numRows()}`);
+ console.log(` Standards Framework Items: ${standardsFrameworkItemsData.numRows()}`);
+ console.log(` Learning Components: ${learningComponentsData.numRows()}`);
+
+ // Filter for crosswalk relationships (hasStandardAlignment)
+ const crosswalkData = relationshipsData
+ .filter(d => d.relationshipType === 'hasStandardAlignment');
+
+ console.log(`\n✅ Crosswalk data filtered:`);
+ console.log(` Total crosswalk relationships (state → CCSSM): ${crosswalkData.numRows()}`);
+
+ // Show preview of crosswalk data
+ if (crosswalkData.numRows() > 0) {
+ console.log(`\n📊 Preview of crosswalk data (first 3 rows):`);
+ const preview = crosswalkData
+ .select('sourceEntityValue', 'targetEntityValue', 'jaccard',
+ 'stateLCCount', 'ccssLCCount', 'sharedLCCount')
+ .slice(0, 3)
+ .objects();
+
+ preview.forEach((row, idx) => {
+ console.log(` ${idx + 1}. Source: ${row.sourceEntityValue} → Target: ${row.targetEntityValue}`);
+ console.log(` Jaccard: ${row.jaccard}, State LCs: ${row.stateLCCount}, CCSS LCs: ${row.ccssLCCount}, Shared: ${row.sharedLCCount}`);
+ });
+ }
- // Filter standards for Multi-State (Common Core) and Texas Mathematics
- const standardsFrameworkItemsData = allStandardsFrameworkItems
- .filter(d =>
- d.academicSubject === 'Mathematics' &&
- (d.jurisdiction === 'Multi-State' || d.jurisdiction === 'Texas')
- );
+ return {
+ crosswalkData,
+ standardsFrameworkItemsData,
+ learningComponentsData,
+ relationshipsData
+ };
+}
- const ccCount = standardsFrameworkItemsData
- .filter(d => d.jurisdiction === 'Multi-State')
- .numRows();
-
- const txCount = standardsFrameworkItemsData
- .filter(d => d.jurisdiction === 'Texas')
- .numRows();
-
- console.log('✅ Data filtered for Multi-State and Texas Mathematics standards');
- console.log({
- filteredStandardsFrameworkItems: standardsFrameworkItemsData.numRows(),
- "Common Core (Multi-State)": ccCount,
- "Texas": txCount,
- learningComponentsData: learningComponentsData.numRows(),
- relationshipsData: relationshipsData.numRows()
- });
- if (ccCount === 0) {
- console.error('❌ No Common Core mathematics standards found');
- return null;
- }
+/* ================================
+ STEP 2: FIND THE BEST-MATCHING CCSSM STANDARD
+ ================================ */
- if (txCount === 0) {
- console.error('❌ No Texas mathematics standards found');
+function findBestCcssmMatch(stateStandardCode, jurisdiction, data, aq) {
+ /**
+ * Find the best CCSSM match for a state standard
+ *
+ * Purpose: To find the best CCSS match for a state standard, filter rows by the
+ * state standard ID and sort by the Jaccard score. This identifies the CCSSM
+ * standard that contains the most similar skills and concept targets for student
+ * mastery (not necessarily the most similar semantically).
+ */
+
+ const { crosswalkData, standardsFrameworkItemsData } = data;
+
+ // First, find the state standard by its statement code and jurisdiction
+ const stateStandard = standardsFrameworkItemsData
+ .params({ code: stateStandardCode, juris: jurisdiction })
+ .filter(d => d.statementCode === code && d.jurisdiction === juris)
+ .object();
+
+ if (!stateStandard || !stateStandard.statementCode) {
+ console.log(`❌ State standard not found: ${stateStandardCode}`);
return null;
}
- return { standardsFrameworkItemsData, learningComponentsData, relationshipsData };
-}
+ const stateStandardId = stateStandard.identifier; // Use 'identifier' column for crosswalk matching
-function findTargetStandard(standardsFrameworkItemsData) {
- /* Find the specific Common Core standard to analyze
- *
- * SQL: SELECT *
- * FROM standards_framework_item
- * WHERE "statementCode" = '6.RP.A.2'
- * AND "academicSubject" = 'Mathematics'
- * AND "jurisdiction" = 'Multi-State';
- *
- * Cypher: MATCH (sfi:StandardsFrameworkItem)
- * WHERE sfi.statementCode = '6.RP.A.2' AND sfi.academicSubject = 'Mathematics'
- * AND sfi.jurisdiction = 'Multi-State'
- * RETURN sfi;
- */
- const targetStandardTable = standardsFrameworkItemsData
- .params({ targetCode: TARGET_STANDARD_CODE })
- .filter(d => d.statementCode === targetCode &&
- d.jurisdiction === 'Multi-State');
+ console.log(`✅ Found state standard: ${stateStandardCode}`);
+ console.log(` Identifier: ${stateStandardId}`);
+ console.log(` Description: ${stateStandard.description}`);
+ console.log(` Jurisdiction: ${stateStandard.jurisdiction}`);
- const targetStandard = targetStandardTable.object();
+ // Filter crosswalk data for this state standard
+ const matches = crosswalkData
+ .params({ stateId: stateStandardId })
+ .filter(d => d.sourceEntityValue === stateId);
- if (!targetStandard || !targetStandard.statementCode) {
- console.log(`❌ Standard not found: ${TARGET_STANDARD_CODE}`);
+ if (matches.numRows() === 0) {
+ console.log(`\n❌ No CCSSM matches found for ${stateStandardCode}`);
return null;
}
- console.log(`✅ Found target standard: ${targetStandard.statementCode}`);
- console.log(targetStandardTable.select('caseIdentifierUUID', 'statementCode', 'description').objects());
+ // Sort by Jaccard score (highest first)
+ const sortedMatches = matches.orderby(aq.desc('jaccard'));
+
+ console.log(`\n✅ Found ${sortedMatches.numRows()} CCSSM matches for ${stateStandardCode}`);
+ console.log(`\n📊 Top match (highest Jaccard score):`);
+
+ const topMatch = sortedMatches.object();
+ console.log(` CCSSM Standard UUID: ${topMatch.targetEntityValue}`);
+ console.log(` Jaccard Score: ${parseFloat(topMatch.jaccard).toFixed(4)}`);
+ console.log(` Shared LC Count: ${topMatch.sharedLCCount}`);
+ console.log(` State LC Count: ${topMatch.stateLCCount}`);
+ console.log(` CCSS LC Count: ${topMatch.ccssLCCount}`);
- return targetStandard;
+ return sortedMatches;
}
-function findSupportingLearningComponents(targetStandard, relationshipsData, learningComponentsData) {
- /* Find learning components that support the target standard
- *
- * SQL: SELECT lc.*
- * FROM learning_component lc
- * JOIN relationships r
- * ON lc."identifier" = r."sourceEntityValue"
- * WHERE r."targetEntityValue" = '0c0bb5f6-4b99-11ec-a82f-0242ac1a0003'
- * AND r."relationshipType" = 'supports';
- *
- * Cypher: MATCH (lc:LearningComponent)-[:supports]->(standard)
- * WHERE standard.caseIdentifierUUID = '0c0bb5f6-4b99-11ec-a82f-0242ac1a0003'
- * RETURN lc
+
+/* ================================
+ STEP 3: INTERPRET THE RELATIONSHIP METRICS
+ ================================ */
+
+function interpretRelationshipMetrics(matches) {
+ /**
+ * Interpret the relationship metrics for crosswalk matches
+ *
+ * Purpose: Each crosswalk relationship carries additional context about the degree
+ * of overlap:
+ * - sharedLCCount shows how many deconstructed skills are shared
+ * - stateLCCount and ccssLCCount show how many total skills support each standard
+ * - Together with the Jaccard score, these counts help interpret the strength and
+ * balance of the overlap
*/
- const supportingLCs = relationshipsData
- .params({ targetId: targetStandard.caseIdentifierUUID })
- .filter(d => d.relationshipType === 'supports' && d.targetEntityValue === targetId)
- .join(learningComponentsData, ['sourceEntityValue', 'identifier'])
- .select('sourceEntityValue', 'description_2')
- .rename({ sourceEntityValue: 'identifier', description_2: 'description' });
- console.log(`✅ Found ${supportingLCs.numRows()} supporting learning components:`);
- console.log(supportingLCs.objects());
+ if (!matches || matches.numRows() === 0) {
+ return;
+ }
- return supportingLCs;
+ console.log(`\n📊 INTERPRETATION OF TOP MATCHES:\n`);
+
+ // Show top 5 matches with interpretation
+ const topMatches = matches.slice(0, 5).objects();
+
+ topMatches.forEach((match, idx) => {
+ const jaccard = parseFloat(match.jaccard);
+ const stateLc = parseFloat(match.stateLCCount);
+ const ccssLc = parseFloat(match.ccssLCCount);
+ const sharedLc = parseFloat(match.sharedLCCount);
+
+ console.log(`Match #${idx + 1}:`);
+ console.log(` Jaccard Score: ${jaccard.toFixed(4)}`);
+ console.log(` State LC Count: ${stateLc}`);
+ console.log(` CCSS LC Count: ${ccssLc}`);
+ console.log(` Shared LC Count: ${sharedLc}`);
+
+ // Interpret the metrics
+ let interpretation;
+ if (jaccard >= 0.9) {
+ interpretation = "Very strong overlap; standards share nearly all skills";
+ } else if (jaccard >= 0.7) {
+ interpretation = "Strong overlap; substantial shared skills";
+ } else if (jaccard >= 0.5) {
+ interpretation = "Moderate overlap; many shared skills";
+ } else if (jaccard >= 0.3) {
+ interpretation = "Partial overlap; some shared skills";
+ } else {
+ interpretation = "Weak overlap; few shared skills";
+ }
+
+ // Check scope balance
+ let scopeNote;
+ if (Math.abs(stateLc - ccssLc) <= 2) {
+ scopeNote = "Both standards have similar scope";
+ } else if (stateLc > ccssLc) {
+ scopeNote = "State standard covers more content";
+ } else {
+ scopeNote = "CCSS standard covers more content";
+ }
+
+ console.log(` Interpretation: ${interpretation}`);
+ console.log(` Scope: ${scopeNote}`);
+ console.log();
+ });
}
-function findMatchedTexasStandards(aq, supportingLCs, relationshipsData, standardsFrameworkItemsData, learningComponentsData) {
- /* Find Texas standards with their learning components - two-step process:
- * 1) Find standards that have overlapping learning components
- * 2) Get ALL learning components for those matched standards
- *
- * SQL: WITH matched_standards AS (
- * SELECT DISTINCT ts."caseIdentifierUUID"
- * FROM standards_framework_item ts
- * JOIN relationships r
- * ON ts."caseIdentifierUUID" = r."targetEntityValue"
- * JOIN learning_component lc
- * ON r."sourceEntityValue" = lc."identifier"
- * WHERE r."relationshipType" = 'supports'
- * AND ts."jurisdiction" = 'Texas'
- * AND lc."identifier" IN ('db4c25ad-9892-5abb-bcba-2fc9781d10f8',
- * 'b9b94f31-b58b-5e26-9efe-680b167046ba',
- * '523d04e7-47d8-55c7-bc44-792f3e01bfda')
- * )
- * SELECT
- * ts."caseIdentifierUUID",
- * ts."statementCode",
- * ts."description",
- * ARRAY_AGG(lc."description") AS lc_descriptions,
- * ARRAY_AGG(lc."identifier") AS lc_identifiers
- * FROM standards_framework_item ts
- * JOIN relationships r
- * ON ts."caseIdentifierUUID" = r."targetEntityValue"
- * JOIN learning_component lc
- * ON r."sourceEntityValue" = lc."identifier"
- * WHERE r."relationshipType" = 'supports'
- * AND ts."jurisdiction" = 'Texas'
- * AND ts."caseIdentifierUUID" IN (SELECT "caseIdentifierUUID" FROM matched_standards)
- * GROUP BY ts."caseIdentifierUUID", ts."statementCode", ts."description";
- *
- * Cypher: MATCH (ts:StandardsFrameworkItem)-[r:supports]-(lc:LearningComponent)
- * WHERE ts.jurisdiction = 'Texas'
- * AND lc.identifier IN ['db4c25ad-9892-5abb-bcba-2fc9781d10f8', 'b9b94f31-b58b-5e26-9efe-680b167046ba', '523d04e7-47d8-55c7-bc44-792f3e01bfda']
- * WITH DISTINCT ts.caseIdentifierUUID AS matched_id
- * MATCH (matched_ts:StandardsFrameworkItem)-[r2:supports]-(all_lc:LearningComponent)
- * WHERE matched_ts.jurisdiction = 'Texas'
- * AND matched_ts.caseIdentifierUUID = matched_id
- * RETURN matched_ts.caseIdentifierUUID,
- * matched_ts.statementCode,
- * matched_ts.description,
- * COLLECT(all_lc.description) AS lc_descriptions,
- * COLLECT(all_lc.identifier) AS lc_identifiers
+
+/* ================================
+ STEP 4: JOIN CROSSWALKS WITH STANDARDS METADATA
+ ================================ */
+
+function enrichCrosswalksWithMetadata(matches, data, aq) {
+ /**
+ * Join crosswalk data with standards metadata
+ *
+ * Purpose: Enrich the crosswalk data by joining it with StandardsFrameworkItems.csv,
+ * which contains metadata such as grade level and description. This provides a clear
+ * view of which state standards most closely align to their CCSSM counterparts, along
+ * with the strength of each connection.
*/
- const lcIds = supportingLCs.array('identifier');
-
- // First, find which Texas standards have overlapping learning components
- const matchedStandardIds = relationshipsData
- .filter(d => d.relationshipType === 'supports')
- .filter(aq.escape(d => lcIds.includes(d.sourceEntityValue)))
- .join(standardsFrameworkItemsData, ['targetEntityValue', 'caseIdentifierUUID'])
- .filter(d => d.jurisdiction === 'Texas')
- .select('targetEntityValue')
- .dedupe('targetEntityValue')
- .array('targetEntityValue');
-
- // Then get ALL learning components for those matched standards
- const results = relationshipsData
- .filter(d => d.relationshipType === 'supports')
- .filter(aq.escape(d => matchedStandardIds.includes(d.targetEntityValue)))
- .join(standardsFrameworkItemsData, ['targetEntityValue', 'caseIdentifierUUID'])
- .filter(d => d.jurisdiction === 'Texas')
- .join(learningComponentsData, ['sourceEntityValue', 'identifier']);
-
- // Organize learning component descriptions and identifiers for each matched standard
- const finalResults = results
- .select('targetEntityKey', 'targetEntityValue', 'statementCode', 'description_2', 'identifier', 'description')
- .rename({
- targetEntityValue: 'caseIdentifierUUID',
- description_2: 'standardDescription',
- identifier: 'lc_identifier',
- description: 'lc_description'
- })
- .groupby('caseIdentifierUUID', 'statementCode', 'standardDescription')
- .rollup({
- lcDescription: d => aq.op.array_agg(d.lc_description),
- lcIdentifier: d => aq.op.array_agg(d.lc_identifier)
- })
- .objects();
-
- console.log(`✅ Found ${finalResults.length} Texas standards with shared learning components (lc):`);
- console.log(finalResults);
-
- return finalResults;
-}
+ if (!matches || matches.numRows() === 0) {
+ return null;
+ }
-function displayComparisonResults(targetStandard, supportingLCs, matchedTexasStandards) {
- // Display the full comparison results between the target Common Core standard
- // and the matched Texas standards, including supporting learning components.
-
- // Calculate overlap with the target standard
- const supportingLCDescriptions = supportingLCs.array('description');
- const resultsWithOverlap = matchedTexasStandards.map(std => {
- const overlapCount = std.lcDescription.filter(lc =>
- supportingLCDescriptions.includes(lc)
- ).length;
- const totalTargetLCs = supportingLCDescriptions.length;
-
- return {
- ...std,
- overlapCount,
- totalTargetLCs,
- overlapRatio: `${overlapCount}/${totalTargetLCs}`
- };
- });
+ const { standardsFrameworkItemsData } = data;
+
+ // Rename columns to avoid conflicts when merging state and CCSS metadata
+ // We'll merge the same standards dataset twice (once for state, once for CCSS)
+
+ // Join with state standard metadata (source)
+ const enriched = matches
+ .join(
+ standardsFrameworkItemsData.select('identifier', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject', 'jurisdiction')
+ .rename({
+ identifier: 'state_identifier',
+ statementCode: 'statementCode_state',
+ description: 'description_state',
+ gradeLevel: 'gradeLevel_state',
+ academicSubject: 'academicSubject_state'
+ }),
+ ['sourceEntityValue', 'state_identifier']
+ )
+ // Join with CCSS standard metadata (target)
+ .join(
+ standardsFrameworkItemsData.select('identifier', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject')
+ .rename({
+ identifier: 'ccss_identifier',
+ statementCode: 'statementCode_ccss',
+ description: 'description_ccss',
+ gradeLevel: 'gradeLevel_ccss',
+ academicSubject: 'academicSubject_ccss'
+ }),
+ ['targetEntityValue', 'ccss_identifier']
+ );
- console.log(`✅ Full comparison between Common Core standard ${targetStandard.statementCode} and matched Texas standards:`);
- console.log(`📋 TARGET STANDARD:`);
- console.log(` Code: ${targetStandard.statementCode}`);
- console.log(` Description: ${targetStandard.description}`);
- console.log(` Supporting Learning Components (${supportingLCs.numRows()}):`);
- supportingLCs.objects().forEach((lc, i) => {
- console.log(` • ${lc.description}`);
- });
- console.log('');
-
- resultsWithOverlap.forEach((match, i) => {
- console.log(`📋 MATCHED STANDARD #${i + 1}:`);
- console.log(` Code: ${match.statementCode}`);
- console.log(` Description: ${match.standardDescription}`);
- console.log(` Supporting Learning Components (${match.lcDescription.length}) - Overlap: ${match.overlapRatio}:`);
-
- const supportingLCDescriptions = supportingLCs.array('description');
- match.lcDescription.forEach((lc, j) => {
- const isShared = supportingLCDescriptions.includes(lc);
- const emoji = isShared ? '➕' : '➖';
- console.log(` ${emoji} ${lc || '(no description)'}`);
- });
- console.log('');
+ console.log(`\n✅ Enriched crosswalk data with standards metadata\n`);
+ console.log(`📊 DETAILED COMPARISON (Top 3 matches):\n`);
+
+ const top3 = enriched.slice(0, 3).objects();
+
+ top3.forEach((row, idx) => {
+ console.log(`Match #${idx + 1} (Jaccard: ${parseFloat(row.jaccard).toFixed(4)}):`);
+ console.log(` STATE STANDARD:`);
+ console.log(` Code: ${row.statementCode_state}`);
+ console.log(` Jurisdiction: ${row.jurisdiction}`);
+ console.log(` Grade Level: ${row.gradeLevel_state}`);
+ console.log(` Description: ${row.description_state}`);
+ console.log(` `);
+ console.log(` CCSS STANDARD:`);
+ console.log(` Code: ${row.statementCode_ccss}`);
+ console.log(` Grade Level: ${row.gradeLevel_ccss}`);
+ console.log(` Description: ${row.description_ccss}`);
+ console.log(` `);
+ console.log(` ALIGNMENT METRICS:`);
+ console.log(` Shared LCs: ${row.sharedLCCount} / State LCs: ${row.stateLCCount} / CCSS LCs: ${row.ccssLCCount}`);
+ console.log();
});
+
+ return enriched;
}
-async function main() {
- const aq = await import('arquero');
- console.log('\n=== COMPARE STANDARDS TUTORIAL ===\n');
+/* ================================
+ STEP 5: JOIN CROSSWALKS TO LEARNING COMPONENTS
+ ================================ */
- console.log('🔄 Step 1: Loading data...');
- const { standardsFrameworkItemsData, learningComponentsData, relationshipsData } = loadData(aq);
+function showSharedLearningComponents(stateStandardCode, ccssStandardCode, stateJurisdiction, data, aq) {
+ /**
+ * Join crosswalks to Learning Components to show shared skills
+ *
+ * Purpose: Now that you have crosswalk pairs (state → CCSSM), you can see the
+ * actual skills behind each match by joining to the Learning Components dataset.
+ * We'll use the 'supports' relationships to fetch the LCs that support each standard
+ * and then intersect them to list the shared LCs (the evidence behind the crosswalk).
+ */
- if (!standardsFrameworkItemsData) {
- console.log('❌ Failed to load and validate data.');
- return;
- }
+ const { standardsFrameworkItemsData, relationshipsData, learningComponentsData } = data;
+
+ // Find the standard identifiers
+ // Note: For LC relationships, we need to use caseIdentifierUUID, not identifier
+ const stateStandard = standardsFrameworkItemsData
+ .params({ code: stateStandardCode, juris: stateJurisdiction })
+ .filter(d => d.statementCode === code && d.jurisdiction === juris)
+ .object();
- console.log('');
- console.log('');
- console.log('🔄 Step 2: "Unpack" a Common Core standard...');
- const targetStandard = findTargetStandard(standardsFrameworkItemsData);
- if (!targetStandard) {
- console.log('❌ Failed to find target standard.');
+ const ccssStandard = standardsFrameworkItemsData
+ .params({ code: ccssStandardCode })
+ .filter(d => d.statementCode === code && d.jurisdiction === 'Multi-State')
+ .object();
+
+ if (!stateStandard || !ccssStandard) {
+ console.log('❌ Could not find one or both standards');
return;
}
- const supportingLCs = findSupportingLearningComponents(targetStandard, relationshipsData, learningComponentsData);
+ const stateUuid = stateStandard.caseIdentifierUUID;
+ const ccssUuid = ccssStandard.caseIdentifierUUID;
+
+ // Get LCs that support the state standard
+ // LC relationships use caseIdentifierUUID for targetEntityValue
+ const stateLcIds = relationshipsData
+ .params({ uuid: stateUuid })
+ .filter(d => d.relationshipType === 'supports' && d.targetEntityValue === uuid)
+ .array('sourceEntityValue');
+
+ const stateLcs = learningComponentsData
+ .filter(aq.escape(d => stateLcIds.includes(d.identifier)))
+ .select('identifier', 'description')
+ .dedupe('identifier');
+
+ // Get LCs that support the CCSS standard
+ const ccssLcIds = relationshipsData
+ .params({ uuid: ccssUuid })
+ .filter(d => d.relationshipType === 'supports' && d.targetEntityValue === uuid)
+ .array('sourceEntityValue');
+
+ const ccssLcs = learningComponentsData
+ .filter(aq.escape(d => ccssLcIds.includes(d.identifier)))
+ .select('identifier', 'description')
+ .dedupe('identifier');
+
+ // Find shared LCs (intersection) using join
+ const sharedLcs = stateLcs
+ .semijoin(ccssLcs, 'identifier');
+
+ // Find state-only LCs (in state but not in CCSS)
+ const stateOnlyLcs = stateLcs
+ .antijoin(ccssLcs, 'identifier');
+
+ // Find CCSS-only LCs (in CCSS but not in state)
+ const ccssOnlyLcs = ccssLcs
+ .antijoin(stateLcs, 'identifier');
+
+ console.log(`\n✅ LEARNING COMPONENTS ANALYSIS:\n`);
+ console.log(`State Standard: ${stateStandardCode}`);
+ console.log(`CCSS Standard: ${ccssStandardCode}`);
+ console.log();
+
+ console.log(`📊 SHARED LEARNING COMPONENTS (${sharedLcs.numRows()}):`);
+ console.log('These are the concrete pedagogical overlaps between the two standards:\n');
+ sharedLcs.objects().forEach((lc, idx) => {
+ console.log(` ✅ ${idx + 1}. ${lc.description}`);
+ });
+ console.log();
+
+ console.log(`📊 STATE-ONLY LEARNING COMPONENTS (${stateOnlyLcs.numRows()}):`);
+ stateOnlyLcs.objects().forEach((lc, idx) => {
+ console.log(` ➖ ${idx + 1}. ${lc.description}`);
+ });
+ console.log();
+
+ console.log(`📊 CCSS-ONLY LEARNING COMPONENTS (${ccssOnlyLcs.numRows()}):`);
+ ccssOnlyLcs.objects().forEach((lc, idx) => {
+ console.log(` ➕ ${idx + 1}. ${lc.description}`);
+ });
+ console.log();
+}
+
+
+/* ================================
+ MAIN EXECUTION
+ ================================ */
- console.log('');
- console.log('');
- console.log('🔄 Step 3: Compare to Texas standards...');
- const matchedTexasStandards = findMatchedTexasStandards(aq, supportingLCs, relationshipsData, standardsFrameworkItemsData, learningComponentsData);
+async function main() {
+ const aq = await import('arquero');
- displayComparisonResults(targetStandard, supportingLCs, matchedTexasStandards);
+ console.log('\n=== USING CROSSWALKS TO COMPARE STATE STANDARDS TO COMMON CORE ===\n');
+
+ console.log('🔄 Step 1: Load the crosswalk data...');
+ const data = loadCrosswalkData(aq);
+
+ console.log('\n' + '='.repeat(70));
+ console.log('🔄 Step 2: Find the best-matching CCSSM standard for a state standard...');
+ const matches = findBestCcssmMatch(TARGET_STATE_STANDARD_CODE, TARGET_STATE_JURISDICTION, data, aq);
+
+ if (matches && matches.numRows() > 0) {
+ console.log('\n' + '='.repeat(70));
+ console.log('🔄 Step 3: Interpret the relationship metrics...');
+ interpretRelationshipMetrics(matches);
+
+ console.log('='.repeat(70));
+ console.log('🔄 Step 4: Join crosswalks with standards metadata...');
+ const enriched = enrichCrosswalksWithMetadata(matches, data, aq);
+
+ if (enriched && enriched.numRows() > 0) {
+ console.log('='.repeat(70));
+ console.log('🔄 Step 5: Join crosswalks to Learning Components...');
+ // Use the top match for detailed LC analysis
+ const topMatch = enriched.object();
+ showSharedLearningComponents(
+ topMatch.statementCode_state,
+ topMatch.statementCode_ccss,
+ topMatch.jurisdiction,
+ data,
+ aq
+ );
+ }
+ }
}
-main().catch(console.error);
\ No newline at end of file
+main().catch(console.error);
diff --git a/tutorials/compare_standards/python/README.md b/tutorials/compare_standards/python/README.md
index 28f53b6..99e5dba 100644
--- a/tutorials/compare_standards/python/README.md
+++ b/tutorials/compare_standards/python/README.md
@@ -1,8 +1,8 @@
# Compare Standards
-Demonstrates how to compare educational standards across different frameworks (Common Core vs Texas)
+Demonstrates how to use crosswalk data to compare state standards to Common Core State Standards (CCSSM) using the Knowledge Graph dataset.
-Follow the step-by-step tutorial [here](https://docs.learningcommons.org/knowledge-graph/v1-1-0/getting-started/tutorials/comparing-standards-across-states)
+Follow the step-by-step tutorial [here](https://docs.learningcommons.org/knowledge-graph/v1-2-0/getting-started/tutorials/comparing-standards-across-states)
## Prerequisites
diff --git a/tutorials/compare_standards/python/compare_standards.py b/tutorials/compare_standards/python/compare_standards.py
index 22b9dfc..e24ebd5 100644
--- a/tutorials/compare_standards/python/compare_standards.py
+++ b/tutorials/compare_standards/python/compare_standards.py
@@ -1,4 +1,18 @@
#!/usr/bin/env python3
+"""
+Using crosswalks to compare state standards to Common Core
+
+This tutorial demonstrates how to use the crosswalk data in Knowledge Graph to compare
+standards between a state framework and the Common Core State Standards (CCSSM). These
+crosswalks help determine which CCSSM standard is most similar to a given state standard
+and understand the similarities and differences between them.
+
+Crosswalks are evidence-based relationships between state standards and CCSSM standards,
+derived from overlapping sets of Learning Components (LCs). Each crosswalk includes
+similarity metrics, such as the Jaccard score and relative LC counts, to help interpret
+how closely two standards align.
+"""
+
"""
================================
CONFIGURATION & SETUP
@@ -16,7 +30,10 @@
load_dotenv()
# Domain Constants
-TARGET_STANDARD_CODE = '6.RP.A.2'
+# Pick a state standard to find its best CCSSM match
+# Note: We need to specify both the code AND jurisdiction since multiple states may use the same code
+TARGET_STATE_STANDARD_CODE = '111.26.b.4.D' # Texas 6th grade math standard on rates
+TARGET_STATE_JURISDICTION = 'Texas'
# Environment Setup
data_dir = os.getenv('KG_DATA_PATH')
@@ -36,384 +53,389 @@
def load_csv(filename):
"""
Load and parse CSV file from data directory
-
+
Args:
filename (str): Name of the CSV file to load
-
+
Returns:
pd.DataFrame: Loaded CSV data as DataFrame
"""
try:
file_path = data_path / filename
- return pd.read_csv(file_path)
+ return pd.read_csv(file_path, low_memory=False)
except Exception as error:
print(f'❌ Error loading CSV file {filename}: {str(error)}')
raise error
-
-
"""
================================
-STEP 1: LOAD DATA
+STEP 1: LOAD THE CROSSWALK DATA
================================
"""
-def load_data():
+def load_crosswalk_data():
"""
- Load CSV data files and apply initial filtering for mathematics standards
-
- Purpose: Load raw CSV data and filter to only Multi-State (Common Core) and Texas
- mathematics standards to reduce dataset size and eliminate irrelevant data early
-
-
+ Load crosswalk data from relationships.csv
+
+ Purpose: Crosswalk data lives in the relationships.csv file. Standards that have
+ crosswalk data include four crosswalk-specific columns: jaccard, stateLCCount,
+ ccssLCCount, and sharedLCCount.
+
+ Each row shows one state → CCSSM crosswalk relationship.
+
Returns:
- dict: Dictionary containing filtered datasets and metadata
+ dict: Dictionary containing crosswalk data and related datasets
"""
- # Load raw CSV files
- all_standards_framework_items = load_csv('StandardsFrameworkItem.csv')
- learning_components_data = load_csv('LearningComponent.csv')
+ # Load CSV files
relationships_data = load_csv('Relationships.csv')
+ standards_framework_items_data = load_csv('StandardsFrameworkItem.csv')
+ learning_components_data = load_csv('LearningComponent.csv')
print('✅ Data loaded from KG CSV files')
- print(f' Standards Framework Items: {len(all_standards_framework_items)}')
- print(f' Learning Components Data: {len(learning_components_data)}')
- print(f' Relationships Data: {len(relationships_data)}')
-
- # Apply domain-specific filtering
- standards_framework_items_data = all_standards_framework_items[
- (all_standards_framework_items['academicSubject'] == 'Mathematics') &
- (all_standards_framework_items['jurisdiction'].isin(['Multi-State', 'Texas']))
- ].copy()
-
- # Count by jurisdiction for validation
- common_core_count = len(standards_framework_items_data[
- standards_framework_items_data['jurisdiction'] == 'Multi-State'
- ])
+ print(f' Total Relationships: {len(relationships_data)}')
+ print(f' Standards Framework Items: {len(standards_framework_items_data)}')
+ print(f' Learning Components: {len(learning_components_data)}')
- texas_count = len(standards_framework_items_data[
- standards_framework_items_data['jurisdiction'] == 'Texas'
- ])
+ # Filter for crosswalk relationships (hasStandardAlignment)
+ crosswalk_data = relationships_data[
+ relationships_data['relationshipType'] == 'hasStandardAlignment'
+ ].copy()
- if common_core_count == 0:
- print('❌ No Common Core mathematics standards found')
- return None
+ print(f'\n✅ Crosswalk data filtered:')
+ print(f' Total crosswalk relationships (state → CCSSM): {len(crosswalk_data)}')
- if texas_count == 0:
- print('❌ No Texas mathematics standards found')
- return None
-
- print('✅ Data loaded and filtered successfully')
- print(f' Filtered Standards Framework Items: {len(standards_framework_items_data)}')
- print(f' Common Core Standards: {common_core_count}')
- print(f' Texas Standards: {texas_count}')
- print(f' Learning Components: {len(learning_components_data)}')
- print(f' Relationships: {len(relationships_data)}')
+ # Show preview of crosswalk data
+ if len(crosswalk_data) > 0:
+ print(f'\n📊 Preview of crosswalk data (first 3 rows):')
+ preview_cols = ['sourceEntityValue', 'targetEntityValue', 'jaccard',
+ 'stateLCCount', 'ccssLCCount', 'sharedLCCount']
+ available_cols = [col for col in preview_cols if col in crosswalk_data.columns]
+ print(crosswalk_data[available_cols].head(3).to_string(index=False))
return {
+ 'crosswalk_data': crosswalk_data,
'standards_framework_items_data': standards_framework_items_data,
'learning_components_data': learning_components_data,
- 'relationships_data': relationships_data,
- 'metadata': {
- 'common_core_count': common_core_count,
- 'texas_count': texas_count
- }
+ 'relationships_data': relationships_data
}
-
-
"""
================================
-STEP 2: "UNPACK" A COMMON CORE STANDARD
+STEP 2: FIND THE BEST-MATCHING CCSSM STANDARD
================================
"""
-def find_target_standard(standards_framework_items_data):
+def find_best_ccssm_match(state_standard_code, jurisdiction, data):
"""
- Locate the specific Common Core standard to analyze
-
- Purpose: Find the target standard that will serve as the basis for comparison.
- This standard's learning components will be used to find similar Texas standards.
-
- SQL: SELECT *
- FROM standards_framework_item
- WHERE "statementCode" = '6.RP.A.2'
- AND "academicSubject" = 'Mathematics'
- AND "jurisdiction" = 'Multi-State';
-
- Cypher: MATCH (sfi:StandardsFrameworkItem)
- WHERE sfi.statementCode = '6.RP.A.2' AND sfi.academicSubject = 'Mathematics'
- AND sfi.jurisdiction = 'Multi-State'
- RETURN sfi;
-
+ Find the best CCSSM match for a state standard
+
+ Purpose: To find the best CCSS match for a state standard, filter rows by the
+ state standard ID and sort by the Jaccard score. This identifies the CCSSM
+ standard that contains the most similar skills and concept targets for student
+ mastery (not necessarily the most similar semantically).
+
Args:
- standards_framework_items_data (pd.DataFrame): Filtered standards data
-
+ state_standard_code (str): The statement code of the state standard
+ jurisdiction (str): The jurisdiction of the state standard
+ data (dict): Dictionary containing the loaded datasets
+
Returns:
- pd.Series or None: Target standard data or None if not found
+ pd.DataFrame: Crosswalk matches sorted by Jaccard score (highest first)
"""
- target_standard_mask = (
- (standards_framework_items_data['statementCode'] == TARGET_STANDARD_CODE) &
- (standards_framework_items_data['jurisdiction'] == 'Multi-State')
- )
-
- target_standards = standards_framework_items_data[target_standard_mask]
-
- if len(target_standards) == 0:
- print(f'❌ Target standard not found: {TARGET_STANDARD_CODE}')
+ crosswalk_data = data['crosswalk_data']
+ standards_data = data['standards_framework_items_data']
+
+ # First, find the state standard by its statement code and jurisdiction
+ state_standard = standards_data[
+ (standards_data['statementCode'] == state_standard_code) &
+ (standards_data['jurisdiction'] == jurisdiction)
+ ]
+
+ if len(state_standard) == 0:
+ print(f'❌ State standard not found: {state_standard_code}')
return None
- target_standard = target_standards.iloc[0]
+ state_standard = state_standard.iloc[0]
+ state_standard_id = state_standard['identifier'] # Use 'identifier' column for crosswalk matching
- print(f'✅ Found target standard: {target_standard["statementCode"]}')
- print(f' UUID: {target_standard["caseIdentifierUUID"]}')
- print(f' Code: {target_standard["statementCode"]}')
- print(f' Description: {target_standard["description"]}')
+ print(f'✅ Found state standard: {state_standard_code}')
+ print(f' Identifier: {state_standard_id}')
+ print(f' Description: {state_standard["description"]}')
+ print(f' Jurisdiction: {state_standard["jurisdiction"]}')
- return target_standard
+ # Filter crosswalk data for this state standard
+ matches = crosswalk_data[
+ crosswalk_data['sourceEntityValue'] == state_standard_id
+ ].copy()
+ if len(matches) == 0:
+ print(f'\n❌ No CCSSM matches found for {state_standard_code}')
+ return None
+ # Sort by Jaccard score (highest first)
+ matches = matches.sort_values('jaccard', ascending=False)
-def extract_supporting_learning_components(target_standard, relationships_data, learning_components_data):
+ print(f'\n✅ Found {len(matches)} CCSSM matches for {state_standard_code}')
+ print(f'\n📊 Top match (highest Jaccard score):')
+
+ top_match = matches.iloc[0]
+ print(f' CCSSM Standard UUID: {top_match["targetEntityValue"]}')
+ print(f' Jaccard Score: {top_match["jaccard"]:.4f}')
+ print(f' Shared LC Count: {top_match["sharedLCCount"]}')
+ print(f' State LC Count: {top_match["stateLCCount"]}')
+ print(f' CCSS LC Count: {top_match["ccssLCCount"]}')
+
+ return matches
+
+
+"""
+================================
+STEP 3: INTERPRET THE RELATIONSHIP METRICS
+================================
+"""
+
+def interpret_relationship_metrics(matches):
"""
- Find learning components that support the target standard
-
- Purpose: Extract the learning components (skills/concepts) that support the target
- standard. These components represent the underlying knowledge needed to master
- the standard and will be used to find similar Texas standards.
-
- SQL: SELECT lc.*
- FROM learning_component lc
- JOIN relationships r
- ON lc."identifier" = r."sourceEntityValue"
- WHERE r."targetEntityValue" = '0c0bb5f6-4b99-11ec-a82f-0242ac1a0003'
- AND r."relationshipType" = 'supports';
-
- Cypher: MATCH (lc:LearningComponent)-[:supports]->(standard)
- WHERE standard.caseIdentifierUUID = '0c0bb5f6-4b99-11ec-a82f-0242ac1a0003'
- RETURN lc
-
+ Interpret the relationship metrics for crosswalk matches
+
+ Purpose: Each crosswalk relationship carries additional context about the degree
+ of overlap:
+ - sharedLCCount shows how many deconstructed skills are shared
+ - stateLCCount and ccssLCCount show how many total skills support each standard
+ - Together with the Jaccard score, these counts help interpret the strength and
+ balance of the overlap
+
Args:
- target_standard (pd.Series): The target standard data
- relationships_data (pd.DataFrame): Relationships dataset
- learning_components_data (pd.DataFrame): Learning components dataset
-
+ matches (pd.DataFrame): Crosswalk matches from Step 2
+ """
+ if matches is None or len(matches) == 0:
+ return
+
+ print(f'\n📊 INTERPRETATION OF TOP MATCHES:\n')
+
+ # Show top 5 matches with interpretation
+ for idx, (_, match) in enumerate(matches.head(5).iterrows(), 1):
+ jaccard = match['jaccard']
+ state_lc = match['stateLCCount']
+ ccss_lc = match['ccssLCCount']
+ shared_lc = match['sharedLCCount']
+
+ print(f'Match #{idx}:')
+ print(f' Jaccard Score: {jaccard:.4f}')
+ print(f' State LC Count: {state_lc}')
+ print(f' CCSS LC Count: {ccss_lc}')
+ print(f' Shared LC Count: {shared_lc}')
+
+ # Interpret the metrics
+ if jaccard >= 0.9:
+ interpretation = "Very strong overlap; standards share nearly all skills"
+ elif jaccard >= 0.7:
+ interpretation = "Strong overlap; substantial shared skills"
+ elif jaccard >= 0.5:
+ interpretation = "Moderate overlap; many shared skills"
+ elif jaccard >= 0.3:
+ interpretation = "Partial overlap; some shared skills"
+ else:
+ interpretation = "Weak overlap; few shared skills"
+
+ # Check scope balance
+ if abs(state_lc - ccss_lc) <= 2:
+ scope_note = "Both standards have similar scope"
+ elif state_lc > ccss_lc:
+ scope_note = "State standard covers more content"
+ else:
+ scope_note = "CCSS standard covers more content"
+
+ print(f' Interpretation: {interpretation}')
+ print(f' Scope: {scope_note}')
+ print()
+
+
+"""
+================================
+STEP 4: JOIN CROSSWALKS WITH STANDARDS METADATA
+================================
+"""
+
+def enrich_crosswalks_with_metadata(matches, data):
+ """
+ Join crosswalk data with standards metadata
+
+ Purpose: Enrich the crosswalk data by joining it with StandardsFrameworkItems.csv,
+ which contains metadata such as grade level and description. This provides a clear
+ view of which state standards most closely align to their CCSSM counterparts, along
+ with the strength of each connection.
+
+ Args:
+ matches (pd.DataFrame): Crosswalk matches from Step 2
+ data (dict): Dictionary containing the loaded datasets
+
Returns:
- pd.DataFrame: Supporting learning components data
+ pd.DataFrame: Enriched crosswalk data with metadata
"""
- # Find relationships where learning components support the target standard
- supporting_relationships = relationships_data[
- (relationships_data['relationshipType'] == 'supports') &
- (relationships_data['targetEntityValue'] == target_standard['caseIdentifierUUID'])
- ]
-
- # Join with learning components data (add suffixes to handle column conflicts)
- supporting_lcs = supporting_relationships.merge(
- learning_components_data,
+ if matches is None or len(matches) == 0:
+ return None
+
+ standards_data = data['standards_framework_items_data']
+
+ # Rename columns to avoid conflicts when merging state and CCSS metadata
+ # We'll merge the same standards dataset twice (once for state, once for CCSS)
+
+ # Join with state standard metadata (source)
+ state_standards = standards_data[['identifier', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject', 'jurisdiction']].copy()
+ state_standards.columns = ['state_identifier', 'statementCode_state', 'description_state',
+ 'gradeLevel_state', 'academicSubject_state', 'jurisdiction']
+
+ enriched = matches.merge(
+ state_standards,
left_on='sourceEntityValue',
- right_on='identifier',
- how='inner',
- suffixes=('_rel', '_lc')
+ right_on='state_identifier',
+ how='left'
)
-
- # Select and rename columns to match expected output
- supporting_lcs = supporting_lcs[['sourceEntityValue', 'description_lc']].copy()
- supporting_lcs = supporting_lcs.rename(columns={
- 'sourceEntityValue': 'identifier',
- 'description_lc': 'description'
- })
- print(f'✅ Found {len(supporting_lcs)} supporting learning components:')
- for i, (_, lc) in enumerate(supporting_lcs.iterrows(), 1):
- print(f' {i}. {lc["identifier"]}: {lc["description"]}')
+ # Join with CCSS standard metadata (target)
+ ccss_standards = standards_data[['identifier', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject']].copy()
+ ccss_standards.columns = ['ccss_identifier', 'statementCode_ccss', 'description_ccss',
+ 'gradeLevel_ccss', 'academicSubject_ccss']
+
+ enriched = enriched.merge(
+ ccss_standards,
+ left_on='targetEntityValue',
+ right_on='ccss_identifier',
+ how='left'
+ )
- return supporting_lcs
+ print(f'\n✅ Enriched crosswalk data with standards metadata\n')
+ print(f'📊 DETAILED COMPARISON (Top 3 matches):\n')
+
+ for idx, (_, row) in enumerate(enriched.head(3).iterrows(), 1):
+ print(f'Match #{idx} (Jaccard: {row["jaccard"]:.4f}):')
+ print(f' STATE STANDARD:')
+ print(f' Code: {row["statementCode_state"]}')
+ print(f' Jurisdiction: {row["jurisdiction"]}')
+ print(f' Grade Level: {row["gradeLevel_state"]}')
+ print(f' Description: {row["description_state"]}')
+ print(f' ')
+ print(f' CCSS STANDARD:')
+ print(f' Code: {row["statementCode_ccss"]}')
+ print(f' Grade Level: {row["gradeLevel_ccss"]}')
+ print(f' Description: {row["description_ccss"]}')
+ print(f' ')
+ print(f' ALIGNMENT METRICS:')
+ print(f' Shared LCs: {row["sharedLCCount"]} / State LCs: {row["stateLCCount"]} / CCSS LCs: {row["ccssLCCount"]}')
+ print()
+
+ return enriched
"""
================================
-STEP 3: COMPARE TO TEXAS STANDARDS
+STEP 5: JOIN CROSSWALKS TO LEARNING COMPONENTS
================================
"""
-def find_matched_texas_standards(supporting_lcs, relationships_data, standards_framework_items_data, learning_components_data):
+def show_shared_learning_components(state_standard_code, ccss_standard_code, state_jurisdiction, data):
"""
- Find Texas standards that share learning components with the target standard
-
- Purpose: Identify Texas standards that have overlapping learning components with
- the target Common Core standard. This two-step process first finds standards with
- any overlap, then retrieves ALL learning components for those matched standards.
-
- SQL: WITH matched_standards AS (
- SELECT DISTINCT ts."caseIdentifierUUID"
- FROM standards_framework_item ts
- JOIN relationships r
- ON ts."caseIdentifierUUID" = r."targetEntityValue"
- JOIN learning_component lc
- ON r."sourceEntityValue" = lc."identifier"
- WHERE r."relationshipType" = 'supports'
- AND ts."jurisdiction" = 'Texas'
- AND lc."identifier" IN ('db4c25ad-9892-5abb-bcba-2fc9781d10f8',
- 'b9b94f31-b58b-5e26-9efe-680b167046ba',
- '523d04e7-47d8-55c7-bc44-792f3e01bfda')
- )
- SELECT
- ts."caseIdentifierUUID",
- ts."statementCode",
- ts."description",
- ARRAY_AGG(lc."description") AS lc_descriptions,
- ARRAY_AGG(lc."identifier") AS lc_identifiers
- FROM standards_framework_item ts
- JOIN relationships r
- ON ts."caseIdentifierUUID" = r."targetEntityValue"
- JOIN learning_component lc
- ON r."sourceEntityValue" = lc."identifier"
- WHERE r."relationshipType" = 'supports'
- AND ts."jurisdiction" = 'Texas'
- AND ts."caseIdentifierUUID" IN (SELECT "caseIdentifierUUID" FROM matched_standards)
- GROUP BY ts."caseIdentifierUUID", ts."statementCode", ts."description";
-
- Cypher: MATCH (ts:StandardsFrameworkItem)-[r:supports]-(lc:LearningComponent)
- WHERE ts.jurisdiction = 'Texas'
- AND lc.identifier IN ['db4c25ad-9892-5abb-bcba-2fc9781d10f8', 'b9b94f31-b58b-5e26-9efe-680b167046ba', '523d04e7-47d8-55c7-bc44-792f3e01bfda']
- WITH DISTINCT ts.caseIdentifierUUID AS matched_id
- MATCH (matched_ts:StandardsFrameworkItem)-[r2:supports]-(all_lc:LearningComponent)
- WHERE matched_ts.jurisdiction = 'Texas'
- AND matched_ts.caseIdentifierUUID = matched_id
- RETURN matched_ts.caseIdentifierUUID,
- matched_ts.statementCode,
- matched_ts.description,
- COLLECT(all_lc.description) AS lc_descriptions,
- COLLECT(all_lc.identifier) AS lc_identifiers
-
+ Join crosswalks to Learning Components to show shared skills
+
+ Purpose: Now that you have crosswalk pairs (state → CCSSM), you can see the
+ actual skills behind each match by joining to the Learning Components dataset.
+ We'll use the 'supports' relationships to fetch the LCs that support each standard
+ and then intersect them to list the shared LCs (the evidence behind the crosswalk).
+
Args:
- supporting_lcs (pd.DataFrame): Learning components from target standard
- relationships_data (pd.DataFrame): Relationships dataset
- standards_framework_items_data (pd.DataFrame): Standards dataset
- learning_components_data (pd.DataFrame): Learning components dataset
-
- Returns:
- list: List of dictionaries with matched Texas standards and their components
+ state_standard_code (str): State standard code
+ ccss_standard_code (str): CCSS standard code
+ state_jurisdiction (str): State jurisdiction (to ensure correct standard match)
+ data (dict): Dictionary containing the loaded datasets
"""
- lc_ids = supporting_lcs['identifier'].tolist()
+ standards_data = data['standards_framework_items_data']
+ relationships_data = data['relationships_data']
+ learning_components_data = data['learning_components_data']
+
+ # Find the standard identifiers
+ # Note: For LC relationships, we need to use caseIdentifierUUID, not identifier
+ state_standard = standards_data[
+ (standards_data['statementCode'] == state_standard_code) &
+ (standards_data['jurisdiction'] == state_jurisdiction)
+ ]
+ ccss_standard = standards_data[
+ (standards_data['statementCode'] == ccss_standard_code) &
+ (standards_data['jurisdiction'] == 'Multi-State')
+ ]
+
+ if len(state_standard) == 0 or len(ccss_standard) == 0:
+ print('❌ Could not find one or both standards')
+ return
+
+ state_uuid = state_standard.iloc[0]['caseIdentifierUUID']
+ ccss_uuid = ccss_standard.iloc[0]['caseIdentifierUUID']
- # Step 1: Find Texas standards with overlapping learning components
- overlapping_relationships = relationships_data[
+ # Get LCs that support the state standard
+ # LC relationships use caseIdentifierUUID for targetEntityValue
+ state_lc_relationships = relationships_data[
(relationships_data['relationshipType'] == 'supports') &
- (relationships_data['sourceEntityValue'].isin(lc_ids))
+ (relationships_data['targetEntityValue'] == state_uuid)
]
-
- texas_standards_with_overlap = overlapping_relationships.merge(
- standards_framework_items_data[standards_framework_items_data['jurisdiction'] == 'Texas'],
- left_on='targetEntityValue',
- right_on='caseIdentifierUUID',
- how='inner'
- )
-
- matched_standard_ids = texas_standards_with_overlap['caseIdentifierUUID'].unique()
- # Step 2: Get ALL learning components for those matched standards
- all_support_relationships = relationships_data[
+ state_lc_ids = state_lc_relationships['sourceEntityValue'].unique()
+ state_lcs = learning_components_data[
+ learning_components_data['identifier'].isin(state_lc_ids)
+ ][['identifier', 'description']].drop_duplicates()
+
+ # Get LCs that support the CCSS standard
+ ccss_lc_relationships = relationships_data[
(relationships_data['relationshipType'] == 'supports') &
- (relationships_data['targetEntityValue'].isin(matched_standard_ids))
+ (relationships_data['targetEntityValue'] == ccss_uuid)
]
-
- # Join with Texas standards (add suffixes to handle column conflicts)
- results_with_standards = all_support_relationships.merge(
- standards_framework_items_data[standards_framework_items_data['jurisdiction'] == 'Texas'],
- left_on='targetEntityValue',
- right_on='caseIdentifierUUID',
- how='inner',
- suffixes=('_rel', '_std')
- )
-
- # Join with learning components (add suffixes to handle column conflicts)
- results_with_components = results_with_standards.merge(
- learning_components_data,
- left_on='sourceEntityValue',
- right_on='identifier',
- how='inner',
- suffixes=('', '_lc')
+
+ ccss_lc_ids = ccss_lc_relationships['sourceEntityValue'].unique()
+ ccss_lcs = learning_components_data[
+ learning_components_data['identifier'].isin(ccss_lc_ids)
+ ][['identifier', 'description']].drop_duplicates()
+
+ # Find shared LCs (intersection) using merge
+ shared_lcs = state_lcs.merge(
+ ccss_lcs[['identifier']],
+ on='identifier',
+ how='inner'
)
- # Group by standard to aggregate learning components
- final_results = []
- for standard_uuid in matched_standard_ids:
- standard_data = results_with_components[
- results_with_components['caseIdentifierUUID'] == standard_uuid
- ]
-
- if len(standard_data) > 0:
- first_row = standard_data.iloc[0]
- lc_descriptions = standard_data['description'].tolist() # description is from learning_components (no _lc suffix)
- lc_identifiers = standard_data['identifier'].tolist() # identifier is from learning_components (no _lc suffix)
-
- final_results.append({
- 'caseIdentifierUUID': standard_uuid,
- 'statementCode': first_row['statementCode'],
- 'standardDescription': first_row['description_std'], # description_std is from standards
- 'lcDescription': lc_descriptions,
- 'lcIdentifier': lc_identifiers
- })
-
- print(f'✅ Found {len(final_results)} Texas standards with shared learning components (lc):')
- for i, result in enumerate(final_results, 1):
- print(f' {i}. {result["statementCode"]}: {result["standardDescription"]}')
- print(f' Learning Components: {len(result["lcDescription"])} components')
-
- return final_results
-
-
-
-def display_comparison_results(target_standard, supporting_lcs, matched_texas_standards):
- """
- Display comprehensive comparison results between standards
-
- Purpose: Present the analysis results in a structured, readable format showing
- the target standard, matched Texas standards, and overlap analysis
-
- Args:
- target_standard (pd.Series): The target Common Core standard
- supporting_lcs (pd.DataFrame): Learning components for target standard
- matched_texas_standards (list): Matched Texas standards with components
- """
- # Calculate overlap metrics for each matched standard
- supporting_lc_descriptions = supporting_lcs['description'].tolist()
- results_with_overlap = []
-
- for std in matched_texas_standards:
- overlap_count = len([lc for lc in std['lcDescription']
- if lc in supporting_lc_descriptions])
- total_target_lcs = len(supporting_lc_descriptions)
-
- results_with_overlap.append({
- **std,
- 'overlapCount': overlap_count,
- 'totalTargetLCs': total_target_lcs,
- 'overlapRatio': f'{overlap_count}/{total_target_lcs}'
- })
-
- print(f'✅ Full comparison between Common Core standard {target_standard["statementCode"]} and matched Texas standards:')
- print('📋 TARGET STANDARD:')
- print(f' Code: {target_standard["statementCode"]}')
- print(f' Description: {target_standard["description"]}')
- print(f' Supporting Learning Components ({len(supporting_lcs)}):')
- for _, lc in supporting_lcs.iterrows():
- print(f' • {lc["description"]}')
- print('')
-
- for i, match in enumerate(results_with_overlap):
- print(f'📋 MATCHED STANDARD #{i + 1}:')
- print(f' Code: {match["statementCode"]}')
- print(f' Description: {match["standardDescription"]}')
- print(f' Supporting Learning Components ({len(match["lcDescription"])}) - Overlap: {match["overlapRatio"]}:')
-
- for lc in match['lcDescription']:
- is_shared = lc in supporting_lc_descriptions
- emoji = '➕' if is_shared else '➖'
- print(f' {emoji} {lc or "(no description)"}')
- print('')
+ # Find state-only LCs (in state but not in CCSS)
+ state_only_lcs = state_lcs[
+ ~state_lcs['identifier'].isin(ccss_lcs['identifier'])
+ ]
+
+ # Find CCSS-only LCs (in CCSS but not in state)
+ ccss_only_lcs = ccss_lcs[
+ ~ccss_lcs['identifier'].isin(state_lcs['identifier'])
+ ]
+
+ print(f'\n✅ LEARNING COMPONENTS ANALYSIS:\n')
+ print(f'State Standard: {state_standard_code}')
+ print(f'CCSS Standard: {ccss_standard_code}')
+ print()
+
+ print(f'📊 SHARED LEARNING COMPONENTS ({len(shared_lcs)}):')
+ print('These are the concrete pedagogical overlaps between the two standards:\n')
+ for idx, (_, lc) in enumerate(shared_lcs.iterrows(), 1):
+ print(f' ✅ {idx}. {lc["description"]}')
+ print()
+
+ print(f'📊 STATE-ONLY LEARNING COMPONENTS ({len(state_only_lcs)}):')
+ for idx, (_, lc) in enumerate(state_only_lcs.iterrows(), 1):
+ print(f' ➖ {idx}. {lc["description"]}')
+ print()
+
+ print(f'📊 CCSS-ONLY LEARNING COMPONENTS ({len(ccss_only_lcs)}):')
+ for idx, (_, lc) in enumerate(ccss_only_lcs.iterrows(), 1):
+ print(f' ➕ {idx}. {lc["description"]}')
+ print()
+
"""
================================
@@ -425,32 +447,36 @@ def main():
"""
Main execution function - orchestrates all tutorial steps
"""
- print('\n=== COMPARE STANDARDS TUTORIAL ===\n')
-
- print('🔄 Step 1: Loading data...')
- prepared_data = load_data()
-
- standards_framework_items_data = prepared_data['standards_framework_items_data']
- learning_components_data = prepared_data['learning_components_data']
- relationships_data = prepared_data['relationships_data']
-
- print('')
- print('')
- print('🔄 Step 2: "Unpack" a Common Core standard...')
- target_standard = find_target_standard(standards_framework_items_data)
- if target_standard is None:
- print('❌ Failed to find target standard.')
- return
-
- supporting_lcs = extract_supporting_learning_components(target_standard, relationships_data, learning_components_data)
-
- print('')
- print('')
- print('🔄 Step 3: Compare to Texas standards...')
- matched_texas_standards = find_matched_texas_standards(supporting_lcs, relationships_data, standards_framework_items_data, learning_components_data)
-
- display_comparison_results(target_standard, supporting_lcs, matched_texas_standards)
+ print('\n=== USING CROSSWALKS TO COMPARE STATE STANDARDS TO COMMON CORE ===\n')
+
+ print('🔄 Step 1: Load the crosswalk data...')
+ data = load_crosswalk_data()
+
+ print('\n' + '='*70)
+ print('🔄 Step 2: Find the best-matching CCSSM standard for a state standard...')
+ matches = find_best_ccssm_match(TARGET_STATE_STANDARD_CODE, TARGET_STATE_JURISDICTION, data)
+
+ if matches is not None and len(matches) > 0:
+ print('\n' + '='*70)
+ print('🔄 Step 3: Interpret the relationship metrics...')
+ interpret_relationship_metrics(matches)
+
+ print('='*70)
+ print('🔄 Step 4: Join crosswalks with standards metadata...')
+ enriched = enrich_crosswalks_with_metadata(matches, data)
+
+ if enriched is not None and len(enriched) > 0:
+ print('='*70)
+ print('🔄 Step 5: Join crosswalks to Learning Components...')
+ # Use the top match for detailed LC analysis
+ top_match = enriched.iloc[0]
+ show_shared_learning_components(
+ top_match['statementCode_state'],
+ top_match['statementCode_ccss'],
+ top_match['jurisdiction'],
+ data
+ )
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
From 5a7655e4b8dfd2df7fc724a61d95d308eea9b83e Mon Sep 17 00:00:00 2001
From: Li Xu
Date: Thu, 20 Nov 2025 19:27:53 -0800
Subject: [PATCH 2/8] update join keys and other fixes for crosswalks
---
...nd_best_ccssm_match_for_state_standard.sql | 14 --
...nd_best_state_match_for_ccssm_standard.sql | 36 +++++
.../get_crosswalks_by_jaccard_threshold.sql | 18 ++-
.../get_crosswalks_for_state.sql | 27 +++-
...get_crosswalks_with_standards_metadata.sql | 32 ++--
...ared_learning_components_for_crosswalk.sql | 4 +
...nd_best_ccssm_match_for_state_standard.sql | 14 --
...nd_best_state_match_for_ccssm_standard.sql | 36 +++++
.../get_crosswalks_by_jaccard_threshold.sql | 18 ++-
.../get_crosswalks_for_state.sql | 27 +++-
...get_crosswalks_with_standards_metadata.sql | 32 ++--
...ared_learning_components_for_crosswalk.sql | 4 +
.../compare_standards/js/compare-standards.js | 132 ++++++++++-------
.../python/compare_standards.py | 138 ++++++++++--------
14 files changed, 350 insertions(+), 182 deletions(-)
delete mode 100644 sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
create mode 100644 sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
delete mode 100644 sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
create mode 100644 sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
diff --git a/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql b/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
deleted file mode 100644
index ded0a43..0000000
--- a/sample_queries/mysql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-SELECT
- r.`sourceEntityValue`,
- r.`targetEntityValue`,
- r.`jaccard`,
- r.`stateLCCount`,
- r.`ccssLCCount`,
- r.`sharedLCCount`
-FROM relationships r
-JOIN standards_framework_item sfi
- ON sfi.`identifier` = r.`sourceEntityValue`
-WHERE r.`relationshipType` = 'hasStandardAlignment'
- AND sfi.`statementCode` = '111.26.b.4.D'
- AND sfi.`jurisdiction` = 'Texas'
-ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql b/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
new file mode 100644
index 0000000..4896d75
--- /dev/null
+++ b/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
@@ -0,0 +1,36 @@
+-- Find the best state standard matches for a given CCSSM standard
+-- Returns crosswalks ordered by Jaccard score (highest similarity first)
+-- with metadata about both the CCSSM and matching state standards
+
+SELECT
+ -- CCSSM Standard Information
+ ccss.`statementCode` AS ccss_standard_code,
+ ccss.`description` AS ccss_description,
+ ccss.`gradeLevel` AS ccss_grade_level,
+ ccss.`jurisdiction` AS ccss_jurisdiction,
+
+ -- State Standard Information
+ state.`statementCode` AS state_standard_code,
+ state.`description` AS state_description,
+ state.`gradeLevel` AS state_grade_level,
+ state.`jurisdiction` AS state_jurisdiction,
+
+ -- Crosswalk Metrics
+ r.`jaccard`,
+ r.`sharedLCCount`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`,
+
+ -- Entity Values for further joins if needed
+ r.`sourceEntityValue` AS state_uuid,
+ r.`targetEntityValue` AS ccss_uuid
+FROM relationships r
+JOIN standards_framework_item state
+ ON state.`caseIdentifierUUID` = r.`sourceEntityValue`
+JOIN standards_framework_item ccss
+ ON ccss.`caseIdentifierUUID` = r.`targetEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND ccss.`statementCode` = '6.RP.A.2'
+ AND ccss.`jurisdiction` = 'Multi-State'
+ORDER BY r.`jaccard` DESC
+LIMIT 10;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
index d405492..0010765 100644
--- a/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -1,16 +1,22 @@
+-- Get all crosswalks that meet or exceed a specified Jaccard similarity threshold
+-- Ordered by state jurisdiction, CCSSM standard code, then Jaccard score
+
SELECT
- state_std.`statementCode` AS state_standard_code,
state_std.`jurisdiction` AS state_jurisdiction,
+ state_std.`statementCode` AS state_standard_code,
ccss_std.`statementCode` AS ccss_standard_code,
r.`jaccard`,
+ r.`sharedLCCount`,
r.`stateLCCount`,
- r.`ccssLCCount`,
- r.`sharedLCCount`
+ r.`ccssLCCount`
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std.`identifier` = r.`sourceEntityValue`
+ ON state_std.`caseIdentifierUUID` = r.`sourceEntityValue`
JOIN standards_framework_item ccss_std
- ON ccss_std.`identifier` = r.`targetEntityValue`
+ ON ccss_std.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
AND r.`jaccard` >= 0.7
-ORDER BY r.`jaccard` DESC;
+ORDER BY
+ state_std.`jurisdiction`,
+ ccss_std.`statementCode`,
+ r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
index 94acc2f..073ccc7 100644
--- a/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_for_state.sql
@@ -1,16 +1,35 @@
+-- Get all crosswalks for a specific state jurisdiction
+-- Returns comprehensive metadata for both state and CCSSM standards
+-- Ordered by state standard code and Jaccard score
+
SELECT
+ -- State Standard Information
+ state_std.`jurisdiction` AS state_jurisdiction,
state_std.`statementCode` AS state_standard_code,
state_std.`gradeLevel` AS state_grade_level,
state_std.`description` AS state_description,
+ state_std.`academicSubject` AS state_academic_subject,
+
+ -- CCSSM Standard Information
ccss_std.`statementCode` AS ccss_standard_code,
+ ccss_std.`gradeLevel` AS ccss_grade_level,
+ ccss_std.`description` AS ccss_description,
+ ccss_std.`academicSubject` AS ccss_academic_subject,
+
+ -- Crosswalk Metrics
r.`jaccard`,
- r.`sharedLCCount`
+ r.`sharedLCCount`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std.`identifier` = r.`sourceEntityValue`
+ ON state_std.`caseIdentifierUUID` = r.`sourceEntityValue`
JOIN standards_framework_item ccss_std
- ON ccss_std.`identifier` = r.`targetEntityValue`
+ ON ccss_std.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
AND state_std.`jurisdiction` = 'Texas'
AND state_std.`academicSubject` = 'Mathematics'
-ORDER BY r.`jaccard` DESC;
+ORDER BY
+ state_std.`statementCode`,
+ ccss_std.`statementCode`,
+ r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
index 72e3bef..d715b62 100644
--- a/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -1,21 +1,33 @@
+-- Find state standard matches for a given CCSSM standard with full metadata
+-- Returns multiple matching state standards ordered by similarity
+
SELECT
+ -- CCSSM Standard Information
+ ccss_std.`statementCode` AS ccss_standard_code,
+ ccss_std.`jurisdiction` AS ccss_jurisdiction,
+ ccss_std.`gradeLevel` AS ccss_grade_level,
+ ccss_std.`description` AS ccss_description,
+ ccss_std.`academicSubject` AS ccss_academic_subject,
+
+ -- State Standard Information
state_std.`statementCode` AS state_standard_code,
state_std.`jurisdiction` AS state_jurisdiction,
state_std.`gradeLevel` AS state_grade_level,
state_std.`description` AS state_description,
- ccss_std.`statementCode` AS ccss_standard_code,
- ccss_std.`gradeLevel` AS ccss_grade_level,
- ccss_std.`description` AS ccss_description,
+ state_std.`academicSubject` AS state_academic_subject,
+
+ -- Crosswalk Metrics
r.`jaccard`,
+ r.`sharedLCCount`,
r.`stateLCCount`,
- r.`ccssLCCount`,
- r.`sharedLCCount`
+ r.`ccssLCCount`
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std.`identifier` = r.`sourceEntityValue`
+ ON state_std.`caseIdentifierUUID` = r.`sourceEntityValue`
JOIN standards_framework_item ccss_std
- ON ccss_std.`identifier` = r.`targetEntityValue`
+ ON ccss_std.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
- AND state_std.`statementCode` = '111.26.b.4.D'
- AND state_std.`jurisdiction` = 'Texas'
-ORDER BY r.`jaccard` DESC;
+ AND ccss_std.`statementCode` = '6.RP.A.2'
+ AND ccss_std.`jurisdiction` = 'Multi-State'
+ORDER BY r.`jaccard` DESC
+LIMIT 10;
diff --git a/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql b/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
index cd932f6..85728c2 100644
--- a/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
@@ -1,3 +1,7 @@
+-- Get the Learning Components that support both a state standard and a CCSSM standard
+-- Returns three categories: shared LCs (in both), state-only LCs, and CCSSM-only LCs
+-- This shows the pedagogical overlap and differences between crosswalked standards
+
WITH state_lcs AS (
SELECT lc.`identifier`, lc.`description`
FROM relationships r
diff --git a/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql b/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
deleted file mode 100644
index 61a4810..0000000
--- a/sample_queries/postgresql/crosswalk_queries/find_best_ccssm_match_for_state_standard.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-SELECT
- r."sourceEntityValue",
- r."targetEntityValue",
- r."jaccard",
- r."stateLCCount",
- r."ccssLCCount",
- r."sharedLCCount"
-FROM relationships r
-JOIN standards_framework_item sfi
- ON sfi."identifier" = r."sourceEntityValue"
-WHERE r."relationshipType" = 'hasStandardAlignment'
- AND sfi."statementCode" = '111.26.b.4.D'
- AND sfi."jurisdiction" = 'Texas'
-ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql b/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
new file mode 100644
index 0000000..03a63c6
--- /dev/null
+++ b/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
@@ -0,0 +1,36 @@
+-- Find the best state standard matches for a given CCSSM standard
+-- Returns crosswalks ordered by Jaccard score (highest similarity first)
+-- with metadata about both the CCSSM and matching state standards
+
+SELECT
+ -- CCSSM Standard Information
+ ccss."statementCode" AS ccss_standard_code,
+ ccss."description" AS ccss_description,
+ ccss."gradeLevel" AS ccss_grade_level,
+ ccss."jurisdiction" AS ccss_jurisdiction,
+
+ -- State Standard Information
+ state."statementCode" AS state_standard_code,
+ state."description" AS state_description,
+ state."gradeLevel" AS state_grade_level,
+ state."jurisdiction" AS state_jurisdiction,
+
+ -- Crosswalk Metrics
+ r."jaccard",
+ r."sharedLCCount",
+ r."stateLCCount",
+ r."ccssLCCount",
+
+ -- Entity Values for further joins if needed
+ r."sourceEntityValue" AS state_uuid,
+ r."targetEntityValue" AS ccss_uuid
+FROM relationships r
+JOIN standards_framework_item state
+ ON state."caseIdentifierUUID" = r."sourceEntityValue"
+JOIN standards_framework_item ccss
+ ON ccss."caseIdentifierUUID" = r."targetEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND ccss."statementCode" = '6.RP.A.2'
+ AND ccss."jurisdiction" = 'Multi-State'
+ORDER BY r."jaccard" DESC
+LIMIT 10;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
index 07fd855..6a18402 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -1,16 +1,22 @@
+-- Get all crosswalks that meet or exceed a specified Jaccard similarity threshold
+-- Ordered by state jurisdiction, CCSSM standard code, then Jaccard score
+
SELECT
- state_std."statementCode" AS state_standard_code,
state_std."jurisdiction" AS state_jurisdiction,
+ state_std."statementCode" AS state_standard_code,
ccss_std."statementCode" AS ccss_standard_code,
r."jaccard",
+ r."sharedLCCount",
r."stateLCCount",
- r."ccssLCCount",
- r."sharedLCCount"
+ r."ccssLCCount"
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std."identifier" = r."sourceEntityValue"
+ ON state_std."caseIdentifierUUID" = r."sourceEntityValue"
JOIN standards_framework_item ccss_std
- ON ccss_std."identifier" = r."targetEntityValue"
+ ON ccss_std."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
AND r."jaccard" >= 0.7
-ORDER BY r."jaccard" DESC;
+ORDER BY
+ state_std."jurisdiction",
+ ccss_std."statementCode",
+ r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
index f54ef10..849d6f6 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_for_state.sql
@@ -1,16 +1,35 @@
+-- Get all crosswalks for a specific state jurisdiction
+-- Returns comprehensive metadata for both state and CCSSM standards
+-- Ordered by state standard code and Jaccard score
+
SELECT
+ -- State Standard Information
+ state_std."jurisdiction" AS state_jurisdiction,
state_std."statementCode" AS state_standard_code,
state_std."gradeLevel" AS state_grade_level,
state_std."description" AS state_description,
+ state_std."academicSubject" AS state_academic_subject,
+
+ -- CCSSM Standard Information
ccss_std."statementCode" AS ccss_standard_code,
+ ccss_std."gradeLevel" AS ccss_grade_level,
+ ccss_std."description" AS ccss_description,
+ ccss_std."academicSubject" AS ccss_academic_subject,
+
+ -- Crosswalk Metrics
r."jaccard",
- r."sharedLCCount"
+ r."sharedLCCount",
+ r."stateLCCount",
+ r."ccssLCCount"
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std."identifier" = r."sourceEntityValue"
+ ON state_std."caseIdentifierUUID" = r."sourceEntityValue"
JOIN standards_framework_item ccss_std
- ON ccss_std."identifier" = r."targetEntityValue"
+ ON ccss_std."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
AND state_std."jurisdiction" = 'Texas'
AND state_std."academicSubject" = 'Mathematics'
-ORDER BY r."jaccard" DESC;
+ORDER BY
+ state_std."statementCode",
+ ccss_std."statementCode",
+ r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
index 872011c..e3eebb9 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -1,21 +1,33 @@
+-- Find state standard matches for a given CCSSM standard with full metadata
+-- Returns multiple matching state standards ordered by similarity
+
SELECT
+ -- CCSSM Standard Information
+ ccss_std."statementCode" AS ccss_standard_code,
+ ccss_std."jurisdiction" AS ccss_jurisdiction,
+ ccss_std."gradeLevel" AS ccss_grade_level,
+ ccss_std."description" AS ccss_description,
+ ccss_std."academicSubject" AS ccss_academic_subject,
+
+ -- State Standard Information
state_std."statementCode" AS state_standard_code,
state_std."jurisdiction" AS state_jurisdiction,
state_std."gradeLevel" AS state_grade_level,
state_std."description" AS state_description,
- ccss_std."statementCode" AS ccss_standard_code,
- ccss_std."gradeLevel" AS ccss_grade_level,
- ccss_std."description" AS ccss_description,
+ state_std."academicSubject" AS state_academic_subject,
+
+ -- Crosswalk Metrics
r."jaccard",
+ r."sharedLCCount",
r."stateLCCount",
- r."ccssLCCount",
- r."sharedLCCount"
+ r."ccssLCCount"
FROM relationships r
JOIN standards_framework_item state_std
- ON state_std."identifier" = r."sourceEntityValue"
+ ON state_std."caseIdentifierUUID" = r."sourceEntityValue"
JOIN standards_framework_item ccss_std
- ON ccss_std."identifier" = r."targetEntityValue"
+ ON ccss_std."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
- AND state_std."statementCode" = '111.26.b.4.D'
- AND state_std."jurisdiction" = 'Texas'
-ORDER BY r."jaccard" DESC;
+ AND ccss_std."statementCode" = '6.RP.A.2'
+ AND ccss_std."jurisdiction" = 'Multi-State'
+ORDER BY r."jaccard" DESC
+LIMIT 10;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql b/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
index 932392c..cd51ead 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_shared_learning_components_for_crosswalk.sql
@@ -1,3 +1,7 @@
+-- Get the Learning Components that support both a state standard and a CCSSM standard
+-- Returns three categories: shared LCs (in both), state-only LCs, and CCSSM-only LCs
+-- This shows the pedagogical overlap and differences between crosswalked standards
+
WITH state_lcs AS (
SELECT lc."identifier", lc."description"
FROM relationships r
diff --git a/tutorials/compare_standards/js/compare-standards.js b/tutorials/compare_standards/js/compare-standards.js
index e4d87c0..e225f28 100644
--- a/tutorials/compare_standards/js/compare-standards.js
+++ b/tutorials/compare_standards/js/compare-standards.js
@@ -9,10 +9,9 @@ const { parse } = require('csv-parse/sync');
require('dotenv').config();
// Domain Constants
-// Pick a state standard to find its best CCSSM match
-// Note: We need to specify both the code AND jurisdiction since multiple states may use the same code
-const TARGET_STATE_STANDARD_CODE = '111.26.b.4.D'; // Texas 6th grade math standard on rates
-const TARGET_STATE_JURISDICTION = 'Texas';
+// Pick a CCSSM standard to find its best state standard matches
+const TARGET_CCSSM_STANDARD_CODE = '6.NS.C.6.c'; // Common Core 6th grade math standard on positioning integers on number lines
+const TARGET_CCSSM_JURISDICTION = 'Multi-State';
// Environment setup
const dataDir = process.env.KG_DATA_PATH;
@@ -94,57 +93,78 @@ function loadCrosswalkData(aq) {
/* ================================
- STEP 2: FIND THE BEST-MATCHING CCSSM STANDARD
+ STEP 2: FIND THE BEST-MATCHING STATE STANDARDS
================================ */
-function findBestCcssmMatch(stateStandardCode, jurisdiction, data, aq) {
+function findBestStateMatches(ccssmStandardCode, jurisdiction, data, aq) {
/**
- * Find the best CCSSM match for a state standard
+ * Find the best state standard matches for a CCSSM standard
*
- * Purpose: To find the best CCSS match for a state standard, filter rows by the
- * state standard ID and sort by the Jaccard score. This identifies the CCSSM
- * standard that contains the most similar skills and concept targets for student
+ * Purpose: To find the best state standard matches for a CCSSM standard, filter rows by the
+ * CCSSM standard ID and sort by the Jaccard score. This identifies the state
+ * standards that contain the most similar skills and concept targets for student
* mastery (not necessarily the most similar semantically).
*/
const { crosswalkData, standardsFrameworkItemsData } = data;
- // First, find the state standard by its statement code and jurisdiction
- const stateStandard = standardsFrameworkItemsData
- .params({ code: stateStandardCode, juris: jurisdiction })
+ // First, find the CCSSM standard by its statement code and jurisdiction
+ const ccssmStandard = standardsFrameworkItemsData
+ .params({ code: ccssmStandardCode, juris: jurisdiction })
.filter(d => d.statementCode === code && d.jurisdiction === juris)
.object();
- if (!stateStandard || !stateStandard.statementCode) {
- console.log(`❌ State standard not found: ${stateStandardCode}`);
+ if (!ccssmStandard || !ccssmStandard.statementCode) {
+ console.log(`❌ CCSSM standard not found: ${ccssmStandardCode}`);
return null;
}
- const stateStandardId = stateStandard.identifier; // Use 'identifier' column for crosswalk matching
+ const ccssmStandardUuid = ccssmStandard.caseIdentifierUUID; // Use 'caseIdentifierUUID' for crosswalk matching
- console.log(`✅ Found state standard: ${stateStandardCode}`);
- console.log(` Identifier: ${stateStandardId}`);
- console.log(` Description: ${stateStandard.description}`);
- console.log(` Jurisdiction: ${stateStandard.jurisdiction}`);
+ console.log(`✅ Found CCSSM standard: ${ccssmStandardCode}`);
+ console.log(` Case UUID: ${ccssmStandardUuid}`);
+ console.log(` Description: ${ccssmStandard.description}`);
+ console.log(` Jurisdiction: ${ccssmStandard.jurisdiction}`);
- // Filter crosswalk data for this state standard
+ // Filter crosswalk data for this CCSSM standard (it's the target in relationships)
+ // and filter for Texas matches only
const matches = crosswalkData
- .params({ stateId: stateStandardId })
- .filter(d => d.sourceEntityValue === stateId);
+ .params({ ccssmId: ccssmStandardUuid })
+ .filter(d => d.targetEntityValue === ccssmId);
if (matches.numRows() === 0) {
- console.log(`\n❌ No CCSSM matches found for ${stateStandardCode}`);
+ console.log(`\n❌ No state standard matches found for ${ccssmStandardCode}`);
return null;
}
+ // Join with standards data to get jurisdiction and filter for Texas
+ const matchesWithJurisdiction = matches
+ .join(
+ standardsFrameworkItemsData.select('caseIdentifierUUID', 'jurisdiction'),
+ ['sourceEntityValue', 'caseIdentifierUUID']
+ );
+
+ // Filter for Texas only
+ const texasMatches = matchesWithJurisdiction
+ .params({ state: 'Texas' })
+ .filter(d => d.jurisdiction === state);
+
+ if (texasMatches.numRows() === 0) {
+ console.log(`\n❌ No Texas standard matches found for ${ccssmStandardCode}`);
+ return null;
+ }
+
+ // Drop the temporary columns added for filtering to avoid conflicts in later joins
+ const texasMatchesClean = texasMatches.select(aq.not('caseIdentifierUUID', 'jurisdiction'));
+
// Sort by Jaccard score (highest first)
- const sortedMatches = matches.orderby(aq.desc('jaccard'));
+ const sortedMatches = texasMatchesClean.orderby(aq.desc('jaccard'));
- console.log(`\n✅ Found ${sortedMatches.numRows()} CCSSM matches for ${stateStandardCode}`);
- console.log(`\n📊 Top match (highest Jaccard score):`);
+ console.log(`\n✅ Found ${sortedMatches.numRows()} Texas standard matches for ${ccssmStandardCode}`);
+ console.log(`\n📊 Top Texas match (highest Jaccard score):`);
const topMatch = sortedMatches.object();
- console.log(` CCSSM Standard UUID: ${topMatch.targetEntityValue}`);
+ console.log(` State Standard UUID: ${topMatch.sourceEntityValue}`);
console.log(` Jaccard Score: ${parseFloat(topMatch.jaccard).toFixed(4)}`);
console.log(` Shared LC Count: ${topMatch.sharedLCCount}`);
console.log(` State LC Count: ${topMatch.stateLCCount}`);
@@ -242,35 +262,36 @@ function enrichCrosswalksWithMetadata(matches, data, aq) {
const { standardsFrameworkItemsData } = data;
- // Rename columns to avoid conflicts when merging state and CCSS metadata
- // We'll merge the same standards dataset twice (once for state, once for CCSS)
+ // Rename columns to avoid conflicts when merging CCSS and state metadata
+ // We'll merge the same standards dataset twice (once for CCSS, once for state)
- // Join with state standard metadata (source)
+ // Join with CCSS standard metadata (target)
const enriched = matches
.join(
- standardsFrameworkItemsData.select('identifier', 'statementCode', 'description',
+ standardsFrameworkItemsData.select('caseIdentifierUUID', 'statementCode', 'description',
'gradeLevel', 'academicSubject', 'jurisdiction')
.rename({
- identifier: 'state_identifier',
- statementCode: 'statementCode_state',
- description: 'description_state',
- gradeLevel: 'gradeLevel_state',
- academicSubject: 'academicSubject_state'
+ caseIdentifierUUID: 'ccss_uuid',
+ statementCode: 'statementCode_ccss',
+ description: 'description_ccss',
+ gradeLevel: 'gradeLevel_ccss',
+ academicSubject: 'academicSubject_ccss',
+ jurisdiction: 'jurisdiction_ccss'
}),
- ['sourceEntityValue', 'state_identifier']
+ ['targetEntityValue', 'ccss_uuid']
)
- // Join with CCSS standard metadata (target)
+ // Join with state standard metadata (source)
.join(
- standardsFrameworkItemsData.select('identifier', 'statementCode', 'description',
- 'gradeLevel', 'academicSubject')
+ standardsFrameworkItemsData.select('caseIdentifierUUID', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject', 'jurisdiction')
.rename({
- identifier: 'ccss_identifier',
- statementCode: 'statementCode_ccss',
- description: 'description_ccss',
- gradeLevel: 'gradeLevel_ccss',
- academicSubject: 'academicSubject_ccss'
+ caseIdentifierUUID: 'state_uuid',
+ statementCode: 'statementCode_state',
+ description: 'description_state',
+ gradeLevel: 'gradeLevel_state',
+ academicSubject: 'academicSubject_state'
}),
- ['targetEntityValue', 'ccss_identifier']
+ ['sourceEntityValue', 'state_uuid']
);
console.log(`\n✅ Enriched crosswalk data with standards metadata\n`);
@@ -280,17 +301,18 @@ function enrichCrosswalksWithMetadata(matches, data, aq) {
top3.forEach((row, idx) => {
console.log(`Match #${idx + 1} (Jaccard: ${parseFloat(row.jaccard).toFixed(4)}):`);
+ console.log(` CCSS STANDARD:`);
+ console.log(` Code: ${row.statementCode_ccss}`);
+ console.log(` Jurisdiction: ${row.jurisdiction_ccss}`);
+ console.log(` Grade Level: ${row.gradeLevel_ccss}`);
+ console.log(` Description: ${row.description_ccss}`);
+ console.log(` `);
console.log(` STATE STANDARD:`);
console.log(` Code: ${row.statementCode_state}`);
console.log(` Jurisdiction: ${row.jurisdiction}`);
console.log(` Grade Level: ${row.gradeLevel_state}`);
console.log(` Description: ${row.description_state}`);
console.log(` `);
- console.log(` CCSS STANDARD:`);
- console.log(` Code: ${row.statementCode_ccss}`);
- console.log(` Grade Level: ${row.gradeLevel_ccss}`);
- console.log(` Description: ${row.description_ccss}`);
- console.log(` `);
console.log(` ALIGNMENT METRICS:`);
console.log(` Shared LCs: ${row.sharedLCCount} / State LCs: ${row.stateLCCount} / CCSS LCs: ${row.ccssLCCount}`);
console.log();
@@ -410,8 +432,8 @@ async function main() {
const data = loadCrosswalkData(aq);
console.log('\n' + '='.repeat(70));
- console.log('🔄 Step 2: Find the best-matching CCSSM standard for a state standard...');
- const matches = findBestCcssmMatch(TARGET_STATE_STANDARD_CODE, TARGET_STATE_JURISDICTION, data, aq);
+ console.log('🔄 Step 2: Find the best-matching state standards for a CCSSM standard...');
+ const matches = findBestStateMatches(TARGET_CCSSM_STANDARD_CODE, TARGET_CCSSM_JURISDICTION, data, aq);
if (matches && matches.numRows() > 0) {
console.log('\n' + '='.repeat(70));
@@ -425,7 +447,7 @@ async function main() {
if (enriched && enriched.numRows() > 0) {
console.log('='.repeat(70));
console.log('🔄 Step 5: Join crosswalks to Learning Components...');
- // Use the top match for detailed LC analysis
+ // Use the top match for detailed LC analysis (already filtered for Texas)
const topMatch = enriched.object();
showSharedLearningComponents(
topMatch.statementCode_state,
diff --git a/tutorials/compare_standards/python/compare_standards.py b/tutorials/compare_standards/python/compare_standards.py
index e24ebd5..b80f02e 100644
--- a/tutorials/compare_standards/python/compare_standards.py
+++ b/tutorials/compare_standards/python/compare_standards.py
@@ -30,10 +30,9 @@
load_dotenv()
# Domain Constants
-# Pick a state standard to find its best CCSSM match
-# Note: We need to specify both the code AND jurisdiction since multiple states may use the same code
-TARGET_STATE_STANDARD_CODE = '111.26.b.4.D' # Texas 6th grade math standard on rates
-TARGET_STATE_JURISDICTION = 'Texas'
+# Pick a CCSSM standard to find its best state standard matches
+TARGET_CCSSM_STANDARD_CODE = '6.NS.C.6.c' # Common Core 6th grade math standard on positioning integers on number lines
+TARGET_CCSSM_JURISDICTION = 'Multi-State'
# Environment Setup
data_dir = os.getenv('KG_DATA_PATH')
@@ -123,22 +122,22 @@ def load_crosswalk_data():
"""
================================
-STEP 2: FIND THE BEST-MATCHING CCSSM STANDARD
+STEP 2: FIND THE BEST-MATCHING STATE STANDARDS
================================
"""
-def find_best_ccssm_match(state_standard_code, jurisdiction, data):
+def find_best_state_matches(ccssm_standard_code, jurisdiction, data):
"""
- Find the best CCSSM match for a state standard
+ Find the best state standard matches for a CCSSM standard
- Purpose: To find the best CCSS match for a state standard, filter rows by the
- state standard ID and sort by the Jaccard score. This identifies the CCSSM
- standard that contains the most similar skills and concept targets for student
+ Purpose: To find the best state standard matches for a CCSSM standard, filter rows by the
+ CCSSM standard ID and sort by the Jaccard score. This identifies the state
+ standards that contain the most similar skills and concept targets for student
mastery (not necessarily the most similar semantically).
Args:
- state_standard_code (str): The statement code of the state standard
- jurisdiction (str): The jurisdiction of the state standard
+ ccssm_standard_code (str): The statement code of the CCSSM standard
+ jurisdiction (str): The jurisdiction of the CCSSM standard (typically 'Multi-State')
data (dict): Dictionary containing the loaded datasets
Returns:
@@ -147,47 +146,67 @@ def find_best_ccssm_match(state_standard_code, jurisdiction, data):
crosswalk_data = data['crosswalk_data']
standards_data = data['standards_framework_items_data']
- # First, find the state standard by its statement code and jurisdiction
- state_standard = standards_data[
- (standards_data['statementCode'] == state_standard_code) &
+ # First, find the CCSSM standard by its statement code and jurisdiction
+ ccssm_standard = standards_data[
+ (standards_data['statementCode'] == ccssm_standard_code) &
(standards_data['jurisdiction'] == jurisdiction)
]
- if len(state_standard) == 0:
- print(f'❌ State standard not found: {state_standard_code}')
+ if len(ccssm_standard) == 0:
+ print(f'❌ CCSSM standard not found: {ccssm_standard_code}')
return None
- state_standard = state_standard.iloc[0]
- state_standard_id = state_standard['identifier'] # Use 'identifier' column for crosswalk matching
+ ccssm_standard = ccssm_standard.iloc[0]
+ ccssm_standard_uuid = ccssm_standard['caseIdentifierUUID'] # Use 'caseIdentifierUUID' for crosswalk matching
- print(f'✅ Found state standard: {state_standard_code}')
- print(f' Identifier: {state_standard_id}')
- print(f' Description: {state_standard["description"]}')
- print(f' Jurisdiction: {state_standard["jurisdiction"]}')
+ print(f'✅ Found CCSSM standard: {ccssm_standard_code}')
+ print(f' Case UUID: {ccssm_standard_uuid}')
+ print(f' Description: {ccssm_standard["description"]}')
+ print(f' Jurisdiction: {ccssm_standard["jurisdiction"]}')
- # Filter crosswalk data for this state standard
+ # Filter crosswalk data for this CCSSM standard (it's the target in relationships)
+ # and filter for Texas matches only
matches = crosswalk_data[
- crosswalk_data['sourceEntityValue'] == state_standard_id
+ crosswalk_data['targetEntityValue'] == ccssm_standard_uuid
].copy()
if len(matches) == 0:
- print(f'\n❌ No CCSSM matches found for {state_standard_code}')
+ print(f'\n❌ No state standard matches found for {ccssm_standard_code}')
+ return None
+
+ # Join with standards data to get jurisdiction and filter for Texas
+ matches = matches.merge(
+ standards_data[['caseIdentifierUUID', 'jurisdiction']],
+ left_on='sourceEntityValue',
+ right_on='caseIdentifierUUID',
+ how='left',
+ suffixes=('', '_temp')
+ )
+
+ # Filter for Texas only
+ texas_matches = matches[matches['jurisdiction'] == 'Texas'].copy()
+
+ if len(texas_matches) == 0:
+ print(f'\n❌ No Texas standard matches found for {ccssm_standard_code}')
return None
+ # Drop the temporary columns added for filtering
+ texas_matches = texas_matches.drop(columns=['caseIdentifierUUID', 'jurisdiction'])
+
# Sort by Jaccard score (highest first)
- matches = matches.sort_values('jaccard', ascending=False)
+ texas_matches = texas_matches.sort_values('jaccard', ascending=False)
- print(f'\n✅ Found {len(matches)} CCSSM matches for {state_standard_code}')
- print(f'\n📊 Top match (highest Jaccard score):')
+ print(f'\n✅ Found {len(texas_matches)} Texas standard matches for {ccssm_standard_code}')
+ print(f'\n📊 Top Texas match (highest Jaccard score):')
- top_match = matches.iloc[0]
- print(f' CCSSM Standard UUID: {top_match["targetEntityValue"]}')
+ top_match = texas_matches.iloc[0]
+ print(f' State Standard UUID: {top_match["sourceEntityValue"]}')
print(f' Jaccard Score: {top_match["jaccard"]:.4f}')
print(f' Shared LC Count: {top_match["sharedLCCount"]}')
print(f' State LC Count: {top_match["stateLCCount"]}')
print(f' CCSS LC Count: {top_match["ccssLCCount"]}')
- return matches
+ return texas_matches
"""
@@ -280,32 +299,32 @@ def enrich_crosswalks_with_metadata(matches, data):
standards_data = data['standards_framework_items_data']
- # Rename columns to avoid conflicts when merging state and CCSS metadata
- # We'll merge the same standards dataset twice (once for state, once for CCSS)
+ # Rename columns to avoid conflicts when merging CCSS and state metadata
+ # We'll merge the same standards dataset twice (once for CCSS, once for state)
- # Join with state standard metadata (source)
- state_standards = standards_data[['identifier', 'statementCode', 'description',
- 'gradeLevel', 'academicSubject', 'jurisdiction']].copy()
- state_standards.columns = ['state_identifier', 'statementCode_state', 'description_state',
- 'gradeLevel_state', 'academicSubject_state', 'jurisdiction']
+ # Join with CCSS standard metadata (target)
+ ccss_standards = standards_data[['caseIdentifierUUID', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject', 'jurisdiction']].copy()
+ ccss_standards.columns = ['ccss_uuid', 'statementCode_ccss', 'description_ccss',
+ 'gradeLevel_ccss', 'academicSubject_ccss', 'jurisdiction_ccss']
enriched = matches.merge(
- state_standards,
- left_on='sourceEntityValue',
- right_on='state_identifier',
+ ccss_standards,
+ left_on='targetEntityValue',
+ right_on='ccss_uuid',
how='left'
)
- # Join with CCSS standard metadata (target)
- ccss_standards = standards_data[['identifier', 'statementCode', 'description',
- 'gradeLevel', 'academicSubject']].copy()
- ccss_standards.columns = ['ccss_identifier', 'statementCode_ccss', 'description_ccss',
- 'gradeLevel_ccss', 'academicSubject_ccss']
+ # Join with state standard metadata (source)
+ state_standards = standards_data[['caseIdentifierUUID', 'statementCode', 'description',
+ 'gradeLevel', 'academicSubject', 'jurisdiction']].copy()
+ state_standards.columns = ['state_uuid', 'statementCode_state', 'description_state',
+ 'gradeLevel_state', 'academicSubject_state', 'jurisdiction']
enriched = enriched.merge(
- ccss_standards,
- left_on='targetEntityValue',
- right_on='ccss_identifier',
+ state_standards,
+ left_on='sourceEntityValue',
+ right_on='state_uuid',
how='left'
)
@@ -314,17 +333,18 @@ def enrich_crosswalks_with_metadata(matches, data):
for idx, (_, row) in enumerate(enriched.head(3).iterrows(), 1):
print(f'Match #{idx} (Jaccard: {row["jaccard"]:.4f}):')
+ print(f' CCSS STANDARD:')
+ print(f' Code: {row["statementCode_ccss"]}')
+ print(f' Jurisdiction: {row["jurisdiction_ccss"]}')
+ print(f' Grade Level: {row["gradeLevel_ccss"]}')
+ print(f' Description: {row["description_ccss"]}')
+ print(f' ')
print(f' STATE STANDARD:')
print(f' Code: {row["statementCode_state"]}')
print(f' Jurisdiction: {row["jurisdiction"]}')
print(f' Grade Level: {row["gradeLevel_state"]}')
print(f' Description: {row["description_state"]}')
print(f' ')
- print(f' CCSS STANDARD:')
- print(f' Code: {row["statementCode_ccss"]}')
- print(f' Grade Level: {row["gradeLevel_ccss"]}')
- print(f' Description: {row["description_ccss"]}')
- print(f' ')
print(f' ALIGNMENT METRICS:')
print(f' Shared LCs: {row["sharedLCCount"]} / State LCs: {row["stateLCCount"]} / CCSS LCs: {row["ccssLCCount"]}')
print()
@@ -416,8 +436,8 @@ def show_shared_learning_components(state_standard_code, ccss_standard_code, sta
]
print(f'\n✅ LEARNING COMPONENTS ANALYSIS:\n')
- print(f'State Standard: {state_standard_code}')
print(f'CCSS Standard: {ccss_standard_code}')
+ print(f'State Standard: {state_standard_code}')
print()
print(f'📊 SHARED LEARNING COMPONENTS ({len(shared_lcs)}):')
@@ -453,8 +473,8 @@ def main():
data = load_crosswalk_data()
print('\n' + '='*70)
- print('🔄 Step 2: Find the best-matching CCSSM standard for a state standard...')
- matches = find_best_ccssm_match(TARGET_STATE_STANDARD_CODE, TARGET_STATE_JURISDICTION, data)
+ print('🔄 Step 2: Find the best-matching state standards for a CCSSM standard...')
+ matches = find_best_state_matches(TARGET_CCSSM_STANDARD_CODE, TARGET_CCSSM_JURISDICTION, data)
if matches is not None and len(matches) > 0:
print('\n' + '='*70)
@@ -468,7 +488,7 @@ def main():
if enriched is not None and len(enriched) > 0:
print('='*70)
print('🔄 Step 5: Join crosswalks to Learning Components...')
- # Use the top match for detailed LC analysis
+ # Use the top match for detailed LC analysis (already filtered for Texas)
top_match = enriched.iloc[0]
show_shared_learning_components(
top_match['statementCode_state'],
From 779f64b4ec473526ee863f36517abe24f489e6c5 Mon Sep 17 00:00:00 2001
From: Li Xu
Date: Thu, 20 Nov 2025 20:09:20 -0800
Subject: [PATCH 3/8] update join keys and other fixes for crosswalks
---
sample_queries/mysql/.DS_Store | Bin 8196 -> 8196 bytes
...nd_best_state_match_for_ccssm_standard.sql | 7 +++--
.../crosswalk_queries/get_all_crosswalks.sql | 26 ++++++++++++------
.../get_crosswalks_by_jaccard_threshold.sql | 6 ++--
...get_crosswalks_with_standards_metadata.sql | 7 +++--
...nd_best_state_match_for_ccssm_standard.sql | 7 +++--
.../crosswalk_queries/get_all_crosswalks.sql | 26 ++++++++++++------
.../get_crosswalks_by_jaccard_threshold.sql | 6 ++--
...get_crosswalks_with_standards_metadata.sql | 7 +++--
.../compare_standards/js/compare-standards.js | 2 +-
.../python/compare_standards.py | 2 +-
11 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/sample_queries/mysql/.DS_Store b/sample_queries/mysql/.DS_Store
index bdd4b9866f52723c1ee36f6f3c0180c4fc8ccd5b..3a0f08e0f27c86ff0ec83ed90398fa97b0dd8562 100644
GIT binary patch
delta 198
zcmZp1XmOa}&nU4mU^hRb#AF8nmB|JI+l2)gk{OB^@)?SOu$&>0A%`J*vVwqI5WgD(
z0|ORGhIocThEj%9pr%Y9S)7~i;*yk;p9ECRaYHg(t2FkwBX*sW7YNG85m53*P>v~q
gVX}^(1<-MPo3#Z0vTkOV_{K8%qOkqu7?Bf90Ep@`4*&oF
delta 51
zcmZp1XmOa}&nU7nU^hRb$YcirmB|JI+b8o2nF49P&0Io%Stm9;*vu~Rjb-y)Q7xv4
G4R-;r?h*w6
diff --git a/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql b/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
index 4896d75..42a572e 100644
--- a/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
+++ b/sample_queries/mysql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
@@ -1,6 +1,6 @@
--- Find the best state standard matches for a given CCSSM standard
+-- Find the best Texas state standard matches for a given CCSSM standard
-- Returns crosswalks ordered by Jaccard score (highest similarity first)
--- with metadata about both the CCSSM and matching state standards
+-- with metadata about both the CCSSM and matching Texas state standards
SELECT
-- CCSSM Standard Information
@@ -30,7 +30,8 @@ JOIN standards_framework_item state
JOIN standards_framework_item ccss
ON ccss.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
- AND ccss.`statementCode` = '6.RP.A.2'
+ AND ccss.`statementCode` = '6.EE.B.5'
AND ccss.`jurisdiction` = 'Multi-State'
+ AND state.`jurisdiction` = 'Texas'
ORDER BY r.`jaccard` DESC
LIMIT 10;
diff --git a/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
index c9149d9..ce9996f 100644
--- a/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
@@ -1,10 +1,18 @@
+-- Get all Texas crosswalks with state standard information
+-- Returns Texas state → CCSSM standard alignments ordered by Jaccard score
+
SELECT
- `sourceEntityValue`,
- `targetEntityValue`,
- `jaccard`,
- `stateLCCount`,
- `ccssLCCount`,
- `sharedLCCount`
-FROM relationships
-WHERE `relationshipType` = 'hasStandardAlignment'
-ORDER BY `jaccard` DESC;
+ state.`statementCode` AS state_standard_code,
+ state.`jurisdiction` AS state_jurisdiction,
+ r.`sourceEntityValue`,
+ r.`targetEntityValue`,
+ r.`jaccard`,
+ r.`stateLCCount`,
+ r.`ccssLCCount`,
+ r.`sharedLCCount`
+FROM relationships r
+JOIN standards_framework_item state
+ ON state.`caseIdentifierUUID` = r.`sourceEntityValue`
+WHERE r.`relationshipType` = 'hasStandardAlignment'
+ AND state.`jurisdiction` = 'Texas'
+ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
index 0010765..14cf2c1 100644
--- a/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -1,5 +1,5 @@
--- Get all crosswalks that meet or exceed a specified Jaccard similarity threshold
--- Ordered by state jurisdiction, CCSSM standard code, then Jaccard score
+-- Get all Texas crosswalks that meet or exceed a specified Jaccard similarity threshold
+-- Ordered by CCSSM standard code, then Jaccard score
SELECT
state_std.`jurisdiction` AS state_jurisdiction,
@@ -16,7 +16,7 @@ JOIN standards_framework_item ccss_std
ON ccss_std.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
AND r.`jaccard` >= 0.7
+ AND state_std.`jurisdiction` = 'Texas'
ORDER BY
- state_std.`jurisdiction`,
ccss_std.`statementCode`,
r.`jaccard` DESC;
diff --git a/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
index d715b62..ae987c2 100644
--- a/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -1,5 +1,5 @@
--- Find state standard matches for a given CCSSM standard with full metadata
--- Returns multiple matching state standards ordered by similarity
+-- Find Texas state standard matches for a given CCSSM standard with full metadata
+-- Returns multiple matching Texas state standards ordered by similarity
SELECT
-- CCSSM Standard Information
@@ -27,7 +27,8 @@ JOIN standards_framework_item state_std
JOIN standards_framework_item ccss_std
ON ccss_std.`caseIdentifierUUID` = r.`targetEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
- AND ccss_std.`statementCode` = '6.RP.A.2'
+ AND ccss_std.`statementCode` = '6.EE.B.5'
AND ccss_std.`jurisdiction` = 'Multi-State'
+ AND state_std.`jurisdiction` = 'Texas'
ORDER BY r.`jaccard` DESC
LIMIT 10;
diff --git a/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql b/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
index 03a63c6..acaffbc 100644
--- a/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
+++ b/sample_queries/postgresql/crosswalk_queries/find_best_state_match_for_ccssm_standard.sql
@@ -1,6 +1,6 @@
--- Find the best state standard matches for a given CCSSM standard
+-- Find the best Texas state standard matches for a given CCSSM standard
-- Returns crosswalks ordered by Jaccard score (highest similarity first)
--- with metadata about both the CCSSM and matching state standards
+-- with metadata about both the CCSSM and matching Texas state standards
SELECT
-- CCSSM Standard Information
@@ -30,7 +30,8 @@ JOIN standards_framework_item state
JOIN standards_framework_item ccss
ON ccss."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
- AND ccss."statementCode" = '6.RP.A.2'
+ AND ccss."statementCode" = '6.EE.B.5'
AND ccss."jurisdiction" = 'Multi-State'
+ AND state."jurisdiction" = 'Texas'
ORDER BY r."jaccard" DESC
LIMIT 10;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
index ec8e4b1..4618cdd 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
@@ -1,10 +1,18 @@
+-- Get all Texas crosswalks with state standard information
+-- Returns Texas state → CCSSM standard alignments ordered by Jaccard score
+
SELECT
- "sourceEntityValue",
- "targetEntityValue",
- "jaccard",
- "stateLCCount",
- "ccssLCCount",
- "sharedLCCount"
-FROM relationships
-WHERE "relationshipType" = 'hasStandardAlignment'
-ORDER BY "jaccard" DESC;
+ state."statementCode" AS state_standard_code,
+ state."jurisdiction" AS state_jurisdiction,
+ r."sourceEntityValue",
+ r."targetEntityValue",
+ r."jaccard",
+ r."stateLCCount",
+ r."ccssLCCount",
+ r."sharedLCCount"
+FROM relationships r
+JOIN standards_framework_item state
+ ON state."caseIdentifierUUID" = r."sourceEntityValue"
+WHERE r."relationshipType" = 'hasStandardAlignment'
+ AND state."jurisdiction" = 'Texas'
+ORDER BY r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
index 6a18402..dfd761f 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_by_jaccard_threshold.sql
@@ -1,5 +1,5 @@
--- Get all crosswalks that meet or exceed a specified Jaccard similarity threshold
--- Ordered by state jurisdiction, CCSSM standard code, then Jaccard score
+-- Get all Texas crosswalks that meet or exceed a specified Jaccard similarity threshold
+-- Ordered by CCSSM standard code, then Jaccard score
SELECT
state_std."jurisdiction" AS state_jurisdiction,
@@ -16,7 +16,7 @@ JOIN standards_framework_item ccss_std
ON ccss_std."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
AND r."jaccard" >= 0.7
+ AND state_std."jurisdiction" = 'Texas'
ORDER BY
- state_std."jurisdiction",
ccss_std."statementCode",
r."jaccard" DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
index e3eebb9..34b1781 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_crosswalks_with_standards_metadata.sql
@@ -1,5 +1,5 @@
--- Find state standard matches for a given CCSSM standard with full metadata
--- Returns multiple matching state standards ordered by similarity
+-- Find Texas state standard matches for a given CCSSM standard with full metadata
+-- Returns multiple matching Texas state standards ordered by similarity
SELECT
-- CCSSM Standard Information
@@ -27,7 +27,8 @@ JOIN standards_framework_item state_std
JOIN standards_framework_item ccss_std
ON ccss_std."caseIdentifierUUID" = r."targetEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
- AND ccss_std."statementCode" = '6.RP.A.2'
+ AND ccss_std."statementCode" = '6.EE.B.5'
AND ccss_std."jurisdiction" = 'Multi-State'
+ AND state_std."jurisdiction" = 'Texas'
ORDER BY r."jaccard" DESC
LIMIT 10;
diff --git a/tutorials/compare_standards/js/compare-standards.js b/tutorials/compare_standards/js/compare-standards.js
index e225f28..e18cd57 100644
--- a/tutorials/compare_standards/js/compare-standards.js
+++ b/tutorials/compare_standards/js/compare-standards.js
@@ -10,7 +10,7 @@ require('dotenv').config();
// Domain Constants
// Pick a CCSSM standard to find its best state standard matches
-const TARGET_CCSSM_STANDARD_CODE = '6.NS.C.6.c'; // Common Core 6th grade math standard on positioning integers on number lines
+const TARGET_CCSSM_STANDARD_CODE = '6.EE.B.5'; // Common Core 6th grade math standard on solving equations and inequalities
const TARGET_CCSSM_JURISDICTION = 'Multi-State';
// Environment setup
diff --git a/tutorials/compare_standards/python/compare_standards.py b/tutorials/compare_standards/python/compare_standards.py
index b80f02e..33a6020 100644
--- a/tutorials/compare_standards/python/compare_standards.py
+++ b/tutorials/compare_standards/python/compare_standards.py
@@ -31,7 +31,7 @@
# Domain Constants
# Pick a CCSSM standard to find its best state standard matches
-TARGET_CCSSM_STANDARD_CODE = '6.NS.C.6.c' # Common Core 6th grade math standard on positioning integers on number lines
+TARGET_CCSSM_STANDARD_CODE = '6.EE.B.5' # Common Core 6th grade math standard on solving equations and inequalities
TARGET_CCSSM_JURISDICTION = 'Multi-State'
# Environment Setup
From 763daa3e4f2c86a3e4fad8d4a09a002c18f4c96a Mon Sep 17 00:00:00 2001
From: Li Xu <45047873+lixuczi@users.noreply.github.com>
Date: Thu, 20 Nov 2025 20:13:54 -0800
Subject: [PATCH 4/8] Delete .DS_Store
---
sample_queries/mysql/.DS_Store | Bin 8196 -> 0 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
delete mode 100644 sample_queries/mysql/.DS_Store
diff --git a/sample_queries/mysql/.DS_Store b/sample_queries/mysql/.DS_Store
deleted file mode 100644
index 3a0f08e0f27c86ff0ec83ed90398fa97b0dd8562..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 8196
zcmeHMyH3L}6ussl6){jYKn$6gV1WrKpu!I@Fd^E~QV?xZ3Kbj5AMg*%3`j`qO#A{1
zGarD&3g=#%Hc?YrQiOP1NA`&vAIIm!ae7@MGV86<644A1Wsn)OlPD4rU+3JFMqCY(
z4fsTdRHrhPXqS$=T5E+mpbn@5>VP_+4*UcM@Xh9w>G9pSSG80J)PaBL0Iv@oGNbP>
zv?xml3JC##W2lydbHo9{@f`XNLyJO1pEi0hj%r+pVJscjJ#G&49flSyos6ZEad$Q@
zLNRi8@H}Zw#ob5p4t9uNHLFeU2H|muG!*P*jGlCU#wT=cNL5_DMFNns;LAbo#MkI#1hA)Pu|C
z_VZ0Orl>2!_7(TZW1r>|d}_$dsG!9B#&IZ)@xE2h*)f_Qrmv`9H?M8KQjaa_*;n+J
z;M*L0ImP^{V@Ho?8Sm@u0}EIy%6Ov!ocDR7=mnmo>L=>g-Q>#q`jN7)&RaKZUvZyg
zoyjKn^uauuc!Vc?IZ^#9ep~+lypGn-@Z!37s%!e=4vboTGaUbKU`pyfz#o6$Do-6q
zaKMb^wsPyZ;9~2pCLC*f$a!Q=+%B{z6ciGU10@^>w*N51xre6A@g0U1QG@jD8v#T9
RmWY2R_4_XmQHx*az#ByKquu}j
From 2207c5d1c79a6b084eca7304667daf26a0d75aeb Mon Sep 17 00:00:00 2001
From: Li Xu
Date: Thu, 20 Nov 2025 20:25:01 -0800
Subject: [PATCH 5/8] update join keys and other fixes for crosswalks
---
.../mysql/crosswalk_queries/get_all_crosswalks.sql | 5 ++---
.../postgresql/crosswalk_queries/get_all_crosswalks.sql | 5 ++---
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
index ce9996f..fe18fe8 100644
--- a/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
+++ b/sample_queries/mysql/crosswalk_queries/get_all_crosswalks.sql
@@ -1,5 +1,5 @@
--- Get all Texas crosswalks with state standard information
--- Returns Texas state → CCSSM standard alignments ordered by Jaccard score
+-- Get all crosswalks with state standard information
+-- Returns state → CCSSM standard alignments ordered by Jaccard score
SELECT
state.`statementCode` AS state_standard_code,
@@ -14,5 +14,4 @@ FROM relationships r
JOIN standards_framework_item state
ON state.`caseIdentifierUUID` = r.`sourceEntityValue`
WHERE r.`relationshipType` = 'hasStandardAlignment'
- AND state.`jurisdiction` = 'Texas'
ORDER BY r.`jaccard` DESC;
diff --git a/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
index 4618cdd..2836e76 100644
--- a/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
+++ b/sample_queries/postgresql/crosswalk_queries/get_all_crosswalks.sql
@@ -1,5 +1,5 @@
--- Get all Texas crosswalks with state standard information
--- Returns Texas state → CCSSM standard alignments ordered by Jaccard score
+-- Get all crosswalks with state standard information
+-- Returns state → CCSSM standard alignments ordered by Jaccard score
SELECT
state."statementCode" AS state_standard_code,
@@ -14,5 +14,4 @@ FROM relationships r
JOIN standards_framework_item state
ON state."caseIdentifierUUID" = r."sourceEntityValue"
WHERE r."relationshipType" = 'hasStandardAlignment'
- AND state."jurisdiction" = 'Texas'
ORDER BY r."jaccard" DESC;
From b5ac7bbd58c060b545f4cdcf9775ce75d848c9f6 Mon Sep 17 00:00:00 2001
From: Li Xu <45047873+lixuczi@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:32:02 -0800
Subject: [PATCH 6/8] chore: Update README with flat file deprecation notice
Added information about future access to Knowledge Graph.
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 31a5a22..fb3245c 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,8 @@ For complete setup instructions and usage examples, see the [full docs](https://
## **Quick Start**
+**v1.2.0 will be the final version where Knowledge Graph can be downloaded as CSV flat files. Starting from v1.3.0 onwards, Knowledge Graph will be accessible as graph-native JSON flat files and we’ll be starting to grant access to our REST API in early 2026. Any CSV and JSON flat files that were previously downloaded will be unaffected.**
+
The knowledge graph data is available for download in both CSV and JSON formats. The graph data is exported with each file representing a specific entity type, and a relationships file capturing the connections between entities.
**CSV files:** UTF-8 encoded with comma delimiters and quoted fields. All CSV files include header rows with column names.
From 21fd8b3c684b4a562f2c5ef8356bc28432d0dd6a Mon Sep 17 00:00:00 2001
From: Li Xu <45047873+lixuczi@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:33:24 -0800
Subject: [PATCH 7/8] chore: Update README with flat file deprecation notice
Added note about changes in Knowledge Graph file formats and API access.
---
import_scripts/mysql/README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/import_scripts/mysql/README.md b/import_scripts/mysql/README.md
index 4054bbf..f223b9a 100644
--- a/import_scripts/mysql/README.md
+++ b/import_scripts/mysql/README.md
@@ -1,3 +1,5 @@
+**v1.2.0 will be the final version where Knowledge Graph can be downloaded as CSV flat files. Starting from v1.3.0 onwards, Knowledge Graph will be accessible as graph-native JSON flat files and we’ll be starting to grant access to our REST API in early 2026. Any CSV and JSON flat files that were previously downloaded will be unaffected.**
+
# MySQL Import Guide
This guide provides instructions for loading the Learning Commons Knowledge Graph dataset into a MySQL database.
From 135ddfb0847ae1f0c19fd66e71c1a6f7ed5f390a Mon Sep 17 00:00:00 2001
From: Li Xu <45047873+lixuczi@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:34:05 -0800
Subject: [PATCH 8/8] chore: Update README with flat file deprecation notice
Added a note about the final version for CSV downloads and upcoming changes.
---
import_scripts/postgresql/README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/import_scripts/postgresql/README.md b/import_scripts/postgresql/README.md
index 4a2dfef..21cfe5e 100644
--- a/import_scripts/postgresql/README.md
+++ b/import_scripts/postgresql/README.md
@@ -1,3 +1,5 @@
+**v1.2.0 will be the final version where Knowledge Graph can be downloaded as CSV flat files. Starting from v1.3.0 onwards, Knowledge Graph will be accessible as graph-native JSON flat files and we’ll be starting to grant access to our REST API in early 2026. Any CSV and JSON flat files that were previously downloaded will be unaffected.**
+
# PostgreSQL Import Guide
This guide provides instructions for loading the Learning Commons Knowledge Graph dataset into a PostgreSQL database.