Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 0 additions & 52 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,6 @@ impl TableConfig {
.map(|field| field.name.clone())
.collect()
}

/// Return field names in PK-first order: primary key fields first (in
/// declaration order), then subsidiary fields (in declaration order).
/// This matches the ordering used by `Table::load()` when building the
/// in-memory table stored in the STATE file.
pub fn ordered_field_names(&self) -> Vec<String> {
let primary_key = self.primary_key();
let mut names = primary_key.clone();
for field in &self.fields {
if !primary_key.contains(&field.name) {
names.push(field.name.clone());
}
}
names
}
}

impl Validate for Config {
Expand Down Expand Up @@ -574,43 +559,6 @@ impl Config {
mod tests {
use super::*;

fn make_field(name: &str, primary_key: bool) -> FieldConfig {
FieldConfig {
name: name.to_string(),
primary_key,
..Default::default()
}
}

#[test]
fn test_ordered_field_names() {
let config = TableConfig {
source: "test.csv".to_string(),
header: false,
fields: vec![
make_field("name", false),
make_field("id", true),
make_field("email", false),
],
};
assert_eq!(config.ordered_field_names(), vec!["id", "name", "email"]);
}

#[test]
fn test_ordered_field_names_multiple_primary_keys() {
let config = TableConfig {
source: "test.csv".to_string(),
header: false,
fields: vec![
make_field("value", false),
make_field("pk_b", true),
make_field("pk_a", true),
],
};
// PKs in declaration order, then subsidiaries
assert_eq!(config.ordered_field_names(), vec!["pk_b", "pk_a", "value"]);
}

#[test]
fn test_validate_rejects_true_sentinel_on_non_boolean() {
let field = FieldConfig {
Expand Down
224 changes: 158 additions & 66 deletions src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ use crate::value::{

type ProtoTable = crate::proto::table::Table;

/// Tuple positions (column index, field config) for one half of a row,
/// either the primary-key columns or the subsidiaries. Sorted
/// lexicographically by field name to keep tuple identity stable across
/// config field reorderings.
type CanonicalColumns<'a> = Vec<(usize, &'a FieldConfig)>;

/// A table with records stored in a hash map for efficient lookup.
/// Fields are ordered with primary key columns first, followed by subsidiary columns.
#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -116,23 +122,35 @@ impl Table {
Ok(indices)
}

/// Split field indices into primary-key and subsidiary groups based on
/// which fields are marked as primary keys in the config.
fn compute_key_indices(
field_names: &[String],
primary_key: &[String],
/// Split columns into primary-key and subsidiary groups, each sorted
/// lexicographically by field name. Each entry pairs a column index
/// with the `FieldConfig` used to parse the value at that column. The
/// canonical ordering makes tuple identity independent of the order
/// fields are declared in the config, so reordering fields in
/// `tables.toml` does not change the on-disk or on-the-wire
/// representation.
fn compute_canonical_columns<'a>(
config: &'a TableConfig,
field_indices: &[usize],
) -> (Vec<usize>, Vec<usize>) {
let mut primary_indices = Vec::new();
let mut subsidiary_indices = Vec::new();
for (config_index, name) in field_names.iter().enumerate() {
if primary_key.contains(name) {
primary_indices.push(field_indices[config_index]);
) -> (CanonicalColumns<'a>, CanonicalColumns<'a>) {
let mut entries: Vec<(&str, usize, &FieldConfig)> = config
.fields
.iter()
.zip(field_indices.iter())
.map(|(field, &idx)| (field.name.as_str(), idx, field))
.collect();
entries.sort_by_key(|(name, _, _)| *name);

let mut primary_columns: CanonicalColumns = Vec::new();
let mut subsidiary_columns: CanonicalColumns = Vec::new();
for (_, idx, field) in entries {
if field.primary_key {
primary_columns.push((idx, field));
} else {
subsidiary_indices.push(field_indices[config_index]);
subsidiary_columns.push((idx, field));
}
}
(primary_indices, subsidiary_indices)
(primary_columns, subsidiary_columns)
}

#[cfg(test)]
Expand All @@ -153,21 +171,15 @@ impl Table {
mut reader: csv::Reader<File>,
) -> Result<Self> {
let field_names = config.field_names();
let primary_key = config.primary_key();
let field_indices = Self::resolve_field_indices(config, &mut reader)?;
let (primary_indices, subsidiary_indices) =
Self::compute_key_indices(&field_names, &primary_key, &field_indices);

// Field configs split by primary-key flag in declaration order. This
// matches the ordering of `primary_indices` / `subsidiary_indices` —
// both are derived from `config.fields` in declaration order — so
// zipping the two pairs lines up the right config with each value.
let primary_field_configs: Vec<&FieldConfig> =
config.fields.iter().filter(|f| f.primary_key).collect();
let subsidiary_field_configs: Vec<&FieldConfig> =
config.fields.iter().filter(|f| !f.primary_key).collect();
let (primary_columns, subsidiary_columns) =
Self::compute_canonical_columns(config, &field_indices);

let fields = config.ordered_field_names();
let fields: Vec<String> = primary_columns
.iter()
.chain(subsidiary_columns.iter())
.map(|(_, field)| field.name.clone())
.collect();

let mut records: HashMap<Vec<Value>, Vec<Value>> = HashMap::new();

Expand All @@ -190,9 +202,9 @@ impl Table {
continue;
}

let primary_key = parse_columns(&record, &primary_indices, &primary_field_configs)
let primary_key = parse_columns(&record, &primary_columns)
.with_context(|| format!("row {}", row_num + 1))?;
let subsidiary = parse_columns(&record, &subsidiary_indices, &subsidiary_field_configs)
let subsidiary = parse_columns(&record, &subsidiary_columns)
.with_context(|| format!("row {}", row_num + 1))?;

if records.insert(primary_key.clone(), subsidiary).is_some() {
Expand All @@ -204,17 +216,16 @@ impl Table {
}
}

/// Pull values at `csv_indices` out of `record` and parse each one into a
/// typed `Value` according to its corresponding `FieldConfig`. The two
/// slices must be the same length and aligned 1-to-1.
/// For each `(column_index, field_config)` entry, pull the value at
/// `column_index` out of `record` and parse it into a typed `Value`
/// according to `field_config`.
fn parse_columns(
record: &csv::StringRecord,
csv_indices: &[usize],
field_configs: &[&FieldConfig],
columns: &[(usize, &FieldConfig)],
) -> Result<Vec<Value>> {
let mut out = Vec::with_capacity(csv_indices.len());
for (&csv_index, &field) in csv_indices.iter().zip(field_configs.iter()) {
out.push(parse_field_value(&record[csv_index], field)?);
let mut out = Vec::with_capacity(columns.len());
for &(column_index, field) in columns {
out.push(parse_field_value(&record[column_index], field)?);
}
Ok(out)
}
Expand Down Expand Up @@ -267,59 +278,140 @@ mod tests {
}
}

// -- compute_key_indices tests --
// -- compute_canonical_columns tests --

/// Project `compute_canonical_columns` output into `(column_index, name)`
/// pairs for easy assertion.
fn extract<'a>(columns: &[(usize, &'a FieldConfig)]) -> Vec<(usize, &'a str)> {
columns
.iter()
.map(|(idx, field)| (*idx, field.name.as_str()))
.collect()
}

#[test]
fn test_compute_key_indices_single_primary_key() {
let field_names = ["id", "name", "email"].map(String::from).to_vec();
let primary_key = ["id"].map(String::from).to_vec();
fn test_compute_canonical_columns_single_primary_key() {
let config = make_config(
vec![
make_field("id", true),
make_field("name", false),
make_field("email", false),
],
false,
);
let field_indices = vec![0, 1, 2];

let (primary_indices, subsidiary_indices) =
Table::compute_key_indices(&field_names, &primary_key, &field_indices);
let (primary, subsidiary) = Table::compute_canonical_columns(&config, &field_indices);

assert_eq!(primary_indices, vec![0]);
assert_eq!(subsidiary_indices, vec![1, 2]);
assert_eq!(extract(&primary), vec![(0, "id")]);
// Subsidiaries sorted lexicographically: email before name.
assert_eq!(extract(&subsidiary), vec![(2, "email"), (1, "name")]);
}

#[test]
fn test_compute_key_indices_composite_primary_key() {
let field_names = ["region", "id", "name"].map(String::from).to_vec();
let primary_key = ["region", "id"].map(String::from).to_vec();
fn test_compute_canonical_columns_composite_primary_key() {
let config = make_config(
vec![
make_field("region", true),
make_field("id", true),
make_field("name", false),
],
false,
);
let field_indices = vec![0, 1, 2];

let (primary_indices, subsidiary_indices) =
Table::compute_key_indices(&field_names, &primary_key, &field_indices);
let (primary, subsidiary) = Table::compute_canonical_columns(&config, &field_indices);

assert_eq!(primary_indices, vec![0, 1]);
assert_eq!(subsidiary_indices, vec![2]);
// Primary keys sorted lexicographically: id before region.
assert_eq!(extract(&primary), vec![(1, "id"), (0, "region")]);
assert_eq!(extract(&subsidiary), vec![(2, "name")]);
}

#[test]
fn test_compute_key_indices_reordered_columns() {
// CSV columns are in a different order than the config fields.
let field_names = ["id", "name", "email"].map(String::from).to_vec();
let primary_key = ["id"].map(String::from).to_vec();
fn test_compute_canonical_columns_reordered_columns() {
// Source columns are in a different order than the declared fields.
let config = make_config(
vec![
make_field("id", true),
make_field("name", false),
make_field("email", false),
],
true,
);
let field_indices = vec![2, 0, 1]; // id->col2, name->col0, email->col1

let (primary_indices, subsidiary_indices) =
Table::compute_key_indices(&field_names, &primary_key, &field_indices);
let (primary, subsidiary) = Table::compute_canonical_columns(&config, &field_indices);

assert_eq!(primary_indices, vec![2]);
assert_eq!(subsidiary_indices, vec![0, 1]);
assert_eq!(extract(&primary), vec![(2, "id")]);
assert_eq!(extract(&subsidiary), vec![(1, "email"), (0, "name")]);
}

#[test]
fn test_compute_key_indices_all_primary_keys() {
let field_names = ["a", "b"].map(String::from).to_vec();
let primary_key = ["a", "b"].map(String::from).to_vec();
fn test_compute_canonical_columns_all_primary_keys() {
let config = make_config(vec![make_field("b", true), make_field("a", true)], false);
let field_indices = vec![0, 1];

let (primary_indices, subsidiary_indices) =
Table::compute_key_indices(&field_names, &primary_key, &field_indices);
let (primary, subsidiary) = Table::compute_canonical_columns(&config, &field_indices);

assert_eq!(primary_indices, vec![0, 1]);
assert!(subsidiary_indices.is_empty());
// Both PKs, sorted lexicographically: a before b. The column index
// at each tuple position follows the field's source position.
assert_eq!(extract(&primary), vec![(1, "a"), (0, "b")]);
assert!(subsidiary.is_empty());
}

#[test]
fn test_parse_csv_tuple_identity_invariant_under_field_reorder() {
// Two configs with the same fields in different declaration orders
// produce identical tables when fed the same source data.
let config_a = make_config(
vec![
make_field("id", true),
make_field("name", false),
make_field("email", false),
],
true,
);
let config_b = make_config(
vec![
make_field("email", false),
make_field("id", true),
make_field("name", false),
],
true,
);
let csv = "id,name,email\n1,Alice,alice@example.com\n";

let reader_a = Table::test_reader(csv, true);
let table_a = Table::parse_csv("t", &config_a, &FilterConfig::default(), reader_a).unwrap();
let reader_b = Table::test_reader(csv, true);
let table_b = Table::parse_csv("t", &config_b, &FilterConfig::default(), reader_b).unwrap();

assert_eq!(table_a.fields, vec!["id", "email", "name"]);
assert_eq!(table_a.fields, table_b.fields);
assert_eq!(table_a.records, table_b.records);
}

#[test]
fn test_parse_csv_no_header_uses_canonical_tuple_layout() {
// header=false: source columns are matched positionally against the
// declaration order, but the resulting tuple is still canonical.
let config = make_config(
vec![
make_field("name", false),
make_field("id", true),
make_field("email", false),
],
false,
);
let reader = Table::test_reader("Alice,1,a@b.com\n", false);
let table = Table::parse_csv("t", &config, &FilterConfig::default(), reader).unwrap();

// Canonical layout: id (PK), then subsidiaries sorted lex.
assert_eq!(table.fields, vec!["id", "email", "name"]);
assert_eq!(
table.records.get(&vec!["1".into()]),
Some(&vec!["a@b.com".into(), "Alice".into()])
);
}

// -- resolve_field_indices tests --
Expand Down
11 changes: 5 additions & 6 deletions tests/accept_composite_pk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,17 @@ fields = [
let patch = Patch::create(&config, &hash1).unwrap();
let sql = sql::patch_to_sql(&config, &patch).unwrap().unwrap();

// DELETE should use composite key with AND
// Composite-key columns appear in canonical (lex-sorted) order:
// course_id before student_id.
assert!(
sql.contains(r#"DELETE FROM "enrollments" WHERE "student_id" = 1 AND "course_id" = 102;"#)
sql.contains(r#"DELETE FROM "enrollments" WHERE "course_id" = 102 AND "student_id" = 1;"#)
);

// INSERT should include all columns
assert!(sql.contains(
r#"INSERT INTO "enrollments" ("student_id", "course_id", "grade") VALUES (2, 103, 'B');"#
r#"INSERT INTO "enrollments" ("course_id", "student_id", "grade") VALUES (103, 2, 'B');"#
));

// UPDATE should use composite key in WHERE
assert!(sql.contains(r#"WHERE "student_id" = 1 AND "course_id" = 101;"#));
assert!(sql.contains(r#"WHERE "course_id" = 101 AND "student_id" = 1;"#));
assert!(sql.contains(r#"SET "grade" = 'A+'"#));

common::assert_wire_roundtrip(&config, &patch);
Expand Down
Loading