Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.

Commit 8ac2b26

Browse files
committed
Part 6: Vacuum the fulltext_values table after clearing excisions.
1 parent 2fc0440 commit 8ac2b26

2 files changed

Lines changed: 150 additions & 0 deletions

File tree

db/src/db.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3282,4 +3282,119 @@ mod tests {
32823282
[?e :db.excise/beforeT {} ?tx4 true]
32833283
[?tx4 :db/txInstant ?ms4 ?tx4 true]]]"#, report.tx_id));
32843284
}
3285+
3286+
#[test]
3287+
fn test_excision_fulltext() {
3288+
let mut conn = TestConn::default();
3289+
3290+
assert_transact!(conn, r#"[
3291+
{:db/id 200
3292+
:db/ident :test/fulltext
3293+
:db/valueType :db.type/string
3294+
:db/cardinality :db.cardinality/one
3295+
:db/fulltext true
3296+
:db/index true}
3297+
]"#);
3298+
3299+
assert_transact!(conn, r#"[
3300+
{:db/id 300
3301+
:test/fulltext "test1"}
3302+
{:db/id 301
3303+
:test/fulltext "test2"}
3304+
]"#);
3305+
3306+
assert_transact!(conn, r#"[
3307+
{:db/id 300
3308+
:test/fulltext "test3"}
3309+
{:db/id 301
3310+
:test/fulltext "test4"}
3311+
]"#);
3312+
3313+
// Before.
3314+
assert_matches!(conn.fulltext_values(), r#"
3315+
[[1 "test1"]
3316+
[2 "test2"]
3317+
[3 "test3"]
3318+
[4 "test4"]]"#);
3319+
assert_matches!(conn.datoms(), r#"
3320+
[[200 :db/ident :test/fulltext]
3321+
[200 :db/valueType :db.type/string]
3322+
[200 :db/cardinality :db.cardinality/one]
3323+
[200 :db/index true]
3324+
[200 :db/fulltext true]
3325+
[300 :test/fulltext 3]
3326+
[301 :test/fulltext 4]]"#);
3327+
3328+
let tempid_report = assert_transact!(conn, r#"[
3329+
[:db/add "e" :db/excise 300]
3330+
]"#);
3331+
// This is implementation specific, but it should be deterministic.
3332+
assert_matches!(tempids(&tempid_report),
3333+
"{\"e\" 65536}");
3334+
3335+
// After.
3336+
assert_matches!(conn.datoms(), r#"
3337+
[[200 :db/ident :test/fulltext]
3338+
[200 :db/valueType :db.type/string]
3339+
[200 :db/cardinality :db.cardinality/one]
3340+
[200 :db/index true]
3341+
[200 :db/fulltext true]
3342+
[301 :test/fulltext 4]
3343+
[?e :db/excise 300]]"#);
3344+
3345+
// We have enqueued a pending excision.
3346+
let pending = excision::pending_excisions(&conn.sqlite, &conn.partition_map, &conn.schema).expect("pending_excisions");
3347+
assert_eq!(pending, ::std::iter::once((65536, excision::Excision {
3348+
target: 300,
3349+
attrs: None,
3350+
before_tx: None,
3351+
})).collect());
3352+
3353+
// Before processing the pending excision, we have full transactions in the transaction log.
3354+
assert_matches!(conn.transactions(), r#"
3355+
[[[200 :db/ident :test/fulltext ?tx1 true]
3356+
[200 :db/valueType :db.type/string ?tx1 true]
3357+
[200 :db/cardinality :db.cardinality/one ?tx1 true]
3358+
[200 :db/index true ?tx1 true]
3359+
[200 :db/fulltext true ?tx1 true]
3360+
[?tx1 :db/txInstant ?ms ?tx1 true]]
3361+
[[300 :test/fulltext 1 ?tx2 true]
3362+
[301 :test/fulltext 2 ?tx2 true]
3363+
[?tx2 :db/txInstant ?ms2 ?tx2 true]]
3364+
[[300 :test/fulltext 1 ?tx3 false]
3365+
[300 :test/fulltext 3 ?tx3 true]
3366+
[301 :test/fulltext 2 ?tx3 false]
3367+
[301 :test/fulltext 4 ?tx3 true]
3368+
[?tx3 :db/txInstant ?ms3 ?tx3 true]]
3369+
[[?e :db/excise 300 ?tx4 true]
3370+
[?tx4 :db/txInstant ?ms4 ?tx4 true]]]"#);
3371+
3372+
excision::ensure_no_pending_excisions(&conn.sqlite, &conn.partition_map, &conn.schema).expect("ensure_no_pending_excisions");
3373+
3374+
// After processing the pending excision, we have nothing left pending.
3375+
let pending = excision::pending_excisions(&conn.sqlite, &conn.partition_map, &conn.schema).expect("pending_excisions");
3376+
assert_eq!(pending, Default::default());
3377+
3378+
// After processing the pending excision, we have rewritten transactions in the transaction
3379+
// log to not refer to the targeted attributes of the target entity.
3380+
assert_matches!(conn.transactions(), r#"
3381+
[[[200 :db/ident :test/fulltext ?tx1 true]
3382+
[200 :db/valueType :db.type/string ?tx1 true]
3383+
[200 :db/cardinality :db.cardinality/one ?tx1 true]
3384+
[200 :db/index true ?tx1 true]
3385+
[200 :db/fulltext true ?tx1 true]
3386+
[?tx1 :db/txInstant ?ms ?tx1 true]]
3387+
[[301 :test/fulltext 2 ?tx2 true]
3388+
[?tx2 :db/txInstant ?ms2 ?tx2 true]]
3389+
[[301 :test/fulltext 2 ?tx3 false]
3390+
[301 :test/fulltext 4 ?tx3 true]
3391+
[?tx3 :db/txInstant ?ms3 ?tx3 true]]
3392+
[[?e :db/excise 300 ?tx4 true]
3393+
[?tx4 :db/txInstant ?ms4 ?tx4 true]]]"#);
3394+
3395+
// After processing the pending excision, we have vacuumed dangling fulltext values.
3396+
assert_matches!(conn.fulltext_values(), r#"
3397+
[[2 "test2"]
3398+
[4 "test4"]]"#);
3399+
}
32853400
}

db/src/excision.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ use mentat_core::{
2222
Entid,
2323
HasSchema,
2424
Schema,
25+
TypedValue,
2526
};
2627

2728
use db::{
2829
PartitionMapping,
30+
TypedSQLValue,
2931
read_materialized_transaction_aev_trie,
3032
};
3133

@@ -258,5 +260,38 @@ pub(crate) fn ensure_no_pending_excisions(conn: &rusqlite::Connection, partition
258260

259261
conn.execute("UPDATE excisions SET status = 0", &[])?;
260262

263+
// TODO: only vacuum fulltext if an excision (likely) impacted fulltext values, since this is
264+
// very expensive. As always, correctness first, performance second.
265+
vacuum_fulltext_table(conn)?;
266+
261267
Ok(list.into_iter().map(|(entity, excision, _status)| (entity, excision)).collect())
262268
}
269+
270+
271+
/// Delete fulltext values that are no longer refered to in the `datoms` or `transactions` table.
272+
pub(crate) fn vacuum_fulltext_table(conn: &rusqlite::Connection) -> Result<()> {
273+
let (true_value, true_value_type_tag) = TypedValue::Boolean(true).to_sql_value_pair();
274+
275+
// First, collect all `:db/fulltext true` attributes. This is easier than extracting them from
276+
// a `Schema` (no need to execute multiple insertions for large collections), but less flexible.
277+
conn.execute(r#"CREATE TABLE temp.fulltext_as (a SMALLINT NOT NULL)"# , &[])?;
278+
conn.execute(r#"INSERT INTO temp.fulltext_as (a)
279+
SELECT e FROM schema WHERE a = ? AND v = ? AND value_type_tag = ?"# ,
280+
&[&entids::DB_FULLTEXT, &true_value, &true_value_type_tag])?;
281+
282+
// Next, purge values that aren't referenced. We're using that `:db/fulltext true` attributes
283+
// always have `:db/index true`, so that we can use the `avet` index.
284+
conn.execute(r#"DELETE FROM fulltext_values
285+
WHERE rowid NOT IN
286+
(SELECT v
287+
FROM datoms
288+
WHERE index_avet IS NOT 0 AND a IN temp.fulltext_as
289+
UNION ALL
290+
SELECT v
291+
FROM transactions
292+
WHERE a IN temp.fulltext_as)"#, &[])?;
293+
294+
conn.execute(r#"DROP TABLE temp.fulltext_as"# , &[])?;
295+
296+
Ok(())
297+
}

0 commit comments

Comments
 (0)