From 0de57c3a9f26fddf86fe62867e34cff20d7c3c26 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Mon, 26 Jun 2023 12:35:35 +0800 Subject: [PATCH] fix LSH index not properly cleaned after tuple deletion --- cozo-core/src/query/stored.rs | 3 ++- cozo-core/src/runtime/minhash_lsh.rs | 2 +- cozo-core/src/runtime/tests.rs | 26 ++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/cozo-core/src/query/stored.rs b/cozo-core/src/query/stored.rs index c4c74209..78a4a2b0 100644 --- a/cozo-core/src/query/stored.rs +++ b/cozo-core/src/query/stored.rs @@ -952,6 +952,7 @@ impl<'a> SessionTx<'a> { let has_indices = !relation_store.indices.is_empty(); let has_hnsw_indices = !relation_store.hnsw_indices.is_empty(); let has_fts_indices = !relation_store.fts_indices.is_empty(); + let has_lsh_indices = !relation_store.lsh_indices.is_empty(); let fts_processors = self.make_fts_lsh_processors(relation_store)?; let mut new_tuples: Vec = vec![]; let mut old_tuples: Vec = vec![]; @@ -977,7 +978,7 @@ impl<'a> SessionTx<'a> { }); } } - if need_to_collect || has_indices || has_hnsw_indices || has_fts_indices { + if need_to_collect || has_indices || has_hnsw_indices || has_fts_indices || has_lsh_indices { if let Some(existing) = self.store_tx.get(&key, false)? { let mut tup = extracted.clone(); extend_tuple_from_v(&mut tup, &existing); diff --git a/cozo-core/src/runtime/minhash_lsh.rs b/cozo-core/src/runtime/minhash_lsh.rs index 15ec2708..cfb0039f 100644 --- a/cozo-core/src/runtime/minhash_lsh.rs +++ b/cozo-core/src/runtime/minhash_lsh.rs @@ -35,7 +35,7 @@ impl<'a> SessionTx<'a> { ) -> Result<()> { let bytes = match bytes { None => { - if let Some(mut found) = inv_idx_handle.get_val_only(self, tuple)? { + if let Some(mut found) = inv_idx_handle.get_val_only(self, &tuple[..inv_idx_handle.metadata.keys.len()])? { let inv_key = inv_idx_handle.encode_key_for_store(tuple, Default::default())?; self.store_tx.del(&inv_key)?; match found.pop() { diff --git a/cozo-core/src/runtime/tests.rs b/cozo-core/src/runtime/tests.rs index 6528b88b..b4898299 100644 --- a/cozo-core/src/runtime/tests.rs +++ b/cozo-core/src/runtime/tests.rs @@ -905,6 +905,32 @@ fn test_lsh_indexing3() { } } + +#[test] +fn test_lsh_indexing4() { + for i in 1..10 { + let f = i as f64 / 10.; + let db = DbInstance::new("mem", "", "").unwrap(); + db.run_default(r":create a {k: String => v: String}") + .unwrap(); + db.run_script( + r"::lsh create a:lsh {extractor: v, tokenizer: NGram, n_gram: 3, target_threshold: $t }", + BTreeMap::from([("t".into(), f.into())]), + ScriptMutability::Mutable + ) + .unwrap(); + db.run_default("?[k, v] <- [['a', 'ewiygfspeoighjsfcfxzdfncalsdf']] :put a {k => v}") + .unwrap(); + db.run_default("?[k] <- [['a']] :rm a {k}") + .unwrap(); + let res = db + .run_default("?[k] := ~a:lsh{k | query: 'ewiygfspeoighjsfcfxzdfncalsdf', k: 1}") + .unwrap(); + assert!(res.rows.len() == 0); + } +} + + #[test] fn test_lsh_indexing() { let db = DbInstance::new("mem", "", "").unwrap();