diff --git a/cozo-core/src/query/stored.rs b/cozo-core/src/query/stored.rs index a0807322..9f2beb2d 100644 --- a/cozo-core/src/query/stored.rs +++ b/cozo-core/src/query/stored.rs @@ -465,7 +465,7 @@ impl<'a> SessionTx<'a> { let mut processors = BTreeMap::new(); for (name, (_, manifest)) in relation_store.fts_indices.iter() { let tokenizer = self.tokenizers.get( - &relation_store.name, + &name, &manifest.tokenizer, &manifest.filters, )?; @@ -482,7 +482,7 @@ impl<'a> SessionTx<'a> { } for (name, (_, _, manifest)) in relation_store.lsh_indices.iter() { let tokenizer = self.tokenizers.get( - &relation_store.name, + &name, &manifest.tokenizer, &manifest.filters, )?; @@ -1107,6 +1107,7 @@ impl<'a> SessionTx<'a> { #[derive(Debug, Error, Diagnostic)] #[error("Assertion failure for {key:?} of {relation}: {notice}")] +#[diagnostic(code(transact::assertion_failure))] struct TransactAssertionFailure { relation: String, key: Vec, diff --git a/cozo-core/src/runtime/tests.rs b/cozo-core/src/runtime/tests.rs index 049867c2..f8ce12f5 100644 --- a/cozo-core/src/runtime/tests.rs +++ b/cozo-core/src/runtime/tests.rs @@ -960,6 +960,69 @@ fn test_fts_indexing() { } } +#[test] +fn test_lsh_indexing2() { + for i in 1..10 { + let f = i as f64 / 10.; + let db = DbInstance::new("mem", "", "").unwrap(); + db.run_script(r":create a {k: String => v: String}", Default::default()) + .unwrap(); + db.run_script( + r"::lsh create a:lsh {extractor: v, tokenizer: NGram, n_gram: 3, target_threshold: $t }", + BTreeMap::from([("t".into(), f.into())]) + ) + .unwrap(); + db.run_script( + "?[k, v] <- [['a', 'ewiygfspeoighjsfcfxzdfncalsdf']] :put a {k => v}", + Default::default(), + ) + .unwrap(); + let res = db + .run_script( + "?[k] := ~a:lsh{k | query: 'ewiygfspeoighjsfcfxzdfncalsdf', k: 1}", + Default::default(), + ) + .unwrap(); + assert!(res.rows.len() > 0); + } +} + +#[test] +fn test_lsh_indexing3() { + for i in 1..10 { + let f = i as f64 / 10.; + let db = DbInstance::new("mem", "", "").unwrap(); + db.run_script(r":create text {id: String, => text: String, url: String? default null, dt: Float default now(), dup_for: String? default null }", Default::default()) + .unwrap(); + db.run_script( + r"::lsh create text:lsh { + extractor: text, + # extract_filter: is_null(dup_for), + tokenizer: NGram, + n_perm: 200, + target_threshold: $t, + n_gram: 7, + }", + BTreeMap::from([("t".into(), f.into())]), + ) + .unwrap(); + db.run_script( + "?[id, text] <- [['a', 'This function first generates 32 random bytes using the os.urandom function. It then base64 encodes these bytes using base64.urlsafe_b64encode, removes the padding, and decodes the result to a string.']] :put text {id, text}", + Default::default(), + ) + .unwrap(); + let res = db + .run_script( + r#"?[id, dup_for] := + ~text:lsh{id: id, dup_for: dup_for, | query: "This function first generates 32 random bytes using the os.urandom function. It then base64 encodes these bytes using base64.urlsafe_b64encode, removes the padding, and decodes the result to a string.", }"#, + Default::default(), + ) + .unwrap(); + assert!(res.rows.len() > 0); + println!("{}", res.into_json()); + } +} + #[test] fn test_lsh_indexing() { let db = DbInstance::new("mem", "", "").unwrap(); @@ -1370,5 +1433,4 @@ fn as_store_in_imperative_script() { for row in res.into_json()["rows"].as_array().unwrap() { println!("{}", row); } - }