From 4097c5865e2197b8f5131502ff7ad6292c74e0d8 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Sun, 30 Apr 2023 18:52:04 +0800 Subject: [PATCH] list indices and describe relations --- cozo-core/src/cozoscript.pest | 6 +- cozo-core/src/data/json.rs | 7 +- cozo-core/src/parse/sys.rs | 25 +++++-- cozo-core/src/runtime/db.rs | 104 ++++++++++++++++++++++++--- cozo-core/src/runtime/minhash_lsh.rs | 10 +-- cozo-core/src/runtime/relation.rs | 27 +++++-- cozo-core/src/runtime/tests.rs | 16 +++-- 7 files changed, 162 insertions(+), 33 deletions(-) diff --git a/cozo-core/src/cozoscript.pest b/cozo-core/src/cozoscript.pest index 968994ac..aa7b602d 100644 --- a/cozo-core/src/cozoscript.pest +++ b/cozo-core/src/cozoscript.pest @@ -11,7 +11,7 @@ query_script = {SOI ~ (option | rule | const_rule | fixed_rule)+ ~ EOI} query_script_inner = {"{" ~ (option | rule | const_rule | fixed_rule)+ ~ "}"} query_script_inner_no_bracket = { (option | rule | const_rule | fixed_rule)+ } imperative_script = {SOI ~ imperative_stmt+ ~ EOI} -sys_script = {SOI ~ "::" ~ (list_relations_op | list_relation_op | remove_relations_op | trigger_relation_op | +sys_script = {SOI ~ "::" ~ (list_relations_op | list_columns_op | list_indices_op | remove_relations_op | trigger_relation_op | trigger_relation_show_op | rename_relations_op | running_op | kill_op | explain_op | access_level_op | index_op | vec_idx_op | fts_idx_op | lsh_idx_op | compact_op | list_fixed_rules) ~ EOI} index_op = {"index" ~ (index_create | index_drop)} @@ -27,7 +27,9 @@ running_op = {"running"} kill_op = {"kill" ~ expr} explain_op = {"explain" ~ "{" ~ query_script_inner_no_bracket ~ "}"} list_relations_op = {"relations"} -list_relation_op = {"columns" ~ compound_or_index_ident} +list_columns_op = {"columns" ~ compound_or_index_ident} +list_indices_op = {"indices" ~ compound_or_index_ident} +describe_relation_op = {"describe" ~ compound_or_index_ident ~ string?} remove_relations_op = {"remove" ~ (compound_ident ~ ",")* ~ compound_ident } rename_relations_op = {"rename" ~ (rename_pair ~ ",")* ~ rename_pair } access_level_op = {"access_level" ~ access_level ~ (compound_ident ~ ",")* ~ compound_ident} diff --git a/cozo-core/src/data/json.rs b/cozo-core/src/data/json.rs index b8fb53b8..defaf18e 100644 --- a/cozo-core/src/data/json.rs +++ b/cozo-core/src/data/json.rs @@ -12,6 +12,7 @@ use serde_json::json; pub(crate) use serde_json::Value as JsonValue; use crate::data::value::{DataValue, Num, Vector}; +use crate::JsonData; impl From for DataValue { fn from(v: JsonValue) -> Self { @@ -27,11 +28,7 @@ impl From for DataValue { }, JsonValue::String(s) => DataValue::from(s), JsonValue::Array(arr) => DataValue::List(arr.iter().map(DataValue::from).collect()), - JsonValue::Object(d) => DataValue::List( - d.into_iter() - .map(|(k, v)| DataValue::List([DataValue::from(k), DataValue::from(v)].into())) - .collect(), - ), + JsonValue::Object(d) => DataValue::Json(JsonData(JsonValue::Object(d))), } } } diff --git a/cozo-core/src/parse/sys.rs b/cozo-core/src/parse/sys.rs index fda2e2d6..c621c001 100644 --- a/cozo-core/src/parse/sys.rs +++ b/cozo-core/src/parse/sys.rs @@ -20,7 +20,7 @@ use crate::data::relation::VecElementType; use crate::data::symb::Symbol; use crate::data::value::{DataValue, ValidityTs}; use crate::fts::TokenizerConfig; -use crate::parse::expr::build_expr; +use crate::parse::expr::{build_expr, parse_string}; use crate::parse::query::parse_query; use crate::parse::{ExtractSpan, Pairs, Rule, SourceSpan}; use crate::runtime::relation::AccessLevel; @@ -28,7 +28,8 @@ use crate::{Expr, FixedRule}; pub(crate) enum SysOp { Compact, - ListRelation(Symbol), + ListColumns(Symbol), + ListIndices(Symbol), ListRelations, ListRunning, ListFixedRules, @@ -44,6 +45,7 @@ pub(crate) enum SysOp { CreateFtsIndex(FtsIndexConfig), CreateMinHashLshIndex(MinHashLshConfig), RemoveIndex(Symbol, Symbol), + DescribeRelation(Symbol, SmartString) } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -126,6 +128,16 @@ pub(crate) fn parse_sys( )?; SysOp::Explain(Box::new(prog)) } + Rule::describe_relation_op => { + let mut inner = inner.into_inner(); + let rels_p = inner.next().unwrap(); + let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span()); + let description = match inner.next() { + None => Default::default(), + Some(desc_p) => parse_string(desc_p)?, + }; + SysOp::DescribeRelation(rel, description) + } Rule::list_relations_op => SysOp::ListRelations, Rule::remove_relations_op => { let rel = inner @@ -135,10 +147,15 @@ pub(crate) fn parse_sys( SysOp::RemoveRelation(rel) } - Rule::list_relation_op => { + Rule::list_columns_op => { + let rels_p = inner.into_inner().next().unwrap(); + let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span()); + SysOp::ListColumns(rel) + } + Rule::list_indices_op => { let rels_p = inner.into_inner().next().unwrap(); let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span()); - SysOp::ListRelation(rel) + SysOp::ListIndices(rel) } Rule::rename_relations_op => { let rename_pairs = inner diff --git a/cozo-core/src/runtime/db.rs b/cozo-core/src/runtime/db.rs index 1ddf6a11..8e3dc9f1 100644 --- a/cozo-core/src/runtime/db.rs +++ b/cozo-core/src/runtime/db.rs @@ -43,7 +43,10 @@ use crate::fts::TokenizerCache; use crate::parse::sys::SysOp; use crate::parse::{parse_script, CozoScript, SourceSpan}; use crate::query::compile::{CompiledProgram, CompiledRule, CompiledRuleSet}; -use crate::query::ra::{FilteredRA, FtsSearchRA, HnswSearchRA, InnerJoin, LshSearchRA, NegJoin, RelAlgebra, ReorderRA, StoredRA, StoredWithValidityRA, TempStoreRA, UnificationRA}; +use crate::query::ra::{ + FilteredRA, FtsSearchRA, HnswSearchRA, InnerJoin, LshSearchRA, NegJoin, RelAlgebra, ReorderRA, + StoredRA, StoredWithValidityRA, TempStoreRA, UnificationRA, +}; #[allow(unused_imports)] use crate::runtime::callback::{ CallbackCollector, CallbackDeclaration, CallbackOp, EventCallbackRegistry, @@ -1070,9 +1073,7 @@ impl<'s, S: Storage<'s>> Db { .map(|f| f.to_string()) .collect_vec()), ), - RelAlgebra::FtsSearch(FtsSearchRA { - fts_search, .. - }) => ( + RelAlgebra::FtsSearch(FtsSearchRA { fts_search, .. }) => ( "fts_index", json!(format!(":{}", fts_search.query.name)), json!(fts_search.query.name), @@ -1082,9 +1083,7 @@ impl<'s, S: Storage<'s>> Db { .map(|f| f.to_string()) .collect_vec()), ), - RelAlgebra::LshSearch(LshSearchRA { - lsh_search, .. - }) => ( + RelAlgebra::LshSearch(LshSearchRA { lsh_search, .. }) => ( "lsh_index", json!(format!(":{}", lsh_search.query.name)), json!(lsh_search.query.name), @@ -1187,6 +1186,14 @@ impl<'s, S: Storage<'s>> Db { vec![vec![DataValue::from(OK_STR)]], )) } + SysOp::DescribeRelation(rel_name, description) => { + let mut tx = self.transact_write()?; + tx.describe_relation(&rel_name, description)?; + Ok(NamedRows::new( + vec![STATUS_STR.to_string()], + vec![vec![DataValue::from(OK_STR)]], + )) + } SysOp::CreateIndex(rel_name, idx_name, cols) => { let lock = self .obtain_relation_locks(iter::once(&rel_name.name)) @@ -1260,7 +1267,8 @@ impl<'s, S: Storage<'s>> Db { vec![vec![DataValue::from(OK_STR)]], )) } - SysOp::ListRelation(rs) => self.list_relation(&rs), + SysOp::ListColumns(rs) => self.list_columns(&rs), + SysOp::ListIndices(rs) => self.list_indices(&rs), SysOp::RenameRelation(rename_pairs) => { let rel_names = rename_pairs.iter().flat_map(|(f, t)| [&f.name, &t.name]); let locks = self.obtain_relation_locks(rel_names); @@ -1582,7 +1590,83 @@ impl<'s, S: Storage<'s>> Db { rows, )) } - fn list_relation(&'s self, name: &str) -> Result { + fn list_indices(&'s self, name: &str) -> Result { + let mut tx = self.transact()?; + let handle = tx.get_relation(name, false)?; + let mut rows = vec![]; + for (name, (rel, cols)) in &handle.indices { + rows.push(vec![ + json!(name), + json!("normal"), + json!([rel.name]), + json!({ "indices": cols }), + ]); + } + for (name, (rel, manifest)) in &handle.hnsw_indices { + rows.push(vec![ + json!(name), + json!("hnsw"), + json!([rel.name]), + json!({ + "vec_dim": manifest.vec_dim, + "dtype": manifest.dtype, + "vec_fields": manifest.vec_fields, + "distance": manifest.distance, + "ef_construction": manifest.ef_construction, + "m_neighbours": manifest.m_neighbours, + "m_max": manifest.m_max, + "m_max0": manifest.m_max0, + "level_multiplier": manifest.level_multiplier, + "extend_candidates": manifest.extend_candidates, + "keep_pruned_connections": manifest.keep_pruned_connections, + }), + ]); + } + for (name, (rel, manifest)) in &handle.fts_indices { + rows.push(vec![ + json!(name), + json!("fts"), + json!([rel.name]), + json!({ + "extractor": manifest.extractor, + "tokenizer": manifest.tokenizer, + "tokenizer_filters": manifest.filters, + }), + ]); + } + for (name, (rel, inv_rel, manifest)) in &handle.lsh_indices { + rows.push(vec![ + json!(name), + json!("lsh"), + json!([rel.name, inv_rel.name]), + json!({ + "extractor": manifest.extractor, + "tokenizer": manifest.tokenizer, + "tokenizer_filters": manifest.filters, + "n_gram": manifest.n_gram, + "num_perm": manifest.num_perm, + "n_bands": manifest.n_bands, + "n_rows_in_band": manifest.n_rows_in_band, + "threshold": manifest.threshold, + }), + ]); + } + tx.commit_tx()?; + let rows = rows + .into_iter() + .map(|row| row.into_iter().map(DataValue::from).collect_vec()) + .collect_vec(); + Ok(NamedRows::new( + vec![ + "name".to_string(), + "type".to_string(), + "relations".to_string(), + "config".to_string(), + ], + rows, + )) + } + fn list_columns(&'s self, name: &str) -> Result { let mut tx = self.transact()?; let handle = tx.get_relation(name, false)?; let mut rows = vec![]; @@ -1653,6 +1737,7 @@ impl<'s, S: Storage<'s>> Db { json!(meta.put_triggers.len()), json!(meta.rm_triggers.len()), json!(meta.replace_triggers.len()), + json!(meta.description), ]); } let rows = rows @@ -1669,6 +1754,7 @@ impl<'s, S: Storage<'s>> Db { "n_put_triggers".to_string(), "n_rm_triggers".to_string(), "n_replace_triggers".to_string(), + "description".to_string(), ], rows, )) diff --git a/cozo-core/src/runtime/minhash_lsh.rs b/cozo-core/src/runtime/minhash_lsh.rs index 5a27360a..af691d59 100644 --- a/cozo-core/src/runtime/minhash_lsh.rs +++ b/cozo-core/src/runtime/minhash_lsh.rs @@ -104,8 +104,8 @@ impl<'a> SessionTx<'a> { }; let bytes = min_hash.get_bytes(); - let chunk_size = manifest.r * std::mem::size_of::(); - let chunks = (0..manifest.b) + let chunk_size = manifest.n_rows_in_band * std::mem::size_of::(); + let chunks = (0..manifest.n_bands) .map(|i| { let mut byte_range = bytes[i * chunk_size..(i + 1) * chunk_size].to_vec(); byte_range.extend_from_slice(&(i as u16).to_le_bytes()); @@ -155,7 +155,7 @@ impl<'a> SessionTx<'a> { } _ => bail!("Cannot search for value {:?} in a LSH index", q), }; - let chunk_size = config.manifest.r * std::mem::size_of::(); + let chunk_size = config.manifest.n_rows_in_band * std::mem::size_of::(); let mut key_prefix = Vec::with_capacity(1); let mut found_tuples: FxHashSet<_> = FxHashSet::default(); for (i, chunk) in bytes.chunks_exact(chunk_size).enumerate() { @@ -222,8 +222,8 @@ pub(crate) struct MinHashLshIndexManifest { pub(crate) filters: Vec, pub(crate) num_perm: usize, - pub(crate) b: usize, - pub(crate) r: usize, + pub(crate) n_bands: usize, + pub(crate) n_rows_in_band: usize, pub(crate) threshold: f64, pub(crate) perms: Vec, } diff --git a/cozo-core/src/runtime/relation.rs b/cozo-core/src/runtime/relation.rs index 105ea7ba..58debecc 100644 --- a/cozo-core/src/runtime/relation.rs +++ b/cozo-core/src/runtime/relation.rs @@ -88,6 +88,7 @@ pub(crate) struct RelationHandle { SmartString, (RelationHandle, RelationHandle, MinHashLshIndexManifest), >, + pub(crate) description: SmartString, } impl RelationHandle { @@ -620,6 +621,7 @@ impl<'a> SessionTx<'a> { hnsw_indices: Default::default(), fts_indices: Default::default(), lsh_indices: Default::default(), + description: Default::default(), }; let name_key = vec![DataValue::Str(meta.name.clone())].encode_as_key(RelationId::SYSTEM); @@ -662,6 +664,22 @@ impl<'a> SessionTx<'a> { let metadata = RelationHandle::decode(&found)?; Ok(metadata) } + pub(crate) fn describe_relation(&mut self, name: &str, description: SmartString) -> Result<()> { + let mut meta = self.get_relation(name, true)?; + + meta.description = description; + let name_key = vec![DataValue::Str(meta.name.clone())].encode_as_key(RelationId::SYSTEM); + let mut meta_val = vec![]; + meta.serialize(&mut Serializer::new(&mut meta_val).with_struct_map()) + .unwrap(); + if meta.is_temp { + self.temp_store_tx.put(&name_key, &meta_val)?; + } else { + self.store_tx.put(&name_key, &meta_val)?; + } + + Ok(()) + } pub(crate) fn destroy_relation(&mut self, name: &str) -> Result, Vec)>> { let is_temp = name.starts_with('_'); let mut to_clean = vec![]; @@ -782,7 +800,8 @@ impl<'a> SessionTx<'a> { config.false_negative_weight.0, ), ); - let perms = HashPermutations::new(config.n_perm); + let num_perm = params.b * params.r; + let perms = HashPermutations::new(num_perm); let manifest = MinHashLshIndexManifest { base_relation: config.base_relation, index_name: config.index_name, @@ -790,9 +809,9 @@ impl<'a> SessionTx<'a> { n_gram: config.n_gram, tokenizer: config.tokenizer, filters: config.filters, - num_perm: config.n_perm, - b: params.b, - r: params.r, + num_perm, + n_bands: params.b, + n_rows_in_band: params.r, threshold: config.target_threshold.0, perms: perms.as_bytes().to_vec(), }; diff --git a/cozo-core/src/runtime/tests.rs b/cozo-core/src/runtime/tests.rs index 68f84e0e..52082807 100644 --- a/cozo-core/src/runtime/tests.rs +++ b/cozo-core/src/runtime/tests.rs @@ -960,7 +960,7 @@ fn test_lsh_indexing() { ) .unwrap(); db.run_script( - r"::lsh create a:lsh {extractor: v, tokenizer: NGram, n_gram: 3, target_threshold: 0.5 }", + r"::lsh create a:lsh {extractor: v, tokenizer: Simple, n_gram: 3, target_threshold: 0.3 }", Default::default(), ) .unwrap(); @@ -981,13 +981,13 @@ fn test_lsh_indexing() { let _res = db .run_script( r" - ?[hash, src_k] := + ?[src_k, hash] := *a:lsh{src_k, hash} ", Default::default(), ) .unwrap(); - // for row in res.into_json()["rows"].as_array().unwrap() { + // for row in _res.into_json()["rows"].as_array().unwrap() { // println!("{}", row); // } let _res = db @@ -1015,6 +1015,10 @@ fn test_lsh_indexing() { for row in res.into_json()["rows"].as_array().unwrap() { println!("{}", row); } + let res = db.run_script("::indices a", Default::default()).unwrap(); + for row in res.into_json()["rows"].as_array().unwrap() { + println!("{}", row); + } } #[test] @@ -1059,7 +1063,7 @@ fn test_insertions() { } #[test] -fn tentivy_tokenizers() { +fn tokenizers() { let tokenizers = TokenizerCache::default(); let tokenizer = tokenizers .get( @@ -1138,4 +1142,8 @@ fn multi_index_vec() { "#, Default::default(), ).unwrap(); + let res = db.run_script("::indices product", Default::default()).unwrap(); + for row in res.into_json()["rows"].as_array().unwrap() { + println!("{}", row); + } }