list indices and describe relations

main
Ziyang Hu 1 year ago
parent 34e3b52216
commit 4097c5865e

@ -11,7 +11,7 @@ query_script = {SOI ~ (option | rule | const_rule | fixed_rule)+ ~ EOI}
query_script_inner = {"{" ~ (option | rule | const_rule | fixed_rule)+ ~ "}"}
query_script_inner_no_bracket = { (option | rule | const_rule | fixed_rule)+ }
imperative_script = {SOI ~ imperative_stmt+ ~ EOI}
sys_script = {SOI ~ "::" ~ (list_relations_op | list_relation_op | remove_relations_op | trigger_relation_op |
sys_script = {SOI ~ "::" ~ (list_relations_op | list_columns_op | list_indices_op | remove_relations_op | trigger_relation_op |
trigger_relation_show_op | rename_relations_op | running_op | kill_op | explain_op |
access_level_op | index_op | vec_idx_op | fts_idx_op | lsh_idx_op | compact_op | list_fixed_rules) ~ EOI}
index_op = {"index" ~ (index_create | index_drop)}
@ -27,7 +27,9 @@ running_op = {"running"}
kill_op = {"kill" ~ expr}
explain_op = {"explain" ~ "{" ~ query_script_inner_no_bracket ~ "}"}
list_relations_op = {"relations"}
list_relation_op = {"columns" ~ compound_or_index_ident}
list_columns_op = {"columns" ~ compound_or_index_ident}
list_indices_op = {"indices" ~ compound_or_index_ident}
describe_relation_op = {"describe" ~ compound_or_index_ident ~ string?}
remove_relations_op = {"remove" ~ (compound_ident ~ ",")* ~ compound_ident }
rename_relations_op = {"rename" ~ (rename_pair ~ ",")* ~ rename_pair }
access_level_op = {"access_level" ~ access_level ~ (compound_ident ~ ",")* ~ compound_ident}

@ -12,6 +12,7 @@ use serde_json::json;
pub(crate) use serde_json::Value as JsonValue;
use crate::data::value::{DataValue, Num, Vector};
use crate::JsonData;
impl From<JsonValue> for DataValue {
fn from(v: JsonValue) -> Self {
@ -27,11 +28,7 @@ impl From<JsonValue> for DataValue {
},
JsonValue::String(s) => DataValue::from(s),
JsonValue::Array(arr) => DataValue::List(arr.iter().map(DataValue::from).collect()),
JsonValue::Object(d) => DataValue::List(
d.into_iter()
.map(|(k, v)| DataValue::List([DataValue::from(k), DataValue::from(v)].into()))
.collect(),
),
JsonValue::Object(d) => DataValue::Json(JsonData(JsonValue::Object(d))),
}
}
}

@ -20,7 +20,7 @@ use crate::data::relation::VecElementType;
use crate::data::symb::Symbol;
use crate::data::value::{DataValue, ValidityTs};
use crate::fts::TokenizerConfig;
use crate::parse::expr::build_expr;
use crate::parse::expr::{build_expr, parse_string};
use crate::parse::query::parse_query;
use crate::parse::{ExtractSpan, Pairs, Rule, SourceSpan};
use crate::runtime::relation::AccessLevel;
@ -28,7 +28,8 @@ use crate::{Expr, FixedRule};
pub(crate) enum SysOp {
Compact,
ListRelation(Symbol),
ListColumns(Symbol),
ListIndices(Symbol),
ListRelations,
ListRunning,
ListFixedRules,
@ -44,6 +45,7 @@ pub(crate) enum SysOp {
CreateFtsIndex(FtsIndexConfig),
CreateMinHashLshIndex(MinHashLshConfig),
RemoveIndex(Symbol, Symbol),
DescribeRelation(Symbol, SmartString<LazyCompact>)
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -126,6 +128,16 @@ pub(crate) fn parse_sys(
)?;
SysOp::Explain(Box::new(prog))
}
Rule::describe_relation_op => {
let mut inner = inner.into_inner();
let rels_p = inner.next().unwrap();
let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span());
let description = match inner.next() {
None => Default::default(),
Some(desc_p) => parse_string(desc_p)?,
};
SysOp::DescribeRelation(rel, description)
}
Rule::list_relations_op => SysOp::ListRelations,
Rule::remove_relations_op => {
let rel = inner
@ -135,10 +147,15 @@ pub(crate) fn parse_sys(
SysOp::RemoveRelation(rel)
}
Rule::list_relation_op => {
Rule::list_columns_op => {
let rels_p = inner.into_inner().next().unwrap();
let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span());
SysOp::ListColumns(rel)
}
Rule::list_indices_op => {
let rels_p = inner.into_inner().next().unwrap();
let rel = Symbol::new(rels_p.as_str(), rels_p.extract_span());
SysOp::ListRelation(rel)
SysOp::ListIndices(rel)
}
Rule::rename_relations_op => {
let rename_pairs = inner

@ -43,7 +43,10 @@ use crate::fts::TokenizerCache;
use crate::parse::sys::SysOp;
use crate::parse::{parse_script, CozoScript, SourceSpan};
use crate::query::compile::{CompiledProgram, CompiledRule, CompiledRuleSet};
use crate::query::ra::{FilteredRA, FtsSearchRA, HnswSearchRA, InnerJoin, LshSearchRA, NegJoin, RelAlgebra, ReorderRA, StoredRA, StoredWithValidityRA, TempStoreRA, UnificationRA};
use crate::query::ra::{
FilteredRA, FtsSearchRA, HnswSearchRA, InnerJoin, LshSearchRA, NegJoin, RelAlgebra, ReorderRA,
StoredRA, StoredWithValidityRA, TempStoreRA, UnificationRA,
};
#[allow(unused_imports)]
use crate::runtime::callback::{
CallbackCollector, CallbackDeclaration, CallbackOp, EventCallbackRegistry,
@ -1070,9 +1073,7 @@ impl<'s, S: Storage<'s>> Db<S> {
.map(|f| f.to_string())
.collect_vec()),
),
RelAlgebra::FtsSearch(FtsSearchRA {
fts_search, ..
}) => (
RelAlgebra::FtsSearch(FtsSearchRA { fts_search, .. }) => (
"fts_index",
json!(format!(":{}", fts_search.query.name)),
json!(fts_search.query.name),
@ -1082,9 +1083,7 @@ impl<'s, S: Storage<'s>> Db<S> {
.map(|f| f.to_string())
.collect_vec()),
),
RelAlgebra::LshSearch(LshSearchRA {
lsh_search, ..
}) => (
RelAlgebra::LshSearch(LshSearchRA { lsh_search, .. }) => (
"lsh_index",
json!(format!(":{}", lsh_search.query.name)),
json!(lsh_search.query.name),
@ -1187,6 +1186,14 @@ impl<'s, S: Storage<'s>> Db<S> {
vec![vec![DataValue::from(OK_STR)]],
))
}
SysOp::DescribeRelation(rel_name, description) => {
let mut tx = self.transact_write()?;
tx.describe_relation(&rel_name, description)?;
Ok(NamedRows::new(
vec![STATUS_STR.to_string()],
vec![vec![DataValue::from(OK_STR)]],
))
}
SysOp::CreateIndex(rel_name, idx_name, cols) => {
let lock = self
.obtain_relation_locks(iter::once(&rel_name.name))
@ -1260,7 +1267,8 @@ impl<'s, S: Storage<'s>> Db<S> {
vec![vec![DataValue::from(OK_STR)]],
))
}
SysOp::ListRelation(rs) => self.list_relation(&rs),
SysOp::ListColumns(rs) => self.list_columns(&rs),
SysOp::ListIndices(rs) => self.list_indices(&rs),
SysOp::RenameRelation(rename_pairs) => {
let rel_names = rename_pairs.iter().flat_map(|(f, t)| [&f.name, &t.name]);
let locks = self.obtain_relation_locks(rel_names);
@ -1582,7 +1590,83 @@ impl<'s, S: Storage<'s>> Db<S> {
rows,
))
}
fn list_relation(&'s self, name: &str) -> Result<NamedRows> {
fn list_indices(&'s self, name: &str) -> Result<NamedRows> {
let mut tx = self.transact()?;
let handle = tx.get_relation(name, false)?;
let mut rows = vec![];
for (name, (rel, cols)) in &handle.indices {
rows.push(vec![
json!(name),
json!("normal"),
json!([rel.name]),
json!({ "indices": cols }),
]);
}
for (name, (rel, manifest)) in &handle.hnsw_indices {
rows.push(vec![
json!(name),
json!("hnsw"),
json!([rel.name]),
json!({
"vec_dim": manifest.vec_dim,
"dtype": manifest.dtype,
"vec_fields": manifest.vec_fields,
"distance": manifest.distance,
"ef_construction": manifest.ef_construction,
"m_neighbours": manifest.m_neighbours,
"m_max": manifest.m_max,
"m_max0": manifest.m_max0,
"level_multiplier": manifest.level_multiplier,
"extend_candidates": manifest.extend_candidates,
"keep_pruned_connections": manifest.keep_pruned_connections,
}),
]);
}
for (name, (rel, manifest)) in &handle.fts_indices {
rows.push(vec![
json!(name),
json!("fts"),
json!([rel.name]),
json!({
"extractor": manifest.extractor,
"tokenizer": manifest.tokenizer,
"tokenizer_filters": manifest.filters,
}),
]);
}
for (name, (rel, inv_rel, manifest)) in &handle.lsh_indices {
rows.push(vec![
json!(name),
json!("lsh"),
json!([rel.name, inv_rel.name]),
json!({
"extractor": manifest.extractor,
"tokenizer": manifest.tokenizer,
"tokenizer_filters": manifest.filters,
"n_gram": manifest.n_gram,
"num_perm": manifest.num_perm,
"n_bands": manifest.n_bands,
"n_rows_in_band": manifest.n_rows_in_band,
"threshold": manifest.threshold,
}),
]);
}
tx.commit_tx()?;
let rows = rows
.into_iter()
.map(|row| row.into_iter().map(DataValue::from).collect_vec())
.collect_vec();
Ok(NamedRows::new(
vec![
"name".to_string(),
"type".to_string(),
"relations".to_string(),
"config".to_string(),
],
rows,
))
}
fn list_columns(&'s self, name: &str) -> Result<NamedRows> {
let mut tx = self.transact()?;
let handle = tx.get_relation(name, false)?;
let mut rows = vec![];
@ -1653,6 +1737,7 @@ impl<'s, S: Storage<'s>> Db<S> {
json!(meta.put_triggers.len()),
json!(meta.rm_triggers.len()),
json!(meta.replace_triggers.len()),
json!(meta.description),
]);
}
let rows = rows
@ -1669,6 +1754,7 @@ impl<'s, S: Storage<'s>> Db<S> {
"n_put_triggers".to_string(),
"n_rm_triggers".to_string(),
"n_replace_triggers".to_string(),
"description".to_string(),
],
rows,
))

@ -104,8 +104,8 @@ impl<'a> SessionTx<'a> {
};
let bytes = min_hash.get_bytes();
let chunk_size = manifest.r * std::mem::size_of::<u32>();
let chunks = (0..manifest.b)
let chunk_size = manifest.n_rows_in_band * std::mem::size_of::<u32>();
let chunks = (0..manifest.n_bands)
.map(|i| {
let mut byte_range = bytes[i * chunk_size..(i + 1) * chunk_size].to_vec();
byte_range.extend_from_slice(&(i as u16).to_le_bytes());
@ -155,7 +155,7 @@ impl<'a> SessionTx<'a> {
}
_ => bail!("Cannot search for value {:?} in a LSH index", q),
};
let chunk_size = config.manifest.r * std::mem::size_of::<u32>();
let chunk_size = config.manifest.n_rows_in_band * std::mem::size_of::<u32>();
let mut key_prefix = Vec::with_capacity(1);
let mut found_tuples: FxHashSet<_> = FxHashSet::default();
for (i, chunk) in bytes.chunks_exact(chunk_size).enumerate() {
@ -222,8 +222,8 @@ pub(crate) struct MinHashLshIndexManifest {
pub(crate) filters: Vec<TokenizerConfig>,
pub(crate) num_perm: usize,
pub(crate) b: usize,
pub(crate) r: usize,
pub(crate) n_bands: usize,
pub(crate) n_rows_in_band: usize,
pub(crate) threshold: f64,
pub(crate) perms: Vec<u8>,
}

@ -88,6 +88,7 @@ pub(crate) struct RelationHandle {
SmartString<LazyCompact>,
(RelationHandle, RelationHandle, MinHashLshIndexManifest),
>,
pub(crate) description: SmartString<LazyCompact>,
}
impl RelationHandle {
@ -620,6 +621,7 @@ impl<'a> SessionTx<'a> {
hnsw_indices: Default::default(),
fts_indices: Default::default(),
lsh_indices: Default::default(),
description: Default::default(),
};
let name_key = vec![DataValue::Str(meta.name.clone())].encode_as_key(RelationId::SYSTEM);
@ -662,6 +664,22 @@ impl<'a> SessionTx<'a> {
let metadata = RelationHandle::decode(&found)?;
Ok(metadata)
}
pub(crate) fn describe_relation(&mut self, name: &str, description: SmartString<LazyCompact>) -> Result<()> {
let mut meta = self.get_relation(name, true)?;
meta.description = description;
let name_key = vec![DataValue::Str(meta.name.clone())].encode_as_key(RelationId::SYSTEM);
let mut meta_val = vec![];
meta.serialize(&mut Serializer::new(&mut meta_val).with_struct_map())
.unwrap();
if meta.is_temp {
self.temp_store_tx.put(&name_key, &meta_val)?;
} else {
self.store_tx.put(&name_key, &meta_val)?;
}
Ok(())
}
pub(crate) fn destroy_relation(&mut self, name: &str) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
let is_temp = name.starts_with('_');
let mut to_clean = vec![];
@ -782,7 +800,8 @@ impl<'a> SessionTx<'a> {
config.false_negative_weight.0,
),
);
let perms = HashPermutations::new(config.n_perm);
let num_perm = params.b * params.r;
let perms = HashPermutations::new(num_perm);
let manifest = MinHashLshIndexManifest {
base_relation: config.base_relation,
index_name: config.index_name,
@ -790,9 +809,9 @@ impl<'a> SessionTx<'a> {
n_gram: config.n_gram,
tokenizer: config.tokenizer,
filters: config.filters,
num_perm: config.n_perm,
b: params.b,
r: params.r,
num_perm,
n_bands: params.b,
n_rows_in_band: params.r,
threshold: config.target_threshold.0,
perms: perms.as_bytes().to_vec(),
};

@ -960,7 +960,7 @@ fn test_lsh_indexing() {
)
.unwrap();
db.run_script(
r"::lsh create a:lsh {extractor: v, tokenizer: NGram, n_gram: 3, target_threshold: 0.5 }",
r"::lsh create a:lsh {extractor: v, tokenizer: Simple, n_gram: 3, target_threshold: 0.3 }",
Default::default(),
)
.unwrap();
@ -981,13 +981,13 @@ fn test_lsh_indexing() {
let _res = db
.run_script(
r"
?[hash, src_k] :=
?[src_k, hash] :=
*a:lsh{src_k, hash}
",
Default::default(),
)
.unwrap();
// for row in res.into_json()["rows"].as_array().unwrap() {
// for row in _res.into_json()["rows"].as_array().unwrap() {
// println!("{}", row);
// }
let _res = db
@ -1015,6 +1015,10 @@ fn test_lsh_indexing() {
for row in res.into_json()["rows"].as_array().unwrap() {
println!("{}", row);
}
let res = db.run_script("::indices a", Default::default()).unwrap();
for row in res.into_json()["rows"].as_array().unwrap() {
println!("{}", row);
}
}
#[test]
@ -1059,7 +1063,7 @@ fn test_insertions() {
}
#[test]
fn tentivy_tokenizers() {
fn tokenizers() {
let tokenizers = TokenizerCache::default();
let tokenizer = tokenizers
.get(
@ -1138,4 +1142,8 @@ fn multi_index_vec() {
"#,
Default::default(),
).unwrap();
let res = db.run_script("::indices product", Default::default()).unwrap();
for row in res.into_json()["rows"].as_array().unwrap() {
println!("{}", row);
}
}

Loading…
Cancel
Save