hnsw index auto insert/remove

main
Ziyang Hu 1 year ago
parent 2e84ce275d
commit e352c762f1

@ -463,7 +463,14 @@ impl Display for DataValue {
.field("retracted", &v.is_assert) .field("retracted", &v.is_assert)
.finish(), .finish(),
DataValue::Vec(a) => { DataValue::Vec(a) => {
write!(f, "array<{:?} elements>", a.len()) match a {
Vector::F32(a) => {
write!(f, "vec({:?})", a.to_vec())
}
Vector::F64(a) => {
write!(f, "vec({:?}, \"F64\")", a.to_vec())
}
}
} }
} }
} }

@ -38,6 +38,7 @@ pub(crate) enum RelAlgebra {
Reorder(ReorderRA), Reorder(ReorderRA),
Filter(FilteredRA), Filter(FilteredRA),
Unification(UnificationRA), Unification(UnificationRA),
HnswSearch(HnswSearchRA),
} }
impl RelAlgebra { impl RelAlgebra {
@ -52,10 +53,17 @@ impl RelAlgebra {
RelAlgebra::Filter(i) => i.span, RelAlgebra::Filter(i) => i.span,
RelAlgebra::Unification(i) => i.span, RelAlgebra::Unification(i) => i.span,
RelAlgebra::StoredWithValidity(i) => i.span, RelAlgebra::StoredWithValidity(i) => i.span,
RelAlgebra::HnswSearch(_) => todo!(),
} }
} }
} }
pub(crate) struct HnswSearchRA {
pub(crate) parent: Box<RelAlgebra>,
pub(crate) to_eliminate: BTreeSet<Symbol>,
pub(crate) span: SourceSpan,
}
pub(crate) struct UnificationRA { pub(crate) struct UnificationRA {
pub(crate) parent: Box<RelAlgebra>, pub(crate) parent: Box<RelAlgebra>,
pub(crate) binding: Symbol, pub(crate) binding: Symbol,
@ -320,6 +328,9 @@ impl Debug for RelAlgebra {
.field(&r.binding) .field(&r.binding)
.field(&r.expr) .field(&r.expr)
.finish(), .finish(),
RelAlgebra::HnswSearch(_) => {
todo!("HnswSearch")
}
} }
} }
} }
@ -363,6 +374,9 @@ impl RelAlgebra {
r.left.fill_binding_indices_and_compile()?; r.left.fill_binding_indices_and_compile()?;
r.right.fill_binding_indices_and_compile()?; r.right.fill_binding_indices_and_compile()?;
} }
RelAlgebra::HnswSearch(_) => {
todo!()
}
} }
Ok(()) Ok(())
} }
@ -559,6 +573,9 @@ impl RelAlgebra {
} }
joined joined
} }
RelAlgebra::HnswSearch(_) => {
todo!("filter on HnswSearch")
}
} }
} }
pub(crate) fn unify( pub(crate) fn unify(
@ -1556,6 +1573,7 @@ impl RelAlgebra {
RelAlgebra::Filter(r) => r.do_eliminate_temp_vars(used), RelAlgebra::Filter(r) => r.do_eliminate_temp_vars(used),
RelAlgebra::NegJoin(r) => r.do_eliminate_temp_vars(used), RelAlgebra::NegJoin(r) => r.do_eliminate_temp_vars(used),
RelAlgebra::Unification(r) => r.do_eliminate_temp_vars(used), RelAlgebra::Unification(r) => r.do_eliminate_temp_vars(used),
RelAlgebra::HnswSearch(_) => {todo!()}
} }
} }
@ -1570,6 +1588,7 @@ impl RelAlgebra {
RelAlgebra::Filter(r) => Some(&r.to_eliminate), RelAlgebra::Filter(r) => Some(&r.to_eliminate),
RelAlgebra::NegJoin(r) => Some(&r.to_eliminate), RelAlgebra::NegJoin(r) => Some(&r.to_eliminate),
RelAlgebra::Unification(u) => Some(&u.to_eliminate), RelAlgebra::Unification(u) => Some(&u.to_eliminate),
RelAlgebra::HnswSearch(_) => {todo!()}
} }
} }
@ -1599,6 +1618,7 @@ impl RelAlgebra {
bindings.push(u.binding.clone()); bindings.push(u.binding.clone());
bindings bindings
} }
RelAlgebra::HnswSearch(_) => {todo!()}
} }
} }
pub(crate) fn iter<'a>( pub(crate) fn iter<'a>(
@ -1617,6 +1637,7 @@ impl RelAlgebra {
RelAlgebra::Filter(r) => r.iter(tx, delta_rule, stores), RelAlgebra::Filter(r) => r.iter(tx, delta_rule, stores),
RelAlgebra::NegJoin(r) => r.iter(tx, delta_rule, stores), RelAlgebra::NegJoin(r) => r.iter(tx, delta_rule, stores),
RelAlgebra::Unification(r) => r.iter(tx, delta_rule, stores), RelAlgebra::Unification(r) => r.iter(tx, delta_rule, stores),
RelAlgebra::HnswSearch(_) => {todo!()}
} }
} }
} }
@ -1819,6 +1840,9 @@ impl InnerJoin {
RelAlgebra::NegJoin(_) => { RelAlgebra::NegJoin(_) => {
panic!("joining on NegJoin") panic!("joining on NegJoin")
} }
RelAlgebra::HnswSearch(_) => {
todo!("joining on HnswSearch")
}
} }
} }
pub(crate) fn iter<'a>( pub(crate) fn iter<'a>(
@ -1913,6 +1937,9 @@ impl InnerJoin {
RelAlgebra::NegJoin(_) => { RelAlgebra::NegJoin(_) => {
panic!("joining on NegJoin") panic!("joining on NegJoin")
} }
RelAlgebra::HnswSearch(_) => {
todo!("joining on HnswSearch")
}
} }
} }
fn materialized_join<'a>( fn materialized_join<'a>(

@ -10,7 +10,8 @@ use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc; use std::sync::Arc;
use itertools::Itertools; use itertools::Itertools;
use miette::{bail, Diagnostic, Result, WrapErr}; use miette::{bail, Diagnostic, IntoDiagnostic, Result, WrapErr};
use pest::Parser;
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use thiserror::Error; use thiserror::Error;
@ -22,7 +23,8 @@ use crate::data::tuple::Tuple;
use crate::data::value::{DataValue, ValidityTs}; use crate::data::value::{DataValue, ValidityTs};
use crate::fixed_rule::utilities::constant::Constant; use crate::fixed_rule::utilities::constant::Constant;
use crate::fixed_rule::FixedRuleHandle; use crate::fixed_rule::FixedRuleHandle;
use crate::parse::parse_script; use crate::parse::expr::build_expr;
use crate::parse::{parse_script, CozoScriptParser, Rule};
use crate::runtime::callback::{CallbackCollector, CallbackOp}; use crate::runtime::callback::{CallbackCollector, CallbackOp};
use crate::runtime::relation::{ use crate::runtime::relation::{
extend_tuple_from_v, AccessLevel, InputRelationHandle, InsufficientAccessLevel, extend_tuple_from_v, AccessLevel, InputRelationHandle, InsufficientAccessLevel,
@ -149,6 +151,7 @@ impl<'a> SessionTx<'a> {
&& (is_callback_target && (is_callback_target
|| (propagate_triggers && !relation_store.rm_triggers.is_empty())); || (propagate_triggers && !relation_store.rm_triggers.is_empty()));
let has_indices = !relation_store.indices.is_empty(); let has_indices = !relation_store.indices.is_empty();
let has_hnsw_indices = !relation_store.hnsw_indices.is_empty();
let mut new_tuples: Vec<DataValue> = vec![]; let mut new_tuples: Vec<DataValue> = vec![];
let mut old_tuples: Vec<DataValue> = vec![]; let mut old_tuples: Vec<DataValue> = vec![];
@ -158,7 +161,7 @@ impl<'a> SessionTx<'a> {
.map(|ex| ex.extract_data(&tuple, cur_vld)) .map(|ex| ex.extract_data(&tuple, cur_vld))
.try_collect()?; .try_collect()?;
let key = relation_store.encode_key_for_store(&extracted, *span)?; let key = relation_store.encode_key_for_store(&extracted, *span)?;
if need_to_collect || has_indices { if need_to_collect || has_indices || has_hnsw_indices {
if let Some(existing) = self.store_tx.get(&key, false)? { if let Some(existing) = self.store_tx.get(&key, false)? {
let mut tup = extracted.clone(); let mut tup = extracted.clone();
extend_tuple_from_v(&mut tup, &existing); extend_tuple_from_v(&mut tup, &existing);
@ -171,6 +174,13 @@ impl<'a> SessionTx<'a> {
self.store_tx.del(&encoded)?; self.store_tx.del(&encoded)?;
} }
} }
if has_hnsw_indices {
for (idx_handle, _) in
relation_store.hnsw_indices.values()
{
self.hnsw_remove(&relation_store, idx_handle, &extracted)?;
}
}
if need_to_collect { if need_to_collect {
old_tuples.push(DataValue::List(tup)); old_tuples.push(DataValue::List(tup));
} }
@ -399,6 +409,7 @@ impl<'a> SessionTx<'a> {
&& (is_callback_target && (is_callback_target
|| (propagate_triggers && !relation_store.put_triggers.is_empty())); || (propagate_triggers && !relation_store.put_triggers.is_empty()));
let has_indices = !relation_store.indices.is_empty(); let has_indices = !relation_store.indices.is_empty();
let has_hnsw_indices = !relation_store.hnsw_indices.is_empty();
let mut new_tuples: Vec<DataValue> = vec![]; let mut new_tuples: Vec<DataValue> = vec![];
let mut old_tuples: Vec<DataValue> = vec![]; let mut old_tuples: Vec<DataValue> = vec![];
@ -409,6 +420,22 @@ impl<'a> SessionTx<'a> {
headers, headers,
)?; )?;
key_extractors.extend(val_extractors); key_extractors.extend(val_extractors);
let mut stack = vec![];
let mut hnsw_filters = BTreeMap::new();
if has_hnsw_indices {
for (name, (_, manifest)) in relation_store.hnsw_indices.iter() {
if let Some(f_code) = &manifest.index_filter {
let parsed = CozoScriptParser::parse(Rule::expr, f_code)
.into_diagnostic()?
.next()
.unwrap();
let mut code_expr = build_expr(parsed, &Default::default())?;
let binding_map = relation_store.raw_binding_map();
code_expr.fill_binding_indices(&binding_map)?;
hnsw_filters.insert(name.clone(), code_expr.compile());
}
}
}
for tuple in res_iter { for tuple in res_iter {
let extracted: Vec<DataValue> = key_extractors let extracted: Vec<DataValue> = key_extractors
@ -419,7 +446,7 @@ impl<'a> SessionTx<'a> {
let key = relation_store.encode_key_for_store(&extracted, *span)?; let key = relation_store.encode_key_for_store(&extracted, *span)?;
let val = relation_store.encode_val_for_store(&extracted, *span)?; let val = relation_store.encode_val_for_store(&extracted, *span)?;
if need_to_collect || has_indices { if need_to_collect || has_indices || has_hnsw_indices {
if let Some(existing) = self.store_tx.get(&key, false)? { if let Some(existing) = self.store_tx.get(&key, false)? {
let mut tup = extracted[0..relation_store.metadata.keys.len()].to_vec(); let mut tup = extracted[0..relation_store.metadata.keys.len()].to_vec();
extend_tuple_from_v(&mut tup, &existing); extend_tuple_from_v(&mut tup, &existing);
@ -456,6 +483,22 @@ impl<'a> SessionTx<'a> {
} }
} }
if has_hnsw_indices {
for (name, (idx_handle, idx_manifest)) in
relation_store.hnsw_indices.iter()
{
let filter = hnsw_filters.get(name);
self.hnsw_put(
idx_manifest,
&relation_store,
idx_handle,
filter,
&mut stack,
&extracted,
)?;
}
}
if need_to_collect { if need_to_collect {
new_tuples.push(DataValue::List(extracted)); new_tuples.push(DataValue::List(extracted));
} }

@ -1055,6 +1055,9 @@ impl<'s, S: Storage<'s>> Db<S> {
json!(expr.to_string()), json!(expr.to_string()),
) )
} }
RelAlgebra::HnswSearch(_) => {
todo!("HnswSearch")
}
}; };
ret_for_relation.push(json!({ ret_for_relation.push(json!({
STRATUM: stratum, STRATUM: stratum,

@ -738,7 +738,7 @@ impl<'a> SessionTx<'a> {
manifest: &HnswIndexManifest, manifest: &HnswIndexManifest,
orig_table: &RelationHandle, orig_table: &RelationHandle,
idx_table: &RelationHandle, idx_table: &RelationHandle,
filter: &Option<Vec<Bytecode>>, filter: Option<&Vec<Bytecode>>,
stack: &mut Vec<DataValue>, stack: &mut Vec<DataValue>,
tuple: &Tuple, tuple: &Tuple,
) -> Result<bool> { ) -> Result<bool> {

@ -12,7 +12,7 @@ use std::sync::atomic::Ordering;
use itertools::Itertools; use itertools::Itertools;
use log::error; use log::error;
use miette::{bail, ensure, Diagnostic, Result, IntoDiagnostic}; use miette::{bail, ensure, Diagnostic, IntoDiagnostic, Result};
use pest::Parser; use pest::Parser;
use rmp_serde::Serializer; use rmp_serde::Serializer;
use serde::Serialize; use serde::Serialize;
@ -24,13 +24,13 @@ use crate::data::relation::{ColType, ColumnDef, NullableColType, StoredRelationM
use crate::data::symb::Symbol; use crate::data::symb::Symbol;
use crate::data::tuple::{decode_tuple_from_key, Tuple, TupleT, ENCODED_KEY_MIN_LEN}; use crate::data::tuple::{decode_tuple_from_key, Tuple, TupleT, ENCODED_KEY_MIN_LEN};
use crate::data::value::{DataValue, ValidityTs}; use crate::data::value::{DataValue, ValidityTs};
use crate::parse::expr::build_expr;
use crate::parse::sys::HnswIndexConfig; use crate::parse::sys::HnswIndexConfig;
use crate::parse::{CozoScriptParser, Rule, SourceSpan}; use crate::parse::{CozoScriptParser, Rule, SourceSpan};
use crate::query::compile::IndexPositionUse; use crate::query::compile::IndexPositionUse;
use crate::runtime::hnsw::HnswIndexManifest;
use crate::runtime::transact::SessionTx; use crate::runtime::transact::SessionTx;
use crate::{NamedRows, StoreTx}; use crate::{NamedRows, StoreTx};
use crate::parse::expr::build_expr;
use crate::runtime::hnsw::HnswIndexManifest;
#[derive( #[derive(
Copy, Copy,
@ -132,7 +132,10 @@ impl RelationHandle {
ret.insert(Symbol::new(col.name.clone(), Default::default()), i); ret.insert(Symbol::new(col.name.clone(), Default::default()), i);
} }
for (i, col) in self.metadata.non_keys.iter().enumerate() { for (i, col) in self.metadata.non_keys.iter().enumerate() {
ret.insert(Symbol::new(col.name.clone(), Default::default()), i + self.metadata.keys.len()); ret.insert(
Symbol::new(col.name.clone(), Default::default()),
i + self.metadata.keys.len(),
);
} }
ret ret
} }
@ -782,7 +785,7 @@ impl<'a> SessionTx<'a> {
nullable: false, nullable: false,
}, },
default_gen: None, default_gen: None,
} },
]; ];
// create index relation // create index relation
let key_bindings = idx_keys let key_bindings = idx_keys
@ -829,17 +832,32 @@ impl<'a> SessionTx<'a> {
// populate index // populate index
let all_tuples = rel_handle.scan_all(self).collect::<Result<Vec<_>>>()?; let all_tuples = rel_handle.scan_all(self).collect::<Result<Vec<_>>>()?;
let filter = if let Some(f_code) = &manifest.index_filter { let filter = if let Some(f_code) = &manifest.index_filter {
let parsed = CozoScriptParser::parse(Rule::expr, f_code).into_diagnostic()?.next().unwrap(); let parsed = CozoScriptParser::parse(Rule::expr, f_code)
.into_diagnostic()?
.next()
.unwrap();
let mut code_expr = build_expr(parsed, &Default::default())?; let mut code_expr = build_expr(parsed, &Default::default())?;
let binding_map = rel_handle.raw_binding_map(); let binding_map = rel_handle.raw_binding_map();
code_expr.fill_binding_indices(&binding_map)?; code_expr.fill_binding_indices(&binding_map)?;
Some(code_expr.compile()) code_expr.compile()
} else { } else {
vec![]
};
let filter = if filter.is_empty() {
None None
} else {
Some(&filter)
}; };
let mut stack = vec![]; let mut stack = vec![];
for tuple in all_tuples { for tuple in all_tuples {
self.hnsw_put(&manifest, &rel_handle, &idx_handle, &filter, &mut stack, &tuple)?; self.hnsw_put(
&manifest,
&rel_handle,
&idx_handle,
filter,
&mut stack,
&tuple,
)?;
} }
rel_handle rel_handle

@ -804,4 +804,17 @@ fn test_vec_index() {
filter: k != 'k1' filter: k != 'k1'
}", Default::default()) }", Default::default())
.unwrap(); .unwrap();
db.run_script(r"
?[k, v] <- [
['a2', [1,2,3,4,5,6,7,8]],
['b2', [2,3,4,5,6,7,8,9]],
['bb2', [2,3,4,5,6,7,8,9]],
['c2', [2,3,4,5,6,7,8,19]],
['a2', [2,3,4,5,6,7,8,9]],
['b2', [1,1,1,1,1,1,1,1]]
]
:put a {k => v}
", Default::default())
.unwrap();
println!("{:#?}", db.export_relations(["a", "a:vec"].iter()));
} }

Loading…
Cancel
Save