diff --git a/cozo-core/src/data/value.rs b/cozo-core/src/data/value.rs index 7e516ada..129c6006 100644 --- a/cozo-core/src/data/value.rs +++ b/cozo-core/src/data/value.rs @@ -463,7 +463,14 @@ impl Display for DataValue { .field("retracted", &v.is_assert) .finish(), DataValue::Vec(a) => { - write!(f, "array<{:?} elements>", a.len()) + match a { + Vector::F32(a) => { + write!(f, "vec({:?})", a.to_vec()) + } + Vector::F64(a) => { + write!(f, "vec({:?}, \"F64\")", a.to_vec()) + } + } } } } diff --git a/cozo-core/src/query/ra.rs b/cozo-core/src/query/ra.rs index 151e2dc7..cc13aa69 100644 --- a/cozo-core/src/query/ra.rs +++ b/cozo-core/src/query/ra.rs @@ -38,6 +38,7 @@ pub(crate) enum RelAlgebra { Reorder(ReorderRA), Filter(FilteredRA), Unification(UnificationRA), + HnswSearch(HnswSearchRA), } impl RelAlgebra { @@ -52,10 +53,17 @@ impl RelAlgebra { RelAlgebra::Filter(i) => i.span, RelAlgebra::Unification(i) => i.span, RelAlgebra::StoredWithValidity(i) => i.span, + RelAlgebra::HnswSearch(_) => todo!(), } } } +pub(crate) struct HnswSearchRA { + pub(crate) parent: Box, + pub(crate) to_eliminate: BTreeSet, + pub(crate) span: SourceSpan, +} + pub(crate) struct UnificationRA { pub(crate) parent: Box, pub(crate) binding: Symbol, @@ -320,6 +328,9 @@ impl Debug for RelAlgebra { .field(&r.binding) .field(&r.expr) .finish(), + RelAlgebra::HnswSearch(_) => { + todo!("HnswSearch") + } } } } @@ -363,6 +374,9 @@ impl RelAlgebra { r.left.fill_binding_indices_and_compile()?; r.right.fill_binding_indices_and_compile()?; } + RelAlgebra::HnswSearch(_) => { + todo!() + } } Ok(()) } @@ -559,6 +573,9 @@ impl RelAlgebra { } joined } + RelAlgebra::HnswSearch(_) => { + todo!("filter on HnswSearch") + } } } pub(crate) fn unify( @@ -1556,6 +1573,7 @@ impl RelAlgebra { RelAlgebra::Filter(r) => r.do_eliminate_temp_vars(used), RelAlgebra::NegJoin(r) => r.do_eliminate_temp_vars(used), RelAlgebra::Unification(r) => r.do_eliminate_temp_vars(used), + RelAlgebra::HnswSearch(_) => {todo!()} } } @@ -1570,6 +1588,7 @@ impl RelAlgebra { RelAlgebra::Filter(r) => Some(&r.to_eliminate), RelAlgebra::NegJoin(r) => Some(&r.to_eliminate), RelAlgebra::Unification(u) => Some(&u.to_eliminate), + RelAlgebra::HnswSearch(_) => {todo!()} } } @@ -1599,6 +1618,7 @@ impl RelAlgebra { bindings.push(u.binding.clone()); bindings } + RelAlgebra::HnswSearch(_) => {todo!()} } } pub(crate) fn iter<'a>( @@ -1617,6 +1637,7 @@ impl RelAlgebra { RelAlgebra::Filter(r) => r.iter(tx, delta_rule, stores), RelAlgebra::NegJoin(r) => r.iter(tx, delta_rule, stores), RelAlgebra::Unification(r) => r.iter(tx, delta_rule, stores), + RelAlgebra::HnswSearch(_) => {todo!()} } } } @@ -1819,6 +1840,9 @@ impl InnerJoin { RelAlgebra::NegJoin(_) => { panic!("joining on NegJoin") } + RelAlgebra::HnswSearch(_) => { + todo!("joining on HnswSearch") + } } } pub(crate) fn iter<'a>( @@ -1913,6 +1937,9 @@ impl InnerJoin { RelAlgebra::NegJoin(_) => { panic!("joining on NegJoin") } + RelAlgebra::HnswSearch(_) => { + todo!("joining on HnswSearch") + } } } fn materialized_join<'a>( diff --git a/cozo-core/src/query/stored.rs b/cozo-core/src/query/stored.rs index f1ffc8e7..2f918581 100644 --- a/cozo-core/src/query/stored.rs +++ b/cozo-core/src/query/stored.rs @@ -10,7 +10,8 @@ use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; use itertools::Itertools; -use miette::{bail, Diagnostic, Result, WrapErr}; +use miette::{bail, Diagnostic, IntoDiagnostic, Result, WrapErr}; +use pest::Parser; use smartstring::{LazyCompact, SmartString}; use thiserror::Error; @@ -22,7 +23,8 @@ use crate::data::tuple::Tuple; use crate::data::value::{DataValue, ValidityTs}; use crate::fixed_rule::utilities::constant::Constant; use crate::fixed_rule::FixedRuleHandle; -use crate::parse::parse_script; +use crate::parse::expr::build_expr; +use crate::parse::{parse_script, CozoScriptParser, Rule}; use crate::runtime::callback::{CallbackCollector, CallbackOp}; use crate::runtime::relation::{ extend_tuple_from_v, AccessLevel, InputRelationHandle, InsufficientAccessLevel, @@ -149,6 +151,7 @@ impl<'a> SessionTx<'a> { && (is_callback_target || (propagate_triggers && !relation_store.rm_triggers.is_empty())); let has_indices = !relation_store.indices.is_empty(); + let has_hnsw_indices = !relation_store.hnsw_indices.is_empty(); let mut new_tuples: Vec = vec![]; let mut old_tuples: Vec = vec![]; @@ -158,7 +161,7 @@ impl<'a> SessionTx<'a> { .map(|ex| ex.extract_data(&tuple, cur_vld)) .try_collect()?; let key = relation_store.encode_key_for_store(&extracted, *span)?; - if need_to_collect || has_indices { + if need_to_collect || has_indices || has_hnsw_indices { if let Some(existing) = self.store_tx.get(&key, false)? { let mut tup = extracted.clone(); extend_tuple_from_v(&mut tup, &existing); @@ -171,6 +174,13 @@ impl<'a> SessionTx<'a> { self.store_tx.del(&encoded)?; } } + if has_hnsw_indices { + for (idx_handle, _) in + relation_store.hnsw_indices.values() + { + self.hnsw_remove(&relation_store, idx_handle, &extracted)?; + } + } if need_to_collect { old_tuples.push(DataValue::List(tup)); } @@ -399,6 +409,7 @@ impl<'a> SessionTx<'a> { && (is_callback_target || (propagate_triggers && !relation_store.put_triggers.is_empty())); let has_indices = !relation_store.indices.is_empty(); + let has_hnsw_indices = !relation_store.hnsw_indices.is_empty(); let mut new_tuples: Vec = vec![]; let mut old_tuples: Vec = vec![]; @@ -409,6 +420,22 @@ impl<'a> SessionTx<'a> { headers, )?; key_extractors.extend(val_extractors); + let mut stack = vec![]; + let mut hnsw_filters = BTreeMap::new(); + if has_hnsw_indices { + for (name, (_, manifest)) in relation_store.hnsw_indices.iter() { + if let Some(f_code) = &manifest.index_filter { + let parsed = CozoScriptParser::parse(Rule::expr, f_code) + .into_diagnostic()? + .next() + .unwrap(); + let mut code_expr = build_expr(parsed, &Default::default())?; + let binding_map = relation_store.raw_binding_map(); + code_expr.fill_binding_indices(&binding_map)?; + hnsw_filters.insert(name.clone(), code_expr.compile()); + } + } + } for tuple in res_iter { let extracted: Vec = key_extractors @@ -419,7 +446,7 @@ impl<'a> SessionTx<'a> { let key = relation_store.encode_key_for_store(&extracted, *span)?; let val = relation_store.encode_val_for_store(&extracted, *span)?; - if need_to_collect || has_indices { + if need_to_collect || has_indices || has_hnsw_indices { if let Some(existing) = self.store_tx.get(&key, false)? { let mut tup = extracted[0..relation_store.metadata.keys.len()].to_vec(); extend_tuple_from_v(&mut tup, &existing); @@ -456,6 +483,22 @@ impl<'a> SessionTx<'a> { } } + if has_hnsw_indices { + for (name, (idx_handle, idx_manifest)) in + relation_store.hnsw_indices.iter() + { + let filter = hnsw_filters.get(name); + self.hnsw_put( + idx_manifest, + &relation_store, + idx_handle, + filter, + &mut stack, + &extracted, + )?; + } + } + if need_to_collect { new_tuples.push(DataValue::List(extracted)); } diff --git a/cozo-core/src/runtime/db.rs b/cozo-core/src/runtime/db.rs index 9c0c0c3c..69e25034 100644 --- a/cozo-core/src/runtime/db.rs +++ b/cozo-core/src/runtime/db.rs @@ -1055,6 +1055,9 @@ impl<'s, S: Storage<'s>> Db { json!(expr.to_string()), ) } + RelAlgebra::HnswSearch(_) => { + todo!("HnswSearch") + } }; ret_for_relation.push(json!({ STRATUM: stratum, diff --git a/cozo-core/src/runtime/hnsw.rs b/cozo-core/src/runtime/hnsw.rs index 27f402d4..e3704766 100644 --- a/cozo-core/src/runtime/hnsw.rs +++ b/cozo-core/src/runtime/hnsw.rs @@ -738,7 +738,7 @@ impl<'a> SessionTx<'a> { manifest: &HnswIndexManifest, orig_table: &RelationHandle, idx_table: &RelationHandle, - filter: &Option>, + filter: Option<&Vec>, stack: &mut Vec, tuple: &Tuple, ) -> Result { diff --git a/cozo-core/src/runtime/relation.rs b/cozo-core/src/runtime/relation.rs index f4eeb069..ffe19c83 100644 --- a/cozo-core/src/runtime/relation.rs +++ b/cozo-core/src/runtime/relation.rs @@ -12,7 +12,7 @@ use std::sync::atomic::Ordering; use itertools::Itertools; use log::error; -use miette::{bail, ensure, Diagnostic, Result, IntoDiagnostic}; +use miette::{bail, ensure, Diagnostic, IntoDiagnostic, Result}; use pest::Parser; use rmp_serde::Serializer; use serde::Serialize; @@ -24,13 +24,13 @@ use crate::data::relation::{ColType, ColumnDef, NullableColType, StoredRelationM use crate::data::symb::Symbol; use crate::data::tuple::{decode_tuple_from_key, Tuple, TupleT, ENCODED_KEY_MIN_LEN}; use crate::data::value::{DataValue, ValidityTs}; +use crate::parse::expr::build_expr; use crate::parse::sys::HnswIndexConfig; use crate::parse::{CozoScriptParser, Rule, SourceSpan}; use crate::query::compile::IndexPositionUse; +use crate::runtime::hnsw::HnswIndexManifest; use crate::runtime::transact::SessionTx; use crate::{NamedRows, StoreTx}; -use crate::parse::expr::build_expr; -use crate::runtime::hnsw::HnswIndexManifest; #[derive( Copy, @@ -132,7 +132,10 @@ impl RelationHandle { ret.insert(Symbol::new(col.name.clone(), Default::default()), i); } for (i, col) in self.metadata.non_keys.iter().enumerate() { - ret.insert(Symbol::new(col.name.clone(), Default::default()), i + self.metadata.keys.len()); + ret.insert( + Symbol::new(col.name.clone(), Default::default()), + i + self.metadata.keys.len(), + ); } ret } @@ -782,7 +785,7 @@ impl<'a> SessionTx<'a> { nullable: false, }, default_gen: None, - } + }, ]; // create index relation let key_bindings = idx_keys @@ -829,17 +832,32 @@ impl<'a> SessionTx<'a> { // populate index let all_tuples = rel_handle.scan_all(self).collect::>>()?; let filter = if let Some(f_code) = &manifest.index_filter { - let parsed = CozoScriptParser::parse(Rule::expr, f_code).into_diagnostic()?.next().unwrap(); + let parsed = CozoScriptParser::parse(Rule::expr, f_code) + .into_diagnostic()? + .next() + .unwrap(); let mut code_expr = build_expr(parsed, &Default::default())?; let binding_map = rel_handle.raw_binding_map(); code_expr.fill_binding_indices(&binding_map)?; - Some(code_expr.compile()) + code_expr.compile() } else { + vec![] + }; + let filter = if filter.is_empty() { None + } else { + Some(&filter) }; let mut stack = vec![]; for tuple in all_tuples { - self.hnsw_put(&manifest, &rel_handle, &idx_handle, &filter, &mut stack, &tuple)?; + self.hnsw_put( + &manifest, + &rel_handle, + &idx_handle, + filter, + &mut stack, + &tuple, + )?; } rel_handle diff --git a/cozo-core/src/runtime/tests.rs b/cozo-core/src/runtime/tests.rs index 5106060c..d3ba0a01 100644 --- a/cozo-core/src/runtime/tests.rs +++ b/cozo-core/src/runtime/tests.rs @@ -804,4 +804,17 @@ fn test_vec_index() { filter: k != 'k1' }", Default::default()) .unwrap(); + db.run_script(r" + ?[k, v] <- [ + ['a2', [1,2,3,4,5,6,7,8]], + ['b2', [2,3,4,5,6,7,8,9]], + ['bb2', [2,3,4,5,6,7,8,9]], + ['c2', [2,3,4,5,6,7,8,19]], + ['a2', [2,3,4,5,6,7,8,9]], + ['b2', [1,1,1,1,1,1,1,1]] + ] + :put a {k => v} + ", Default::default()) + .unwrap(); + println!("{:#?}", db.export_relations(["a", "a:vec"].iter())); }