From fdc3ab471eb241209c7b5f3204414c19cea814be Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Wed, 27 Jul 2022 16:13:56 +0800 Subject: [PATCH] neg join for triples --- README.md | 4 +- python/cozo_helper.py | 15 ++++- src/parse/query.rs | 58 +++++++++++++++--- src/query/compile.rs | 8 +-- src/query/relation.rs | 137 ++++++++++++++++++++++++++++++++++++++++++ tests/creation.rs | 6 +- 6 files changed, 211 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5ee017ae..7b533d26 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,6 @@ * [ ] range scan * [ ] public API * [ ] sorting -* [ ] limit, offset \ No newline at end of file +* [ ] limit, offset + +comparators can have problems when sorting mixed integers and floats \ No newline at end of file diff --git a/python/cozo_helper.py b/python/cozo_helper.py index cbd62059..2050f3b6 100644 --- a/python/cozo_helper.py +++ b/python/cozo_helper.py @@ -80,7 +80,20 @@ def Const(item): return {'const': item} -__all__ = ['Gt', 'Lt', 'Ge', 'Le', 'Eq', 'Neq', 'Add', 'Sub', 'Mul', 'Div', 'Q', 'T', 'R', 'Const'] +def Conj(*items): + return {'conj': items} + + +def Disj(*items): + return {'disj': items} + + +def NotExists(item): + return {'not_exists': item} + + +__all__ = ['Gt', 'Lt', 'Ge', 'Le', 'Eq', 'Neq', 'Add', 'Sub', 'Mul', 'Div', 'Q', 'T', 'R', 'Const', 'Conj', 'Disj', + 'NotExists'] if __name__ == '__main__': import json diff --git a/src/parse/query.rs b/src/parse/query.rs index 63eaf8ae..080d6a1f 100644 --- a/src/parse/query.rs +++ b/src/parse/query.rs @@ -12,8 +12,8 @@ use crate::data::keyword::{Keyword, PROG_ENTRY}; use crate::data::value::DataValue; use crate::parse::triple::TxError; use crate::query::compile::{ - Atom, AttrTripleAtom, BindingHeadTerm, DatalogProgram, QueryCompilationError, Rule, - RuleApplyAtom, RuleSet, Term, + Atom, AttrTripleAtom, BindingHeadTerm, DatalogProgram, LogicalAtom, QueryCompilationError, + Rule, RuleApplyAtom, RuleSet, Term, }; use crate::runtime::transact::SessionTx; use crate::{EntityId, Validity}; @@ -359,22 +359,62 @@ impl SessionTx { _ => unimplemented!(), }, JsonValue::Object(map) => { - // rule application, or built-in predicates, - // or disjunction/negation (convert to disjunctive normal forms) if map.contains_key("rule") { self.parse_rule_atom(map, vld) } else if map.contains_key("pred") { Self::parse_predicate_atom(map) - } else if map.contains_key("logical") { - dbg!("x"); - todo!() + } else if map.contains_key("conj") + || map.contains_key("disj") + || map.contains_key("not_exists") + { + if map.len() != 1 { + return Err(QueryCompilationError::UnexpectedForm( + JsonValue::Object(map.clone()), + "too many keys".to_string(), + ) + .into()); + } + self.parse_logical_atom(map, vld) } else { - todo!() + Err(QueryCompilationError::UnexpectedForm( + JsonValue::Object(map.clone()), + "unknown format".to_string(), + ) + .into()) } } - _ => unimplemented!(), + v => Err(QueryCompilationError::UnexpectedForm( + v.clone(), + "unknown format".to_string(), + ) + .into()), } } + fn parse_logical_atom(&mut self, map: &Map, vld: Validity) -> Result { + let (k, v) = map.iter().next().unwrap(); + Ok(match k as &str { + "not_exists" => { + let arg = self.parse_atom(v, vld)?; + Atom::Logical(LogicalAtom::Negation(Box::new(arg))) + } + "conj" | "disj" => { + let args = v + .as_array() + .ok_or_else(|| { + QueryCompilationError::UnexpectedForm(v.clone(), "expect array".to_string()) + })? + .iter() + .map(|a| self.parse_atom(a, vld)) + .try_collect()?; + if k == "conj" { + Atom::Logical(LogicalAtom::Conjunction(args)) + } else { + Atom::Logical(LogicalAtom::Disjunction(args)) + } + } + _ => unreachable!(), + }) + } fn parse_triple_atom( &mut self, entity_rep: &JsonValue, diff --git a/src/query/compile.rs b/src/query/compile.rs index 595df51f..2e1ae5d5 100644 --- a/src/query/compile.rs +++ b/src/query/compile.rs @@ -111,11 +111,9 @@ pub struct RuleApplyAtom { #[derive(Clone, Debug)] pub enum LogicalAtom { - AttrTriple(AttrTripleAtom), - Rule(RuleApplyAtom), - Negation(Box), - Conjunction(Vec), - Disjunction(Vec), + Negation(Box), + Conjunction(Vec), + Disjunction(Vec), } #[derive(Clone, Debug)] diff --git a/src/query/relation.rs b/src/query/relation.rs index 6226fdac..5557095c 100644 --- a/src/query/relation.rs +++ b/src/query/relation.rs @@ -416,6 +416,39 @@ fn invert_option_err(v: Result>) -> Option> { } impl TripleRelation { + pub(crate) fn neg_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + (left_join_indices, right_join_indices): (Vec, Vec), + tx: &'a SessionTx, + eliminate_indices: BTreeSet, + ) -> TupleIter<'a> { + match right_join_indices.len() { + 2 => { + let right_first = *right_join_indices.first().unwrap(); + let right_second = *right_join_indices.last().unwrap(); + let left_first = *left_join_indices.first().unwrap(); + let left_second = *left_join_indices.last().unwrap(); + match (right_first, right_second) { + (0, 1) => self.neg_ev_join(left_iter, left_first, left_second, tx), + (1, 0) => self.neg_ev_join(left_iter, left_second, left_first, tx), + _ => panic!("should not happen"), + } + } + 1 => { + if right_join_indices[0] == 0 { + self.neg_e_join(left_iter, left_join_indices[0], tx) + } else if self.attr.val_type.is_ref_type() { + self.neg_v_ref_join(left_iter, left_join_indices[0], tx) + } else if self.attr.indexing.should_index() { + self.neg_v_index_join(left_iter, left_join_indices[0], tx) + } else { + self.neg_v_no_index_join(left_iter, left_join_indices[0], tx) + } + } + _ => unreachable!(), + } + } pub(crate) fn join<'a>( &'a self, left_iter: TupleIter<'a>, @@ -471,6 +504,25 @@ impl TripleRelation { .map(flatten_err), ) } + fn neg_ev_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + left_e_idx: usize, + left_v_idx: usize, + tx: &'a SessionTx, + ) -> TupleIter<'a> { + Box::new( + left_iter + .map_ok(move |tuple| -> Result> { + let eid = tuple.0.get(left_e_idx).unwrap().get_entity_id()?; + let v = tuple.0.get(left_v_idx).unwrap(); + let exists = tx.eav_exists(eid, self.attr.id, v, self.vld)?; + Ok(if exists { None } else { Some(tuple) }) + }) + .map(flatten_err) + .filter_map(invert_option_err), + ) + } fn ev_join<'a>( &'a self, left_iter: TupleIter<'a>, @@ -514,6 +566,26 @@ impl TripleRelation { .filter_map(invert_option_err), ) } + fn neg_e_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + left_e_idx: usize, + tx: &'a SessionTx, + ) -> TupleIter<'a> { + Box::new( + left_iter + .map_ok(move |tuple| -> Result> { + let eid = tuple.0.get(left_e_idx).unwrap().get_entity_id()?; + match tx.triple_ea_before_scan(eid, self.attr.id, self.vld).next() { + None => Ok(Some(tuple)), + Some(Ok(_)) => Ok(None), + Some(Err(e)) => Err(e), + } + }) + .map(flatten_err) + .filter_map(invert_option_err), + ) + } fn e_join<'a>( &'a self, left_iter: TupleIter<'a>, @@ -559,6 +631,29 @@ impl TripleRelation { .map(flatten_err), ) } + fn neg_v_ref_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + left_v_idx: usize, + tx: &'a SessionTx, + ) -> TupleIter<'a> { + Box::new( + left_iter + .map_ok(move |tuple| -> Result> { + let v_eid = tuple.0.get(left_v_idx).unwrap().get_entity_id()?; + match tx + .triple_vref_a_before_scan(v_eid, self.attr.id, self.vld) + .next() + { + None => Ok(Some(tuple)), + Some(Ok(_)) => Ok(None), + Some(Err(e)) => Err(e), + } + }) + .map(flatten_err) + .filter_map(invert_option_err), + ) + } fn v_ref_join<'a>( &'a self, left_iter: TupleIter<'a>, @@ -606,6 +701,26 @@ impl TripleRelation { .map(flatten_err), ) } + fn neg_v_index_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + left_v_idx: usize, + tx: &'a SessionTx, + ) -> TupleIter<'a> { + Box::new( + left_iter + .map_ok(move |tuple| -> Result> { + let val = tuple.0.get(left_v_idx).unwrap(); + match tx.triple_av_before_scan(self.attr.id, val, self.vld).next() { + None => Ok(Some(tuple)), + Some(Ok(_)) => Ok(None), + Some(Err(e)) => Err(e), + } + }) + .map(flatten_err) + .filter_map(invert_option_err), + ) + } fn v_index_join<'a>( &'a self, left_iter: TupleIter<'a>, @@ -646,6 +761,28 @@ impl TripleRelation { .map(flatten_err), ) } + fn neg_v_no_index_join<'a>( + &'a self, + left_iter: TupleIter<'a>, + left_v_idx: usize, + tx: &'a SessionTx, + ) -> TupleIter<'a> { + Box::new( + left_iter + .map_ok(move |tuple| -> Result> { + let val = tuple.0.get(left_v_idx).unwrap(); + for item in tx.triple_a_before_scan(self.attr.id, self.vld) { + let (_, _, found_val) = item?; + if *val == found_val { + return Ok(None); + } + } + Ok(Some(tuple)) + }) + .map(flatten_err) + .filter_map(invert_option_err), + ) + } fn v_no_index_join<'a>( &'a self, left_iter: TupleIter<'a>, diff --git a/tests/creation.rs b/tests/creation.rs index 53041ed5..409360fc 100644 --- a/tests/creation.rs +++ b/tests/creation.rs @@ -141,7 +141,11 @@ fn creation() { }, { "rule": "?", - "args": [["?a"], {"pred": "Neq", "args": ["?n", {"pred": "StrCat", "args": ["A", "l", "i", "c", "e"]}]}, {"rule": "ff", "args": [{"person.id": "alice_amorist"}, "?a"]}, ["?a", "person.first_name", "?n"]] + "args": [["?a"], + {"pred": "Neq", "args": ["?n", {"pred": "StrCat", "args": ["A", "l", "i", "c", "e"]}]}, + {"rule": "ff", "args": [{"person.id": "alice_amorist"}, "?a"]}, + {"not_exists": ["?a", "person.last_name", "Goodman"]}, + ["?a", "person.first_name", "?n"]] } ], "out": {"friend": {"pull": "?a", "spec": ["person.first_name"]}}