From 493d0c95d841deab021df38641cce645f1369a0a Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Tue, 19 Jul 2022 17:49:07 +0800 Subject: [PATCH] joining game --- src/data/keyword.rs | 6 ++ src/data/tuple.rs | 5 +- src/transact/query.rs | 203 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 197 insertions(+), 17 deletions(-) diff --git a/src/data/keyword.rs b/src/data/keyword.rs index e91a85bc..c23d9462 100644 --- a/src/data/keyword.rs +++ b/src/data/keyword.rs @@ -54,6 +54,12 @@ impl Keyword { pub(crate) fn is_reserved(&self) -> bool { self.0.is_empty() || self.0.starts_with(['_', ':', '<', '.', '*', '?', '!']) } + pub(crate) fn is_user_binding(&self) -> bool { + self.0.starts_with('?') + } + pub(crate) fn is_anon_binding(&self) -> bool { + self.0.starts_with('_') + } pub(crate) fn to_string_no_prefix(&self) -> String { format!("{}", self.0) } diff --git a/src/data/tuple.rs b/src/data/tuple.rs index 5b1196b2..353325ed 100644 --- a/src/data/tuple.rs +++ b/src/data/tuple.rs @@ -2,6 +2,7 @@ use std::cmp::{min, Ordering}; use rmp_serde::Serializer; use serde::Serialize; +use anyhow::Result; use crate::data::value::DataValue; @@ -11,7 +12,9 @@ pub enum TupleError { BadData(String, Vec), } -pub(crate) struct Tuple(Vec); +pub(crate) struct Tuple(pub(crate) Vec); + +pub(crate) type TupleIter<'a> = Box> + 'a>; impl Tuple { pub(crate) fn arity(&self) -> usize { diff --git a/src/transact/query.rs b/src/transact/query.rs index 953f5fa3..f151b63f 100644 --- a/src/transact/query.rs +++ b/src/transact/query.rs @@ -1,6 +1,13 @@ +use std::collections::BTreeMap; + +use anyhow::Result; +use itertools::Itertools; + use crate::data::attr::Attribute; use crate::data::keyword::Keyword; +use crate::data::tuple::{Tuple, TupleIter}; use crate::data::value::DataValue; +use crate::runtime::transact::SessionTx; use crate::transact::pull::PullSpec; use crate::Validity; @@ -13,33 +20,197 @@ pub(crate) struct QuerySpec { offset: Option, } +pub(crate) struct InlineFixedRelation { + bindings: Vec, + data: Vec>, +} + +impl InlineFixedRelation { + pub(crate) fn join<'a>( + &'a self, + left_iter: TupleIter<'a>, + (left_join_indices, right_join_indices): (Vec, Vec), + ) -> TupleIter<'a> { + if self.data.is_empty() { + Box::new([].into_iter()) + } else if self.data.len() == 1 { + let data = self.data[0].clone(); + let right_join_values = right_join_indices + .into_iter() + .map(|v| data[v].clone()) + .collect_vec(); + Box::new(left_iter.filter_map_ok(move |tuple| { + let left_join_values = left_join_indices.iter().map(|v| &tuple.0[*v]).collect_vec(); + if left_join_values.into_iter().eq(right_join_values.iter()) { + let mut left_data = tuple.0; + left_data.extend_from_slice(&data); + Some(Tuple(left_data)) + } else { + None + } + })) + } else { + let mut right_mapping = BTreeMap::new(); + for data in &self.data { + let right_join_values = right_join_indices.iter().map(|v| &data[*v]).collect_vec(); + match right_mapping.get_mut(&right_join_values) { + None => { + right_mapping.insert(right_join_values, vec![data]); + } + Some(coll) => { + coll.push(data); + } + } + } + Box::new( + left_iter + .filter_map_ok(move |tuple| { + let left_join_values = + left_join_indices.iter().map(|v| &tuple.0[*v]).collect_vec(); + right_mapping.get(&left_join_values).map(|v| { + v.iter() + .map(|right_values| { + let mut left_data = tuple.0.clone(); + left_data.extend_from_slice(right_values); + Tuple(left_data) + }) + .collect_vec() + }) + }) + .flatten_ok(), + ) + } + } +} + +pub(crate) struct TripleRelation { + attr: Attribute, + vld: Validity, + bindings: [Keyword; 2], +} + +pub(crate) struct ProjectedRelation { + relation: Relation, + eliminate: Vec, +} + pub(crate) enum Relation { - Attr(Attribute, Validity), - FullAttr(Attribute), - Derived(DerivedRelation), + Fixed(InlineFixedRelation), + Triple(TripleRelation), + Derived(StoredDerivedRelation), + Join(Box), + Project(Box), } -pub(crate) struct DerivedRelation { +pub(crate) struct StoredDerivedRelation { name: Keyword, arity: usize, + bindings: Vec, } -impl Relation { - pub(crate) fn arity(&self) -> usize { - match self { - Relation::Attr(_, _) => 3, - Relation::FullAttr(_) => 5, - Relation::Derived(r) => r.arity, +pub(crate) struct Joiner { + // invariant: these are of the same lengths + left_keys: Vec, + right_keys: Vec, +} + +impl Joiner { + pub(crate) fn len(&self) -> usize { + self.left_keys.len() + } + pub(crate) fn swap(self) -> Self { + Self { + left_keys: self.right_keys, + right_keys: self.left_keys, + } + } + pub(crate) fn join_indices( + &self, + left_bindings: &[Keyword], + right_bindings: &[Keyword], + ) -> (Vec, Vec) { + let left_binding_map = left_bindings + .iter() + .enumerate() + .map(|(k, v)| (v, k)) + .collect::>(); + let right_binding_map = right_bindings + .iter() + .enumerate() + .map(|(k, v)| (v, k)) + .collect::>(); + let mut ret_l = Vec::with_capacity(self.left_keys.len()); + let mut ret_r = Vec::with_capacity(self.left_keys.len()); + for (l, r) in self.left_keys.iter().zip(self.right_keys.iter()) { + let l_pos = left_binding_map + .get(l) + .expect("program logic error: join key is wrong"); + let r_pos = right_binding_map + .get(r) + .expect("program logic error: join key is wrong"); + ret_l.push(*l_pos); + ret_r.push(*r_pos) } + (ret_l, ret_r) } } -pub(crate) enum RelationSlot { - Var(Keyword), - Const(DataValue), +pub(crate) struct InnerJoin { + left: Relation, + right: Relation, + joiner: Joiner, } -pub(crate) struct BoundRelation { - relation: Relation, - slots: Vec, +impl Relation { + pub(crate) fn bindings(&self) -> &[Keyword] { + match self { + Relation::Fixed(f) => &f.bindings, + Relation::Triple(t) => &t.bindings, + Relation::Derived(d) => todo!(), + Relation::Join(j) => todo!(), + Relation::Project(p) => todo!(), + } + } + pub(crate) fn iter(&self, tx: &mut SessionTx) -> TupleIter { + match self { + Relation::Fixed(f) => Box::new(f.data.iter().map(|t| Ok(Tuple(t.clone())))), + Relation::Triple(r) => Box::new( + tx.triple_a_before_scan(r.attr.id, r.vld) + .map_ok(|(_, e_id, y)| Tuple(vec![DataValue::EnId(e_id), y])), + ), + Relation::Derived(r) => { + todo!() + } + Relation::Join(j) => j.iter(tx), + Relation::Project(_) => { + todo!() + } + } + } +} + +impl InnerJoin { + pub(crate) fn iter(&self, tx: &mut SessionTx) -> TupleIter { + let left_iter = self.left.iter(tx); + match &self.right { + Relation::Fixed(f) => { + let join_indices = self + .joiner + .join_indices(self.left.bindings(), self.right.bindings()); + f.join(left_iter, join_indices) + } + Relation::Triple(_) => { + todo!() + } + Relation::Derived(_) => { + todo!() + } + Relation::Join(_) => { + todo!() + } + Relation::Project(_) => { + todo!() + } + } + } }