From 9e0b7395891afde214d0a43543e70b0584fffea2 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Fri, 13 May 2022 19:15:35 +0800 Subject: [PATCH] sorting mat plan --- src/db/ddl.rs | 2 +- src/db/iterator.rs | 37 +++++++++++++++++++-- src/db/plan.rs | 83 ++++++++++++++++++++++++++++++++++++++++------ src/db/query.rs | 6 ++-- src/grammar.pest | 4 +-- 5 files changed, 114 insertions(+), 18 deletions(-) diff --git a/src/db/ddl.rs b/src/db/ddl.rs index 4c34d6eb..84a8c380 100644 --- a/src/db/ddl.rs +++ b/src/db/ddl.rs @@ -305,7 +305,7 @@ impl<'s> Session<'s> { self.define_data(&name, tuple, in_root) } - fn get_next_storage_id(&self, in_root: bool) -> Result { + pub fn get_next_storage_id(&self, in_root: bool) -> Result { let mut key_entry = Tuple::with_null_prefix(); key_entry.push_null(); let db_res = if in_root { diff --git a/src/db/iterator.rs b/src/db/iterator.rs index 79f47076..ae990d9b 100644 --- a/src/db/iterator.rs +++ b/src/db/iterator.rs @@ -5,16 +5,48 @@ use crate::error::Result; use crate::relation::data::{DataKind, EMPTY_DATA}; use crate::relation::table::MegaTuple; use crate::relation::tuple::{CowSlice, CowTuple, OwnTuple, Tuple}; -use crate::relation::value::Value; +use crate::relation::value::{Value}; use cozorocks::IteratorPtr; use std::cmp::Ordering; use std::{iter, mem}; +use crate::db::engine::Session; use crate::db::plan::{ExecPlan, TableRowGetter}; // Implementation notice // Never define `.next()` recursively for iterators below, otherwise stackoverflow is almost // guaranteed (but may not show for test data) +pub struct SortingMaterialization<'a> { + pub(crate) source: Box> + 'a>, + pub(crate) ordering: &'a [(bool, Value<'a>)], + pub(crate) sess: &'a Session<'a>, + pub(crate) sorted: bool +} + +impl <'a> SortingMaterialization<'a> { + fn sort(&mut self) { + // todo!() + self.sorted = true; + } +} + +impl<'a> Drop for SortingMaterialization<'a> { + fn drop(&mut self) { + // todo!() + } +} + +impl <'a> Iterator for SortingMaterialization<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.sorted { + self.sort(); + } + todo!() + } +} + #[derive(Copy, Clone, Debug)] pub enum NodeEdgeChainKind { Fwd, @@ -1147,8 +1179,9 @@ mod tests { let start = Instant::now(); let s = r##"from (j:Job)<-[hj:HasJob]-(e:Employee) - where j.id == 16 + // where j.id == 16 select { eid: e.id, jid: j.id, fname: e.first_name, salary: hj.salary, job: j.title } + ordered [j.id: desc, e.id] limit 2 offset 1"##; let parsed = Parser::parse(Rule::relational_query, s)?.next().unwrap(); diff --git a/src/db/plan.rs b/src/db/plan.rs index ea7ffb50..666f3dac 100644 --- a/src/db/plan.rs +++ b/src/db/plan.rs @@ -13,10 +13,28 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::fmt::{Debug, Formatter}; use std::iter; -use crate::db::iterator::{BagsUnionIterator, CartesianProdIterator, EdgeIterator, EdgeKeyOnlyBwdIterator, EdgeToNodeChainJoinIterator, EvalIterator, FilterIterator, KeyedDifferenceIterator, KeyedUnionIterator, KeySortedWithAssocIterator, LimiterIterator, MergeJoinIterator, NodeEdgeChainKind, NodeIterator, NodeToEdgeChainJoinIterator, OuterMergeJoinIterator, OutputIterator}; +use crate::db::iterator::{BagsUnionIterator, CartesianProdIterator, EdgeIterator, EdgeKeyOnlyBwdIterator, EdgeToNodeChainJoinIterator, EvalIterator, FilterIterator, KeyedDifferenceIterator, KeyedUnionIterator, KeySortedWithAssocIterator, LimiterIterator, MergeJoinIterator, NodeEdgeChainKind, NodeIterator, NodeToEdgeChainJoinIterator, OuterMergeJoinIterator, OutputIterator, SortingMaterialization}; use crate::relation::table::MegaTuple; +pub enum SessionSlot<'a> { + Dummy, + Reified(&'a Session<'a>) +} + +impl <'a> Debug for SessionSlot<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + SessionSlot::Dummy => { + write!(f, "DummySession") + } + SessionSlot::Reified(_) => { + write!(f, "Session") + } + } + } +} + pub enum IteratorSlot<'a> { Dummy, Reified(IteratorPtr<'a>), @@ -192,14 +210,19 @@ pub enum ExecPlan<'a> { keys: Vec<(String, Value<'a>)>, vals: Vec<(String, Value<'a>)>, }, - BagsUnionIt { + BagsUnionItPlan { bags: Vec>, }, - LimiterIt { + LimiterItPlan { source: Box>, offset: usize, limit: usize, }, + SortingMatPlan { + source: Box>, + ordering: Vec<(bool, StaticValue)>, + sess: SessionSlot<'a> + }, } impl<'a> ExecPlan<'a> { @@ -231,7 +254,7 @@ impl<'a> ExecPlan<'a> { ExecPlan::KeyedDifferenceItPlan { left, .. } => left.tuple_widths(), ExecPlan::FilterItPlan { source, .. } => source.tuple_widths(), ExecPlan::EvalItPlan { source, .. } => source.tuple_widths(), - ExecPlan::BagsUnionIt { bags } => { + ExecPlan::BagsUnionItPlan { bags } => { if bags.is_empty() { (0, 0) } else { @@ -245,7 +268,10 @@ impl<'a> ExecPlan<'a> { let (l1, l2) = left.tuple_widths(); (l1 + 1, l2 + 1 + right_associates.len()) } - ExecPlan::LimiterIt { source, .. } => { + ExecPlan::LimiterItPlan { source, .. } => { + source.tuple_widths() + } + ExecPlan::SortingMatPlan { source, .. } => { source.tuple_widths() } } @@ -385,7 +411,7 @@ impl<'a> ExecPlan<'a> { started: false, })) } - ExecPlan::BagsUnionIt { bags } => { + ExecPlan::BagsUnionItPlan { bags } => { let bags = bags.iter().map(|i| i.iter()).collect::>>()?; Ok(Box::new(BagsUnionIterator { bags, current: 0 })) } @@ -458,7 +484,7 @@ impl<'a> ExecPlan<'a> { }), }), }, - ExecPlan::LimiterIt { source, limit, offset } => { + ExecPlan::LimiterItPlan { source, limit, offset } => { Ok(Box::new(LimiterIterator { source: source.iter()?, limit: *limit, @@ -466,6 +492,21 @@ impl<'a> ExecPlan<'a> { current: 0, })) } + ExecPlan::SortingMatPlan { source, ordering , sess } => { + match sess { + SessionSlot::Dummy => { + Err(LogicError("Uninitialized session data".to_string())) + } + SessionSlot::Reified(sess) => { + Ok(Box::new(SortingMaterialization { + source: source.iter()?, + ordering, + sess, + sorted: false + })) + } + } + } } } } @@ -656,7 +697,7 @@ impl<'a> Session<'a> { }; (plan, amap) } - ExecPlan::BagsUnionIt { .. } => todo!(), + ExecPlan::BagsUnionItPlan { .. } => todo!(), ExecPlan::ChainJoinItPlan { left, left_info, @@ -716,14 +757,27 @@ impl<'a> Session<'a> { }; (plan, l_map) } - ExecPlan::LimiterIt { source, limit, offset } => { + ExecPlan::LimiterItPlan { source, limit, offset } => { let (source, amap) = self.do_reify_intermediate_plan(*source)?; - (ExecPlan::LimiterIt { + (ExecPlan::LimiterItPlan { source: source.into(), limit, offset, }, amap) } + ExecPlan::SortingMatPlan { source, ordering, .. } => { + let (source, amap) = self.do_reify_intermediate_plan(*source)?; + let ordering = ordering.into_iter().map(|(is_asc, val)| -> Result<(bool, StaticValue)> { + let (_, val) = self.partial_eval(val, &Default::default(), &amap)?; + Ok((is_asc, val)) + }).collect::>>()?; + let temp_table_id = self.get_next_storage_id(false)?; + (ExecPlan::SortingMatPlan { + source: source.into(), + ordering, + sess: SessionSlot::Reified(self) + }, amap) + } }; Ok(res) } @@ -949,10 +1003,17 @@ impl<'a> Session<'a> { mut plan: ExecPlan<'b>, select_data: Selection, ) -> Result> { + if !select_data.ordering.is_empty() { + plan = ExecPlan::SortingMatPlan { + source: plan.into(), + ordering: select_data.ordering, + sess: SessionSlot::Dummy + }; + } if select_data.limit.is_some() || select_data.offset.is_some() { let limit = select_data.limit.unwrap_or(0) as usize; let offset = select_data.offset.unwrap_or(0) as usize; - plan = ExecPlan::LimiterIt { + plan = ExecPlan::LimiterItPlan { source: plan.into(), offset, limit, diff --git a/src/db/query.rs b/src/db/query.rs index 8fd52637..5bb55442 100644 --- a/src/db/query.rs +++ b/src/db/query.rs @@ -290,7 +290,7 @@ impl<'a> Session<'a> { for p in p.into_inner() { ordering.push(( p.as_rule() == Rule::order_asc, - parse_string(p.into_inner().next().unwrap())?, + Value::from_pair(p.into_inner().next().unwrap())?.to_static(), )) } } @@ -325,6 +325,8 @@ impl<'a> Session<'a> { } } + println!("ordering {:?}", ordering); + Ok(Selection { scoped, keys, @@ -341,7 +343,7 @@ pub struct Selection { pub scoped: Option, pub keys: Vec<(String, StaticValue)>, pub vals: Vec<(String, StaticValue)>, - pub ordering: Vec<(bool, String)>, + pub ordering: Vec<(bool, StaticValue)>, pub limit: Option, pub offset: Option, } diff --git a/src/grammar.pest b/src/grammar.pest index fe281337..946182e3 100644 --- a/src/grammar.pest +++ b/src/grammar.pest @@ -191,9 +191,9 @@ where_pattern = { "where" ~ (expr ~ ",")* ~ expr } select_pattern = { "select" ~ (select_dict | scoped_dict) ~ ( (order_pattern ~ offset_pattern?) | (offset_pattern ~ order_pattern?) )? } order_pattern = { "ordered" ~ "[" ~ order_el ~ ("," ~ order_el)* ~ "]" } -order_el = _{order_asc | order_dsc} -order_asc = {expr ~ (":" ~ "asc")?} +order_el = _{order_dsc | order_asc} order_dsc = {expr ~ (":" ~ "desc")} +order_asc = {expr ~ (":" ~ "asc")?} offset_pattern = { limit_clause? ~ offset_clause? } limit_clause = { "limit" ~ pos_int } offset_clause = { "offset" ~ pos_int }