whole set of iterators

main
Ziyang Hu 2 years ago
parent b192c72e51
commit 29fb1b9999

@ -1,5 +1,6 @@
use std::collections::BTreeMap;
use std::{iter, mem};
use std::cmp::Ordering;
use pest::iterators::Pair;
use cozorocks::{IteratorPtr};
use crate::db::engine::Session;
@ -8,9 +9,9 @@ use crate::db::table::{ColId, TableId, TableInfo};
use crate::relation::value::{StaticValue, Value};
use crate::parser::Rule;
use crate::error::Result;
use crate::relation::data::EMPTY_DATA;
use crate::relation::data::{DataKind, EMPTY_DATA};
use crate::relation::table::MegaTuple;
use crate::relation::tuple::{CowSlice, CowTuple, OwnTuple, SliceTuple, Tuple};
use crate::relation::tuple::{CowSlice, CowTuple, OwnTuple, Tuple};
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum QueryPlan {
@ -136,197 +137,256 @@ impl<'a> Session<'a> {
Ok(QueryPlan::Projection { arg: Box::new(plan), projection: select_data })
}
pub fn iter_table(&self, tid: TableId) -> TableRowIterable {
let it = if tid.in_root {
pub fn raw_iterator(&self, in_root: bool) -> IteratorPtr {
if in_root {
self.txn.iterator(true, &self.perm_cf)
} else {
self.txn.iterator(false, &self.temp_cf)
};
TableRowIterable::new(it, tid.id as u32)
}
}
pub struct TableRowIterable<'a> {
it: IteratorPtr<'a>,
prefix: u32,
}
impl<'a> TableRowIterable<'a> {
pub fn new(it: IteratorPtr<'a>, prefix: u32) -> Self {
Self {
it,
prefix,
}
}
}
impl<'a> IntoIterator for &'a TableRowIterable<'a> {
type Item = (SliceTuple, SliceTuple);
type IntoIter = TableRowIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
let prefix_tuple = OwnTuple::with_prefix(self.prefix);
self.it.seek(prefix_tuple);
Self::IntoIter { it: &self.it, started: false }
pub fn iter_node(&self, tid: TableId) -> MegaTupleIt {
let it = self.raw_iterator(tid.in_root);
MegaTupleIt::NodeIt { it, tid: tid.id as u32 }
}
}
pub struct TableRowIterator<'a> {
it: &'a IteratorPtr<'a>,
started: bool,
pub enum MegaTupleIt<'a> {
NodeIt { it: IteratorPtr<'a>, tid: u32 },
EdgeIt { it: IteratorPtr<'a>, tid: u32 },
EdgeKeyOnlyBwdIt { it: IteratorPtr<'a>, tid: u32 },
// EdgeBwdIt { it: IteratorPtr<'a>, sess: &'a Session<'a>, tid: u32 },
// IndexIt {it: ..}
KeySortedWithAssocIt { main: Box<MegaTupleIt<'a>>, associates: Vec<(u32, IteratorPtr<'a>)> },
CartesianProdIt { left: Box<MegaTupleIt<'a>>, right: Box<MegaTupleIt<'a>> },
MergeJoinIt { left: Box<MegaTupleIt<'a>>, right: Box<MegaTupleIt<'a>>, left_keys: Vec<(TableId, ColId)>, right_keys: Vec<(TableId, ColId)> },
}
impl<'a> Iterator for TableRowIterator<'a> {
type Item = (SliceTuple, SliceTuple);
fn next(&mut self) -> Option<Self::Item> {
if self.started {
self.it.next();
} else {
self.started = true;
}
self.it.pair().map(|(k, v)| (Tuple::new(k), Tuple::new(v)))
}
}
pub struct TableRowWithAssociatesIterable<'a> {
main: TableRowIterable<'a>,
associates: Vec<TableRowIterable<'a>>,
}
impl<'a> TableRowWithAssociatesIterable<'a> {
pub fn new(main: TableRowIterable<'a>, associates: Vec<TableRowIterable<'a>>) -> Self {
Self { main, associates }
}
}
pub struct TableRowWithAssociatesIterator<'a> {
main: TableRowIterator<'a>,
associates: Vec<TableRowIterator<'a>>,
buffer: Vec<Option<(SliceTuple, SliceTuple)>>,
}
impl<'a> IntoIterator for &'a TableRowWithAssociatesIterable<'a> {
impl<'a> IntoIterator for &'a MegaTupleIt<'a> {
type Item = MegaTuple;
type IntoIter = TableRowWithAssociatesIterator<'a>;
type IntoIter = MegaTupleIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
Self::IntoIter {
main: (&self.main).into_iter(),
associates: self.associates.iter().map(|v| v.into_iter()).collect(),
buffer: iter::repeat_with(|| None).take(self.associates.len()).collect(),
}
}
}
pub enum MegaTupleIterable<'a> {
TableRowIt(IteratorPtr<'a>),
TableRowWithAssocIt(IteratorPtr<'a>, Vec<IteratorPtr<'a>>),
CartesianProdIt(Box<MegaTupleIterable<'a>>, Box<MegaTupleIterable<'a>>),
}
match self {
MegaTupleIt::NodeIt { it, tid } => {
let prefix_tuple = OwnTuple::with_prefix(*tid);
it.seek(prefix_tuple);
MegaTupleIterator::NodeIterator {
it,
started: false,
}
}
MegaTupleIt::EdgeIt { it, tid } => {
let prefix_tuple = OwnTuple::with_prefix(*tid);
it.seek(prefix_tuple);
impl<'a> Iterator for TableRowWithAssociatesIterator<'a> {
type Item = MegaTuple;
MegaTupleIterator::EdgeIterator {
it,
started: false,
}
}
MegaTupleIt::EdgeKeyOnlyBwdIt { it, tid } => {
let prefix_tuple = OwnTuple::with_prefix(*tid);
it.seek(prefix_tuple);
fn next(&mut self) -> Option<Self::Item> {
match self.main.next() {
None => None,
Some((k, v)) => {
let l = self.associates.len();
let mut assoc_vals: Vec<Option<CowTuple>> = iter::repeat_with(|| None).take(l).collect();
let l = assoc_vals.len();
for i in 0..l {
let cached = self.buffer.get(i).unwrap();
let cached = if matches!(cached, None) {
self.buffer[i] = self.associates.get_mut(i).unwrap().next();
self.buffer.get(i).unwrap()
} else {
cached
};
if let Some((ck, _)) = cached {
if k.key_part_eq(ck) {
let (_, v) = mem::replace(&mut self.buffer[i], None).unwrap();
assoc_vals[i] = Some(v.into())
}
MegaTupleIterator::EdgeKeyOnlyBwdIterator {
it,
started: false,
}
}
MegaTupleIt::KeySortedWithAssocIt { main, associates } => {
let buffer = iter::repeat_with(|| None).take(associates.len()).collect();
let associates = associates.into_iter().map(|(tid, it)| {
let prefix_tuple = OwnTuple::with_prefix(*tid);
it.seek(prefix_tuple);
MegaTupleIterator::NodeIterator {
it,
started: false,
}
}).collect();
MegaTupleIterator::KeySortedWithAssocIterator {
main: Box::new(main.as_ref().into_iter()),
associates,
buffer,
}
let mut vals: Vec<CowTuple> = Vec::with_capacity(assoc_vals.len());
vals.push(v.into());
vals.extend(assoc_vals.into_iter().map(|v|
match v {
None => {
CowTuple::new(CowSlice::Own(EMPTY_DATA.into()))
}
Some(v) => v
}));
Some(MegaTuple {
keys: vec![k.into()],
vals,
})
}
MegaTupleIt::CartesianProdIt { left, right } => {
MegaTupleIterator::CartesianProdIterator {
left: Box::new(left.as_ref().into_iter()),
left_cache: MegaTuple::empty_tuple(),
right_source: right.as_ref(),
right: Box::new(right.as_ref().into_iter()),
}
}
MegaTupleIt::MergeJoinIt { .. } => todo!(),
}
}
}
pub struct CartesianProductIterable<A, B> {
left: A,
right: B,
}
impl<'a, A, B, AI, BI> IntoIterator for &'a CartesianProductIterable<A, B>
where &'a A: IntoIterator<Item=MegaTuple, IntoIter=AI>,
&'a B: IntoIterator<Item=MegaTuple, IntoIter=BI>,
AI: Iterator<Item=MegaTuple>,
BI: Iterator<Item=MegaTuple> {
type Item = MegaTuple;
type IntoIter = CartesianProductIterator<'a, B, AI, BI>;
fn into_iter(self) -> Self::IntoIter {
let mut left = (&self.left).into_iter();
let left_cache = left.next();
Self::IntoIter {
right_source: &self.right,
left,
right: (&self.right).into_iter(),
left_cache,
}
}
}
pub struct CartesianProductIterator<'a, B, AI, BI>
where &'a B: IntoIterator<Item=MegaTuple, IntoIter=BI> {
right_source: &'a B,
left: AI,
right: BI,
left_cache: Option<MegaTuple>,
pub enum MegaTupleIterator<'a> {
NodeIterator { it: &'a IteratorPtr<'a>, started: bool },
EdgeIterator { it: &'a IteratorPtr<'a>, started: bool },
EdgeKeyOnlyBwdIterator { it: &'a IteratorPtr<'a>, started: bool },
KeySortedWithAssocIterator { main: Box<MegaTupleIterator<'a>>, associates: Vec<MegaTupleIterator<'a>>, buffer: Vec<Option<(CowTuple, CowTuple)>> },
CartesianProdIterator {
left: Box<MegaTupleIterator<'a>>,
left_cache: MegaTuple,
right_source: &'a MegaTupleIt<'a>,
right: Box<MegaTupleIterator<'a>>,
},
}
impl<'a, B, AI, BI> Iterator for CartesianProductIterator<'a, B, AI, BI>
where &'a B: IntoIterator<Item=MegaTuple, IntoIter=BI>,
AI: Iterator<Item=MegaTuple>,
BI: Iterator<Item=MegaTuple> {
impl<'a> Iterator for MegaTupleIterator<'a> {
type Item = MegaTuple;
fn next(&mut self) -> Option<Self::Item> {
match &self.left_cache {
None => None,
Some(t) => {
match self.right.next() {
Some(t2) => {
let mut keys = t.keys.clone();
keys.extend(t2.keys);
let mut vals = t.vals.clone();
vals.extend(t2.vals);
Some(MegaTuple { keys, vals })
match self {
MegaTupleIterator::NodeIterator { it, started } => {
if *started {
it.next();
} else {
*started = true;
}
it.pair().map(|(k, v)| {
MegaTuple {
keys: vec![Tuple::new(k).into()],
vals: vec![Tuple::new(v).into()],
}
None => {
self.left_cache = self.left.next();
self.right = self.right_source.into_iter();
self.next()
})
}
MegaTupleIterator::EdgeIterator { it, started } => {
if *started {
it.next();
} else {
*started = true;
}
loop {
match it.pair() {
None => return None,
Some((k, v)) => {
let vt = Tuple::new(v);
if matches!(vt.data_kind(), Ok(DataKind::Edge)) {
it.next()
} else {
let kt = Tuple::new(k);
return Some(MegaTuple {
keys: vec![kt.into()],
vals: vec![vt.into()],
});
}
}
}
}
}
MegaTupleIterator::EdgeKeyOnlyBwdIterator { it, started } => {
if *started {
it.next();
} else {
*started = true;
}
loop {
match it.pair() {
None => return None,
Some((_k, rev_k)) => {
let rev_k_tuple = Tuple::new(rev_k);
if !matches!(rev_k_tuple.data_kind(), Ok(DataKind::Edge)) {
it.next()
} else {
return Some(MegaTuple {
keys: vec![rev_k_tuple.into()],
vals: vec![],
});
}
}
}
}
}
MegaTupleIterator::KeySortedWithAssocIterator { main, associates, buffer } => {
// first get a tuple from main
match main.next() {
None => None, // main exhausted, we are finished
Some(MegaTuple { mut keys, mut vals }) => {
// extract key from main
let k = keys.pop().unwrap();
let l = associates.len();
// initialize vector for associate values
let mut assoc_vals: Vec<Option<CowTuple>> = iter::repeat_with(|| None).take(l).collect();
let l = assoc_vals.len();
for i in 0..l {
// for each associate
let cached = buffer.get(i).unwrap();
// if no cache, try to get cache filled first
if matches!(cached, None) {
let assoc_data = associates.get_mut(i).unwrap().next()
.map(|mut mt| (mt.keys.pop().unwrap(), mt.vals.pop().unwrap()));
buffer[i] = assoc_data;
}
// if we have cache
while let Some((ck, _)) = buffer.get(i).unwrap() {
match k.key_part_cmp(ck) {
Ordering::Less => {
// target key less than cache key, no value for current iteration
break;
}
Ordering::Equal => {
// target key equals cache key, we put it into collected values
let (_, v) = mem::replace(&mut buffer[i], None).unwrap();
assoc_vals[i] = Some(v.into());
break;
}
Ordering::Greater => {
// target key greater than cache key, meaning that the source has holes (maybe due to filtering)
// get a new one into buffer
let assoc_data = associates.get_mut(i).unwrap().next()
.map(|mut mt| (mt.keys.pop().unwrap(), mt.vals.pop().unwrap()));
buffer[i] = assoc_data;
}
}
}
}
vals.extend(assoc_vals.into_iter().map(|v|
match v {
None => {
CowTuple::new(CowSlice::Own(EMPTY_DATA.into()))
}
Some(v) => v
}));
Some(MegaTuple {
keys: vec![k],
vals,
})
}
}
}
MegaTupleIterator::CartesianProdIterator { left, left_cache, right, right_source } => {
if left_cache.is_empty() {
*left_cache = match left.next() {
None => return None,
Some(v) => v
}
}
let r_tpl = match right.next() {
None => {
*right = Box::new((*right_source).into_iter());
*left_cache = match left.next() {
None => return None,
Some(v) => v
};
match right.next() {
// early return in case right is empty
None => return None,
Some(r_tpl) => r_tpl
}
}
Some(r_tpl) => r_tpl
};
let mut ret = left_cache.clone();
ret.keys.extend(r_tpl.keys);
ret.vals.extend(r_tpl.vals);
Some(ret)
}
}
}
@ -340,7 +400,7 @@ mod tests {
use crate::db::engine::Engine;
use crate::parser::{Parser, Rule};
use pest::Parser as PestParser;
use crate::db::plan::{CartesianProductIterable, TableRowWithAssociatesIterable};
use crate::db::plan::{MegaTupleIt};
use crate::db::query::FromEl;
use crate::relation::value::Value;
use crate::error::Result;
@ -411,8 +471,7 @@ mod tests {
println!("{:?}", rel_tbls);
let tbl = rel_tbls.pop().unwrap();
let it = sess.iter_table(tbl);
let it = TableRowWithAssociatesIterable::new(it, vec![]);
let it = sess.iter_node(tbl);
for tuple in &it {
match sess.tuple_eval(&where_vals, &tuple).unwrap() {
Value::Bool(true) => {
@ -429,10 +488,12 @@ mod tests {
let duration = start.elapsed();
let duration2 = start2.elapsed();
println!("Time elapsed {:?} {:?}", duration, duration2);
let a = sess.iter_table(tbl);
let b = sess.iter_table(tbl);
let c = sess.iter_table(tbl);
let it = TableRowWithAssociatesIterable::new(a, vec![b, c]);
let it = MegaTupleIt::KeySortedWithAssocIt {
main: Box::new(sess.iter_node(tbl)),
associates: vec![(tbl.id as u32, sess.raw_iterator(true)),
(tbl.id as u32, sess.raw_iterator(true)),
(tbl.id as u32, sess.raw_iterator(true))],
};
{
for el in &it {
println!("{:?}", el);
@ -444,31 +505,31 @@ mod tests {
println!("{:?}", el);
}
}
let a = sess.iter_table(tbl);
let a = TableRowWithAssociatesIterable::new(a, vec![]);
let b = sess.iter_table(tbl);
let b = TableRowWithAssociatesIterable::new(b, vec![]);
let c_it = CartesianProductIterable { left: a, right: b };
let c = sess.iter_table(tbl);
let c = TableRowWithAssociatesIterable::new(c, vec![]);
let c_it = CartesianProductIterable { left: c, right: c_it };
let mut it = sess.iter_node(tbl);
for _ in 0..2 {
it = MegaTupleIt::CartesianProdIt {
left: Box::new(it),
right: Box::new(sess.iter_node(tbl)),
}
}
let start = Instant::now();
println!("Now cartesian product");
for el in &c_it {
// if i % 4096 == 0 {
// println!("{}: {:?}", i, el)
// }
let mut n = 0;
for el in &it {
if n % 4096 == 0 {
println!("{}: {:?}", n, el)
}
let _x = el.keys.into_iter().map(|v| v.iter().map(|_v| ()).collect::<Vec<_>>()).collect::<Vec<_>>();
let _y = el.vals.into_iter().map(|v| v.iter().map(|_v| ()).collect::<Vec<_>>()).collect::<Vec<_>>();
n += 1;
}
let duration = start.elapsed();
println!("Time elapsed {:?}", duration);
let a = sess.iter_table(tbl);
let ac = (&a).into_iter().count();
println!("{}", ac);
println!("{} items per second", 1e9 * (n as f64) / (duration.as_nanos() as f64));
// let a = sess.iter_table(tbl);
// let ac = (&a).into_iter().count();
// println!("{}", ac);
}
drop(engine);
let _ = fs::remove_dir_all(db_path);

@ -43,5 +43,14 @@ pub enum Table {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MegaTuple {
pub keys: Vec<CowTuple>,
pub vals: Vec<CowTuple>
pub vals: Vec<CowTuple>,
}
impl MegaTuple {
pub fn empty_tuple() -> Self {
MegaTuple { keys: vec![], vals: vec![] }
}
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
}

@ -1,5 +1,6 @@
use std::borrow::{Cow};
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
@ -97,6 +98,11 @@ impl<T: AsRef<[u8]>> Tuple<T> {
self.data.as_ref()[PREFIX_LEN..] == other.data.as_ref()[PREFIX_LEN..]
}
#[inline]
pub fn key_part_cmp<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> Ordering {
self.iter().cmp(other.iter())
}
#[inline]
pub fn new(data: T) -> Self {
Self {

Loading…
Cancel
Save