From 793030d9a1d022d7da9fa02bba9efc2fc314a701 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Thu, 19 May 2022 22:10:32 +0800 Subject: [PATCH] reification without execution --- src/data/eval.rs | 12 +- src/data/op.rs | 11 ++ src/data/op/boolean.rs | 4 +- src/data/op/control.rs | 6 +- src/ddl.rs | 1 + src/ddl/parser.rs | 87 +++++++---- src/ddl/reify.rs | 348 +++++++++++++++++++++++++++++++++++++++++ src/grammar.pest | 2 +- 8 files changed, 429 insertions(+), 42 deletions(-) create mode 100644 src/ddl/reify.rs diff --git a/src/data/eval.rs b/src/data/eval.rs index 50799197..8b0f4f64 100644 --- a/src/data/eval.rs +++ b/src/data/eval.rs @@ -53,15 +53,15 @@ impl RowEvalContext for () { } pub(crate) trait PartialEvalContext { - fn resolve<'a>(&'a self, key: &str) -> Option>; - fn resolve_table_col<'a>(&'a self, binding: &str, col: &str) -> Option<(TableId, ColId)>; + fn resolve(&self, key: &str) -> Option; + fn resolve_table_col(&self, binding: &str, col: &str) -> Option<(TableId, ColId)>; } impl PartialEvalContext for () { - fn resolve<'a>(&'a self, _key: &str) -> Option> { + fn resolve(&self, _key: &str) -> Option { None } - fn resolve_table_col<'a>(&'a self, _binding: &str, _col: &str) -> Option<(TableId, ColId)> { + fn resolve_table_col(&self, _binding: &str, _col: &str) -> Option<(TableId, ColId)> { None } } @@ -79,14 +79,14 @@ fn extract_optimized_u_args(args: Vec) -> Expr { } impl<'a> Expr<'a> { - pub(crate) fn interpret_eval(self, ctx: &'a C) -> Result { + pub(crate) fn interpret_eval(self, ctx: &'a C) -> Result { match self.partial_eval(ctx)? { Expr::Const(v) => Ok(v), v => Err(EvalError::IncompleteEvaluation(format!("{:?}", v))), } } - pub(crate) fn partial_eval(self, ctx: &'a C) -> Result { + pub(crate) fn partial_eval(self, ctx: &'a C) -> Result { let res = match self { v @ (Expr::Const(_) | Expr::TableCol(_, _) | Expr::TupleSetIdx(_)) => v, Expr::List(l) => Expr::List( diff --git a/src/data/op.rs b/src/data/op.rs index 60283b8b..61e0ffe9 100644 --- a/src/data/op.rs +++ b/src/data/op.rs @@ -17,6 +17,7 @@ pub(crate) use combine::*; pub(crate) use comparison::*; pub(crate) use control::*; pub(crate) use text::*; +use crate::data::expr::Expr; type Result = result::Result; @@ -32,6 +33,8 @@ pub(crate) trait AggOp: Send + Sync { fn arity(&self) -> Option; fn has_side_effect(&self) -> bool; fn name(&self) -> &str; + fn prep(&self, args: &[Expr]) -> Result<()>; + fn get(&self, args: &[Expr]) -> Result; } pub(crate) struct UnresolvedOp(pub String); @@ -66,6 +69,14 @@ impl AggOp for UnresolvedOp { fn name(&self) -> &str { &self.0 } + + fn prep(&self, args: &[Expr]) -> Result<()> { + todo!() + } + + fn get(&self, args: &[Expr]) -> Result { + todo!() + } } pub(crate) fn extract_two_args<'a>(args: Vec>) -> (Value<'a>, Value<'a>) { diff --git a/src/data/op/boolean.rs b/src/data/op/boolean.rs index 7fe30fd0..17b9aa70 100644 --- a/src/data/op/boolean.rs +++ b/src/data/op/boolean.rs @@ -106,7 +106,7 @@ impl Op for OpOr { } } -pub(crate) fn partial_eval_or<'a, T: PartialEvalContext + 'a>( +pub(crate) fn partial_eval_or<'a, T: PartialEvalContext>( ctx: &'a T, args: Vec>, ) -> Result> { @@ -210,7 +210,7 @@ impl Op for OpAnd { } } -pub(crate) fn partial_eval_and<'a, T: PartialEvalContext + 'a>( +pub(crate) fn partial_eval_and<'a, T: PartialEvalContext>( ctx: &'a T, args: Vec>, ) -> Result> { diff --git a/src/data/op/control.rs b/src/data/op/control.rs index 21d57d99..2d6c1289 100644 --- a/src/data/op/control.rs +++ b/src/data/op/control.rs @@ -52,7 +52,7 @@ pub(crate) fn row_eval_coalesce<'a, T: RowEvalContext + 'a>( pub(crate) const IF_NAME: &str = "if"; -pub(crate) fn partial_eval_coalesce<'a, T: PartialEvalContext + 'a>( +pub(crate) fn partial_eval_coalesce<'a, T: PartialEvalContext>( ctx: &'a T, args: Vec>, ) -> Result> { @@ -94,7 +94,7 @@ pub(crate) fn row_eval_if_expr<'a, T: RowEvalContext + 'a>( } } -pub(crate) fn partial_eval_if_expr<'a, T: PartialEvalContext + 'a>( +pub(crate) fn partial_eval_if_expr<'a, T: PartialEvalContext>( ctx: &'a T, cond: Expr<'a>, if_part: Expr<'a>, @@ -135,7 +135,7 @@ pub(crate) fn row_eval_switch_expr<'a, T: RowEvalContext + 'a>( default.row_eval(ctx) } -pub(crate) fn partial_eval_switch_expr<'a, T: PartialEvalContext + 'a>( +pub(crate) fn partial_eval_switch_expr<'a, T: PartialEvalContext>( ctx: &'a T, args: Vec<(Expr<'a>, Expr<'a>)>, ) -> Result> { diff --git a/src/ddl.rs b/src/ddl.rs index a5f93427..9db8354c 100644 --- a/src/ddl.rs +++ b/src/ddl.rs @@ -1 +1,2 @@ pub(crate) mod parser; +pub(crate) mod reify; diff --git a/src/ddl/parser.rs b/src/ddl/parser.rs index fbfe6096..ebb801b5 100644 --- a/src/ddl/parser.rs +++ b/src/ddl/parser.rs @@ -18,51 +18,52 @@ pub(crate) enum DdlParseError { ExprParse(#[from] ExprParseError), #[error("definition error: {0}")] - Definition(&'static str) + Definition(&'static str), } type Result = result::Result; #[derive(Debug, Clone)] pub(crate) struct ColSchema { - name: String, - typing: Typing, - default: StaticExpr, + pub(crate) name: String, + pub(crate) typing: Typing, + pub(crate) default: StaticExpr, } #[derive(Debug, Clone)] pub(crate) struct NodeSchema { - name: String, - keys: Vec, - vals: Vec, + pub(crate) name: String, + pub(crate) keys: Vec, + pub(crate) vals: Vec, } #[derive(Debug, Clone)] pub(crate) struct EdgeSchema { - name: String, - src_name: String, - dst_name: String, - keys: Vec, - vals: Vec, + pub(crate) name: String, + pub(crate) src_name: String, + pub(crate) dst_name: String, + pub(crate) keys: Vec, + pub(crate) vals: Vec, } #[derive(Debug, Clone)] pub(crate) struct AssocSchema { - name: String, - src_name: String, - vals: Vec, + pub(crate) name: String, + pub(crate) src_name: String, + pub(crate) vals: Vec, } #[derive(Debug, Clone)] pub(crate) struct IndexSchema { - name: String, - src_name: String, - index: Vec, + pub(crate) name: String, + pub(crate) src_name: String, + pub(crate) assoc_names: Vec, + pub(crate) index: Vec, } #[derive(Debug, Clone)] pub(crate) struct SequenceSchema { - name: String, + pub(crate) name: String, } #[derive(Debug, Clone)] @@ -71,7 +72,7 @@ pub(crate) enum DdlSchema { Edge(EdgeSchema), Assoc(AssocSchema), Index(IndexSchema), - Sequence(SequenceSchema) + Sequence(SequenceSchema), } impl<'a> TryFrom> for DdlSchema { @@ -83,6 +84,7 @@ impl<'a> TryFrom> for DdlSchema { Rule::edge_def => DdlSchema::Edge(pair.try_into()?), Rule::assoc_def => DdlSchema::Assoc(pair.try_into()?), Rule::seq_def => DdlSchema::Sequence(pair.try_into()?), + Rule::index_def => DdlSchema::Index(pair.try_into()?), _ => todo!() }) } @@ -120,7 +122,7 @@ impl<'a> TryFrom> for EdgeSchema { src_name, dst_name, keys, - vals + vals, }) } } @@ -132,29 +134,48 @@ impl<'a> TryFrom> for AssocSchema { let src_name = build_name_in_def(pairs.next().unwrap(), true)?; let name = build_name_in_def(pairs.next().unwrap(), true)?; - let (keys, vals) = parse_cols(pairs.next().unwrap())?; + let (keys, vals) = parse_cols(pairs.next().unwrap())?; if !keys.is_empty() { - return Err(DdlParseError::Definition("assoc cannot have keys")) + return Err(DdlParseError::Definition("assoc cannot have keys")); } if vals.is_empty() { - return Err(DdlParseError::Definition("assoc has no values")) + return Err(DdlParseError::Definition("assoc has no values")); } Ok(AssocSchema { name, src_name, - vals + vals, }) } } impl<'a> TryFrom> for IndexSchema { type Error = DdlParseError; - fn try_from(value: Pair) -> Result { - todo!() + fn try_from(pair: Pair) -> Result { + let mut pairs = pair.into_inner(); + let index_name = build_name_in_def(pairs.next().unwrap(), true)?; + let main_name = build_name_in_def(pairs.next().unwrap(), false)?; + let mut associate_names = vec![]; + let mut indices = vec![]; + for pair in pairs { + match pair.as_rule() { + Rule::name_in_def => associate_names.push(build_name_in_def(pair, false)?), + _ => indices.push(Expr::try_from(pair)?.to_static()) + } + } + if indices.is_empty() { + return Err(DdlParseError::Definition("Empty indexed columns")); + } + Ok(IndexSchema { + name: index_name, + src_name: main_name, + assoc_names: associate_names, + index: indices + }) } } -impl <'a> TryFrom> for SequenceSchema { +impl<'a> TryFrom> for SequenceSchema { type Error = DdlParseError; fn try_from(pair: Pair) -> Result { let name = build_name_in_def(pair.into_inner().next().unwrap(), true)?; @@ -187,7 +208,7 @@ fn parse_col_entry(pair: Pair) -> Result<(bool, ColSchema)> { Ok((is_key, ColSchema { name, typing, - default + default, })) } @@ -198,7 +219,7 @@ fn parse_col_name(pair: Pair) -> Result<(bool, String)> { Rule::key_marker => { nxt = pairs.next().unwrap(); true - }, + } _ => false }; let name = build_name_in_def(nxt, true)?; @@ -252,6 +273,12 @@ mod tests { let p = CozoParser::parse(Rule::definition_all, s).unwrap().next().unwrap(); dbg!(DdlSchema::try_from(p)?); + let s = r#" + index bankaccountidx: Person + BankAccount [id, x, y, z] + "#; + let p = CozoParser::parse(Rule::definition_all, s).unwrap().next().unwrap(); + dbg!(DdlSchema::try_from(p)?); + Ok(()) } } \ No newline at end of file diff --git a/src/ddl/reify.rs b/src/ddl/reify.rs new file mode 100644 index 00000000..fae4dad1 --- /dev/null +++ b/src/ddl/reify.rs @@ -0,0 +1,348 @@ +use std::collections::BTreeSet; +use std::result; +use chrono::format::Item; +use crate::data::eval::{EvalError, PartialEvalContext}; +use crate::data::expr::{Expr, StaticExpr}; +use crate::data::tuple_set::{ColId, TableId, TupleSetIdx}; +use crate::data::value::Value; +use crate::ddl::parser::{AssocSchema, ColSchema, DdlSchema, EdgeSchema, IndexSchema, NodeSchema, SequenceSchema}; + +#[derive(thiserror::Error, Debug)] +pub(crate) enum DdlReifyError { + #[error("Name clash: {0}")] + NameClash(String), + + #[error(transparent)] + Eval(#[from] EvalError), +} + +type Result = result::Result; + +#[derive(Debug, Copy, Clone)] +pub(crate) enum TableKind { + Node, + Edge, + Assoc, + Index, + Sequence, +} + +#[derive(Debug, Clone)] +pub(crate) enum TableInfo { + Node(NodeInfo), + Edge(EdgeInfo), +} + +impl TableInfo { + pub(crate) fn table_id(&self) -> TableId { + match self { + TableInfo::Node(n) => n.tid, + TableInfo::Edge(e) => e.tid + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct NodeInfo { + pub(crate) name: String, + pub(crate) tid: TableId, + pub(crate) keys: Vec, + pub(crate) vals: Vec, +} + + +#[derive(Debug, Clone)] +pub(crate) struct EdgeInfo { + pub(crate) name: String, + pub(crate) tid: TableId, + pub(crate) src_id: TableId, + pub(crate) dst_id: TableId, + pub(crate) keys: Vec, + pub(crate) vals: Vec, +} + +#[derive(Debug, Clone)] +pub(crate) struct AssocInfo { + pub(crate) name: String, + pub(crate) tid: TableId, + pub(crate) src_id: TableId, + pub(crate) vals: Vec, +} + +#[derive(Debug, Clone)] +pub(crate) struct IndexInfo { + pub(crate) name: String, + pub(crate) tid: TableId, + pub(crate) src_id: TableId, + pub(crate) assoc_ids: Vec, + pub(crate) index: Vec, +} + +pub(crate) struct SequenceInfo { + pub(crate) name: String, + pub(crate) tid: TableId, +} + +pub(crate) trait DdlContext { + fn gen_table_id(&mut self) -> TableId; + fn resolve_table_id_for_derivation>(&self, name: &str, kind: I) -> Result; + fn resolve_table>(&self, name: &str, kind: I, for_derivation: bool) -> Result; + fn resolve_table_by_id(&self, tid: TableId) -> Result; + fn resolve_associates_for(&self, id: TableId) -> Vec; + fn build_table(&mut self, schema: DdlSchema) -> Result<()> { + match schema { + DdlSchema::Node(n) => self.build_node(n)?, + DdlSchema::Edge(e) => self.build_edge(e)?, + DdlSchema::Assoc(a) => self.build_assoc(a)?, + DdlSchema::Index(i) => self.build_index(i)?, + DdlSchema::Sequence(s) => self.build_sequence(s)? + }; + Ok(()) + } + fn build_node(&mut self, schema: NodeSchema) -> Result<()> { + check_name_clash([&schema.keys, &schema.vals])?; + let info = NodeInfo { + name: schema.name, + tid: self.gen_table_id(), + keys: eval_defaults(schema.keys)?, + vals: eval_defaults(schema.vals)?, + }; + self.store_node(info) + } + fn store_node(&mut self, info: NodeInfo) -> Result<()>; + fn build_edge(&mut self, schema: EdgeSchema) -> Result<()> { + check_name_clash([&schema.keys, &schema.vals])?; + let info = EdgeInfo { + name: schema.name, + tid: self.gen_table_id(), + src_id: self.resolve_table_id_for_derivation(&schema.src_name, [TableKind::Node])?, + dst_id: self.resolve_table_id_for_derivation(&schema.dst_name, [TableKind::Node])?, + keys: eval_defaults(schema.keys)?, + vals: eval_defaults(schema.vals)?, + }; + self.store_edge(info) + } + fn store_edge(&mut self, info: EdgeInfo) -> Result<()>; + fn build_assoc(&mut self, schema: AssocSchema) -> Result<()> { + let src_info = self.resolve_table(&schema.src_name, [TableKind::Node, TableKind::Edge], true)?; + let src_id = src_info.table_id(); + let associates = self.resolve_associates_for(src_id); + let mut names_to_check: Vec<_> = associates.iter().map(|ai| &ai.vals).collect(); + names_to_check.push(&schema.vals); + check_name_clash(names_to_check)?; + let info = AssocInfo { + name: schema.name, + tid: self.gen_table_id(), + src_id, + vals: eval_defaults(schema.vals)?, + }; + self.store_assoc(info) + } + fn store_assoc(&mut self, info: AssocInfo) -> Result<()>; + fn build_index(&mut self, schema: IndexSchema) -> Result<()> { + let src_schema = self.resolve_table(&schema.src_name, [TableKind::Node, TableKind::Edge], true)?; + let associates = self.resolve_associates_for(src_schema.table_id()); + let assoc_vals = associates.iter().map(|v| v.vals.as_slice()).collect::>(); + let index_exprs = match &src_schema { + TableInfo::Node(node_info) => { + let ctx = NodeDefEvalCtx { + keys: &node_info.keys, + vals: &node_info.vals, + assoc_vals: &assoc_vals, + }; + schema.index.into_iter().map(|ex| + ex.partial_eval(&ctx).map(|ex| ex.to_static())) + .collect::, _>>()? + } + TableInfo::Edge(edge_info) => { + let src_info = self.resolve_table_by_id(edge_info.src_id)?; + let src_keys = match &src_info { + TableInfo::Node(n) => &n.keys, + _ => unreachable!() + }; + let dst_info = self.resolve_table_by_id(edge_info.dst_id)?; + let dst_keys = match &dst_info { + TableInfo::Node(n) => &n.keys, + _ => unreachable!() + }; + let ctx = EdgeDefEvalCtx { + keys: &edge_info.keys, + vals: &edge_info.vals, + src_keys, + dst_keys, + assoc_vals: &assoc_vals, + }; + schema.index.into_iter().map(|ex| + ex.partial_eval(&ctx).map(|ex| ex.to_static())) + .collect::, _>>()? + } + }; + + let info = IndexInfo { + name: schema.name, + tid: self.gen_table_id(), + src_id: src_schema.table_id(), + assoc_ids: schema.assoc_names.iter().map(|n| + self.resolve_table_id_for_derivation(n, [TableKind::Assoc])) + .collect::>>()?, + index: index_exprs, + }; + self.store_index(info) + } + fn store_index(&mut self, info: IndexInfo) -> Result<()>; + fn build_sequence(&mut self, schema: SequenceSchema) -> Result<()> { + let tid = self.gen_table_id(); + self.store_sequence(SequenceInfo { + name: schema.name, + tid, + }) + } + fn store_sequence(&mut self, info: SequenceInfo) -> Result<()>; +} + +fn check_name_clash<'a, I: IntoIterator, II: IntoIterator>(kvs: I) -> Result<()> { + let mut seen: BTreeSet<&str> = BTreeSet::new(); + for it in kvs.into_iter() { + for el in it.into_iter() { + if !seen.insert(&el.name as &str) { + return Err(DdlReifyError::NameClash(el.name.clone())); + } + } + } + Ok(()) +} + +fn eval_defaults(cols: Vec) -> Result> { + cols.into_iter().map(|ColSchema { name, typing, default }| + match default.partial_eval(&()) { + Ok(default) => Ok(ColSchema { + name, + typing, + default, + }), + Err(e) => Err(e.into()) + }).collect::>>() +} + +pub(crate) struct NodeDefEvalCtx<'a> { + keys: &'a [ColSchema], + vals: &'a [ColSchema], + assoc_vals: &'a [&'a [ColSchema]], +} + +impl<'a> NodeDefEvalCtx<'a> { + fn resolve_name(&self, name: &str) -> Option { + for (i, col) in self.keys.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: true, + t_set: 0, + col_idx: i, + }); + } + } + for (i, col) in self.vals.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: false, + t_set: 0, + col_idx: i, + }); + } + } + for (j, set) in self.assoc_vals.iter().enumerate() { + for (i, col) in set.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: false, + t_set: j + 1, + col_idx: i, + }); + } + } + } + None + } +} + +impl<'a> PartialEvalContext for NodeDefEvalCtx<'a> { + fn resolve(&self, key: &str) -> Option { + self.resolve_name(key).map(Expr::TupleSetIdx) + } + + fn resolve_table_col(&self, _binding: &str, _col: &str) -> Option<(TableId, ColId)> { + None + } +} + + +pub(crate) struct EdgeDefEvalCtx<'a> { + keys: &'a [ColSchema], + vals: &'a [ColSchema], + src_keys: &'a [ColSchema], + dst_keys: &'a [ColSchema], + assoc_vals: &'a [&'a [ColSchema]], +} + +impl<'a> EdgeDefEvalCtx<'a> { + fn resolve_name(&self, name: &str) -> Option { + for (i, col) in self.src_keys.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: true, + t_set: 0, + col_idx: i + 1, + }); + } + } + for (i, col) in self.keys.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: true, + t_set: 0, + col_idx: i + 1 + self.src_keys.len(), + }); + } + } + for (i, col) in self.dst_keys.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: true, + t_set: 0, + col_idx: i + 2 + self.src_keys.len() + self.dst_keys.len(), + }); + } + } + for (i, col) in self.vals.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: false, + t_set: 0, + col_idx: i, + }); + } + } + for (j, set) in self.assoc_vals.iter().enumerate() { + for (i, col) in set.iter().enumerate() { + if name == col.name { + return Some(TupleSetIdx { + is_key: false, + t_set: j + 1, + col_idx: i, + }); + } + } + } + None + } +} + +impl<'a> PartialEvalContext for EdgeDefEvalCtx<'a> { + fn resolve(&self, key: &str) -> Option { + self.resolve_name(key).map(Expr::TupleSetIdx) + } + + fn resolve_table_col(&self, _binding: &str, _col: &str) -> Option<(TableId, ColId)> { + None + } +} diff --git a/src/grammar.pest b/src/grammar.pest index 9bbed77c..83477904 100644 --- a/src/grammar.pest +++ b/src/grammar.pest @@ -175,7 +175,7 @@ assoc_def = { "assoc" ~ name_in_def ~ ":" ~ name_in_def ~ cols_def ~ ";"? } edge_def = { "edge" ~ "(" ~ name_in_def ~ ")" ~ "-" ~ "[" ~ name_in_def ~ "]" ~ "->" ~ "(" ~ name_in_def ~ ")" ~ cols_def? ~ ";"? } -index_def = { "index" ~ (name_in_def ~ ":")? ~ name_in_def ~ ("+" ~ name_in_def)* ~ "[" ~ (expr ~ ",")* ~ expr? ~ "]" ~ ";"? } +index_def = { "index" ~ name_in_def ~ ":" ~ name_in_def ~ ("+" ~ name_in_def)* ~ "[" ~ (expr ~ ",")* ~ expr? ~ "]" ~ ";"? } seq_def = { "sequence" ~ name_in_def ~ ";" } type_def = { "type" ~ name_in_def ~ "=" ~ typing ~ ";" }