From 7b00b81a2d0fe2d6e2387c940a67d9ed8b57a4e8 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Sun, 15 May 2022 19:29:09 +0800 Subject: [PATCH] parsing of expressions again --- src/data.rs | 1 + src/data/expr.rs | 19 +++ src/data/expr_parser.rs | 245 +++++++++++++++++++++++++++++++++++ src/data/op.rs | 277 ++++++++++++++++++++++++++++++++++++++++ src/data/tuple_set.rs | 8 ++ 5 files changed, 550 insertions(+) create mode 100644 src/data/expr_parser.rs diff --git a/src/data.rs b/src/data.rs index fbd37da7..3054cfa6 100644 --- a/src/data.rs +++ b/src/data.rs @@ -4,3 +4,4 @@ pub(crate) mod tuple; pub(crate) mod tuple_set; pub(crate) mod typing; pub(crate) mod value; +pub(crate) mod expr_parser; diff --git a/src/data/expr.rs b/src/data/expr.rs index a3fa6f32..9c74659a 100644 --- a/src/data/expr.rs +++ b/src/data/expr.rs @@ -2,6 +2,7 @@ use crate::data::op::{AggOp, Op, UnresolvedOp}; use crate::data::tuple_set::{ColId, TableId, TupleSetIdx}; use crate::data::value::{StaticValue, Value}; use std::collections::BTreeMap; +use std::fmt::{Debug, Formatter, write}; use std::result; use std::sync::Arc; @@ -19,6 +20,7 @@ pub(crate) enum ExprError { type Result = result::Result; +#[derive(Clone)] pub(crate) enum Expr<'a> { Const(Value<'a>), List(Vec>), @@ -32,6 +34,23 @@ pub(crate) enum Expr<'a> { IdxAcc(usize, Box>), } +impl<'a> Debug for Expr<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Const(c) => write!(f, "{}", c), + Expr::List(l) => write!(f, "{:?}", l), + Expr::Dict(d) => write!(f, "{:?}", d), + Expr::Variable(v) => write!(f, "`{}`", v), + Expr::TableCol(tid, cid) => write!(f, "{:?}{:?}", tid, cid), + Expr::TupleSetIdx(sid) => write!(f, "{:?}", sid), + Expr::Apply(op, args) => write!(f, "({} {:?})", op.name(), args), + Expr::ApplyAgg(op, a_args, args) => write!(f, "({} {:?} {:?})", op.name(), a_args, args), + Expr::FieldAcc(field, arg) => write!(f, "(.{} {:?})", field, arg), + Expr::IdxAcc(i, arg) => write!(f, "(.{} {:?})", i, arg) + } + } +} + pub(crate) type StaticExpr = Expr<'static>; fn extract_list_from_value(value: Value, n: usize) -> Result> { diff --git a/src/data/expr_parser.rs b/src/data/expr_parser.rs new file mode 100644 index 00000000..edaf5556 --- /dev/null +++ b/src/data/expr_parser.rs @@ -0,0 +1,245 @@ +use std::borrow::Cow; +use std::collections::BTreeMap; +use pest::prec_climber::{Assoc, Operator, PrecClimber}; +use std::result; +use std::sync::Arc; +use lazy_static::lazy_static; +use pest::iterators::Pair; +use crate::data::expr::{Expr, ExprError}; +use crate::data::op::{Op, OpAdd, OpAnd, OpCoalesce, OpConcat, OpDiv, OpEq, OpGe, OpGt, OpLe, OpLt, OpMerge, OpMinus, OpMod, OpMul, OpNe, OpNegate, OpOr, OpPow, OpStrCat, OpSub, UnresolvedOp}; +use crate::data::value::Value; +use crate::parser::number::parse_int; +use crate::parser::Rule; +use crate::parser::text_identifier::{parse_string}; + +#[derive(thiserror::Error, Debug)] +pub(crate) enum ExprParseError { + #[error(transparent)] + TextParser(#[from] crate::parser::text_identifier::TextParseError), + + #[error(transparent)] + ParseInt(#[from] std::num::ParseIntError), + + #[error(transparent)] + ParseFloat(#[from] std::num::ParseFloatError), + + #[error("Cannot spread {0}")] + SpreadingError(String), +} + +type Result = result::Result; + +impl<'a> TryFrom> for Expr<'a> { + type Error = ExprParseError; + + fn try_from(pair: Pair<'a, Rule>) -> Result { + PREC_CLIMBER.climb(pair.into_inner(), build_expr_primary, build_expr_infix) + } +} + +lazy_static! { + static ref PREC_CLIMBER: PrecClimber = { + use Assoc::*; + + PrecClimber::new(vec![ + Operator::new(Rule::op_or, Left), + Operator::new(Rule::op_and, Left), + Operator::new(Rule::op_gt, Left) + | Operator::new(Rule::op_lt, Left) + | Operator::new(Rule::op_ge, Left) + | Operator::new(Rule::op_le, Left), + Operator::new(Rule::op_mod, Left), + Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left), + Operator::new(Rule::op_add, Left) + | Operator::new(Rule::op_sub, Left) + | Operator::new(Rule::op_str_cat, Left), + Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left), + Operator::new(Rule::op_pow, Assoc::Right), + Operator::new(Rule::op_coalesce, Assoc::Left), + ]) + }; +} + +fn build_expr_primary(pair: Pair) -> Result { + match pair.as_rule() { + Rule::expr => build_expr_primary(pair.into_inner().next().unwrap()), + Rule::term => { + let mut pairs = pair.into_inner(); + let mut head = build_expr_primary(pairs.next().unwrap())?; + for p in pairs { + match p.as_rule() { + Rule::accessor => { + let accessor_key = p.into_inner().next().unwrap().as_str(); + head = Expr::FieldAcc(accessor_key.into(), head.into()); + } + Rule::index_accessor => { + let accessor_key = p.into_inner().next().unwrap(); + let accessor_idx = parse_int(accessor_key.as_str(), 10); + head = Expr::IdxAcc(accessor_idx as usize, head.into()); + } + Rule::call => { + let mut pairs = p.into_inner(); + let method_name = pairs.next().unwrap().as_str(); + let op = Arc::new(UnresolvedOp(method_name.to_string())); + let mut args = vec![head]; + args.extend(pairs.map(Expr::try_from).collect::>>()?); + head = Expr::Apply(op, args); + } + _ => todo!(), + } + } + Ok(head) + } + Rule::grouping => Expr::try_from(pair.into_inner().next().unwrap()), + + Rule::unary => { + let mut inner = pair.into_inner(); + let p = inner.next().unwrap(); + let op = p.as_rule(); + let op: Arc = match op { + Rule::term => return build_expr_primary(p), + Rule::negate => Arc::new(OpNegate), + Rule::minus => Arc::new(OpMinus), + _ => unreachable!(), + }; + let term = build_expr_primary(inner.next().unwrap())?; + Ok(Expr::Apply(op, vec![term])) + } + + Rule::pos_int => Ok(Expr::Const(Value::Int(pair.as_str().replace('_', "").parse::()?))), + Rule::hex_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 16)))), + Rule::octo_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 8)))), + Rule::bin_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 2)))), + Rule::dot_float | Rule::sci_float => Ok(Expr::Const(Value::Float( + pair.as_str().replace('_', "").parse::()?.into(), + ))), + Rule::null => Ok(Expr::Const(Value::Null)), + Rule::boolean => Ok(Expr::Const(Value::Bool(pair.as_str() == "true"))), + Rule::quoted_string | Rule::s_quoted_string | Rule::raw_string => { + Ok(Expr::Const(Value::Text(Cow::Owned(parse_string(pair)?)))) + } + Rule::list => { + let mut spread_collected = vec![]; + let mut collected = vec![]; + for p in pair.into_inner() { + match p.as_rule() { + Rule::expr => collected.push(Expr::try_from(p)?), + Rule::spreading => { + let el = p.into_inner().next().unwrap(); + let to_concat = Expr::try_from(el)?; + if !matches!( + to_concat, + Expr::List(_) + | Expr::Variable(_) + | Expr::IdxAcc(_, _) + | Expr::FieldAcc(_, _) + | Expr::Apply(_, _) + ) { + return Err(ExprParseError::SpreadingError(format!("{:?}", to_concat))); + } + if !collected.is_empty() { + spread_collected.push(Expr::List(collected)); + collected = vec![]; + } + spread_collected.push(to_concat); + } + _ => unreachable!(), + } + } + if spread_collected.is_empty() { + return Ok(Expr::List(collected)); + } + if !collected.is_empty() { + spread_collected.push(Expr::List(collected)); + } + Ok(Expr::Apply(Arc::new(OpConcat), spread_collected)) + } + Rule::dict => { + let mut spread_collected = vec![]; + let mut collected = BTreeMap::new(); + for p in pair.into_inner() { + match p.as_rule() { + Rule::dict_pair => { + let mut inner = p.into_inner(); + let name = parse_string(inner.next().unwrap())?; + let val = Expr::try_from(inner.next().unwrap())?; + collected.insert(name.into(), val); + } + Rule::scoped_accessor => { + let name = parse_string(p.into_inner().next().unwrap())?; + let val = Expr::FieldAcc( + name.clone().into(), + Expr::Variable("_".into()).into(), + ); + collected.insert(name.into(), val); + } + Rule::spreading => { + let el = p.into_inner().next().unwrap(); + let to_concat = build_expr_primary(el)?; + if !matches!( + to_concat, + Expr::Dict(_) + | Expr::Variable(_) + | Expr::IdxAcc(_, _) + | Expr::FieldAcc(_, _) + | Expr::Apply(_, _) + ) { + return Err(ExprParseError::SpreadingError(format!("{:?}", to_concat))); + } + if !collected.is_empty() { + spread_collected.push(Expr::Dict(collected)); + collected = BTreeMap::new(); + } + spread_collected.push(to_concat); + } + _ => unreachable!(), + } + } + + if spread_collected.is_empty() { + return Ok(Expr::Dict(collected)); + } + + if !collected.is_empty() { + spread_collected.push(Expr::Dict(collected)); + } + Ok(Expr::Apply(Arc::new(OpMerge), spread_collected)) + } + Rule::param => Ok(Expr::Variable(pair.as_str().into())), + Rule::ident => Ok(Expr::Variable(pair.as_str().into())), + _ => { + println!("Unhandled rule {:?}", pair.as_rule()); + unimplemented!() + } + } +} + + +fn build_expr_infix<'a>( + lhs: Result>, + op: Pair, + rhs: Result>, +) -> Result> { + let lhs = lhs?; + let rhs = rhs?; + let op: Arc = match op.as_rule() { + Rule::op_add => Arc::new(OpAdd), + Rule::op_str_cat => Arc::new(OpStrCat), + Rule::op_sub => Arc::new(OpSub), + Rule::op_mul => Arc::new(OpMul), + Rule::op_div => Arc::new(OpDiv), + Rule::op_eq => Arc::new(OpEq), + Rule::op_ne => Arc::new(OpNe), + Rule::op_or => Arc::new(OpOr), + Rule::op_and => Arc::new(OpAnd), + Rule::op_mod => Arc::new(OpMod), + Rule::op_gt => Arc::new(OpGt), + Rule::op_ge => Arc::new(OpGe), + Rule::op_lt => Arc::new(OpLt), + Rule::op_le => Arc::new(OpLe), + Rule::op_pow => Arc::new(OpPow), + Rule::op_coalesce => Arc::new(OpCoalesce), + _ => unreachable!(), + }; + Ok(Expr::Apply(op, vec![lhs, rhs])) +} \ No newline at end of file diff --git a/src/data/op.rs b/src/data/op.rs index db5eff35..101e9e9a 100644 --- a/src/data/op.rs +++ b/src/data/op.rs @@ -1,3 +1,5 @@ +use std::fmt::{Debug, Formatter}; + pub(crate) trait Op { fn is_resolved(&self) -> bool; fn name(&self) -> &str; @@ -28,4 +30,279 @@ impl AggOp for UnresolvedOp { fn name(&self) -> &str { &self.0 } +} + +pub(crate) struct OpAdd; + +impl Op for OpAdd { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "+" + } +} + +pub(crate) struct OpSub; + +impl Op for OpSub { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "-" + } +} + + +pub(crate) struct OpMul; + +impl Op for OpMul { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "*" + } +} + + +pub(crate) struct OpDiv; + +impl Op for OpDiv { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "/" + } +} + + +pub(crate) struct OpStrCat; + +impl Op for OpStrCat { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "++" + } +} + + +pub(crate) struct OpEq; + +impl Op for OpEq { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "==" + } +} + + +pub(crate) struct OpNe; + +impl Op for OpNe { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "!=" + } +} + + +pub(crate) struct OpOr; + +impl Op for OpOr { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "||" + } +} + + + +pub(crate) struct OpAnd; + +impl Op for OpAnd { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "&&" + } +} + +pub(crate) struct OpMod; + +impl Op for OpMod { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "%" + } +} + +pub(crate) struct OpGt; + +impl Op for OpGt { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + ">" + } +} + + +pub(crate) struct OpGe; + +impl Op for OpGe { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + ">=" + } +} + +pub(crate) struct OpLt; + +impl Op for OpLt { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "<" + } +} + +pub(crate) struct OpLe; + +impl Op for OpLe { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "<=" + } +} + +pub(crate) struct OpPow; + +impl Op for OpPow { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "**" + } +} + +pub(crate) struct OpCoalesce; + +impl Op for OpCoalesce { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "~~" + } +} + +pub(crate) struct OpNegate; + +impl Op for OpNegate { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "!" + } +} + + +pub(crate) struct OpMinus; + +impl Op for OpMinus { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "--" + } +} + +pub(crate) struct OpIsNull; + +impl Op for OpIsNull { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "is_null" + } +} + +pub(crate) struct OpNotNull; + +impl Op for OpNotNull { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "not_null" + } +} + +pub(crate) struct OpConcat; + +impl Op for OpConcat { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "concat" + } +} + + +pub(crate) struct OpMerge; + +impl Op for OpMerge { + fn is_resolved(&self) -> bool { + true + } + + fn name(&self) -> &str { + "merge" + } } \ No newline at end of file diff --git a/src/data/tuple_set.rs b/src/data/tuple_set.rs index 078ecb1e..d768ab04 100644 --- a/src/data/tuple_set.rs +++ b/src/data/tuple_set.rs @@ -60,8 +60,16 @@ impl From<(bool, usize)> for ColId { } } +#[derive(Clone, Copy, PartialEq, Ord, PartialOrd, Eq)] pub(crate) struct TupleSetIdx { pub(crate) is_key: bool, pub(crate) t_set: usize, pub(crate) col_idx: usize, } + + +impl Debug for TupleSetIdx { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "@{}{}{}", self.t_set, if self.is_key { 'K' } else { 'D' }, self.col_idx) + } +} \ No newline at end of file