parsing of expressions again

2 years ago · 7b00b81a2d
parent 6b0c3d88c0
commit 7b00b81a2d
5 changed files with 550 additions and 0 deletions
--- a/src/data.rs
+++ b/src/data.rs
@ -4,3 +4,4 @@ pub(crate) mod tuple;
 pub(crate) mod tuple_set;
 pub(crate) mod typing;
 pub(crate) mod value;
+pub(crate) mod expr_parser;
--- a/src/data/expr.rs
+++ b/src/data/expr.rs
@ -2,6 +2,7 @@ use crate::data::op::{AggOp, Op, UnresolvedOp};
 use crate::data::tuple_set::{ColId, TableId, TupleSetIdx};
 use crate::data::value::{StaticValue, Value};
 use std::collections::BTreeMap;
+use std::fmt::{Debug, Formatter, write};
 use std::result;
 use std::sync::Arc;

@ -19,6 +20,7 @@ pub(crate) enum ExprError {

 type Result<T> = result::Result<T, ExprError>;

+#[derive(Clone)]
 pub(crate) enum Expr<'a> {
    Const(Value<'a>),
    List(Vec<Expr<'a>>),
@ -32,6 +34,23 @@ pub(crate) enum Expr<'a> {
    IdxAcc(usize, Box<Expr<'a>>),
 }

+impl<'a> Debug for Expr<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Expr::Const(c) => write!(f, "{}", c),
+            Expr::List(l) => write!(f, "{:?}", l),
+            Expr::Dict(d) => write!(f, "{:?}", d),
+            Expr::Variable(v) => write!(f, "`{}`", v),
+            Expr::TableCol(tid, cid) => write!(f, "{:?}{:?}", tid, cid),
+            Expr::TupleSetIdx(sid) => write!(f, "{:?}", sid),
+            Expr::Apply(op, args) => write!(f, "({} {:?})", op.name(), args),
+            Expr::ApplyAgg(op, a_args, args) => write!(f, "({} {:?} {:?})", op.name(), a_args, args),
+            Expr::FieldAcc(field, arg) => write!(f, "(.{} {:?})", field, arg),
+            Expr::IdxAcc(i, arg) => write!(f, "(.{} {:?})", i, arg)
+        }
+    }
+}
+
 pub(crate) type StaticExpr = Expr<'static>;

 fn extract_list_from_value(value: Value, n: usize) -> Result<Vec<Value>> {
--- a/src/data/expr_parser.rs
+++ b/src/data/expr_parser.rs
@ -0,0 +1,245 @@
+use std::borrow::Cow;
+use std::collections::BTreeMap;
+use pest::prec_climber::{Assoc, Operator, PrecClimber};
+use std::result;
+use std::sync::Arc;
+use lazy_static::lazy_static;
+use pest::iterators::Pair;
+use crate::data::expr::{Expr, ExprError};
+use crate::data::op::{Op, OpAdd, OpAnd, OpCoalesce, OpConcat, OpDiv, OpEq, OpGe, OpGt, OpLe, OpLt, OpMerge, OpMinus, OpMod, OpMul, OpNe, OpNegate, OpOr, OpPow, OpStrCat, OpSub, UnresolvedOp};
+use crate::data::value::Value;
+use crate::parser::number::parse_int;
+use crate::parser::Rule;
+use crate::parser::text_identifier::{parse_string};
+
+#[derive(thiserror::Error, Debug)]
+pub(crate) enum ExprParseError {
+    #[error(transparent)]
+    TextParser(#[from] crate::parser::text_identifier::TextParseError),
+
+    #[error(transparent)]
+    ParseInt(#[from] std::num::ParseIntError),
+
+    #[error(transparent)]
+    ParseFloat(#[from] std::num::ParseFloatError),
+
+    #[error("Cannot spread {0}")]
+    SpreadingError(String),
+}
+
+type Result<T> = result::Result<T, ExprParseError>;
+
+impl<'a> TryFrom<Pair<'a, Rule>> for Expr<'a> {
+    type Error = ExprParseError;
+
+    fn try_from(pair: Pair<'a, Rule>) -> Result<Self> {
+        PREC_CLIMBER.climb(pair.into_inner(), build_expr_primary, build_expr_infix)
+    }
+}
+
+lazy_static! {
+    static ref PREC_CLIMBER: PrecClimber<Rule> = {
+        use Assoc::*;
+
+        PrecClimber::new(vec![
+            Operator::new(Rule::op_or, Left),
+            Operator::new(Rule::op_and, Left),
+            Operator::new(Rule::op_gt, Left)
+                | Operator::new(Rule::op_lt, Left)
+                | Operator::new(Rule::op_ge, Left)
+                | Operator::new(Rule::op_le, Left),
+            Operator::new(Rule::op_mod, Left),
+            Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left),
+            Operator::new(Rule::op_add, Left)
+                | Operator::new(Rule::op_sub, Left)
+                | Operator::new(Rule::op_str_cat, Left),
+            Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left),
+            Operator::new(Rule::op_pow, Assoc::Right),
+            Operator::new(Rule::op_coalesce, Assoc::Left),
+        ])
+    };
+}
+
+fn build_expr_primary(pair: Pair<Rule>) -> Result<Expr> {
+    match pair.as_rule() {
+        Rule::expr => build_expr_primary(pair.into_inner().next().unwrap()),
+        Rule::term => {
+            let mut pairs = pair.into_inner();
+            let mut head = build_expr_primary(pairs.next().unwrap())?;
+            for p in pairs {
+                match p.as_rule() {
+                    Rule::accessor => {
+                        let accessor_key = p.into_inner().next().unwrap().as_str();
+                        head = Expr::FieldAcc(accessor_key.into(), head.into());
+                    }
+                    Rule::index_accessor => {
+                        let accessor_key = p.into_inner().next().unwrap();
+                        let accessor_idx = parse_int(accessor_key.as_str(), 10);
+                        head = Expr::IdxAcc(accessor_idx as usize, head.into());
+                    }
+                    Rule::call => {
+                        let mut pairs = p.into_inner();
+                        let method_name = pairs.next().unwrap().as_str();
+                        let op = Arc::new(UnresolvedOp(method_name.to_string()));
+                        let mut args = vec![head];
+                        args.extend(pairs.map(Expr::try_from).collect::<Result<Vec<_>>>()?);
+                        head = Expr::Apply(op, args);
+                    }
+                    _ => todo!(),
+                }
+            }
+            Ok(head)
+        }
+        Rule::grouping => Expr::try_from(pair.into_inner().next().unwrap()),
+
+        Rule::unary => {
+            let mut inner = pair.into_inner();
+            let p = inner.next().unwrap();
+            let op = p.as_rule();
+            let op: Arc<dyn Op> = match op {
+                Rule::term => return build_expr_primary(p),
+                Rule::negate => Arc::new(OpNegate),
+                Rule::minus => Arc::new(OpMinus),
+                _ => unreachable!(),
+            };
+            let term = build_expr_primary(inner.next().unwrap())?;
+            Ok(Expr::Apply(op, vec![term]))
+        }
+
+        Rule::pos_int => Ok(Expr::Const(Value::Int(pair.as_str().replace('_', "").parse::<i64>()?))),
+        Rule::hex_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 16)))),
+        Rule::octo_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 8)))),
+        Rule::bin_pos_int => Ok(Expr::Const(Value::Int(parse_int(pair.as_str(), 2)))),
+        Rule::dot_float | Rule::sci_float => Ok(Expr::Const(Value::Float(
+            pair.as_str().replace('_', "").parse::<f64>()?.into(),
+        ))),
+        Rule::null => Ok(Expr::Const(Value::Null)),
+        Rule::boolean => Ok(Expr::Const(Value::Bool(pair.as_str() == "true"))),
+        Rule::quoted_string | Rule::s_quoted_string | Rule::raw_string => {
+            Ok(Expr::Const(Value::Text(Cow::Owned(parse_string(pair)?))))
+        }
+        Rule::list => {
+            let mut spread_collected = vec![];
+            let mut collected = vec![];
+            for p in pair.into_inner() {
+                match p.as_rule() {
+                    Rule::expr => collected.push(Expr::try_from(p)?),
+                    Rule::spreading => {
+                        let el = p.into_inner().next().unwrap();
+                        let to_concat = Expr::try_from(el)?;
+                        if !matches!(
+                            to_concat,
+                            Expr::List(_)
+                                | Expr::Variable(_)
+                                | Expr::IdxAcc(_, _)
+                                | Expr::FieldAcc(_, _)
+                                | Expr::Apply(_, _)
+                        ) {
+                            return Err(ExprParseError::SpreadingError(format!("{:?}", to_concat)));
+                        }
+                        if !collected.is_empty() {
+                            spread_collected.push(Expr::List(collected));
+                            collected = vec![];
+                        }
+                        spread_collected.push(to_concat);
+                    }
+                    _ => unreachable!(),
+                }
+            }
+            if spread_collected.is_empty() {
+                return Ok(Expr::List(collected));
+            }
+            if !collected.is_empty() {
+                spread_collected.push(Expr::List(collected));
+            }
+            Ok(Expr::Apply(Arc::new(OpConcat), spread_collected))
+        }
+        Rule::dict => {
+            let mut spread_collected = vec![];
+            let mut collected = BTreeMap::new();
+            for p in pair.into_inner() {
+                match p.as_rule() {
+                    Rule::dict_pair => {
+                        let mut inner = p.into_inner();
+                        let name = parse_string(inner.next().unwrap())?;
+                        let val = Expr::try_from(inner.next().unwrap())?;
+                        collected.insert(name.into(), val);
+                    }
+                    Rule::scoped_accessor => {
+                        let name = parse_string(p.into_inner().next().unwrap())?;
+                        let val = Expr::FieldAcc(
+                            name.clone().into(),
+                            Expr::Variable("_".into()).into(),
+                        );
+                        collected.insert(name.into(), val);
+                    }
+                    Rule::spreading => {
+                        let el = p.into_inner().next().unwrap();
+                        let to_concat = build_expr_primary(el)?;
+                        if !matches!(
+                            to_concat,
+                            Expr::Dict(_)
+                                | Expr::Variable(_)
+                                | Expr::IdxAcc(_, _)
+                                | Expr::FieldAcc(_, _)
+                                | Expr::Apply(_, _)
+                        ) {
+                            return Err(ExprParseError::SpreadingError(format!("{:?}", to_concat)));
+                        }
+                        if !collected.is_empty() {
+                            spread_collected.push(Expr::Dict(collected));
+                            collected = BTreeMap::new();
+                        }
+                        spread_collected.push(to_concat);
+                    }
+                    _ => unreachable!(),
+                }
+            }
+
+            if spread_collected.is_empty() {
+                return Ok(Expr::Dict(collected));
+            }
+
+            if !collected.is_empty() {
+                spread_collected.push(Expr::Dict(collected));
+            }
+            Ok(Expr::Apply(Arc::new(OpMerge), spread_collected))
+        }
+        Rule::param => Ok(Expr::Variable(pair.as_str().into())),
+        Rule::ident => Ok(Expr::Variable(pair.as_str().into())),
+        _ => {
+            println!("Unhandled rule {:?}", pair.as_rule());
+            unimplemented!()
+        }
+    }
+}
+
+
+fn build_expr_infix<'a>(
+    lhs: Result<Expr<'a>>,
+    op: Pair<Rule>,
+    rhs: Result<Expr<'a>>,
+) -> Result<Expr<'a>> {
+    let lhs = lhs?;
+    let rhs = rhs?;
+    let op: Arc<dyn Op> = match op.as_rule() {
+        Rule::op_add => Arc::new(OpAdd),
+        Rule::op_str_cat => Arc::new(OpStrCat),
+        Rule::op_sub => Arc::new(OpSub),
+        Rule::op_mul => Arc::new(OpMul),
+        Rule::op_div => Arc::new(OpDiv),
+        Rule::op_eq => Arc::new(OpEq),
+        Rule::op_ne => Arc::new(OpNe),
+        Rule::op_or => Arc::new(OpOr),
+        Rule::op_and => Arc::new(OpAnd),
+        Rule::op_mod => Arc::new(OpMod),
+        Rule::op_gt => Arc::new(OpGt),
+        Rule::op_ge => Arc::new(OpGe),
+        Rule::op_lt => Arc::new(OpLt),
+        Rule::op_le => Arc::new(OpLe),
+        Rule::op_pow => Arc::new(OpPow),
+        Rule::op_coalesce => Arc::new(OpCoalesce),
+        _ => unreachable!(),
+    };
+    Ok(Expr::Apply(op, vec![lhs, rhs]))
+}
--- a/src/data/op.rs
+++ b/src/data/op.rs
@ -1,3 +1,5 @@
+use std::fmt::{Debug, Formatter};
+
 pub(crate) trait Op {
    fn is_resolved(&self) -> bool;
    fn name(&self) -> &str;
@ -28,4 +30,279 @@ impl AggOp for UnresolvedOp {
    fn name(&self) -> &str {
        &self.0
    }
+}
+
+pub(crate) struct OpAdd;
+
+impl Op for OpAdd {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "+"
+    }
+}
+
+pub(crate) struct OpSub;
+
+impl Op for OpSub {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "-"
+    }
+}
+
+
+pub(crate) struct OpMul;
+
+impl Op for OpMul {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "*"
+    }
+}
+
+
+pub(crate) struct OpDiv;
+
+impl Op for OpDiv {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "/"
+    }
+}
+
+
+pub(crate) struct OpStrCat;
+
+impl Op for OpStrCat {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "++"
+    }
+}
+
+
+pub(crate) struct OpEq;
+
+impl Op for OpEq {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "=="
+    }
+}
+
+
+pub(crate) struct OpNe;
+
+impl Op for OpNe {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "!="
+    }
+}
+
+
+pub(crate) struct OpOr;
+
+impl Op for OpOr {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "||"
+    }
+}
+
+
+
+pub(crate) struct OpAnd;
+
+impl Op for OpAnd {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "&&"
+    }
+}
+
+pub(crate) struct OpMod;
+
+impl Op for OpMod {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "%"
+    }
+}
+
+pub(crate) struct OpGt;
+
+impl Op for OpGt {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        ">"
+    }
+}
+
+
+pub(crate) struct OpGe;
+
+impl Op for OpGe {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        ">="
+    }
+}
+
+pub(crate) struct OpLt;
+
+impl Op for OpLt {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "<"
+    }
+}
+
+pub(crate) struct OpLe;
+
+impl Op for OpLe {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "<="
+    }
+}
+
+pub(crate) struct OpPow;
+
+impl Op for OpPow {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "**"
+    }
+}
+
+pub(crate) struct OpCoalesce;
+
+impl Op for OpCoalesce {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "~~"
+    }
+}
+
+pub(crate) struct OpNegate;
+
+impl Op for OpNegate {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "!"
+    }
+}
+
+
+pub(crate) struct OpMinus;
+
+impl Op for OpMinus {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "--"
+    }
+}
+
+pub(crate) struct OpIsNull;
+
+impl Op for OpIsNull {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "is_null"
+    }
+}
+
+pub(crate) struct OpNotNull;
+
+impl Op for OpNotNull {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "not_null"
+    }
+}
+
+pub(crate) struct OpConcat;
+
+impl Op for OpConcat {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "concat"
+    }
+}
+
+
+pub(crate) struct OpMerge;
+
+impl Op for OpMerge {
+    fn is_resolved(&self) -> bool {
+        true
+    }
+
+    fn name(&self) -> &str {
+        "merge"
+    }
 }
--- a/src/data/tuple_set.rs
+++ b/src/data/tuple_set.rs
@ -60,8 +60,16 @@ impl From<(bool, usize)> for ColId {
    }
 }

+#[derive(Clone, Copy, PartialEq, Ord, PartialOrd, Eq)]
 pub(crate) struct TupleSetIdx {
    pub(crate) is_key: bool,
    pub(crate) t_set: usize,
    pub(crate) col_idx: usize,
 }
+
+
+impl Debug for TupleSetIdx {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "@{}{}{}", self.t_set, if self.is_key { 'K' } else { 'D' }, self.col_idx)
+    }
+}