parsing literals
parent
da6f772fd1
commit
f220bf13c7
@ -0,0 +1,156 @@
|
|||||||
|
use std::iter;
|
||||||
|
use pest::iterators::{Pair, Pairs};
|
||||||
|
use pest::Parser as PestParser;
|
||||||
|
use pest::prec_climber::{Assoc, PrecClimber, Operator};
|
||||||
|
use crate::parser::Parser;
|
||||||
|
use crate::parser::Rule;
|
||||||
|
use anyhow::Result;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use crate::ast::Expr::Const;
|
||||||
|
use crate::value::Value;
|
||||||
|
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref PREC_CLIMBER: PrecClimber<Rule> = {
|
||||||
|
use Assoc::*;
|
||||||
|
|
||||||
|
PrecClimber::new(vec![
|
||||||
|
Operator::new(Rule::op_or, Left),
|
||||||
|
Operator::new(Rule::op_and, Left),
|
||||||
|
Operator::new(Rule::op_gt, Left) | Operator::new(Rule::op_lt, Left) | Operator::new(Rule::op_ge,Left) | Operator::new(Rule::op_le, Left),
|
||||||
|
Operator::new(Rule::op_mod, Left),
|
||||||
|
Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left),
|
||||||
|
Operator::new(Rule::op_add, Left) | Operator::new(Rule::op_sub, Left),
|
||||||
|
Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left),
|
||||||
|
Operator::new(Rule::op_pow, Assoc::Right),
|
||||||
|
Operator::new(Rule::op_coalesce, Assoc::Left)
|
||||||
|
])
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Debug)]
|
||||||
|
pub enum Expr<'a> {
|
||||||
|
UnaryOp,
|
||||||
|
BinaryOp,
|
||||||
|
AssocOp,
|
||||||
|
Accessor,
|
||||||
|
FnCall,
|
||||||
|
Const(Value<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expr_infix<'a>(_lhs: Expr<'a>, _op: Pair<Rule>, _rhs: Expr<'a>) -> Expr<'a> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn parse_int(s: &str, radix: u32) -> i64 {
|
||||||
|
i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn parse_quoted_string(pairs: Pairs<Rule>) -> String {
|
||||||
|
let mut ret = String::new();
|
||||||
|
for pair in pairs {
|
||||||
|
let s = pair.as_str();
|
||||||
|
match s {
|
||||||
|
r#"\""# => ret.push('"'),
|
||||||
|
r"\\" => ret.push('\\'),
|
||||||
|
r"\/" => ret.push('/'),
|
||||||
|
r"\b" => ret.push('\x08'),
|
||||||
|
r"\f" => ret.push('\x0c'),
|
||||||
|
r"\n" => ret.push('\n'),
|
||||||
|
r"\r" => ret.push('\r'),
|
||||||
|
r"\t" => ret.push('\t'),
|
||||||
|
s if s.starts_with(r"\u") => {
|
||||||
|
let code = parse_int(s, 16) as u32;
|
||||||
|
let ch = char::from_u32(code).unwrap_or('\u{FFFD}');
|
||||||
|
ret.push(ch);
|
||||||
|
}
|
||||||
|
s => ret.push_str(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn parse_s_quoted_string(pairs: Pairs<Rule>) -> String {
|
||||||
|
let mut ret = String::new();
|
||||||
|
for pair in pairs {
|
||||||
|
let s = pair.as_str();
|
||||||
|
match s {
|
||||||
|
r#"\'"# => ret.push('\''),
|
||||||
|
r"\\" => ret.push('\\'),
|
||||||
|
r"\/" => ret.push('/'),
|
||||||
|
r"\b" => ret.push('\x08'),
|
||||||
|
r"\f" => ret.push('\x0c'),
|
||||||
|
r"\n" => ret.push('\n'),
|
||||||
|
r"\r" => ret.push('\r'),
|
||||||
|
r"\t" => ret.push('\t'),
|
||||||
|
s if s.starts_with(r"\u") => {
|
||||||
|
let code = parse_int(s, 16) as u32;
|
||||||
|
let ch = char::from_u32(code).unwrap_or('\u{FFFD}');
|
||||||
|
ret.push(ch);
|
||||||
|
}
|
||||||
|
s => ret.push_str(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expr_primary(pair: Pair<Rule>) -> Expr {
|
||||||
|
match pair.as_rule() {
|
||||||
|
Rule::expr => parse_expr_primary(pair.into_inner().next().unwrap()),
|
||||||
|
Rule::term => parse_expr_primary(pair.into_inner().next().unwrap()),
|
||||||
|
|
||||||
|
Rule::pos_int => Const(Value::Int(pair.as_str().replace('_', "").parse::<i64>().unwrap())),
|
||||||
|
Rule::hex_pos_int => Const(Value::Int(parse_int(pair.as_str(), 16))),
|
||||||
|
Rule::octo_pos_int => Const(Value::Int(parse_int(pair.as_str(), 8))),
|
||||||
|
Rule::bin_pos_int => Const(Value::Int(parse_int(pair.as_str(), 2))),
|
||||||
|
Rule::dot_float | Rule::sci_float => Const(Value::Float(pair.as_str().replace('_', "").parse::<f64>().unwrap())),
|
||||||
|
Rule::null => Const(Value::Null),
|
||||||
|
Rule::boolean => Const(Value::Bool(pair.as_str() == "true")),
|
||||||
|
Rule::quoted_string => Const(Value::OwnString(Box::new(parse_quoted_string(pair.into_inner().next().unwrap().into_inner())))),
|
||||||
|
Rule::s_quoted_string => Const(Value::OwnString(Box::new(parse_s_quoted_string(pair.into_inner().next().unwrap().into_inner())))),
|
||||||
|
_ => {
|
||||||
|
println!("{:#?}", pair);
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expr(pair: Pair<Rule>) -> Expr {
|
||||||
|
PREC_CLIMBER.climb(iter::once(pair), parse_expr_primary, parse_expr_infix)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_expr_from_str(inp: &str) -> Result<Expr> {
|
||||||
|
let expr_tree = Parser::parse(Rule::expr, inp)?.next().unwrap();
|
||||||
|
Ok(parse_expr(expr_tree))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_literals() {
|
||||||
|
assert_eq!(parse_expr_from_str("1").unwrap(), Const(Value::Int(1)));
|
||||||
|
assert_eq!(parse_expr_from_str("12_3").unwrap(), Const(Value::Int(123)));
|
||||||
|
assert_eq!(parse_expr_from_str("0xaf").unwrap(), Const(Value::Int(0xaf)));
|
||||||
|
assert_eq!(parse_expr_from_str("0xafcE_f").unwrap(), Const(Value::Int(0xafcef)));
|
||||||
|
assert_eq!(parse_expr_from_str("0o1234_567").unwrap(), Const(Value::Int(0o1234567)));
|
||||||
|
assert_eq!(parse_expr_from_str("0o0001234_567").unwrap(), Const(Value::Int(0o1234567)));
|
||||||
|
assert_eq!(parse_expr_from_str("0b101010").unwrap(), Const(Value::Int(0b101010)));
|
||||||
|
|
||||||
|
assert_eq!(parse_expr_from_str("0.0").unwrap(), Const(Value::Float(0.)));
|
||||||
|
assert_eq!(parse_expr_from_str("10.022_3").unwrap(), Const(Value::Float(10.0223)));
|
||||||
|
assert_eq!(parse_expr_from_str("10.022_3e-100").unwrap(), Const(Value::Float(10.0223e-100)));
|
||||||
|
|
||||||
|
assert_eq!(parse_expr_from_str("null").unwrap(), Const(Value::Null));
|
||||||
|
assert_eq!(parse_expr_from_str("true").unwrap(), Const(Value::Bool(true)));
|
||||||
|
assert_eq!(parse_expr_from_str("false").unwrap(), Const(Value::Bool(false)));
|
||||||
|
assert_eq!(parse_expr_from_str(r#""x \n \ty \"""#).unwrap(), Const(Value::RefString("x \n \ty \"")));
|
||||||
|
assert_eq!(parse_expr_from_str(r#""x'""#).unwrap(), Const(Value::RefString("x'")));
|
||||||
|
assert_eq!(parse_expr_from_str(r#"'"x"'"#).unwrap(), Const(Value::RefString(r##""x""##)));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,96 @@
|
|||||||
|
use pest::Parser as PestParser;
|
||||||
|
use pest_derive::Parser;
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[grammar = "grammar.pest"]
|
||||||
|
pub struct Parser;
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn db() {
|
||||||
|
use rocksdb::{DB, Options};
|
||||||
|
// NB: db is automatically closed at end of lifetime
|
||||||
|
let path = "_path_for_rocksdb_storage";
|
||||||
|
{
|
||||||
|
let db = DB::open_default(path).unwrap();
|
||||||
|
db.put("真二", "你好👋").unwrap();
|
||||||
|
match db.get_pinned("真二") {
|
||||||
|
Ok(Some(value)) => println!("retrieved value {}", std::str::from_utf8(&value).unwrap()),
|
||||||
|
Ok(None) => println!("value not found"),
|
||||||
|
Err(e) => println!("operational problem encountered: {}", e),
|
||||||
|
}
|
||||||
|
db.delete(b"my key").unwrap();
|
||||||
|
}
|
||||||
|
let _ = DB::destroy(&Options::default(), path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn identifiers() {
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "x").unwrap().as_str(), "x");
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "x2").unwrap().as_str(), "x2");
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "x_y").unwrap().as_str(), "x_y");
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "x_").unwrap().as_str(), "x_");
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "你好").unwrap().as_str(), "你好");
|
||||||
|
assert_eq!(Parser::parse(Rule::normal_ident, "你好123").unwrap().as_str(), "你好123");
|
||||||
|
assert_ne!(Parser::parse(Rule::ident, "x$y").unwrap().as_str(), "x$y");
|
||||||
|
|
||||||
|
assert!(Parser::parse(Rule::normal_ident, "_x").is_err());
|
||||||
|
assert!(Parser::parse(Rule::normal_ident, "_").is_err());
|
||||||
|
assert_eq!(Parser::parse(Rule::ident, "_x").unwrap().as_str(), "_x");
|
||||||
|
assert_eq!(Parser::parse(Rule::ident, "_").unwrap().as_str(), "_");
|
||||||
|
|
||||||
|
assert!(Parser::parse(Rule::normal_ident, "$x").is_err());
|
||||||
|
assert!(Parser::parse(Rule::ident, "$").is_err());
|
||||||
|
assert_eq!(Parser::parse(Rule::ident, "$x").unwrap().as_str(), "$x");
|
||||||
|
|
||||||
|
assert!(Parser::parse(Rule::ident, "123x").is_err());
|
||||||
|
assert!(Parser::parse(Rule::ident, ".x").is_err());
|
||||||
|
assert_ne!(Parser::parse(Rule::ident, "x.x").unwrap().as_str(), "x.x");
|
||||||
|
assert_ne!(Parser::parse(Rule::ident, "x~x").unwrap().as_str(), "x~x");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strings() {
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r#""""#).unwrap().as_str(), r#""""#);
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r#"" b a c""#).unwrap().as_str(), r#"" b a c""#);
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r#""你好👋""#).unwrap().as_str(), r#""你好👋""#);
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r#""\n""#).unwrap().as_str(), r#""\n""#);
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r#""\u5678""#).unwrap().as_str(), r#""\u5678""#);
|
||||||
|
assert!(Parser::parse(Rule::string, r#""\ux""#).is_err());
|
||||||
|
assert_eq!(Parser::parse(Rule::string, r###"r#"a"#"###).unwrap().as_str(), r##"r#"a"#"##);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn numbers() {
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "123").unwrap().as_str(), "123");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "-123").unwrap().as_str(), "-123");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "0").unwrap().as_str(), "0");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "-0").unwrap().as_str(), "-0");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "0123").unwrap().as_str(), "0123");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "000_1").unwrap().as_str(), "000_1");
|
||||||
|
assert!(Parser::parse(Rule::number, "_000_1").is_err());
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "0xAf03").unwrap().as_str(), "0xAf03");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "0o0_7067").unwrap().as_str(), "0o0_7067");
|
||||||
|
assert_ne!(Parser::parse(Rule::number, "0o0_7068").unwrap().as_str(), "0o0_7068");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "0b0000_0000_1111").unwrap().as_str(), "0b0000_0000_1111");
|
||||||
|
assert_ne!(Parser::parse(Rule::number, "0b0000_0000_1112").unwrap().as_str(), "0b0000_0000_1112");
|
||||||
|
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "123.45").unwrap().as_str(), "123.45");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "1_23.4_5_").unwrap().as_str(), "1_23.4_5_");
|
||||||
|
assert_ne!(Parser::parse(Rule::number, "123.").unwrap().as_str(), "123.");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "-123e-456").unwrap().as_str(), "-123e-456");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "123.333e456").unwrap().as_str(), "123.333e456");
|
||||||
|
assert_eq!(Parser::parse(Rule::number, "1_23.33_3e45_6").unwrap().as_str(), "1_23.33_3e45_6");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn expressions() {
|
||||||
|
assert!(Parser::parse(Rule::expr, r"(a + b) ~ [] + c.d.e(1,2,x=3).f").is_ok());
|
||||||
|
// print!("{:#?}", CozoParser::parse(Rule::expr, r"(a + b) ~ [] + c.d.e(1,2,x=3).f"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue