parsing literals

main
Ziyang Hu 2 years ago
parent da6f772fd1
commit f220bf13c7

@ -12,3 +12,5 @@ ordered-float = "2.10.0"
uuid = "0.8"
chrono = "0.4"
rocksdb = "0.18.0"
anyhow = "1.0"
lazy_static = "1.4.0"

@ -0,0 +1,156 @@
use std::iter;
use pest::iterators::{Pair, Pairs};
use pest::Parser as PestParser;
use pest::prec_climber::{Assoc, PrecClimber, Operator};
use crate::parser::Parser;
use crate::parser::Rule;
use anyhow::Result;
use lazy_static::lazy_static;
use crate::ast::Expr::Const;
use crate::value::Value;
lazy_static! {
static ref PREC_CLIMBER: PrecClimber<Rule> = {
use Assoc::*;
PrecClimber::new(vec![
Operator::new(Rule::op_or, Left),
Operator::new(Rule::op_and, Left),
Operator::new(Rule::op_gt, Left) | Operator::new(Rule::op_lt, Left) | Operator::new(Rule::op_ge,Left) | Operator::new(Rule::op_le, Left),
Operator::new(Rule::op_mod, Left),
Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left),
Operator::new(Rule::op_add, Left) | Operator::new(Rule::op_sub, Left),
Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left),
Operator::new(Rule::op_pow, Assoc::Right),
Operator::new(Rule::op_coalesce, Assoc::Left)
])
};
}
#[derive(PartialEq, Debug)]
pub enum Expr<'a> {
UnaryOp,
BinaryOp,
AssocOp,
Accessor,
FnCall,
Const(Value<'a>),
}
fn parse_expr_infix<'a>(_lhs: Expr<'a>, _op: Pair<Rule>, _rhs: Expr<'a>) -> Expr<'a> {
unimplemented!()
}
#[inline]
fn parse_int(s: &str, radix: u32) -> i64 {
i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap()
}
#[inline]
fn parse_quoted_string(pairs: Pairs<Rule>) -> String {
let mut ret = String::new();
for pair in pairs {
let s = pair.as_str();
match s {
r#"\""# => ret.push('"'),
r"\\" => ret.push('\\'),
r"\/" => ret.push('/'),
r"\b" => ret.push('\x08'),
r"\f" => ret.push('\x0c'),
r"\n" => ret.push('\n'),
r"\r" => ret.push('\r'),
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).unwrap_or('\u{FFFD}');
ret.push(ch);
}
s => ret.push_str(s)
}
}
ret
}
#[inline]
fn parse_s_quoted_string(pairs: Pairs<Rule>) -> String {
let mut ret = String::new();
for pair in pairs {
let s = pair.as_str();
match s {
r#"\'"# => ret.push('\''),
r"\\" => ret.push('\\'),
r"\/" => ret.push('/'),
r"\b" => ret.push('\x08'),
r"\f" => ret.push('\x0c'),
r"\n" => ret.push('\n'),
r"\r" => ret.push('\r'),
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).unwrap_or('\u{FFFD}');
ret.push(ch);
}
s => ret.push_str(s)
}
}
ret
}
fn parse_expr_primary(pair: Pair<Rule>) -> Expr {
match pair.as_rule() {
Rule::expr => parse_expr_primary(pair.into_inner().next().unwrap()),
Rule::term => parse_expr_primary(pair.into_inner().next().unwrap()),
Rule::pos_int => Const(Value::Int(pair.as_str().replace('_', "").parse::<i64>().unwrap())),
Rule::hex_pos_int => Const(Value::Int(parse_int(pair.as_str(), 16))),
Rule::octo_pos_int => Const(Value::Int(parse_int(pair.as_str(), 8))),
Rule::bin_pos_int => Const(Value::Int(parse_int(pair.as_str(), 2))),
Rule::dot_float | Rule::sci_float => Const(Value::Float(pair.as_str().replace('_', "").parse::<f64>().unwrap())),
Rule::null => Const(Value::Null),
Rule::boolean => Const(Value::Bool(pair.as_str() == "true")),
Rule::quoted_string => Const(Value::OwnString(Box::new(parse_quoted_string(pair.into_inner().next().unwrap().into_inner())))),
Rule::s_quoted_string => Const(Value::OwnString(Box::new(parse_s_quoted_string(pair.into_inner().next().unwrap().into_inner())))),
_ => {
println!("{:#?}", pair);
unimplemented!()
}
}
}
fn parse_expr(pair: Pair<Rule>) -> Expr {
PREC_CLIMBER.climb(iter::once(pair), parse_expr_primary, parse_expr_infix)
}
pub fn parse_expr_from_str(inp: &str) -> Result<Expr> {
let expr_tree = Parser::parse(Rule::expr, inp)?.next().unwrap();
Ok(parse_expr(expr_tree))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_literals() {
assert_eq!(parse_expr_from_str("1").unwrap(), Const(Value::Int(1)));
assert_eq!(parse_expr_from_str("12_3").unwrap(), Const(Value::Int(123)));
assert_eq!(parse_expr_from_str("0xaf").unwrap(), Const(Value::Int(0xaf)));
assert_eq!(parse_expr_from_str("0xafcE_f").unwrap(), Const(Value::Int(0xafcef)));
assert_eq!(parse_expr_from_str("0o1234_567").unwrap(), Const(Value::Int(0o1234567)));
assert_eq!(parse_expr_from_str("0o0001234_567").unwrap(), Const(Value::Int(0o1234567)));
assert_eq!(parse_expr_from_str("0b101010").unwrap(), Const(Value::Int(0b101010)));
assert_eq!(parse_expr_from_str("0.0").unwrap(), Const(Value::Float(0.)));
assert_eq!(parse_expr_from_str("10.022_3").unwrap(), Const(Value::Float(10.0223)));
assert_eq!(parse_expr_from_str("10.022_3e-100").unwrap(), Const(Value::Float(10.0223e-100)));
assert_eq!(parse_expr_from_str("null").unwrap(), Const(Value::Null));
assert_eq!(parse_expr_from_str("true").unwrap(), Const(Value::Bool(true)));
assert_eq!(parse_expr_from_str("false").unwrap(), Const(Value::Bool(false)));
assert_eq!(parse_expr_from_str(r#""x \n \ty \"""#).unwrap(), Const(Value::RefString("x \n \ty \"")));
assert_eq!(parse_expr_from_str(r#""x'""#).unwrap(), Const(Value::RefString("x'")));
assert_eq!(parse_expr_from_str(r#"'"x"'"#).unwrap(), Const(Value::RefString(r##""x""##)));
}
}

@ -18,28 +18,25 @@ ident = { normal_ident | special_ident | param_ident}
// literals
raw_string = {
"r" ~ PUSH("#"*) ~ "\"" // push the number signs onto the stack
~ raw_string_inner
~ "\"" ~ POP // match a quotation mark and the number signs
}
raw_string_inner = {
(
!("\"" ~ PEEK) // unless the next character is a quotation mark
// followed by the correct amount of number signs,
~ ANY // consume one character
)*
}
quoted_string = ${ "\"" ~ quoted_string_inner ~ "\"" }
quoted_string_inner = @{ char* }
quoted_string_inner = { char* }
char = {
!("\"" | "\\") ~ ANY
| "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
string = {(raw_string | quoted_string)}
s_quoted_string = ${ "\'" ~ s_quoted_string_inner ~ "\'" }
s_quoted_string_inner = { s_char* }
s_char = {
!("\'" | "\\") ~ ANY
| "\\" ~ ("\'" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
string = _{(s_quoted_string | quoted_string)}
// Boolean and null
@ -50,27 +47,24 @@ null = { "null" }
// Numbers
pos_int = @{ASCII_DIGIT ~ ("_" | ASCII_DIGIT)*}
neg_int = @{"-" ~ pos_int}
hex_pos_int = @{"0x" ~ ASCII_HEX_DIGIT ~ ("_" | ASCII_HEX_DIGIT)*}
octo_pos_int = @{"0o" ~ ASCII_OCT_DIGIT ~ ("_" | ASCII_OCT_DIGIT)*}
bin_pos_int = @{"0b" ~ ASCII_BIN_DIGIT ~ ("_" | ASCII_BIN_DIGIT)*}
int = {(hex_pos_int | octo_pos_int | bin_pos_int | pos_int | neg_int)}
int = _{(hex_pos_int | octo_pos_int | bin_pos_int | pos_int)}
dot_float = @{
"-"?
~ ("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*)
("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*)
~ ("." ~ ("_" | ASCII_DIGIT)+)
}
sci_float = @{
"-"?
~ ("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*)
("0" | ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)*)
~ ("." ~ ("_" | ASCII_DIGIT)+)?
~ (^"e" ~ ("+" | "-")? ~ ("_" | ASCII_DIGIT)+)
}
float = {(sci_float | dot_float)}
number = {(float | int)}
float = _{(sci_float | dot_float)}
number = _{(float | int)}
literal = _{ null | boolean | number | string}
@ -83,9 +77,8 @@ comma_sep_expr = { expr ~ ("," ~ expr)* }
expr = {(unary | term) ~ (operation ~ (unary | term)) *}
operation = _{ (op_and | op_or | op_pow | op_add | op_sub | op_mul | op_div | op_mod | op_coalesce |
op_ge | op_le | op_gt | op_lt | op_eq | op_ne)}
op_and = { "&&" }
op_or = { "||" }
op_pow = { "^" }
op_and = { "&&" }
op_add = { "+" }
op_sub = { "-" }
op_mul = { "*" }
@ -98,15 +91,17 @@ op_gt = { ">" }
op_lt = { "<" }
op_ge = { ">=" }
op_le = { "<=" }
op_pow = { "^" }
unary = { unary_op ~ term }
unary_op = _{ minus | negate }
minus = { "-" }
negate = { "!" }
term = { (grouping | literal | normal_ident | param_ident | list | dict) ~ (call | accessor)* }
term = { (grouping | literal | normal_ident | param_ident | list | dict) ~ (call | accessor | index_accessor)* }
call = {"." ~ (normal_ident | special_ident) ~ "(" ~ argument* ~ ")"}
accessor = {"." ~ (normal_ident | special_ident)}
index_accessor = {"[" ~ expr ~ "]"}
argument = _{(kw_arg | pos_arg)}
kw_arg = {normal_ident ~ "=" ~ expr}
pos_arg = { expr }

@ -1,102 +1,6 @@
extern crate pest;
#[macro_use]
extern crate pest_derive;
pub mod value;
pub mod typing;
pub mod env;
pub mod ast;
pub mod parser;
use pest::Parser;
#[derive(Parser)]
#[grammar = "cozo.pest"]
pub struct CozoParser;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn db() {
use rocksdb::{DB, Options};
// NB: db is automatically closed at end of lifetime
let path = "_path_for_rocksdb_storage";
{
let db = DB::open_default(path).unwrap();
db.put("真二", "你好👋").unwrap();
match db.get_pinned("真二") {
Ok(Some(value)) => println!("retrieved value {}", std::str::from_utf8(&value).unwrap()),
Ok(None) => println!("value not found"),
Err(e) => println!("operational problem encountered: {}", e),
}
db.delete(b"my key").unwrap();
}
let _ = DB::destroy(&Options::default(), path);
}
#[test]
fn identifiers() {
assert_eq!(CozoParser::parse(Rule::normal_ident, "x").unwrap().as_str(), "x");
assert_eq!(CozoParser::parse(Rule::normal_ident, "x2").unwrap().as_str(), "x2");
assert_eq!(CozoParser::parse(Rule::normal_ident, "x_y").unwrap().as_str(), "x_y");
assert_eq!(CozoParser::parse(Rule::normal_ident, "x_").unwrap().as_str(), "x_");
assert_eq!(CozoParser::parse(Rule::normal_ident, "你好").unwrap().as_str(), "你好");
assert_eq!(CozoParser::parse(Rule::normal_ident, "你好123").unwrap().as_str(), "你好123");
assert_ne!(CozoParser::parse(Rule::ident, "x$y").unwrap().as_str(), "x$y");
assert!(CozoParser::parse(Rule::normal_ident, "_x").is_err());
assert!(CozoParser::parse(Rule::normal_ident, "_").is_err());
assert_eq!(CozoParser::parse(Rule::ident, "_x").unwrap().as_str(), "_x");
assert_eq!(CozoParser::parse(Rule::ident, "_").unwrap().as_str(), "_");
assert!(CozoParser::parse(Rule::normal_ident, "$x").is_err());
assert!(CozoParser::parse(Rule::ident, "$").is_err());
assert_eq!(CozoParser::parse(Rule::ident, "$x").unwrap().as_str(), "$x");
assert!(CozoParser::parse(Rule::ident, "123x").is_err());
assert!(CozoParser::parse(Rule::ident, ".x").is_err());
assert_ne!(CozoParser::parse(Rule::ident, "x.x").unwrap().as_str(), "x.x");
assert_ne!(CozoParser::parse(Rule::ident, "x~x").unwrap().as_str(), "x~x");
}
#[test]
fn strings() {
assert_eq!(CozoParser::parse(Rule::string, r#""""#).unwrap().as_str(), r#""""#);
assert_eq!(CozoParser::parse(Rule::string, r#"" b a c""#).unwrap().as_str(), r#"" b a c""#);
assert_eq!(CozoParser::parse(Rule::string, r#""你好👋""#).unwrap().as_str(), r#""你好👋""#);
assert_eq!(CozoParser::parse(Rule::string, r#""\n""#).unwrap().as_str(), r#""\n""#);
assert_eq!(CozoParser::parse(Rule::string, r#""\u5678""#).unwrap().as_str(), r#""\u5678""#);
assert!(CozoParser::parse(Rule::string, r#""\ux""#).is_err());
assert_eq!(CozoParser::parse(Rule::string, r###"r#"a"#"###).unwrap().as_str(), r##"r#"a"#"##);
}
#[test]
fn numbers() {
assert_eq!(CozoParser::parse(Rule::number, "123").unwrap().as_str(), "123");
assert_eq!(CozoParser::parse(Rule::number, "-123").unwrap().as_str(), "-123");
assert_eq!(CozoParser::parse(Rule::number, "0").unwrap().as_str(), "0");
assert_eq!(CozoParser::parse(Rule::number, "-0").unwrap().as_str(), "-0");
assert_eq!(CozoParser::parse(Rule::number, "0123").unwrap().as_str(), "0123");
assert_eq!(CozoParser::parse(Rule::number, "000_1").unwrap().as_str(), "000_1");
assert!(CozoParser::parse(Rule::number, "_000_1").is_err());
assert_eq!(CozoParser::parse(Rule::number, "0xAf03").unwrap().as_str(), "0xAf03");
assert_eq!(CozoParser::parse(Rule::number, "0o0_7067").unwrap().as_str(), "0o0_7067");
assert_ne!(CozoParser::parse(Rule::number, "0o0_7068").unwrap().as_str(), "0o0_7068");
assert_eq!(CozoParser::parse(Rule::number, "0b0000_0000_1111").unwrap().as_str(), "0b0000_0000_1111");
assert_ne!(CozoParser::parse(Rule::number, "0b0000_0000_1112").unwrap().as_str(), "0b0000_0000_1112");
assert_eq!(CozoParser::parse(Rule::number, "123.45").unwrap().as_str(), "123.45");
assert_eq!(CozoParser::parse(Rule::number, "1_23.4_5_").unwrap().as_str(), "1_23.4_5_");
assert_ne!(CozoParser::parse(Rule::number, "123.").unwrap().as_str(), "123.");
assert_eq!(CozoParser::parse(Rule::number, "-123e-456").unwrap().as_str(), "-123e-456");
assert_eq!(CozoParser::parse(Rule::number, "123.333e456").unwrap().as_str(), "123.333e456");
assert_eq!(CozoParser::parse(Rule::number, "1_23.33_3e45_6").unwrap().as_str(), "1_23.33_3e45_6");
}
#[test]
fn expressions() {
assert!(CozoParser::parse(Rule::expr, r"(a + b) ~ [] + c.d.e(1,2,x=3).f").is_ok());
}
}

@ -0,0 +1,96 @@
use pest::Parser as PestParser;
use pest_derive::Parser;
#[derive(Parser)]
#[grammar = "grammar.pest"]
pub struct Parser;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn db() {
use rocksdb::{DB, Options};
// NB: db is automatically closed at end of lifetime
let path = "_path_for_rocksdb_storage";
{
let db = DB::open_default(path).unwrap();
db.put("真二", "你好👋").unwrap();
match db.get_pinned("真二") {
Ok(Some(value)) => println!("retrieved value {}", std::str::from_utf8(&value).unwrap()),
Ok(None) => println!("value not found"),
Err(e) => println!("operational problem encountered: {}", e),
}
db.delete(b"my key").unwrap();
}
let _ = DB::destroy(&Options::default(), path);
}
#[test]
fn identifiers() {
assert_eq!(Parser::parse(Rule::normal_ident, "x").unwrap().as_str(), "x");
assert_eq!(Parser::parse(Rule::normal_ident, "x2").unwrap().as_str(), "x2");
assert_eq!(Parser::parse(Rule::normal_ident, "x_y").unwrap().as_str(), "x_y");
assert_eq!(Parser::parse(Rule::normal_ident, "x_").unwrap().as_str(), "x_");
assert_eq!(Parser::parse(Rule::normal_ident, "你好").unwrap().as_str(), "你好");
assert_eq!(Parser::parse(Rule::normal_ident, "你好123").unwrap().as_str(), "你好123");
assert_ne!(Parser::parse(Rule::ident, "x$y").unwrap().as_str(), "x$y");
assert!(Parser::parse(Rule::normal_ident, "_x").is_err());
assert!(Parser::parse(Rule::normal_ident, "_").is_err());
assert_eq!(Parser::parse(Rule::ident, "_x").unwrap().as_str(), "_x");
assert_eq!(Parser::parse(Rule::ident, "_").unwrap().as_str(), "_");
assert!(Parser::parse(Rule::normal_ident, "$x").is_err());
assert!(Parser::parse(Rule::ident, "$").is_err());
assert_eq!(Parser::parse(Rule::ident, "$x").unwrap().as_str(), "$x");
assert!(Parser::parse(Rule::ident, "123x").is_err());
assert!(Parser::parse(Rule::ident, ".x").is_err());
assert_ne!(Parser::parse(Rule::ident, "x.x").unwrap().as_str(), "x.x");
assert_ne!(Parser::parse(Rule::ident, "x~x").unwrap().as_str(), "x~x");
}
#[test]
fn strings() {
assert_eq!(Parser::parse(Rule::string, r#""""#).unwrap().as_str(), r#""""#);
assert_eq!(Parser::parse(Rule::string, r#"" b a c""#).unwrap().as_str(), r#"" b a c""#);
assert_eq!(Parser::parse(Rule::string, r#""你好👋""#).unwrap().as_str(), r#""你好👋""#);
assert_eq!(Parser::parse(Rule::string, r#""\n""#).unwrap().as_str(), r#""\n""#);
assert_eq!(Parser::parse(Rule::string, r#""\u5678""#).unwrap().as_str(), r#""\u5678""#);
assert!(Parser::parse(Rule::string, r#""\ux""#).is_err());
assert_eq!(Parser::parse(Rule::string, r###"r#"a"#"###).unwrap().as_str(), r##"r#"a"#"##);
}
#[test]
fn numbers() {
assert_eq!(Parser::parse(Rule::number, "123").unwrap().as_str(), "123");
assert_eq!(Parser::parse(Rule::number, "-123").unwrap().as_str(), "-123");
assert_eq!(Parser::parse(Rule::number, "0").unwrap().as_str(), "0");
assert_eq!(Parser::parse(Rule::number, "-0").unwrap().as_str(), "-0");
assert_eq!(Parser::parse(Rule::number, "0123").unwrap().as_str(), "0123");
assert_eq!(Parser::parse(Rule::number, "000_1").unwrap().as_str(), "000_1");
assert!(Parser::parse(Rule::number, "_000_1").is_err());
assert_eq!(Parser::parse(Rule::number, "0xAf03").unwrap().as_str(), "0xAf03");
assert_eq!(Parser::parse(Rule::number, "0o0_7067").unwrap().as_str(), "0o0_7067");
assert_ne!(Parser::parse(Rule::number, "0o0_7068").unwrap().as_str(), "0o0_7068");
assert_eq!(Parser::parse(Rule::number, "0b0000_0000_1111").unwrap().as_str(), "0b0000_0000_1111");
assert_ne!(Parser::parse(Rule::number, "0b0000_0000_1112").unwrap().as_str(), "0b0000_0000_1112");
assert_eq!(Parser::parse(Rule::number, "123.45").unwrap().as_str(), "123.45");
assert_eq!(Parser::parse(Rule::number, "1_23.4_5_").unwrap().as_str(), "1_23.4_5_");
assert_ne!(Parser::parse(Rule::number, "123.").unwrap().as_str(), "123.");
assert_eq!(Parser::parse(Rule::number, "-123e-456").unwrap().as_str(), "-123e-456");
assert_eq!(Parser::parse(Rule::number, "123.333e456").unwrap().as_str(), "123.333e456");
assert_eq!(Parser::parse(Rule::number, "1_23.33_3e45_6").unwrap().as_str(), "1_23.33_3e45_6");
}
#[test]
fn expressions() {
assert!(Parser::parse(Rule::expr, r"(a + b) ~ [] + c.d.e(1,2,x=3).f").is_ok());
// print!("{:#?}", CozoParser::parse(Rule::expr, r"(a + b) ~ [] + c.d.e(1,2,x=3).f"));
}
}

@ -56,7 +56,7 @@ pub enum EdgeDirKind {
BwdEdgeDir,
}
#[derive(Debug, PartialEq, Clone)]
#[derive(Debug, Clone)]
pub enum Value<'a> {
Null,
Bool(bool),
@ -71,6 +71,29 @@ pub enum Value<'a> {
Dict(Box<BTreeMap<Cow<'a, str>, Value<'a>>>),
}
impl<'a> PartialEq for Value<'a> {
fn eq(&self, other: &Self) -> bool {
use Value::*;
match (self, other) {
(Null, Null) => true,
(Bool(a), Bool(b)) => a == b,
(EdgeDir(a), EdgeDir(b)) => a == b,
(UInt(a), UInt(b)) => a == b,
(Int(a), Int(b)) => a == b,
(Float(a), Float(b)) => a == b,
(Uuid(a), Uuid(b)) => a == b,
(RefString(a), RefString(b)) => a == b,
(RefString(a), OwnString(b)) => *a == **b,
(OwnString(a), RefString(b)) => **a == *b,
(OwnString(a), OwnString(b)) => a == b,
(List(a), List(b)) => a == b,
(Dict(a), Dict(b)) => a == b,
_ => false
}
}
}
pub struct ByteArrayParser<'a> {
bytes: &'a [u8],
current: usize,

Loading…
Cancel
Save