typing parser

main
Ziyang Hu 2 years ago
parent 16f04399cd
commit 5635aa4ff2

@ -94,81 +94,6 @@
// Ok(Apply(op, vec![lhs, rhs]))
// }
//
// #[inline]
// fn parse_int(s: &str, radix: u32) -> i64 {
// i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap()
// }
//
// #[inline]
// fn parse_raw_string(pair: Pair<Rule>) -> Result<String> {
// Ok(pair.into_inner().into_iter().next().unwrap().as_str().to_string())
// }
//
// #[inline]
// fn parse_quoted_string(pair: Pair<Rule>) -> Result<String> {
// let pairs = pair.into_inner().next().unwrap().into_inner();
// let mut ret = String::with_capacity(pairs.as_str().len());
// for pair in pairs {
// let s = pair.as_str();
// match s {
// r#"\""# => ret.push('"'),
// r"\\" => ret.push('\\'),
// r"\/" => ret.push('/'),
// r"\b" => ret.push('\x08'),
// r"\f" => ret.push('\x0c'),
// r"\n" => ret.push('\n'),
// r"\r" => ret.push('\r'),
// r"\t" => ret.push('\t'),
// s if s.starts_with(r"\u") => {
// let code = parse_int(s, 16) as u32;
// let ch = char::from_u32(code).ok_or(CozoError::InvalidUtfCode)?;
// ret.push(ch);
// }
// s if s.starts_with('\\') => return Err(CozoError::InvalidEscapeSequence),
// s => ret.push_str(s)
// }
// }
// Ok(ret)
// }
//
//
// #[inline]
// fn parse_s_quoted_string(pair: Pair<Rule>) -> Result<String> {
// let pairs = pair.into_inner().next().unwrap().into_inner();
// let mut ret = String::with_capacity(pairs.as_str().len());
// for pair in pairs {
// let s = pair.as_str();
// match s {
// r#"\'"# => ret.push('\''),
// r"\\" => ret.push('\\'),
// r"\/" => ret.push('/'),
// r"\b" => ret.push('\x08'),
// r"\f" => ret.push('\x0c'),
// r"\n" => ret.push('\n'),
// r"\r" => ret.push('\r'),
// r"\t" => ret.push('\t'),
// s if s.starts_with(r"\u") => {
// let code = parse_int(s, 16) as u32;
// let ch = char::from_u32(code).ok_or(CozoError::InvalidUtfCode)?;
// ret.push(ch);
// }
// s if s.starts_with('\\') => return Err(CozoError::InvalidEscapeSequence),
// s => ret.push_str(s)
// }
// }
// Ok(ret)
// }
//
// #[inline]
// pub fn parse_string(pair: Pair<Rule>) -> Result<String> {
// match pair.as_rule() {
// Rule::quoted_string => Ok(parse_quoted_string(pair)?),
// Rule::s_quoted_string => Ok(parse_s_quoted_string(pair)?),
// Rule::raw_string => Ok(parse_raw_string(pair)?),
// Rule::ident => Ok(pair.as_str().to_string()),
// _ => unreachable!()
// }
// }
//
// fn build_expr_primary(pair: Pair<Rule>) -> Result<Expr> {
// match pair.as_rule() {

@ -246,7 +246,7 @@ mod tests {
assert!(engine2.is_err());
println!("create OK");
}
let engine2 = Engine::new(p2.to_string(), true);
let engine2 = Engine::new(p2.to_string(), false);
assert!(engine2.is_ok());
println!("start ok");
let engine2 = Arc::new(Engine::new(p3.to_string(), true).unwrap());
@ -271,14 +271,15 @@ mod tests {
thread_handles.push(thread::spawn(move || {
let mut sess = engine.session().unwrap();
println!("In thread {} {}", i, sess.handle.read().unwrap().cf_ident);
let gname = format!("abc{}", i);
for _ in 0..10000 {
sess.push_env();
sess.define_variable("abc", &"xyz".into(), true);
sess.define_variable("pqr", &"xyz".into(), false);
sess.define_variable(&gname, &"xyz".into(), true).unwrap();
sess.define_variable("pqr", &"xyz".into(), false).unwrap();
}
println!("pqr {:?}", sess.resolve("pqr"));
println!("uvw {:?}", sess.resolve("uvw"));
println!("aaa {:?}", sess.resolve("aaa"));
println!("aaa {:?}", sess.resolve(&gname));
let it = sess.txn.iterator(false, &sess.temp_cf);
it.to_first();
// for (key, val) in it.iter() {

@ -1,30 +1,28 @@
use std::result;
use std::sync::PoisonError;
use std::time::SystemTimeError;
use thiserror::Error;
use cozorocks::BridgeError;
use crate::parser::Rule;
// use crate::parser::Rule;
//
#[derive(Error, Debug)]
pub enum CozoError {
// #[error("Invalid UTF code")]
// InvalidUtfCode,
//
// #[error("Invalid escape sequence")]
// InvalidEscapeSequence,
//
#[error("Invalid UTF code")]
InvalidUtfCode,
#[error("Invalid escape sequence")]
InvalidEscapeSequence,
// #[error("Type mismatch")]
// TypeError,
//
// #[error("Reserved identifier")]
// ReservedIdent,
//
// #[error("The requested name exists")]
#[error("Reserved identifier")]
ReservedIdent,
// #[error("The requested name exists")]
// NameConflict,
//
// #[error("Undefined type")]
// UndefinedType,
#[error("Undefined type '{0}'")]
UndefinedType(String),
//
// #[error("Undefined table")]
// UndefinedTable,
@ -55,10 +53,10 @@ pub enum CozoError {
//
// #[error(transparent)]
// ParseFloat(#[from] std::num::ParseFloatError),
//
// #[error(transparent)]
// Parse(#[from] pest::error::Error<Rule>),
//
#[error(transparent)]
Parse(#[from] pest::error::Error<Rule>),
// #[error(transparent)]
// Storage(#[from] cozo_rocks::BridgeStatus),
//

@ -133,11 +133,15 @@ col_entry = { col_name ~ ":" ~ typing ~ ("=" ~ expr)? }
col_name = { key_marker? ~ name_in_def }
key_marker = { "*" }
typing = { nullable_marker? ~ (simple_type | tuple_type | list_type ) }
nullable_marker = {"?"}
simple_type = {ident}
tuple_type = {"("~ (typing ~ ",")+ ~")"}
list_type = {"[" ~ typing ~ "]"}
typing = _{ simple_type | nullable_type | homogeneous_list_type | unnamed_tuple_type | named_tuple_type }
simple_type = { ident }
nullable_type = { "?" ~ typing }
homogeneous_list_type = { "[" ~ typing ~ "]"}
unnamed_tuple_type = { "(" ~ typing ~ ("," ~ typing)* ~ ")" }
named_tuple_type = { "{" ~ named_type_pair ~ ("," ~ named_type_pair)* ~ "}" }
named_type_pair = { (name_in_def | string) ~ ":" ~ typing }
cols_def = { "{" ~ col_entry ~ ("," ~ col_entry)* ~ ","? ~ "}" }

@ -14,6 +14,7 @@ extern crate core;
pub mod relation;
pub mod db;
pub mod error;
pub mod parser;
#[cfg(test)]
mod tests {

@ -1,3 +1,5 @@
pub mod text_identifier;
pub mod number;
use pest_derive::Parser;

@ -0,0 +1,4 @@
#[inline]
pub fn parse_int(s: &str, radix: u32) -> i64 {
i64::from_str_radix(&s[2..].replace('_', ""), radix).unwrap()
}

@ -0,0 +1,106 @@
use pest::iterators::Pair;
use crate::parser::Parser;
use crate::parser::Rule;
use crate::error::{CozoError, Result};
use crate::parser::number::parse_int;
#[inline]
fn parse_raw_string(pair: Pair<Rule>) -> Result<String> {
Ok(pair.into_inner().into_iter().next().unwrap().as_str().to_string())
}
#[inline]
fn parse_quoted_string(pair: Pair<Rule>) -> Result<String> {
let pairs = pair.into_inner().next().unwrap().into_inner();
let mut ret = String::with_capacity(pairs.as_str().len());
for pair in pairs {
let s = pair.as_str();
match s {
r#"\""# => ret.push('"'),
r"\\" => ret.push('\\'),
r"\/" => ret.push('/'),
r"\b" => ret.push('\x08'),
r"\f" => ret.push('\x0c'),
r"\n" => ret.push('\n'),
r"\r" => ret.push('\r'),
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).ok_or(CozoError::InvalidUtfCode)?;
ret.push(ch);
}
s if s.starts_with('\\') => return Err(CozoError::InvalidEscapeSequence),
s => ret.push_str(s)
}
}
Ok(ret)
}
#[inline]
fn parse_s_quoted_string(pair: Pair<Rule>) -> Result<String> {
let pairs = pair.into_inner().next().unwrap().into_inner();
let mut ret = String::with_capacity(pairs.as_str().len());
for pair in pairs {
let s = pair.as_str();
match s {
r#"\'"# => ret.push('\''),
r"\\" => ret.push('\\'),
r"\/" => ret.push('/'),
r"\b" => ret.push('\x08'),
r"\f" => ret.push('\x0c'),
r"\n" => ret.push('\n'),
r"\r" => ret.push('\r'),
r"\t" => ret.push('\t'),
s if s.starts_with(r"\u") => {
let code = parse_int(s, 16) as u32;
let ch = char::from_u32(code).ok_or(CozoError::InvalidUtfCode)?;
ret.push(ch);
}
s if s.starts_with('\\') => return Err(CozoError::InvalidEscapeSequence),
s => ret.push_str(s)
}
}
Ok(ret)
}
#[inline]
pub fn parse_string(pair: Pair<Rule>) -> Result<String> {
match pair.as_rule() {
Rule::quoted_string => Ok(parse_quoted_string(pair)?),
Rule::s_quoted_string => Ok(parse_s_quoted_string(pair)?),
Rule::raw_string => Ok(parse_raw_string(pair)?),
Rule::ident => Ok(pair.as_str().to_string()),
_ => unreachable!()
}
}
pub fn parse_ident(pair: Pair<Rule>) -> String {
pair.as_str().to_string()
}
pub fn build_name_in_def(pair: Pair<Rule>, forbid_underscore: bool) -> Result<String> {
let inner = pair.into_inner().next().unwrap();
let name = match inner.as_rule() {
Rule::ident => parse_ident(inner),
Rule::raw_string | Rule::s_quoted_string | Rule::quoted_string => parse_string(inner)?,
_ => unreachable!()
};
if forbid_underscore && name.starts_with('_') {
Err(CozoError::ReservedIdent)
} else {
Ok(name)
}
}
pub fn parse_col_name(pair: Pair<Rule>) -> Result<(String, bool)> {
let mut pairs = pair.into_inner();
let mut is_key = false;
let mut nxt_pair = pairs.next().unwrap();
if nxt_pair.as_rule() == Rule::key_marker {
is_key = true;
nxt_pair = pairs.next().unwrap();
}
Ok((build_name_in_def(nxt_pair, true)?, is_key))
}

@ -1,5 +1,12 @@
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter, write};
use pest::iterators::Pair;
use crate::error::{Result, CozoError};
use crate::relation::value::Value;
use pest::Parser as PestParser;
use crate::parser::Parser;
use crate::parser::Rule;
use crate::parser::text_identifier::build_name_in_def;
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone)]
@ -14,7 +21,34 @@ pub enum Typing {
Nullable(Box<Typing>),
Homogeneous(Box<Typing>),
UnnamedTuple(Vec<Typing>),
NamedTuple(BTreeMap<String, Typing>),
NamedTuple(Vec<(String, Typing)>),
}
impl Display for Typing {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Typing::Any => write!(f, "Any"),
Typing::Bool => write!(f, "Bool"),
Typing::Int => write!(f, "Int"),
Typing::Float => write!(f, "Float"),
Typing::Text => write!(f, "Text"),
Typing::Uuid => write!(f, "Uuid"),
Typing::UInt => write!(f, "UInt"),
Typing::Nullable(t) => write!(f, "?{}", t),
Typing::Homogeneous(h) => write!(f, "[{}]", h),
Typing::UnnamedTuple(u) => {
let collected = u.iter().map(|v| v.to_string()).collect::<Vec<_>>();
let joined = collected.join(",");
write!(f, "({})", joined)
}
Typing::NamedTuple(n) => {
let collected = n.iter().map(|(k, v)|
format!(r##""{}":{}"##, k, v)).collect::<Vec<_>>();
let joined = collected.join(",");
write!(f, "({})", joined)
}
}
}
}
impl Typing {
@ -26,4 +60,78 @@ impl Typing {
pub fn to_display(&self, _v: Value) -> Option<Value> {
todo!()
}
}
impl Typing {
fn from_pair(pair: Pair<Rule>) -> Result<Self> {
Ok(match pair.as_rule() {
Rule::simple_type => match pair.as_str() {
"Any" => Typing::Any,
"Bool" => Typing::Bool,
"Int" => Typing::Int,
"Float" => Typing::Float,
"Text" => Typing::Text,
"Uuid" => Typing::Uuid,
"UInt" => Typing::UInt,
t => return Err(CozoError::UndefinedType(t.to_string()))
},
Rule::nullable_type => Typing::Nullable(Box::new(Typing::from_pair(pair.into_inner().next().unwrap())?)),
Rule::homogeneous_list_type => Typing::Homogeneous(Box::new(Typing::from_pair(pair.into_inner().next().unwrap())?)),
Rule::unnamed_tuple_type => {
let types = pair.into_inner().map(|p| Typing::from_pair(p)).collect::<Result<Vec<Typing>>>()?;
Typing::UnnamedTuple(types)
}
Rule::named_tuple_type => {
let types = pair.into_inner().map(|p| -> Result<(String, Typing)> {
let mut ps = p.into_inner();
let name_pair = ps.next().unwrap();
let name = build_name_in_def(name_pair, true)?;
let typ_pair = ps.next().unwrap();
let typ = Typing::from_pair(typ_pair)?;
Ok((name, typ))
}).collect::<Result<Vec<(String, Typing)>>>()?;
Typing::NamedTuple(types)
}
_ => unreachable!()
})
}
}
impl TryFrom<&str> for Typing {
type Error = CozoError;
fn try_from(value: &str) -> Result<Self> {
let pair = Parser::parse(Rule::typing, value)?.next().unwrap();
Typing::from_pair(pair)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Result;
#[test]
fn to_string() {
assert_eq!(
format!("{}", Typing::Nullable(Box::new(Typing::Homogeneous(Box::new(Typing::Text))))),
"?[Text]"
);
}
#[test]
fn from_string() {
let res: Result<Typing> = "?[Text]".try_into();
println!("{:#?}", res);
assert!(res.is_ok());
let res: Result<Typing> = "?(Text, [Int], ?Uuid)".try_into();
println!("{:#?}", res);
assert!(res.is_ok());
let res: Result<Typing> = "{xzzx : Text}".try_into();
println!("{:#?}", res);
assert!(res.is_ok());
let res: Result<Typing> = "?({x : Text, ppqp: ?UInt}, [Int], ?Uuid)".try_into();
println!("{:#?}", res);
assert!(res.is_ok());
}
}
Loading…
Cancel
Save