Support signed integers

next
Sayan Nandan 2 years ago
parent 9d51dc70fe
commit d1cba5d8b4
No known key found for this signature in database
GPG Key ID: 8BC07A0A4D41DD52

@ -37,15 +37,25 @@ pub enum DataType {
String(String),
/// Bytes
Binary(Vec<u8>),
/// An integer
Number(u64),
/// An unsigned integer
///
/// **NOTE:** This is the default evaluated type for unsigned integers by the query processor. It is the
/// responsibility of the executor to ensure integrity checks depending on actual type width in the declared
/// schema (if any)
UnsignedInt(u64),
/// A signed integer
///
/// **NOTE:** This is the default evaluated type for signed integers by the query processor. It is the
/// responsibility of the executor to ensure integrity checks depending on actual type width in the declared
/// schema (if any)
SignedInt(i64),
/// A boolean
Boolean(bool),
/// A single-type list. Note, you **need** to keep up the invariant that the [`DataType`] disc. remains the same for all
/// elements to ensure correctness in this specific context
/// FIXME(@ohsayan): Try enforcing this somehow
List(Vec<Self>),
/// Not an actual data type but MUST be translated into an actual data type
/// **☢ WARNING:** Not an actual data type but MUST be translated into an actual data type
AnonymousTypeNeedsEval(RawSlice),
}
@ -53,7 +63,7 @@ enum_impls! {
DataType => {
String as String,
Vec<u8> as Binary,
u64 as Number,
u64 as UnsignedInt,
bool as Boolean,
Vec<Self> as List,
&'static str as String,

@ -81,9 +81,10 @@ pub(super) fn parse_list(
let d = match &tok[i] {
Token::Lit(l) => match l {
Lit::Str(s) => DataType::String(s.to_string()),
Lit::UnsignedInt(n) => DataType::Number(*n),
Lit::UnsignedInt(n) => DataType::UnsignedInt(*n),
Lit::Bool(b) => DataType::Boolean(*b),
Lit::UnsafeLit(l) => DataType::AnonymousTypeNeedsEval(l.clone()),
Lit::SignedInt(uint) => DataType::SignedInt(*uint),
},
Token::Symbol(Symbol::TtOpenSqBracket) => {
// a nested list
@ -152,6 +153,7 @@ pub(super) fn parse_data_tuple_syntax(tok: &[Token]) -> (Vec<Option<DataType>>,
data.push(Some((*b).into()));
}
Lit::UnsafeLit(r) => data.push(Some(DataType::AnonymousTypeNeedsEval(r.clone()))),
Lit::SignedInt(int) => data.push(Some(DataType::SignedInt(*int))),
},
Token::Symbol(Symbol::TtOpenSqBracket) => {
// ah, a list
@ -208,6 +210,7 @@ pub(super) fn parse_data_map_syntax<'a>(
Lit::Bool(b) => (*b).into(),
Lit::UnsignedInt(s) => (*s).into(),
Lit::UnsafeLit(l) => DataType::AnonymousTypeNeedsEval(l.clone()),
Lit::SignedInt(int) => DataType::SignedInt(*int),
};
okay &= data.insert(unsafe { id.as_slice() }, Some(dt)).is_none();
}

@ -75,6 +75,7 @@ pub enum Lit {
Str(Box<str>),
Bool(bool),
UnsignedInt(u64),
SignedInt(i64),
UnsafeLit(RawSlice),
}
@ -219,11 +220,11 @@ pub enum Type {
}
/*
This section implements DAGs, as described by Czech et al in their paper. I wrote these pretty much by brute-force using
a byte-level multiplicative function (inside a script). This unfortunately implies that every time we *do* need to add a
new keyword, I will need to recompute and rewrite the vertices. I don't plan to use any codegen, so I think this is good
as-is. The real challenge here is to keep the graph small, and I couldn't do that for the symbols table even with multiple
trials. Please see if you can improve them.
This section implements LUTs constructed using DAGs, as described by Czech et al in their paper. I wrote these pretty much by
brute-force using a byte-level multiplicative function (inside a script). This unfortunately implies that every time we *do*
need to add a new keyword, I will need to recompute and rewrite the vertices. I don't plan to use any codegen, so I think
this is good as-is. The real challenge here is to keep the graph small, and I couldn't do that for the symbols table even with
multiple trials. Please see if you can improve them.
Also the functions are unique to every graph, and every input set, so BE WARNED!
@ -501,7 +502,7 @@ impl<'a> Lexer<'a> {
}
}
fn scan_number(&mut self) {
fn scan_unsigned_integer(&mut self) {
let s = self.cursor();
unsafe {
while self.peek_is(|b| b.is_ascii_digit()) {
@ -615,6 +616,40 @@ impl<'a> Lexer<'a> {
}
}
#[inline(always)]
fn scan_signed_integer(&mut self) {
unsafe {
self.incr_cursor();
}
if self.peek_is(|b| b.is_ascii_digit()) {
// we have some digits
let start = unsafe {
// UNSAFE(@ohsayan): Take the (-) into the parse
// TODO(@ohsayan): we can maybe look at a more efficient way later
self.cursor().sub(1)
};
while self.peek_is_and_forward(|b| b.is_ascii_digit()) {}
let wseof = self.peek_is(|char| !char.is_ascii_alphabetic()) || self.exhausted();
match unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
start,
self.cursor().offset_from(start) as usize,
))
}
.parse::<i64>()
{
Ok(num) if compiler::likely(wseof) => {
self.push_token(Lit::SignedInt(num));
}
_ => {
compiler::cold_err(self.last_error = Some(LangError::InvalidNumericLiteral));
}
}
} else {
self.push_token(Token![-]);
}
}
fn _lex(&mut self) {
while self.not_exhausted() && self.last_error.is_none() {
match unsafe { self.deref_cursor() } {
@ -628,7 +663,8 @@ impl<'a> Lexer<'a> {
}
}
b'\r' => self.scan_unsafe_literal(),
byte if byte.is_ascii_digit() => self.scan_number(),
byte if byte.is_ascii_digit() => self.scan_unsigned_integer(),
b'-' => self.scan_signed_integer(),
qs @ (b'\'' | b'"') => self.scan_quoted_string(qs),
b' ' | b'\n' | b'\t' => self.trim_ahead(),
b => self.scan_byte(b),

@ -115,7 +115,7 @@ mod lexer_tests {
// literals
#[test]
fn lex_number() {
fn lex_unsigned_int() {
let number = v!("123456");
assert_eq!(
lex(&number).unwrap(),
@ -123,6 +123,14 @@ mod lexer_tests {
);
}
#[test]
fn lex_signed_int() {
let number = v!("-123456");
assert_eq!(
lex(&number).unwrap(),
vec![Token::Lit(Lit::SignedInt(-123456))]
);
}
#[test]
fn lex_bool() {
let (t, f) = v!("true", "false");
assert_eq!(lex(&t).unwrap(), vec![Token::Lit(Lit::Bool(true))]);

Loading…
Cancel
Save