main
Ziyang Hu 2 years ago
parent 55304c621e
commit ca9c3ee831

@ -0,0 +1,130 @@
#[cfg(test)]
mod tests {
use super::*;
use crate::relation::tuple::Tuple;
use std::{fs, thread};
#[test]
fn push_get() {
{
let engine = Engine::new("_push_get".to_string(), false).unwrap();
let sess = engine.session().unwrap();
for i in (-80..-40).step_by(10) {
let mut ikey = Tuple::with_null_prefix();
ikey.push_int(i);
ikey.push_str("pqr");
println!("in {:?} {:?}", ikey, ikey.data);
sess.txn.put(true, &sess.perm_cf, &ikey, &ikey).unwrap();
let out = sess.txn.get(true, &sess.perm_cf, &ikey).unwrap();
let out = out.as_ref().map(Tuple::new);
println!("out {:?}", out);
}
let it = sess.txn.iterator(true, &sess.perm_cf);
it.to_first();
while let Some((key, val)) = unsafe { it.pair() } {
println!("a: {:?} {:?}", key.as_ref(), val.as_ref());
println!("v: {:?} {:?}", Tuple::new(key), Tuple::new(val));
it.next();
}
}
let _ = fs::remove_dir_all("_push_get");
}
#[test]
fn test_create() {
let p1 = "_test_db_create1";
let p2 = "_test_db_create2";
let p3 = "_test_db_create3";
{
{
let engine = Engine::new(p1.to_string(), true);
assert!(engine.is_ok());
let engine = Engine::new(p2.to_string(), false);
assert!(engine.is_ok());
let engine = Engine::new(p3.to_string(), true);
assert!(engine.is_ok());
let engine2 = Engine::new(p1.to_string(), false);
assert!(engine2.is_err());
println!("create OK");
}
let engine2 = Engine::new(p2.to_string(), false);
assert!(engine2.is_ok());
println!("start ok");
let engine2 = Arc::new(Engine::new(p3.to_string(), false).unwrap());
{
for _i in 0..10 {
let _sess = engine2.session().unwrap();
}
println!("sess OK");
let handles = engine2.session_handles.lock().unwrap();
println!("got handles {}", handles.len());
let cf_ident = &handles.first().unwrap().read().unwrap().cf_ident;
println!("Opening ok {}", cf_ident);
let cf = engine2.db.get_cf(cf_ident).unwrap();
assert!(!cf.is_null());
println!("Getting CF ok");
}
let mut thread_handles = vec![];
println!("concurrent");
for i in 0..10 {
let engine = engine2.clone();
thread_handles.push(thread::spawn(move || {
let mut sess = engine.session().unwrap();
println!("In thread {} {}", i, sess.handle.read().unwrap().cf_ident);
let gname = format!("abc{}", i);
for _ in 0..1000 {
sess.push_env().unwrap();
sess.define_variable(&gname, &"xyz".into(), true).unwrap();
sess.define_variable("pqr", &"xyz".into(), false).unwrap();
}
if i & 1 == 0 {
sess.commit().unwrap();
}
println!("pqr {:?}", sess.resolve("pqr"));
println!("uvw {:?}", sess.resolve("uvw"));
println!("aaa {} {:?}", &gname, sess.resolve(&gname));
let it = sess.txn.iterator(false, &sess.temp_cf);
it.to_first();
// for (key, val) in it.iter() {
// println!("a: {:?} {:?}", key.as_ref(), val.as_ref());
// println!("v: {:?}", Tuple::new(key));
// }
for _ in 0..50 {
sess.pop_env().unwrap();
}
// if let Err(e) = sess.commit() {
// println!("Err {} with {:?}", i, e);
// } else {
// println!("OK!!!! {}", i);
// sess.commit().unwrap();
// sess.commit().unwrap();
// println!("OK!!!!!!!! {}", i);
// }
// sess.commit().unwrap();
// sess.commit().unwrap();
println!("pqr {:?}", sess.resolve("pqr"));
println!("In thread {} end", i);
}))
}
for t in thread_handles {
t.join().unwrap();
}
println!("All OK");
{
let handles = engine2.session_handles.lock().unwrap();
println!(
"got handles {:#?}",
handles
.iter()
.map(|h| h.read().unwrap().cf_ident.to_string())
.collect::<Vec<_>>()
);
}
}
let _ = fs::remove_dir_all(p1);
let _ = fs::remove_dir_all(p2);
let _ = fs::remove_dir_all(p3);
}
}

@ -1,9 +1,3 @@
use crate::db::engine::Session;
use crate::error::{CozoError, Result};
use crate::relation::data::{DataKind, EMPTY_DATA};
use crate::relation::tuple::{OwnTuple, SliceTuple, Tuple};
use crate::relation::value::Value;
/// # layouts for sector 0
///
/// `[Null]`: stores information about table_ids

@ -1,17 +1,3 @@
use crate::db::cnf_transform::{cnf_transform, extract_tables};
use crate::db::engine::Session;
use crate::db::plan::AccessorMap;
use crate::db::table::{ColId, TableId};
use crate::error::CozoError::{InvalidArgument, LogicError};
use crate::error::{CozoError, Result};
use crate::relation::data::DataKind;
use crate::relation::table::MegaTuple;
use crate::relation::value;
use crate::relation::value::Value;
use std::borrow::Cow;
use std::cmp::{max, min, Ordering};
use std::collections::{BTreeMap, BTreeSet};
pub fn extract_table_ref<'a>(
tuples: &'a MegaTuple,
tid: &TableId,

@ -98,6 +98,7 @@ pub(crate) trait Op: Send + Sync {
)
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> Result<Option<Expr<'a>>> {
// usually those functions that needs specialized implementations are those with arity None
if let Some(arity) = self.arity() {
if arity != args.len() {
return Err(EvalError::ArityMismatch(self.name().to_string(), arity))

@ -1,7 +1,9 @@
use std::result;
use crate::data::eval::EvalError;
use crate::data::expr::Expr;
use crate::data::op::Op;
use crate::data::value::Value;
use crate::data::typing::Typing;
use crate::data::value::{StaticValue, Value};
type Result<T> = result::Result<T, EvalError>;
@ -65,6 +67,9 @@ impl Op for OpCoalesce {
(l, _r) => Ok(l)
}
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> crate::data::op::Result<Option<Expr<'a>>> {
todo!()
}
}
pub(crate) struct OpOr;
@ -110,6 +115,9 @@ impl Op for OpOr {
))
}
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> crate::data::op::Result<Option<Expr<'a>>> {
todo!()
}
}
pub(crate) struct OpAnd;
@ -155,4 +163,7 @@ impl Op for OpAnd {
))
}
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> crate::data::op::Result<Option<Expr<'a>>> {
todo!()
}
}

@ -1,8 +1,10 @@
use std::collections::BTreeMap;
use std::result;
use crate::data::eval::EvalError;
use crate::data::expr::Expr;
use crate::data::op::Op;
use crate::data::value::Value;
use crate::data::typing::Typing;
use crate::data::value::{StaticValue, Value};
type Result<T> = result::Result<T, EvalError>;
@ -32,6 +34,9 @@ impl Op for OpConcat {
}
Ok(coll.into())
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> crate::data::op::Result<Option<Expr<'a>>> {
todo!()
}
}
pub(crate) struct OpMerge;
@ -60,4 +65,7 @@ impl Op for OpMerge {
}
Ok(coll.into())
}
fn partial_eval<'a>(&self, args: Vec<Expr<'a>>) -> crate::data::op::Result<Option<Expr<'a>>> {
todo!()
}
}

@ -1,11 +0,0 @@
// pub mod cnf_transform;
// pub mod ddl;
// pub mod engine;
// pub mod env;
// pub mod eval;
// pub mod iterator;
// pub mod mutation;
// pub mod plan;
// pub mod query;
// pub mod table;

@ -1,366 +0,0 @@
// single engine per db storage
// will be shared among threads
use crate::error::CozoError::{Poisoned, SessionErr};
use crate::error::{CozoError, Result};
use crate::relation::tuple::{Tuple, PREFIX_LEN};
use cozorocks::*;
use rand::Rng;
use std::sync::{Arc, Mutex, RwLock};
use std::time::{SystemTime, UNIX_EPOCH};
use uuid::v1::{Context, Timestamp};
use uuid::Uuid;
pub struct EngineOptions {
cmp: RustComparatorPtr,
options: OptionsPtr,
t_options: TDBOptions,
path: String,
uuid_ctx: Context,
}
pub struct Engine {
pub db: DBPtr,
pub options_store: Box<EngineOptions>,
session_handles: Mutex<Vec<Arc<RwLock<SessionHandle>>>>,
}
unsafe impl Send for Engine {}
unsafe impl Sync for Engine {}
impl Engine {
pub fn new(path: String, optimistic: bool) -> Result<Self> {
let t_options = if optimistic {
TDBOptions::Optimistic(OTxnDBOptionsPtr::default())
} else {
TDBOptions::Pessimistic(PTxnDBOptionsPtr::default())
};
let cmp = RustComparatorPtr::new("cozo_cmp_v1", crate::relation::key_order::compare, false);
let mut options = OptionsPtr::default();
options
.set_comparator(&cmp)
.increase_parallelism()
.optimize_level_style_compaction()
.set_create_if_missing(true)
.set_paranoid_checks(false)
.set_bloom_filter(10., true)
.set_fixed_prefix_extractor(PREFIX_LEN);
let mut rng = rand::thread_rng();
let uuid_ctx = Context::new(rng.gen());
let e_options = Box::new(EngineOptions {
cmp,
options,
t_options,
path,
uuid_ctx,
});
let db = DBPtr::open(&e_options.options, &e_options.t_options, &e_options.path)?;
db.drop_non_default_cfs();
Ok(Self {
db,
options_store: e_options,
session_handles: Mutex::new(vec![]),
})
}
pub fn session(&self) -> Result<Session> {
// find a handle if there is one available
// otherwise create a new one
let mut guard = self
.session_handles
.lock()
.map_err(|_| CozoError::Poisoned)?;
let old_handle = guard
.iter()
.find(|v| match v.read() {
Ok(content) => content.status == SessionStatus::Completed,
Err(_) => false,
})
.cloned();
let handle = match old_handle {
None => {
let now = SystemTime::now();
let since_epoch = now.duration_since(UNIX_EPOCH)?;
let ts = Timestamp::from_unix(
&self.options_store.uuid_ctx,
since_epoch.as_secs(),
since_epoch.subsec_nanos(),
);
let mut rng = rand::thread_rng();
let id = Uuid::new_v1(
ts,
&[
rng.gen(),
rng.gen(),
rng.gen(),
rng.gen(),
rng.gen(),
rng.gen(),
],
)?;
let cf_ident = id.to_string();
self.db.create_cf(&self.options_store.options, &cf_ident)?;
let ret = Arc::new(RwLock::new(SessionHandle {
cf_ident,
status: SessionStatus::Prepared,
}));
guard.push(ret.clone());
ret
}
Some(h) => h,
};
let mut sess = Session {
engine: self,
stack_depth: 0,
txn: TransactionPtr::null(),
perm_cf: SharedPtr::null(),
temp_cf: SharedPtr::null(),
handle,
};
sess.start()?;
Ok(sess)
}
}
pub struct Session<'a> {
pub engine: &'a Engine,
pub stack_depth: i32,
pub handle: Arc<RwLock<SessionHandle>>,
pub txn: TransactionPtr,
pub perm_cf: SharedPtr<ColumnFamilyHandle>,
pub temp_cf: SharedPtr<ColumnFamilyHandle>,
}
// every session has its own column family to play with
// metadata are stored in table 0
impl<'a> Session<'a> {
pub fn start(&mut self) -> Result<()> {
self.start_with_total_seek(false)
}
fn start_with_total_seek(&mut self, total_seek: bool) -> Result<()> {
self.perm_cf = self.engine.db.default_cf();
assert!(!self.perm_cf.is_null());
self.temp_cf = self
.engine
.db
.get_cf(&self.handle.read().map_err(|_| Poisoned)?.cf_ident)
.ok_or(SessionErr)?;
assert!(!self.temp_cf.is_null());
let t_options = match self.engine.options_store.t_options {
TDBOptions::Pessimistic(_) => TransactOptions::Pessimistic(PTxnOptionsPtr::default()),
TDBOptions::Optimistic(_) => {
TransactOptions::Optimistic(OTxnOptionsPtr::new(&self.engine.options_store.cmp))
}
};
let mut r_opts = ReadOptionsPtr::default();
let mut rx_opts = ReadOptionsPtr::default();
if total_seek {
r_opts
.set_total_order_seek(true)
.set_prefix_same_as_start(false)
.set_auto_prefix_mode(true);
rx_opts
.set_total_order_seek(true)
.set_prefix_same_as_start(false)
.set_auto_prefix_mode(true);
} else {
r_opts
.set_total_order_seek(false)
.set_prefix_same_as_start(true)
.set_auto_prefix_mode(false);
rx_opts
.set_total_order_seek(false)
.set_prefix_same_as_start(true)
.set_auto_prefix_mode(false);
}
let w_opts = WriteOptionsPtr::default();
let mut wx_opts = WriteOptionsPtr::default();
wx_opts.set_disable_wal(true);
self.txn = self
.engine
.db
.make_transaction(t_options, r_opts, rx_opts, w_opts, wx_opts);
if self.txn.is_null() {
panic!("Starting session failed as opening transaction failed");
}
self.handle.write().map_err(|_| Poisoned)?.status = SessionStatus::Running;
Ok(())
}
pub fn commit(&self) -> Result<()> {
self.txn.commit()?;
Ok(())
}
pub fn rollback(&mut self) -> Result<()> {
self.txn.rollback()?;
Ok(())
}
pub fn finish_work(&mut self) -> Result<()> {
self.txn
.del_range(&self.temp_cf, Tuple::with_null_prefix(), Tuple::max_tuple())?;
self.txn.compact_all(&self.temp_cf)?;
// let mut options = FlushOptionsPtr::default();
// options.set_allow_write_stall(true).set_flush_wait(true);
// self.txn.flush(&self.temp_cf, options)?;
Ok(())
}
}
impl<'a> Drop for Session<'a> {
fn drop(&mut self) {
if let Err(e) = self.finish_work() {
eprintln!("Dropping session failed {:?}", e);
}
if let Ok(mut h) = self.handle.write().map_err(|_| Poisoned) {
h.status = SessionStatus::Completed;
} else {
eprintln!("Accessing lock of session handle failed");
}
}
}
#[derive(Clone, Debug)]
pub struct SessionHandle {
cf_ident: String,
status: SessionStatus,
}
#[derive(Eq, PartialEq, Debug, Clone)]
pub enum SessionStatus {
Prepared,
Running,
Completed,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::relation::tuple::Tuple;
use std::{fs, thread};
#[test]
fn push_get() {
{
let engine = Engine::new("_push_get".to_string(), false).unwrap();
let sess = engine.session().unwrap();
for i in (-80..-40).step_by(10) {
let mut ikey = Tuple::with_null_prefix();
ikey.push_int(i);
ikey.push_str("pqr");
println!("in {:?} {:?}", ikey, ikey.data);
sess.txn.put(true, &sess.perm_cf, &ikey, &ikey).unwrap();
let out = sess.txn.get(true, &sess.perm_cf, &ikey).unwrap();
let out = out.as_ref().map(Tuple::new);
println!("out {:?}", out);
}
let it = sess.txn.iterator(true, &sess.perm_cf);
it.to_first();
while let Some((key, val)) = unsafe { it.pair() } {
println!("a: {:?} {:?}", key.as_ref(), val.as_ref());
println!("v: {:?} {:?}", Tuple::new(key), Tuple::new(val));
it.next();
}
}
let _ = fs::remove_dir_all("_push_get");
}
#[test]
fn test_create() {
let p1 = "_test_db_create1";
let p2 = "_test_db_create2";
let p3 = "_test_db_create3";
{
{
let engine = Engine::new(p1.to_string(), true);
assert!(engine.is_ok());
let engine = Engine::new(p2.to_string(), false);
assert!(engine.is_ok());
let engine = Engine::new(p3.to_string(), true);
assert!(engine.is_ok());
let engine2 = Engine::new(p1.to_string(), false);
assert!(engine2.is_err());
println!("create OK");
}
let engine2 = Engine::new(p2.to_string(), false);
assert!(engine2.is_ok());
println!("start ok");
let engine2 = Arc::new(Engine::new(p3.to_string(), false).unwrap());
{
for _i in 0..10 {
let _sess = engine2.session().unwrap();
}
println!("sess OK");
let handles = engine2.session_handles.lock().unwrap();
println!("got handles {}", handles.len());
let cf_ident = &handles.first().unwrap().read().unwrap().cf_ident;
println!("Opening ok {}", cf_ident);
let cf = engine2.db.get_cf(cf_ident).unwrap();
assert!(!cf.is_null());
println!("Getting CF ok");
}
let mut thread_handles = vec![];
println!("concurrent");
for i in 0..10 {
let engine = engine2.clone();
thread_handles.push(thread::spawn(move || {
let mut sess = engine.session().unwrap();
println!("In thread {} {}", i, sess.handle.read().unwrap().cf_ident);
let gname = format!("abc{}", i);
for _ in 0..1000 {
sess.push_env().unwrap();
sess.define_variable(&gname, &"xyz".into(), true).unwrap();
sess.define_variable("pqr", &"xyz".into(), false).unwrap();
}
if i & 1 == 0 {
sess.commit().unwrap();
}
println!("pqr {:?}", sess.resolve("pqr"));
println!("uvw {:?}", sess.resolve("uvw"));
println!("aaa {} {:?}", &gname, sess.resolve(&gname));
let it = sess.txn.iterator(false, &sess.temp_cf);
it.to_first();
// for (key, val) in it.iter() {
// println!("a: {:?} {:?}", key.as_ref(), val.as_ref());
// println!("v: {:?}", Tuple::new(key));
// }
for _ in 0..50 {
sess.pop_env().unwrap();
}
// if let Err(e) = sess.commit() {
// println!("Err {} with {:?}", i, e);
// } else {
// println!("OK!!!! {}", i);
// sess.commit().unwrap();
// sess.commit().unwrap();
// println!("OK!!!!!!!! {}", i);
// }
// sess.commit().unwrap();
// sess.commit().unwrap();
println!("pqr {:?}", sess.resolve("pqr"));
println!("In thread {} end", i);
}))
}
for t in thread_handles {
t.join().unwrap();
}
println!("All OK");
{
let handles = engine2.session_handles.lock().unwrap();
println!(
"got handles {:#?}",
handles
.iter()
.map(|h| h.read().unwrap().cf_ident.to_string())
.collect::<Vec<_>>()
);
}
}
let _ = fs::remove_dir_all(p1);
let _ = fs::remove_dir_all(p2);
let _ = fs::remove_dir_all(p3);
}
}

@ -1,116 +0,0 @@
use crate::parser::Rule;
use crate::relation::data::DataKind;
use crate::relation::tuple::OwnTuple;
use crate::relation::value::StaticValue;
use cozorocks::BridgeError;
use std::result;
use std::time::SystemTimeError;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum CozoError {
#[error("Invalid UTF code")]
InvalidUtfCode,
#[error("Invalid escape sequence")]
InvalidEscapeSequence,
// #[error("Type mismatch")]
// TypeError,
#[error("Reserved identifier")]
ReservedIdent,
// #[error("The requested name exists")]
// NameConflict,
//
#[error("Undefined type '{0}'")]
UndefinedType(String),
#[error("Undefined data kind {0}")]
UndefinedDataKind(u32),
#[error("Unexpected data kind {0:?}")]
UnexpectedDataKind(DataKind),
#[error("Logic error: {0}")]
LogicError(String),
#[error("Key conflict: {0:?}")]
KeyConflict(OwnTuple),
#[error("Key not found: {0:?}")]
KeyNotFound(OwnTuple),
#[error("Bad data format {0:?}")]
BadDataFormat(Vec<u8>),
#[error("Duplicate names in {0:?}")]
DuplicateNames(Vec<String>),
#[error("Undefined parameter {0}")]
UndefinedParam(String),
//
// #[error("Undefined table")]
// UndefinedTable,
//
// #[error("Undefined parameter")]
// UndefinedParam,
//
// #[error("Value required")]
// ValueRequired,
//
// #[error("Incompatible value")]
// IncompatibleValue,
//
// #[error("Wrong type")]
// WrongType,
//
// #[error("Cannot have global edge between local nodes")]
// IncompatibleEdge,
//
// #[error("Unexpected index columns found")]
// UnexpectedIndexColumns,
//
// #[error("Database already closed")]
// DatabaseClosed,
#[error("InvalidArgument")]
InvalidArgument,
#[error(transparent)]
ParseInt(#[from] std::num::ParseIntError),
#[error(transparent)]
ParseFloat(#[from] std::num::ParseFloatError),
#[error(transparent)]
Parse(#[from] pest::error::Error<Rule>),
#[error("Not null constraint violated")]
NotNullViolated(StaticValue),
#[error("Type mismatch")]
TypeMismatch,
// #[error(transparent)]
// Storage(#[from] cozo_rocks::BridgeStatus),
//
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("Session error")]
SessionErr,
#[error("Poisoned locks")]
Poisoned,
#[error(transparent)]
SysTime(#[from] SystemTimeError),
#[error(transparent)]
Uuid(#[from] uuid::Error),
#[error(transparent)]
Bridge(#[from] BridgeError),
}
pub type Result<T> = result::Result<T, CozoError>;

@ -1,70 +0,0 @@
use std::collections::BTreeMap;
use std::ops::Range;
use std::sync::Arc;
use crate::relation::value::Value;
use crate::error::Result;
use crate::relation::table::MegaTuple;
use crate::relation::typing::Typing;
pub struct InterpretContext;
pub struct ApplyContext;
pub struct ArgsIterator<'a> {
value: Value<'a>
}
pub struct TypingIterator;
pub trait Op {
fn arity(&self) -> Range<usize>;
fn apply_raw<'a>(&self, arg: Value<'a>) -> Result<Value<'a>>;
fn typing(&self, arg_types: TypingIterator) -> Result<Typing>;
fn apply<'a>(&self, ctx: &ApplyContext, args: ArgsIterator<'a>) -> Result<Value<'a>>;
fn interpret<'a>(&self, ctx: &InterpretContext, arg: ArgsIterator<'a>) -> Result<(Value<'a>, bool)>;
}
pub trait AggregationOp {
}
// NOTE: the interpreter can hold global states for itself
// Tiers of values:
//
// * Scalars
// * Aggregates
// * Typings
// * RelPlan
// * QueryTemplate
// Global DB should be a C++ sharedptr
// Every session gets its own temp DB
// Sessions are reused after use
// lower sectors layouts
// [env_stack_depth; string_name, flags*] -> resolvable data
// [env_stack_depth; tid, flags*] -> table definitions
// table 10000 for serial numbers (auto incrementing, no transaction)
// tables start at 10001
pub struct DBInstance;
impl DBInstance {
pub fn get_meta_by_id() {}
pub fn get_meta_by_name() {}
pub fn put_meta() {}
}
pub struct InterpreterSession {
global_db: DBInstance,
local_db: DBInstance,
env_depth: usize,
session_params: BTreeMap<String, Arc<dyn Op>>,
}
impl InterpreterSession {
pub fn push_env() {}
pub fn pop_env() {}
pub fn destroy() {}
}

@ -1,8 +1,3 @@
// pub mod db;
// pub mod error;
// pub mod relation;
// pub(crate) mod eval;
// pub(crate) mod db;
pub(crate) mod data;
pub(crate) mod logger;
pub(crate) mod parser;

@ -1,6 +0,0 @@
pub mod data;
pub mod key_order;
pub mod table;
pub mod tuple;
pub mod typing;
pub mod value;

@ -1,44 +0,0 @@
use crate::error::{CozoError, Result};
use crate::relation::tuple::Tuple;
use crate::relation::typing::Typing;
use std::borrow::Borrow;
//
// #[repr(u32)]
// #[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone)]
// pub enum DataKind {
// Data = 0,
// Node = 1,
// Edge = 2,
// Assoc = 3,
// Index = 4,
// Val = 5,
// Type = 6,
// Empty = u32::MAX,
// }
// // In storage, key layout is `[0, name, stack_depth]` where stack_depth is a non-positive number as zigzag
// // Also has inverted index `[0, stack_depth, name]` for easy popping of stacks
//
// pub const EMPTY_DATA: [u8; 4] = u32::MAX.to_be_bytes();
//
// impl<T: AsRef<[u8]>> Tuple<T> {
// pub fn data_kind(&self) -> Result<DataKind> {
// use DataKind::*;
// Ok(match self.get_prefix() {
// 0 => Data,
// 1 => Node,
// 2 => Edge,
// 3 => Assoc,
// 4 => Index,
// 5 => Val,
// 6 => Type,
// u32::MAX => Empty,
// v => return Err(CozoError::UndefinedDataKind(v)),
// })
// }
// pub fn interpret_as_type(&self) -> Result<Typing> {
// let text = self
// .get_text(0)
// .ok_or_else(|| CozoError::BadDataFormat(self.as_ref().to_vec()))?;
// Typing::try_from(text.borrow())
// }
// }

@ -1,70 +0,0 @@
use crate::relation::tuple::Tuple;
use std::cmp::Ordering;
impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Tuple<T2>> for Tuple<T> {
fn partial_cmp(&self, other: &Tuple<T2>) -> Option<Ordering> {
match self.get_prefix().cmp(&other.get_prefix()) {
x @ (Ordering::Less | Ordering::Greater) => return Some(x),
Ordering::Equal => {}
}
Some(self.iter().cmp(other.iter()))
}
}
impl<T: AsRef<[u8]>> Ord for Tuple<T> {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
pub fn compare(a: &[u8], b: &[u8]) -> i8 {
let ta = Tuple::new(a);
let tb = Tuple::new(b);
match ta.cmp(&tb) {
Ordering::Less => -1,
Ordering::Greater => 1,
Ordering::Equal => 0,
}
}
#[cfg(test)]
mod tests {
use crate::relation::key_order::compare;
use crate::relation::tuple::Tuple;
use crate::relation::value::Value;
use std::collections::BTreeMap;
#[test]
fn ordering() {
let mut t = Tuple::with_prefix(0);
let t2 = Tuple::with_prefix(123);
assert_eq!(compare(t.as_ref(), t.as_ref()), 0);
assert_eq!(compare(t.as_ref(), t2.as_ref()), -1);
assert_eq!(compare(t2.as_ref(), t.as_ref()), 1);
let mut t2 = Tuple::with_prefix(0);
t.push_str("aaa");
t2.push_str("aaac");
assert_eq!(compare(t.as_ref(), t2.as_ref()), -1);
let mut t2 = Tuple::with_prefix(0);
t2.push_str("aaa");
t2.push_null();
assert_eq!(compare(t.as_ref(), t2.as_ref()), -1);
t.push_null();
assert_eq!(compare(t.as_ref(), t2.as_ref()), 0);
t.push_int(-123);
t2.push_int(123);
assert_eq!(compare(t.as_ref(), t2.as_ref()), -1);
assert_eq!(compare(t.as_ref(), t.as_ref()), 0);
let vals: Value = vec![
().into(),
BTreeMap::new().into(),
1e23.into(),
false.into(),
Value::from("xxyx"),
]
.into();
t.push_value(&vals);
assert_eq!(compare(t.as_ref(), t.as_ref()), 0);
}
}

@ -1,84 +0,0 @@
use crate::relation::tuple::CowTuple;
use crate::relation::typing::Typing;
use std::cmp::Ordering;
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone)]
pub struct StorageId {
pub cf: String,
pub tid: u32,
}
pub struct Column {
pub name: String,
pub typ: Typing,
}
pub struct StoredRelation {
pub keys: Vec<Column>,
pub vals: Vec<Column>,
}
pub enum Table {
NodeTable {
name: String,
stored: StoredRelation,
},
EdgeTable {
name: String,
src: Box<Table>,
dst: Box<Table>,
stored: StoredRelation,
},
AssociateTable {
name: String,
src: Box<Table>,
stored: StoredRelation,
},
IndexTable {
name: String,
src: Box<Table>,
stored: StoredRelation,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MegaTuple {
pub keys: Vec<CowTuple>,
pub vals: Vec<CowTuple>,
}
impl MegaTuple {
pub fn empty_tuple() -> Self {
MegaTuple {
keys: vec![],
vals: vec![],
}
}
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn extend(&mut self, other: Self) {
self.keys.extend(other.keys);
self.vals.extend(other.vals);
}
pub fn all_keys_eq(&self, other: &Self) -> bool {
if self.keys.len() != other.keys.len() {
return false;
}
for (l, r) in self.keys.iter().zip(&other.keys) {
if !l.key_part_eq(r) {
return false;
}
}
true
}
pub fn all_keys_cmp(&self, other: &Self) -> Ordering {
for (l, r) in self.keys.iter().zip(&other.keys) {
match l.key_part_cmp(r) {
Ordering::Equal => {}
v => return v,
}
}
Ordering::Equal
}
}

@ -1,923 +0,0 @@
use crate::db::table::{ColId, TableId};
use crate::relation::data::DataKind;
use crate::relation::value::{Tag, Value};
use cozorocks::SlicePtr;
use std::borrow::Cow;
use std::cell::RefCell;
use std::cmp::{Ordering, Reverse};
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use uuid::Uuid;
#[derive(Clone)]
pub struct Tuple<T>
where
T: AsRef<[u8]>,
{
pub data: T,
idx_cache: RefCell<Vec<usize>>,
}
impl<T> Tuple<T>
where
T: AsRef<[u8]>,
{
pub fn clear_cache(&self) {
self.idx_cache.borrow_mut().clear()
}
}
impl<T> AsRef<[u8]> for Tuple<T>
where
T: AsRef<[u8]>,
{
fn as_ref(&self) -> &[u8] {
self.data.as_ref()
}
}
#[derive(Clone)]
pub enum CowSlice {
Ptr(SlicePtr),
Own(Vec<u8>),
}
impl From<SlicePtr> for CowSlice {
fn from(p: SlicePtr) -> Self {
CowSlice::Ptr(p)
}
}
impl From<Vec<u8>> for CowSlice {
fn from(v: Vec<u8>) -> Self {
CowSlice::Own(v)
}
}
// impl Clone for CowSlice {
// fn clone(&self) -> Self {
// match self {
// CowSlice::Ptr(p) => { CowSlice::Ptr(p.clone()) }
// CowSlice::Own(o) => { CowSlice::Own(o.clone()) }
// }
// }
// }
impl AsRef<[u8]> for CowSlice {
fn as_ref(&self) -> &[u8] {
match self {
CowSlice::Ptr(s) => s.as_ref(),
CowSlice::Own(o) => o.as_ref(),
}
}
}
impl From<SliceTuple> for CowTuple {
fn from(s: SliceTuple) -> Self {
Tuple::new(CowSlice::Ptr(s.data))
}
}
impl From<OwnTuple> for CowTuple {
fn from(o: OwnTuple) -> Self {
Tuple::new(CowSlice::Own(o.data))
}
}
impl CowTuple {
pub fn to_owned(self) -> OwnTuple {
match self.data {
CowSlice::Ptr(p) => OwnTuple::new(p.as_ref().to_vec()),
CowSlice::Own(o) => OwnTuple::new(o),
}
}
}
pub type CowTuple = Tuple<CowSlice>;
pub type OwnTuple = Tuple<Vec<u8>>;
pub type SliceTuple = Tuple<SlicePtr>;
pub const PREFIX_LEN: usize = 4;
impl<T: AsRef<[u8]>> Tuple<T> {
#[inline]
pub fn starts_with<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref().starts_with(other.data.as_ref())
}
#[inline]
pub fn key_part_eq<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref()[PREFIX_LEN..] == other.data.as_ref()[PREFIX_LEN..]
}
#[inline]
pub fn key_part_cmp<T2: AsRef<[u8]>>(&self, other: &Tuple<T2>) -> Ordering {
self.iter().cmp(other.iter())
}
#[inline]
pub fn new(data: T) -> Self {
Self {
data,
idx_cache: RefCell::new(vec![]),
}
}
#[inline]
pub fn get_prefix(&self) -> u32 {
u32::from_be_bytes(self.data.as_ref()[0..4].try_into().unwrap())
}
#[inline]
fn all_cached(&self) -> bool {
match self.idx_cache.borrow().last() {
None => self.data.as_ref().len() == PREFIX_LEN,
Some(l) => *l == self.data.as_ref().len(),
}
}
#[inline]
fn get_pos(&self, idx: usize) -> Option<usize> {
if idx == 0 {
if self.data.as_ref().len() > PREFIX_LEN {
Some(PREFIX_LEN)
} else {
None
}
} else {
self.cache_until(idx);
self.idx_cache.borrow().get(idx - 1).cloned()
}
}
#[inline]
fn cache_until(&self, idx: usize) {
while self.idx_cache.borrow().len() < idx && !self.all_cached() {
self.skip_and_cache();
}
}
#[inline]
fn skip_and_cache(&self) {
let data = self.data.as_ref();
let tag_start = *self.idx_cache.borrow().last().unwrap_or(&PREFIX_LEN);
let mut start = tag_start + 1;
let nxt;
loop {
nxt = match Tag::try_from(data[tag_start]).unwrap() {
Tag::Null | Tag::BoolTrue | Tag::BoolFalse => start,
Tag::Int => start + self.parse_varint(start).1,
Tag::Float => start + 8,
Tag::Uuid => start + 16,
Tag::Text | Tag::Variable | Tag::Bytes => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
start + slen + offset
}
Tag::List | Tag::Apply | Tag::Dict | Tag::IdxAccess | Tag::FieldAccess => {
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize
}
Tag::TupleRef => {
let temp = start + 1 + self.parse_varint(start + 1).1 + 1;
temp + self.parse_varint(temp).1
}
Tag::DescVal => {
start += 1;
continue;
}
Tag::MaxTag => panic!(),
};
break;
}
self.idx_cache.borrow_mut().push(nxt);
}
#[inline]
fn parse_varint(&self, idx: usize) -> (u64, usize) {
let data = self.data.as_ref();
let mut cur = idx;
let mut u: u64 = 0;
let mut shift = 0;
loop {
let buf = data[cur];
cur += 1;
u |= ((buf & 0b01111111) as u64) << shift;
if buf & 0b10000000 == 0 {
break;
}
shift += 7;
}
(u, cur - idx)
}
#[inline]
pub fn get(&self, idx: usize) -> Option<Value> {
match self.get_pos(idx) {
Some(v) => {
if v == self.data.as_ref().len() {
return None;
}
let (val, nxt) = self.parse_value_at(v);
if idx == self.idx_cache.borrow().len() {
self.idx_cache.borrow_mut().push(nxt);
}
Some(val)
}
None => None,
}
}
#[inline]
pub fn get_null(&self, idx: usize) -> Option<()> {
match self.get(idx)? {
Value::Null => Some(()),
_ => None,
}
}
#[inline]
pub fn get_int(&self, idx: usize) -> Option<i64> {
match self.get(idx)? {
Value::Int(i) => Some(i),
_ => None,
}
}
#[inline]
pub fn get_text(&self, idx: usize) -> Option<Cow<str>> {
match self.get(idx)? {
Value::Text(d) => Some(d),
_ => None,
}
}
#[inline]
pub fn get_bool(&self, idx: usize) -> Option<bool> {
match self.get(idx)? {
Value::Bool(b) => Some(b),
_ => None,
}
}
#[inline]
pub fn get_float(&self, idx: usize) -> Option<f64> {
match self.get(idx)? {
Value::Float(f) => Some(f.into_inner()),
_ => None,
}
}
#[inline]
pub fn get_uuid(&self, idx: usize) -> Option<Uuid> {
match self.get(idx)? {
Value::Uuid(u) => Some(u),
_ => None,
}
}
#[inline]
pub fn get_list(&self, idx: usize) -> Option<Vec<Value>> {
match self.get(idx)? {
Value::List(u) => Some(u),
_ => None,
}
}
#[inline]
pub fn get_dict(&self, idx: usize) -> Option<BTreeMap<Cow<str>, Value>> {
match self.get(idx)? {
Value::Dict(u) => Some(u),
_ => None,
}
}
#[inline]
pub fn get_variable(&self, idx: usize) -> Option<Cow<str>> {
match self.get(idx)? {
Value::Variable(u) => Some(u),
_ => None,
}
}
#[inline]
pub fn get_apply(&self, idx: usize) -> Option<(Cow<str>, Vec<Value>)> {
match self.get(idx)? {
Value::Apply(n, l) => Some((n, l)),
_ => None,
}
}
#[inline]
fn parse_value_at(&self, pos: usize) -> (Value, usize) {
let data = self.data.as_ref();
let start = pos + 1;
let tag = match Tag::try_from(data[pos]) {
Ok(t) => t,
Err(e) => panic!("Cannot parse tag {} for {:?}", e, data),
};
let (nxt, val): (usize, Value) = match tag {
Tag::Null => (start, ().into()),
Tag::BoolTrue => (start, true.into()),
Tag::BoolFalse => (start, false.into()),
Tag::Int => {
let (u, offset) = self.parse_varint(start);
let val = Self::varint_to_zigzag(u);
(start + offset, val.into())
}
Tag::Float => (
start + 8,
f64::from_be_bytes(data[start..start + 8].try_into().unwrap()).into(),
),
Tag::Uuid => (
start + 16,
Uuid::from_slice(&data[start..start + 16]).unwrap().into(),
),
Tag::Text => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let s = unsafe {
std::str::from_utf8_unchecked(&data[start + offset..start + offset + slen])
};
(start + slen + offset, s.into())
}
Tag::Bytes => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let s = &data[start + offset..start + offset + slen];
(start + slen + offset, s.into())
}
Tag::Variable => {
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let s = unsafe {
std::str::from_utf8_unchecked(&data[start + offset..start + offset + slen])
};
(start + slen + offset, Value::Variable(s.into()))
}
Tag::List => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let mut collected = vec![];
while start_pos < end_pos {
let (val, new_pos) = self.parse_value_at(start_pos);
collected.push(val);
start_pos = new_pos;
}
(end_pos, collected.into())
}
Tag::Apply => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let mut collected = vec![];
let (val, new_pos) = self.parse_value_at(start_pos);
start_pos = new_pos;
let op = match val {
Value::Variable(s) => s,
_ => panic!("Corrupt data when parsing Apply"),
};
while start_pos < end_pos {
let (val, new_pos) = self.parse_value_at(start_pos);
collected.push(val);
start_pos = new_pos;
}
(end_pos, Value::Apply(op, collected))
}
Tag::Dict => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let mut collected: BTreeMap<Cow<str>, Value> = BTreeMap::new();
while start_pos < end_pos {
let (slen, offset) = self.parse_varint(start_pos);
start_pos += offset;
let key = unsafe {
std::str::from_utf8_unchecked(&data[start_pos..start_pos + slen as usize])
};
start_pos += slen as usize;
let (val, new_pos) = self.parse_value_at(start_pos);
collected.insert(key.into(), val);
start_pos = new_pos;
}
(end_pos, collected.into())
}
Tag::MaxTag => (start, Value::EndSentinel),
Tag::IdxAccess => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let (idx, offset) = self.parse_varint(start_pos);
start_pos += offset;
let (val, _) = self.parse_value_at(start_pos);
(end_pos, Value::IdxAccess(idx as usize, val.into()))
}
Tag::FieldAccess => {
let end_pos =
start + u32::from_be_bytes(data[start..start + 4].try_into().unwrap()) as usize;
let mut start_pos = start + 4;
let (slen, offset) = self.parse_varint(start);
let slen = slen as usize;
let field = unsafe {
std::str::from_utf8_unchecked(&data[start + offset..start + offset + slen])
};
start_pos += slen + offset;
let (val, _) = self.parse_value_at(start_pos);
(end_pos, Value::FieldAccess(field.into(), val.into()))
}
Tag::TupleRef => {
let in_root = self.parse_value_at(start).0 == Value::Bool(true);
let (tidu, parse_len) = self.parse_varint(start + 1);
let is_key = self.parse_value_at(parse_len + start + 1).0 == Value::Bool(true);
let (cidu, parse_len2) = self.parse_varint(start + 1 + parse_len + 1);
(
start + 1 + parse_len + 1 + parse_len2,
Value::TupleRef(
TableId {
in_root,
id: Self::varint_to_zigzag(tidu),
},
ColId {
is_key,
id: Self::varint_to_zigzag(cidu),
},
),
)
}
Tag::DescVal => {
let (val, offset) = self.parse_value_at(pos + 1);
(offset, Value::DescSort(Reverse(val.into())))
}
};
(val, nxt)
}
fn varint_to_zigzag(u: u64) -> i64 {
if u & 1 == 0 {
(u >> 1) as i64
} else {
-((u >> 1) as i64) - 1
}
}
pub fn iter(&self) -> TupleIter<T> {
TupleIter {
tuple: self,
pos: 4,
}
}
}
impl<T: AsRef<[u8]>> Debug for Tuple<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.data_kind() {
Ok(data_kind) => {
write!(f, "Tuple<{:?}>{{", data_kind)?;
}
Err(_) => {
write!(f, "Tuple<{}>{{", self.get_prefix())?;
}
}
let strings = self
.iter()
.enumerate()
.map(|(i, v)| format!("{}: {}", i, v))
.collect::<Vec<_>>()
.join(", ");
write!(f, "{}}}", strings)
}
}
pub struct TupleIter<'a, T: AsRef<[u8]>> {
tuple: &'a Tuple<T>,
pos: usize,
}
impl<'a, T: AsRef<[u8]>> Iterator for TupleIter<'a, T> {
type Item = Value<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos == self.tuple.data.as_ref().len() {
return None;
}
let (v, pos) = self.tuple.parse_value_at(self.pos);
self.pos = pos;
Some(v)
}
}
impl OwnTuple {
#[inline]
pub fn truncate_all(&mut self) {
self.clear_cache();
self.data.truncate(PREFIX_LEN);
}
#[inline]
pub fn empty_tuple() -> OwnTuple {
OwnTuple::with_data_prefix(DataKind::Empty)
}
#[inline]
pub fn with_null_prefix() -> Self {
Tuple::with_prefix(0)
}
#[inline]
pub fn with_data_prefix(prefix: DataKind) -> Self {
Tuple::with_prefix(prefix as u32)
}
#[inline]
pub fn with_prefix(prefix: u32) -> Self {
let data = Vec::from(prefix.to_be_bytes());
Self {
data,
idx_cache: RefCell::new(vec![]),
}
}
#[inline]
pub fn overwrite_prefix(&mut self, prefix: u32) {
let bytes = prefix.to_be_bytes();
self.data[..4].clone_from_slice(&bytes[..4]);
}
#[inline]
pub fn max_tuple() -> Self {
let mut ret = Tuple::with_prefix(u32::MAX);
ret.seal_with_sentinel();
ret
}
#[inline]
pub fn seal_with_sentinel(&mut self) {
self.push_tag(Tag::MaxTag);
}
#[inline]
fn push_tag(&mut self, tag: Tag) {
self.data.push(tag as u8);
}
#[inline]
pub fn push_null(&mut self) {
self.push_tag(Tag::Null);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_bool(&mut self, b: bool) {
self.push_tag(if b { Tag::BoolTrue } else { Tag::BoolFalse });
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_int(&mut self, i: i64) {
self.push_tag(Tag::Int);
self.push_zigzag(i);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_float(&mut self, f: f64) {
self.push_tag(Tag::Float);
self.data.extend(f.to_be_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_uuid(&mut self, u: Uuid) {
self.push_tag(Tag::Uuid);
self.data.extend(u.as_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_str(&mut self, s: impl AsRef<str>) {
let s = s.as_ref();
self.push_tag(Tag::Text);
self.push_varint(s.len() as u64);
self.data.extend_from_slice(s.as_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_bytes(&mut self, b: impl AsRef<[u8]>) {
let b = b.as_ref();
self.push_tag(Tag::Bytes);
self.push_varint(b.len() as u64);
self.data.extend_from_slice(b);
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_variable(&mut self, s: impl AsRef<str>) {
let s = s.as_ref();
self.push_tag(Tag::Variable);
self.push_varint(s.len() as u64);
self.data.extend_from_slice(s.as_bytes());
self.idx_cache.borrow_mut().push(self.data.len());
}
#[inline]
pub fn push_reverse_value(&mut self, v: &Value) {
self.push_tag(Tag::DescVal);
let start_len = self.idx_cache.borrow().len();
self.push_value(v);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
#[inline]
pub fn push_value(&mut self, v: &Value) {
match v {
Value::Null => self.push_null(),
Value::Bool(b) => self.push_bool(*b),
Value::Int(i) => self.push_int(*i),
Value::Float(f) => self.push_float(f.into_inner()),
Value::Uuid(u) => self.push_uuid(*u),
Value::Text(t) => self.push_str(t),
Value::Bytes(b) => self.push_bytes(b),
Value::Variable(s) => self.push_variable(s),
Value::List(l) => {
self.push_tag(Tag::List);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
for val in l {
self.push_value(val);
}
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::Apply(op, args) => {
self.push_tag(Tag::Apply);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
self.push_variable(op);
for val in args {
self.push_value(val);
}
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::FieldAccess(field, arg) => {
self.push_tag(Tag::IdxAccess);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
self.push_varint(field.len() as u64);
self.data.extend_from_slice(field.as_bytes());
self.push_value(arg);
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::IdxAccess(idx, arg) => {
self.push_tag(Tag::IdxAccess);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
self.push_varint(*idx as u64);
self.push_value(arg);
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::Dict(d) => {
self.push_tag(Tag::Dict);
let start_pos = self.data.len();
let start_len = self.idx_cache.borrow().len();
self.data.extend(0u32.to_be_bytes());
for (k, v) in d {
self.push_varint(k.len() as u64);
self.data.extend_from_slice(k.as_bytes());
self.push_value(v);
}
let length = (self.data.len() - start_pos) as u32;
let length_bytes = length.to_be_bytes();
self.data[start_pos..(4 + start_pos)].clone_from_slice(&length_bytes[..4]);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::TupleRef(tid, cid) => {
self.push_tag(Tag::Dict);
let start_len = self.idx_cache.borrow().len();
self.push_bool(tid.in_root);
self.push_int(tid.id);
self.push_bool(cid.is_key);
self.push_int(cid.id);
let mut cache = self.idx_cache.borrow_mut();
cache.truncate(start_len);
cache.push(self.data.len());
}
Value::EndSentinel => panic!("Cannot push sentinel value"),
Value::DescSort(Reverse(v)) => {
self.push_reverse_value(v);
}
}
}
#[inline]
fn push_varint(&mut self, u: u64) {
let mut u = u;
while u > 0b01111111 {
self.data.push(0b10000000 | (u as u8 & 0b01111111));
u >>= 7;
}
self.data.push(u as u8);
}
#[inline]
fn push_zigzag(&mut self, i: i64) {
let u: u64 = if i >= 0 {
(i as u64) << 1
} else {
// Convoluted, to prevent overflow when calling .abs()
(((i + 1).abs() as u64) << 1) + 1
};
self.push_varint(u);
}
#[inline]
pub fn concat_data<T: AsRef<[u8]>>(&mut self, other: &Tuple<T>) {
let other_data_part = &other.as_ref()[4..];
self.data.extend_from_slice(other_data_part);
}
#[inline]
pub fn insert_values_at<'a, T: AsRef<[Value<'a>]>>(&self, idx: usize, values: T) -> Self {
let mut new_tuple = Tuple::with_prefix(self.get_prefix());
for v in self.iter().take(idx) {
new_tuple.push_value(&v);
}
for v in values.as_ref() {
new_tuple.push_value(v);
}
for v in self.iter().skip(idx) {
new_tuple.push_value(&v);
}
new_tuple
}
}
impl<'a> Extend<Value<'a>> for OwnTuple {
#[inline]
fn extend<T: IntoIterator<Item = Value<'a>>>(&mut self, iter: T) {
for v in iter {
self.push_value(&v)
}
}
}
impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Tuple<T2>> for Tuple<T> {
#[inline]
fn eq(&self, other: &Tuple<T2>) -> bool {
self.data.as_ref() == other.data.as_ref()
}
}
impl<T: AsRef<[u8]>> Hash for Tuple<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.data.as_ref().hash(state);
}
}
impl<T: AsRef<[u8]>> Eq for Tuple<T> {}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
#[test]
fn serde() {
let mut t = Tuple::with_prefix(0);
t.push_null();
t.push_bool(true);
t.push_bool(false);
t.push_null();
t.push_str("abcdef");
t.push_null();
t.push_value(&vec![true.into(), 1e236.into(), Value::from("xxyyzz")].into());
t.push_int(-123345);
t.push_value(&BTreeMap::from([]).into());
t.push_int(12121212);
t.push_value(&BTreeMap::from([("yzyz".into(), "fifo".into())]).into());
t.push_float(1e245);
t.push_bool(false);
assert!(t.all_cached());
assert_eq!(t.idx_cache.borrow().len(), 13);
let ot = t;
let t = Tuple::new(ot.data.as_slice());
let t3 = Tuple::new(ot.data.as_slice());
assert_eq!(Value::from(()), t.get(0).unwrap());
t3.get_pos(1);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(true), t.get(1).unwrap());
t3.get_pos(2);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(false), t.get(2).unwrap());
t3.get_pos(3);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::Null, t.get(3).unwrap());
t3.get_pos(4);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from("abcdef"), t.get(4).unwrap());
t3.get_pos(5);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::Null, t.get(5).unwrap());
t3.get_pos(6);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(
Value::from(Value::from(vec![
true.into(),
1e236.into(),
Value::from("xxyyzz"),
])),
t.get(6).unwrap()
);
t3.get_pos(7);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(-123345i64), t.get(7).unwrap());
t3.get_pos(8);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(BTreeMap::new()), t.get(8).unwrap());
t3.get_pos(9);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(12121212i64), t.get(9).unwrap());
t3.get_pos(10);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(
Value::from(BTreeMap::from([("yzyz".into(), "fifo".into())])),
t.get(10).unwrap()
);
t3.get_pos(11);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(1e245), t.get(11).unwrap());
t3.get_pos(12);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(Value::from(false), t.get(12).unwrap());
t3.get_pos(13);
assert_eq!(t.idx_cache.borrow().last(), t3.idx_cache.borrow().last());
assert_eq!(None, t.get(13));
assert_eq!(None, t.get(13131));
let t = Tuple::new(ot.data.as_slice());
assert_eq!(Value::Null, t.get(5).unwrap());
assert_eq!(Value::from(true), t.get(1).unwrap());
assert_eq!(Value::from(true), t.get(1).unwrap());
assert_eq!(Value::from(1e245), t.get(11).unwrap());
assert_eq!(Value::from(false), t.get(12).unwrap());
assert_eq!(Value::from(()), t.get(0).unwrap());
assert_eq!(Value::from(false), t.get(2).unwrap());
assert_eq!(Value::from(12121212i64), t.get(9).unwrap());
assert_eq!(Value::from(BTreeMap::new()), t.get(8).unwrap());
assert_eq!(Value::Null, t.get(3).unwrap());
assert_eq!(Value::from("abcdef"), t.get(4).unwrap());
assert_eq!(
Value::from(Value::from(vec![
true.into(),
1e236.into(),
Value::from("xxyyzz"),
])),
t.get(6).unwrap()
);
assert_eq!(None, t.get(13));
assert_eq!(Value::from(-123345i64), t.get(7).unwrap());
assert_eq!(
Value::from(BTreeMap::from([("yzyz".into(), "fifo".into())])),
t.get(10).unwrap()
);
assert_eq!(None, t.get(13131));
println!("{:?}", t.iter().collect::<Vec<Value>>());
for v in t.iter() {
println!("{}", v);
}
}
/*
#[test]
fn lifetime() {
let v;
{
let s : Vec<u8> = vec![];
let s = s.as_slice();
let p = Tuple::new(s);
v = p.get(0);
}
println!("{:?}", v);
}
*/
#[test]
fn particular() {
let mut v = Tuple::with_prefix(0);
v.push_str("pqr");
v.push_int(-64);
println!("{:?} {:?}", v, v.data);
}
}

@ -1,274 +0,0 @@
use crate::db::engine::Session;
use crate::error::{CozoError, Result};
use crate::parser::text_identifier::build_name_in_def;
use crate::parser::Parser;
use crate::parser::Rule;
use crate::relation::data::DataKind;
use crate::relation::value::Value;
use pest::iterators::Pair;
use pest::Parser as PestParser;
use std::fmt::{Display, Formatter};
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone)]
pub enum Typing {
Any,
Bool,
Int,
Float,
Text,
Uuid,
Nullable(Box<Typing>),
Homogeneous(Box<Typing>),
UnnamedTuple(Vec<Typing>),
NamedTuple(Vec<(String, Typing)>),
Function(Vec<Typing>, Box<Typing>),
}
impl Display for Typing {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Typing::Any => write!(f, "Any"),
Typing::Bool => write!(f, "Bool"),
Typing::Int => write!(f, "Int"),
Typing::Float => write!(f, "Float"),
Typing::Text => write!(f, "Text"),
Typing::Uuid => write!(f, "Uuid"),
Typing::Nullable(t) => write!(f, "?{}", t),
Typing::Homogeneous(h) => write!(f, "[{}]", h),
Typing::UnnamedTuple(u) => {
let collected = u.iter().map(|v| v.to_string()).collect::<Vec<_>>();
let joined = collected.join(",");
write!(f, "({})", joined)
}
Typing::NamedTuple(n) => {
let collected = n
.iter()
.map(|(k, v)| format!(r##""{}":{}"##, k, v))
.collect::<Vec<_>>();
let joined = collected.join(",");
write!(f, "{{")?;
write!(f, "{}", joined)?;
write!(f, "}}")
}
Typing::Function(args, ret) => {
let args_display = args
.iter()
.map(|t| t.to_string())
.collect::<Vec<_>>()
.join(",");
write!(f, "<{}>->{}", args_display, ret)
}
}
}
}
impl Typing {
#[inline]
pub fn to_storage(&self, _v: Value) -> Option<Value> {
todo!()
}
#[inline]
pub fn to_display(&self, _v: Value) -> Option<Value> {
todo!()
}
}
impl Typing {
pub fn from_pair<'a>(pair: Pair<Rule>, env: Option<&Session<'a>>) -> Result<Self> {
Ok(match pair.as_rule() {
Rule::simple_type => match pair.as_str() {
"Any" => Typing::Any,
"Bool" => Typing::Bool,
"Int" => Typing::Int,
"Float" => Typing::Float,
"Text" => Typing::Text,
"Uuid" => Typing::Uuid,
t => match env {
None => return Err(CozoError::UndefinedType(t.to_string())),
Some(env) => {
let resolved = env.resolve(t)?;
let resolved =
resolved.ok_or_else(|| CozoError::UndefinedType(t.to_string()))?;
match resolved.data_kind()? {
DataKind::Type => resolved.interpret_as_type()?,
_ => return Err(CozoError::UndefinedType(t.to_string())),
}
}
},
},
Rule::nullable_type => Typing::Nullable(Box::new(Typing::from_pair(
pair.into_inner().next().unwrap(),
env,
)?)),
Rule::homogeneous_list_type => Typing::Homogeneous(Box::new(Typing::from_pair(
pair.into_inner().next().unwrap(),
env,
)?)),
Rule::unnamed_tuple_type => {
let types = pair
.into_inner()
.map(|p| Typing::from_pair(p, env))
.collect::<Result<Vec<Typing>>>()?;
Typing::UnnamedTuple(types)
}
Rule::named_tuple_type => {
let types = pair
.into_inner()
.map(|p| -> Result<(String, Typing)> {
let mut ps = p.into_inner();
let name_pair = ps.next().unwrap();
let name = build_name_in_def(name_pair, true)?;
let typ_pair = ps.next().unwrap();
let typ = Typing::from_pair(typ_pair, env)?;
Ok((name, typ))
})
.collect::<Result<Vec<(String, Typing)>>>()?;
Typing::NamedTuple(types)
}
Rule::function_type => {
let mut pairs = pair.into_inner();
let args = pairs
.next()
.unwrap()
.into_inner()
.map(|p| Typing::from_pair(p, env))
.collect::<Result<Vec<_>>>()?;
let ret = Typing::from_pair(pairs.next().unwrap(), env)?;
Typing::Function(args, ret.into())
}
_ => unreachable!(),
})
}
pub fn extract_named_tuple(self) -> Option<Vec<(String, Typing)>> {
match self {
Typing::NamedTuple(t) => Some(t),
_ => None,
}
}
pub fn coerce<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
if *self == Typing::Any {
return Ok(v);
}
if v == Value::Null {
return if matches!(self, Typing::Nullable(_)) {
Ok(Value::Null)
} else {
Err(CozoError::NotNullViolated(v.to_static()))
};
}
if let Typing::Nullable(t) = self {
return t.coerce(v);
}
match self {
Typing::Bool => self.coerce_bool(v),
Typing::Int => self.coerce_int(v),
Typing::Float => self.coerce_float(v),
Typing::Text => self.coerce_text(v),
Typing::Uuid => self.coerce_uuid(v),
Typing::Homogeneous(t) => match v {
Value::List(vs) => Ok(Value::List(
vs.into_iter()
.map(|v| t.coerce(v))
.collect::<Result<Vec<_>>>()?,
)),
_ => Err(CozoError::TypeMismatch),
},
Typing::UnnamedTuple(_ut) => {
todo!()
}
Typing::NamedTuple(_nt) => {
todo!()
}
Typing::Any => unreachable!(),
Typing::Nullable(_) => unreachable!(),
Typing::Function(_, _) => Err(CozoError::LogicError(
"Cannot coerce function types".to_string(),
)),
}
}
fn coerce_bool<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
match v {
v @ Value::Bool(_) => Ok(v),
_ => Err(CozoError::TypeMismatch),
}
}
fn coerce_int<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
match v {
v @ Value::Int(_) => Ok(v),
_ => Err(CozoError::TypeMismatch),
}
}
fn coerce_float<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
match v {
v @ Value::Float(_) => Ok(v),
_ => Err(CozoError::TypeMismatch),
}
}
fn coerce_text<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
match v {
v @ Value::Text(_) => Ok(v),
_ => Err(CozoError::TypeMismatch),
}
}
fn coerce_uuid<'a>(&self, v: Value<'a>) -> Result<Value<'a>> {
match v {
v @ Value::Uuid(_) => Ok(v),
_ => Err(CozoError::TypeMismatch),
}
}
}
impl TryFrom<&str> for Typing {
type Error = CozoError;
fn try_from(value: &str) -> Result<Self> {
let pair = Parser::parse(Rule::typing, value)?.next().unwrap();
Typing::from_pair(pair, None)
}
}
//
// #[cfg(test)]
// mod tests {
// use super::*;
// use crate::error::Result;
//
// #[test]
// fn to_string() {
// assert_eq!(
// format!(
// "{}",
// Typing::Nullable(Box::new(Typing::Homogeneous(Box::new(Typing::Text))))
// ),
// "?[Text]"
// );
// }
//
// #[test]
// fn from_string() {
// let res: Result<Typing> = "?[Text]".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// let res: Result<Typing> = "?(Text, [Int], ?Uuid)".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// let res: Result<Typing> = "{xzzx : Text}".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// let res: Result<Typing> = "?({x : Text, ppqp: ?Int}, [Int], ?Uuid)".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// let res: Result<Typing> = "??Int".try_into();
// println!("{:#?}", res);
// assert!(res.is_err());
// let res: Result<Typing> = "<Int, Int, ?Int>->Any".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// let res: Result<Typing> = "<>->Any".try_into();
// println!("{:#?}", res);
// assert!(res.is_ok());
// }
// }

@ -1,800 +0,0 @@
use crate::db::table::{ColId, TableId};
use crate::error::CozoError::LogicError;
use crate::error::{CozoError, Result};
use crate::parser::number::parse_int;
use crate::parser::text_identifier::parse_string;
use crate::parser::{Parser, Rule};
use lazy_static::lazy_static;
use ordered_float::OrderedFloat;
use pest::iterators::Pair;
use pest::prec_climber::{Assoc, Operator, PrecClimber};
use pest::Parser as PestParser;
use std::borrow::Cow;
use std::cmp::{min, Reverse};
use std::collections::BTreeMap;
use std::fmt::{Debug, Display, Formatter, Write};
use uuid::Uuid;
#[repr(u8)]
#[derive(Ord, PartialOrd, Eq, PartialEq)]
pub enum Tag {
BoolFalse = 1,
Null = 2,
BoolTrue = 3,
Int = 4,
Float = 5,
Text = 6,
Uuid = 7,
Bytes = 64,
List = 128,
Dict = 129,
DescVal = 192,
TupleRef = 250,
IdxAccess = 251,
FieldAccess = 252,
Variable = 253,
Apply = 254,
MaxTag = 255,
}
impl TryFrom<u8> for Tag {
type Error = u8;
#[inline]
fn try_from(u: u8) -> std::result::Result<Tag, u8> {
use self::Tag::*;
Ok(match u {
1 => BoolFalse,
2 => Null,
3 => BoolTrue,
4 => Int,
5 => Float,
6 => Text,
7 => Uuid,
64 => Bytes,
128 => List,
129 => Dict,
192 => DescVal,
250 => TupleRef,
251 => IdxAccess,
252 => FieldAccess,
253 => Variable,
254 => Apply,
255 => MaxTag,
v => return Err(v),
})
}
}
// Timestamp = 23,
// Datetime = 25,
// Timezone = 27,
// Date = 27,
// Time = 29,
// Duration = 31,
// BigInt = 51,
// BigDecimal = 53,
// Inet = 55,
// Crs = 57,
// BitArr = 60,
// U8Arr = 61,
// I8Arr = 62,
// U16Arr = 63,
// I16Arr = 64,
// U32Arr = 65,
// I32Arr = 66,
// U64Arr = 67,
// I64Arr = 68,
// F16Arr = 69,
// F32Arr = 70,
// F64Arr = 71,
// C32Arr = 72,
// C64Arr = 73,
// C128Arr = 74,
#[derive(Clone, PartialEq, Ord, PartialOrd, Eq)]
pub enum Value<'a> {
// evaluated
Null,
Bool(bool),
Int(i64),
Float(OrderedFloat<f64>),
Uuid(Uuid),
Text(Cow<'a, str>),
Bytes(Cow<'a, [u8]>),
// maybe evaluated
List(Vec<Value<'a>>),
Dict(BTreeMap<Cow<'a, str>, Value<'a>>),
// not evaluated
Variable(Cow<'a, str>),
TupleRef(TableId, ColId),
Apply(Cow<'a, str>, Vec<Value<'a>>),
// TODO optimization: special case for small number of args (esp. 0, 1, 2)
FieldAccess(Cow<'a, str>, Box<Value<'a>>),
IdxAccess(usize, Box<Value<'a>>),
DescSort(DescVal<'a>),
// cannot exist
EndSentinel,
}
// DescVal are kind of "write-only": you construct them, write them to keys to let the database do
// the sorting, and never read them back in program logic. In particular there is no need for them
// to support operators and methods.
pub type DescVal<'a> = Reverse<Box<Value<'a>>>;
pub type StaticValue = Value<'static>;
impl<'a> Debug for Value<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Value {{ {} }}", self)
}
}
impl<'a> Value<'a> {
#[inline]
pub fn to_static(self) -> StaticValue {
match self {
Value::Null => Value::from(()),
Value::Bool(b) => Value::from(b),
Value::Int(i) => Value::from(i),
Value::Float(f) => Value::from(f),
Value::Uuid(u) => Value::from(u),
Value::Text(t) => Value::from(t.into_owned()),
Value::Variable(s) => Value::Variable(Cow::Owned(s.into_owned())),
Value::List(l) => l
.into_iter()
.map(|v| v.to_static())
.collect::<Vec<StaticValue>>()
.into(),
Value::Apply(op, args) => Value::Apply(
Cow::Owned(op.into_owned()),
args.into_iter()
.map(|v| v.to_static())
.collect::<Vec<StaticValue>>(),
),
Value::Dict(d) => d
.into_iter()
.map(|(k, v)| (Cow::Owned(k.into_owned()), v.to_static()))
.collect::<BTreeMap<Cow<'static, str>, StaticValue>>()
.into(),
Value::EndSentinel => panic!("Cannot process sentinel value"),
Value::FieldAccess(field, value) => {
Value::FieldAccess(Cow::from(field.into_owned()), value.to_static().into())
}
Value::IdxAccess(idx, value) => Value::IdxAccess(idx, value.to_static().into()),
Value::TupleRef(tid, cid) => Value::TupleRef(tid, cid),
Value::DescSort(Reverse(val)) => Value::DescSort(Reverse(val.to_static().into())),
Value::Bytes(t) => Value::from(t.into_owned()),
}
}
#[inline]
pub fn is_evaluated(&self) -> bool {
match self {
Value::Null
| Value::Bool(_)
| Value::Int(_)
| Value::Float(_)
| Value::Uuid(_)
| Value::Text(_)
| Value::Bytes(_)
| Value::EndSentinel => true,
Value::List(l) => l.iter().all(|v| v.is_evaluated()),
Value::Dict(d) => d.values().all(|v| v.is_evaluated()),
Value::Variable(_) => false,
Value::Apply(_, _) => false,
Value::FieldAccess(_, _) => false,
Value::IdxAccess(_, _) => false,
Value::TupleRef(_, _) => false,
Value::DescSort(Reverse(v)) => v.is_evaluated(),
}
}
#[inline]
pub fn from_pair(pair: pest::iterators::Pair<'a, Rule>) -> Result<Self> {
PREC_CLIMBER.climb(pair.into_inner(), build_expr_primary, build_expr_infix)
}
#[inline]
pub fn parse_str(s: &'a str) -> Result<Self> {
let pair = Parser::parse(Rule::expr, s)?.next();
let pair = pair.ok_or_else(|| CozoError::LogicError("Parsing value failed".to_string()))?;
Value::from_pair(pair)
}
pub fn extract_relevant_tables<T: Iterator<Item = Self>>(
data: T,
) -> Result<(Vec<Self>, Vec<TableId>)> {
let mut coll = vec![];
let mut res = Vec::with_capacity(data.size_hint().1.unwrap_or(0));
for v in data {
res.push(v.do_extract_relevant_tables(&mut coll)?);
}
Ok((res, coll))
}
fn do_extract_relevant_tables(self, coll: &mut Vec<TableId>) -> Result<Self> {
Ok(match self {
v @ (Value::Null
| Value::Bool(_)
| Value::Int(_)
| Value::Float(_)
| Value::Uuid(_)
| Value::Text(_)
| Value::Bytes(_)
| Value::Variable(_)) => v,
Value::List(l) => Value::List(
l.into_iter()
.map(|v| v.do_extract_relevant_tables(coll))
.collect::<Result<Vec<_>>>()?,
),
Value::Dict(d) => Value::Dict(
d.into_iter()
.map(|(k, v)| v.do_extract_relevant_tables(coll).map(|v| (k, v)))
.collect::<Result<BTreeMap<_, _>>>()?,
),
Value::TupleRef(tid, cid) => {
let pos = coll.iter().position(|id| id == &tid).unwrap_or_else(|| {
let olen = coll.len();
coll.push(tid);
olen
});
Value::TupleRef((false, pos).into(), cid)
}
Value::Apply(op, args) => Value::Apply(
op,
args.into_iter()
.map(|v| v.do_extract_relevant_tables(coll))
.collect::<Result<Vec<_>>>()?,
),
Value::FieldAccess(field, arg) => {
Value::FieldAccess(field, arg.do_extract_relevant_tables(coll)?.into())
}
Value::IdxAccess(idx, arg) => {
Value::IdxAccess(idx, arg.do_extract_relevant_tables(coll)?.into())
}
Value::EndSentinel => {
return Err(LogicError("Encountered end sentinel".to_string()));
}
Value::DescSort(Reverse(v)) => {
return v.do_extract_relevant_tables(coll);
}
})
}
}
impl From<()> for StaticValue {
#[inline]
fn from(_: ()) -> Self {
Value::Null
}
}
impl From<bool> for StaticValue {
#[inline]
fn from(b: bool) -> Self {
Value::Bool(b)
}
}
impl From<i64> for StaticValue {
#[inline]
fn from(i: i64) -> Self {
Value::Int(i)
}
}
impl From<i32> for StaticValue {
#[inline]
fn from(i: i32) -> Self {
Value::Int(i as i64)
}
}
impl From<f64> for StaticValue {
#[inline]
fn from(f: f64) -> Self {
Value::Float(f.into())
}
}
impl From<OrderedFloat<f64>> for StaticValue {
#[inline]
fn from(f: OrderedFloat<f64>) -> Self {
Value::Float(f)
}
}
impl<'a> From<&'a str> for Value<'a> {
#[inline]
fn from(s: &'a str) -> Self {
Value::Text(Cow::Borrowed(s))
}
}
impl<'a> From<&'a [u8]> for Value<'a> {
#[inline]
fn from(v: &'a [u8]) -> Self {
Value::Bytes(Cow::Borrowed(v))
}
}
impl From<String> for StaticValue {
#[inline]
fn from(s: String) -> Self {
Value::Text(Cow::Owned(s))
}
}
impl From<Vec<u8>> for StaticValue {
#[inline]
fn from(v: Vec<u8>) -> Self {
Value::Bytes(Cow::Owned(v))
}
}
impl From<Uuid> for StaticValue {
#[inline]
fn from(u: Uuid) -> Self {
Value::Uuid(u)
}
}
impl<'a> From<Vec<Value<'a>>> for Value<'a> {
#[inline]
fn from(v: Vec<Value<'a>>) -> Self {
Value::List(v)
}
}
impl<'a> From<BTreeMap<Cow<'a, str>, Value<'a>>> for Value<'a> {
#[inline]
fn from(m: BTreeMap<Cow<'a, str>, Value<'a>>) -> Self {
Value::Dict(m)
}
}
impl<'a> Display for Value<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Value::Null => {
write!(f, "null")?;
}
Value::Bool(b) => {
write!(f, "{}", if *b { "true" } else { "false" })?;
}
Value::Int(i) => {
write!(f, "{}", i)?;
}
Value::Float(n) => {
write!(f, "{}", n.into_inner())?;
}
Value::Uuid(u) => {
write!(f, "{}", u)?;
}
Value::Bytes(b) => {
write!(f, "<{} bytes: {:?} ..>", b.len(), &b[..min(8, b.len())])?;
}
Value::Text(t) => {
f.write_char('"')?;
for char in t.chars() {
match char {
'"' => {
f.write_str("\\\"")?;
}
'\\' => {
f.write_str("\\\\")?;
}
'/' => {
f.write_str("\\/")?;
}
'\x08' => {
f.write_str("\\b")?;
}
'\x0c' => {
f.write_str("\\f")?;
}
'\n' => {
f.write_str("\\n")?;
}
'\r' => {
f.write_str("\\r")?;
}
'\t' => {
f.write_str("\\t")?;
}
c => {
f.write_char(c)?;
}
}
}
f.write_char('"')?;
}
Value::List(l) => {
f.write_char('[')?;
let mut first = true;
for v in l.iter() {
if !first {
f.write_char(',')?;
}
Display::fmt(v, f)?;
first = false;
}
f.write_char(']')?;
}
Value::Dict(d) => {
f.write_char('{')?;
let mut first = true;
for (k, v) in d.iter() {
if !first {
f.write_char(',')?;
}
Display::fmt(&Value::Text(k.clone()), f)?;
f.write_char(':')?;
Display::fmt(v, f)?;
first = false;
}
f.write_char('}')?;
}
Value::Variable(s) => write!(f, "`{}`", s)?,
Value::EndSentinel => write!(f, "Sentinel")?,
Value::Apply(op, args) => {
write!(
f,
"({} {})",
op,
args.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(" ")
)?;
}
Value::FieldAccess(field, value) => {
write!(f, "(.{} {})", field, value)?;
}
Value::IdxAccess(idx, value) => {
write!(f, "(.{} {})", idx, value)?;
}
Value::TupleRef(tid, cid) => {
write!(
f,
"#{}{}.{}{}",
if tid.in_root { 'G' } else { 'L' },
tid.id,
if cid.is_key { 'K' } else { 'D' },
cid.id
)?;
}
Value::DescSort(Reverse(v)) => {
write!(f, "~{}", v)?;
}
}
Ok(())
}
}
lazy_static! {
static ref PREC_CLIMBER: PrecClimber<Rule> = {
use Assoc::*;
PrecClimber::new(vec![
Operator::new(Rule::op_or, Left),
Operator::new(Rule::op_and, Left),
Operator::new(Rule::op_gt, Left)
| Operator::new(Rule::op_lt, Left)
| Operator::new(Rule::op_ge, Left)
| Operator::new(Rule::op_le, Left),
Operator::new(Rule::op_mod, Left),
Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left),
Operator::new(Rule::op_add, Left)
| Operator::new(Rule::op_sub, Left)
| Operator::new(Rule::op_str_cat, Left),
Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left),
Operator::new(Rule::op_pow, Assoc::Right),
Operator::new(Rule::op_coalesce, Assoc::Left),
])
};
}
pub const OP_ADD: &str = "+";
pub const OP_STR_CAT: &str = "++";
pub const OP_SUB: &str = "-";
pub const OP_MUL: &str = "*";
pub const OP_DIV: &str = "/";
pub const OP_EQ: &str = "==";
pub const OP_NE: &str = "!=";
pub const OP_OR: &str = "||";
pub const OP_AND: &str = "&&";
pub const OP_MOD: &str = "%";
pub const OP_GT: &str = ">";
pub const OP_GE: &str = ">=";
pub const OP_LT: &str = "<";
pub const OP_LE: &str = "<=";
pub const OP_POW: &str = "**";
pub const OP_COALESCE: &str = "~~";
pub const OP_NEGATE: &str = "!";
pub const OP_MINUS: &str = "--";
pub const METHOD_IS_NULL: &str = "is_null";
pub const METHOD_NOT_NULL: &str = "not_null";
pub const METHOD_CONCAT: &str = "concat";
pub const METHOD_MERGE: &str = "merge";
fn build_expr_infix<'a>(
lhs: Result<Value<'a>>,
op: Pair<Rule>,
rhs: Result<Value<'a>>,
) -> Result<Value<'a>> {
let lhs = lhs?;
let rhs = rhs?;
let op = match op.as_rule() {
Rule::op_add => OP_ADD,
Rule::op_str_cat => OP_STR_CAT,
Rule::op_sub => OP_SUB,
Rule::op_mul => OP_MUL,
Rule::op_div => OP_DIV,
Rule::op_eq => OP_EQ,
Rule::op_ne => OP_NE,
Rule::op_or => OP_OR,
Rule::op_and => OP_AND,
Rule::op_mod => OP_MOD,
Rule::op_gt => OP_GT,
Rule::op_ge => OP_GE,
Rule::op_lt => OP_LT,
Rule::op_le => OP_LE,
Rule::op_pow => OP_POW,
Rule::op_coalesce => OP_COALESCE,
_ => unreachable!(),
};
Ok(Value::Apply(op.into(), vec![lhs, rhs]))
}
fn build_expr_primary(pair: Pair<Rule>) -> Result<Value> {
match pair.as_rule() {
Rule::expr => build_expr_primary(pair.into_inner().next().unwrap()),
Rule::term => {
let mut pairs = pair.into_inner();
let mut head = build_expr_primary(pairs.next().unwrap())?;
for p in pairs {
match p.as_rule() {
Rule::accessor => {
let accessor_key = p.into_inner().next().unwrap().as_str();
head = Value::FieldAccess(accessor_key.into(), head.into());
}
Rule::index_accessor => {
let accessor_key = p.into_inner().next().unwrap();
let accessor_idx = parse_int(accessor_key.as_str(), 10);
head = Value::IdxAccess(accessor_idx as usize, head.into());
}
Rule::call => {
let mut pairs = p.into_inner();
let method_name = pairs.next().unwrap().as_str();
let mut args = vec![head];
args.extend(pairs.map(Value::from_pair).collect::<Result<Vec<_>>>()?);
head = Value::Apply(method_name.into(), args);
}
_ => todo!(),
}
}
Ok(head)
}
Rule::grouping => Value::from_pair(pair.into_inner().next().unwrap()),
Rule::unary => {
let mut inner = pair.into_inner();
let p = inner.next().unwrap();
let op = p.as_rule();
let op = match op {
Rule::term => return build_expr_primary(p),
Rule::negate => OP_NEGATE,
Rule::minus => OP_MINUS,
_ => unreachable!(),
};
let term = build_expr_primary(inner.next().unwrap())?;
Ok(Value::Apply(op.into(), vec![term]))
}
Rule::pos_int => Ok(Value::Int(pair.as_str().replace('_', "").parse::<i64>()?)),
Rule::hex_pos_int => Ok(Value::Int(parse_int(pair.as_str(), 16))),
Rule::octo_pos_int => Ok(Value::Int(parse_int(pair.as_str(), 8))),
Rule::bin_pos_int => Ok(Value::Int(parse_int(pair.as_str(), 2))),
Rule::dot_float | Rule::sci_float => Ok(Value::Float(
pair.as_str().replace('_', "").parse::<f64>()?.into(),
)),
Rule::null => Ok(Value::Null),
Rule::boolean => Ok(Value::Bool(pair.as_str() == "true")),
Rule::quoted_string | Rule::s_quoted_string | Rule::raw_string => {
Ok(Value::Text(Cow::Owned(parse_string(pair)?)))
}
Rule::list => {
let mut spread_collected = vec![];
let mut collected = vec![];
for p in pair.into_inner() {
match p.as_rule() {
Rule::expr => collected.push(Value::from_pair(p)?),
Rule::spreading => {
let el = p.into_inner().next().unwrap();
let to_concat = Value::from_pair(el)?;
if !matches!(
to_concat,
Value::List(_)
| Value::Variable(_)
| Value::IdxAccess(_, _)
| Value::FieldAccess(_, _)
| Value::Apply(_, _)
) {
return Err(CozoError::LogicError("Cannot spread".to_string()));
}
if !collected.is_empty() {
spread_collected.push(Value::List(collected));
collected = vec![];
}
spread_collected.push(to_concat);
}
_ => unreachable!(),
}
}
if spread_collected.is_empty() {
return Ok(Value::List(collected));
}
if !collected.is_empty() {
spread_collected.push(Value::List(collected));
}
Ok(Value::Apply(METHOD_CONCAT.into(), spread_collected))
}
Rule::dict => {
let mut spread_collected = vec![];
let mut collected = BTreeMap::new();
for p in pair.into_inner() {
match p.as_rule() {
Rule::dict_pair => {
let mut inner = p.into_inner();
let name = parse_string(inner.next().unwrap())?;
let val = Value::from_pair(inner.next().unwrap())?;
collected.insert(name.into(), val);
}
Rule::scoped_accessor => {
let name = parse_string(p.into_inner().next().unwrap())?;
let val = Value::FieldAccess(
name.clone().into(),
Value::Variable("_".into()).into(),
);
collected.insert(name.into(), val);
}
Rule::spreading => {
let el = p.into_inner().next().unwrap();
let to_concat = build_expr_primary(el)?;
if !matches!(
to_concat,
Value::Dict(_)
| Value::Variable(_)
| Value::IdxAccess(_, _)
| Value::FieldAccess(_, _)
| Value::Apply(_, _)
) {
return Err(CozoError::LogicError("Cannot spread".to_string()));
}
if !collected.is_empty() {
spread_collected.push(Value::Dict(collected));
collected = BTreeMap::new();
}
spread_collected.push(to_concat);
}
_ => unreachable!(),
}
}
if spread_collected.is_empty() {
return Ok(Value::Dict(collected));
}
if !collected.is_empty() {
spread_collected.push(Value::Dict(collected));
}
Ok(Value::Apply(METHOD_MERGE.into(), spread_collected))
}
Rule::param => Ok(Value::Variable(pair.as_str().into())),
Rule::ident => Ok(Value::Variable(pair.as_str().into())),
_ => {
println!("Unhandled rule {:?}", pair.as_rule());
unimplemented!()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Parser;
use pest::Parser as PestParser;
fn parse_expr_from_str<S: AsRef<str>>(s: S) -> Result<StaticValue> {
let pair = Parser::parse(Rule::expr, s.as_ref())
.unwrap()
.next()
.unwrap();
Value::from_pair(pair).map(|v| v.to_static())
}
#[test]
fn raw_string() {
println!("{:#?}", parse_expr_from_str(r#####"r#"x"#"#####))
}
#[test]
fn unevaluated() {
let val = parse_expr_from_str("a+b*c+d").unwrap();
println!("{}", val);
assert!(!val.is_evaluated());
}
#[test]
fn parse_literals() {
assert_eq!(parse_expr_from_str("1").unwrap(), Value::Int(1));
assert_eq!(parse_expr_from_str("12_3").unwrap(), Value::Int(123));
assert_eq!(parse_expr_from_str("0xaf").unwrap(), Value::Int(0xaf));
assert_eq!(
parse_expr_from_str("0xafcE_f").unwrap(),
Value::Int(0xafcef)
);
assert_eq!(
parse_expr_from_str("0o1234_567").unwrap(),
Value::Int(0o1234567)
);
assert_eq!(
parse_expr_from_str("0o0001234_567").unwrap(),
Value::Int(0o1234567)
);
assert_eq!(
parse_expr_from_str("0b101010").unwrap(),
Value::Int(0b101010)
);
assert_eq!(
parse_expr_from_str("0.0").unwrap(),
Value::Float((0.).into())
);
assert_eq!(
parse_expr_from_str("10.022_3").unwrap(),
Value::Float(10.0223.into())
);
assert_eq!(
parse_expr_from_str("10.022_3e-100").unwrap(),
Value::Float(10.0223e-100.into())
);
assert_eq!(parse_expr_from_str("null").unwrap(), Value::Null);
assert_eq!(parse_expr_from_str("true").unwrap(), Value::Bool(true));
assert_eq!(parse_expr_from_str("false").unwrap(), Value::Bool(false));
assert_eq!(
parse_expr_from_str(r#""x \n \ty \"""#).unwrap(),
Value::Text(Cow::Borrowed("x \n \ty \""))
);
assert_eq!(
parse_expr_from_str(r#""x'""#).unwrap(),
Value::Text("x'".into())
);
assert_eq!(
parse_expr_from_str(r#"'"x"'"#).unwrap(),
Value::Text(r##""x""##.into())
);
assert_eq!(
parse_expr_from_str(r#####"r###"x"yz"###"#####).unwrap(),
(Value::Text(r##"x"yz"##.into()))
);
}
#[test]
fn complex_cases() -> Result<()> {
println!("{}", parse_expr_from_str("{}")?);
println!("{}", parse_expr_from_str("{b:1,a,c:2,d,...e,}")?);
println!("{}", parse_expr_from_str("{...a,...b,c:1,d:2,...e,f:3}")?);
println!("{}", parse_expr_from_str("[]")?);
println!("{}", parse_expr_from_str("[...a,...b,1,2,...e,3]")?);
Ok(())
}
}
Loading…
Cancel
Save