more tests and docs and fixes

2 years ago · 6537cfd01e
parent 363f7b4e24
commit 6537cfd01e
7 changed files with 570 additions and 143 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -36,6 +36,7 @@ pest_derive = "2.2.1"
 rayon = "1.5.3"
 nalgebra = "0.31.1"
 approx = "0.5.1"
+unicode-normalization = "0.1.21"
 cozorocks = { path = "cozorocks" }

 #[target.'cfg(not(target_env = "msvc"))'.dependencies]
--- a/docs/functions.md
+++ b/docs/functions.md
@ -36,7 +36,7 @@ The four _basic arithmetic operators_ `+`, `-`, `*`, and `/` do what you expect,

 !> `negate(...)` is not the same as `not ...`, the former denotes the negation of a boolean expression, whereas the latter denotes the negation of a Horn clause.

-## Maths functions
+## Mathematical functions

 `add(...)`, `sub(x, y)`, `mul(...)`, `div(x, y)`: the function forms of `+`, `-`, `*`, `/`. `add` and `mul` can take multiple arguments (or no arguments).

@ -68,9 +68,19 @@ The four _basic arithmetic operators_ `+`, `-`, `*`, and `/` do what you expect,

 `sinh(x)`, `cosh(x)`, `tanh(x)`, `asinh(x)`, `acosh(x)`, `atanh(x)`: the hyperbolic sine, cosine, tangent and their inverses.

+`deg_to_rad(x)`: converts degrees to radians.
+
+`rad_to_deg(x)`: converts radians to degrees.
+
+`haversine(a_lat, a_lon, b_lat, b_lon)`: returns the spherical distance measured on a unit sphere in radians between two points specified by their latitudes and longitudes. The inputs are in radians as well. If you want to calculate spherical distance on earth, you probably want the next function. See [Haversine formula](https://en.wikipedia.org/wiki/Haversine_formula) for more details.
+
+`haversine_deg_input(a_lat, a_lon, b_lat, b_lon)`: same as the previous function, but the inputs are in degrees instead of radians. The return value is still in radians. If you want approximate distance measured on the surface of the earth instead of spherical distance, multiply the result by the radius of the earth, which is about `6371` kilometres, `3959` miles, or `3440` nautical miles.
+
 ## Functions on strings

-`str_cat(...)` concatenates strings. Takes any number of arguments. The operator form `x ++ y` is also available for binary arguments.
+`length(str)` returns the number of Unicode characters in the string. See the caveat at the end of this section.
+
+`concat(x, ...)` concatenates strings. Takes any number of arguments. The operator form `x ++ y` is also available for binary arguments.

 `str_includes(x, y)` returns `true` if `x` contains the substring `y`, `false` otherwise.

@ -82,6 +92,86 @@ The four _basic arithmetic operators_ `+`, `-`, `*`, and `/` do what you expect,

 ?> `starts_with(?var, str)` is prefered over equivalent (e.g. regex) conditions, since the compiler may more easily compile the clause into a range scan.

+`unicode_normalize(str, norm)`: converts `str` to the normalization specified by `norm`. The valid values of `norm` are `'nfc'`, `'nfd'`, `'nfkc'` and `'nfkd'`. See [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence).
+
+!> `length(str)` does not return the number of bytes of the string representation. Also, what is returned depends on the normalization of the string. So if such details are important, apply `unicode_normalize` before `length`.
+
+`chars(str)` returns Unicode characters of the string as a list of substrings.
+
+`from_substrings(list)` combines the strings in `list` into a big string. In a sense, it is the inverse function of `chars`.
+
+!> If you want substring slices, indexing strings, etc., first convert the string to a list with `chars`, do the manipulation on the list, and then recombine with `from_substring`. Hopefully, the omission of functions doing such things directly can make people more aware of the complexities involved in manipulating strings (and getting the _correct_ result).
+
+## Functions on lists
+
+`list(x ...)` constructs a list from its argument, e.g. `list(1, 2, 3)`. You may prefer to use the literal form `[1, 2, 3]`.
+
+`is_in(el, list)` tests the membership of an element in a list, e.g. `is_in(1, [1, 2, 3])` is true, whereas `is_in(5, [1, 2, 3])` is false.
+
+`first(l)`, `last(l)` returns the first / last element of the list respectively.
+
+`get(l, n)` returns the element at index `n` in the list `l`. This function will error if the access is out of bounds. Indices start with 0.
+
+`maybe_get(l, n)` returns the element at index `n` in the list `l`. This function will return `null` if the access is out of bounds. Indices start with 0.
+
+`length(list)` returns the length of the list.
+
+`slice(l, start, end)` returns the slice of list between the index `start` (inclusive) and `end` (exclusive). Negative numbers may be used, which is interpreted as counting from the end of the list. E.g. `slice([1, 2, 3, 4], 1, 3) == [2, 3]`, `slice([1, 2, 3, 4], 1, -1) == [2, 3]`.
+
+?> The spread-unify operator `?var <- ..[1, 2, 3]` is equivalent to `is_in(?var, [1, 2, 3])` if `?var` is bound.
+
+`concat(x, ...)` concatenates lists. Takes any number of arguments. The operator form `x ++ y` is also available for binary arguments.
+
+`prepend(l, x)`, `append(l, x)`: prepends / appends the element `x` to the list `l`.
+
+`sorted(l)`: returns the sorted list as defined by the total order detailed in [datatypes](datatypes.md).
+
+`chunks(l, n)`: splits the list `l` into chunks of `n`, e.g. `chunks([1, 2, 3, 4, 5], 2) == [[1, 2], [3, 4], [5]]`.
+
+`chunks_exact(l, n)`: splits the list `l` into chunks of `n`, discarding any trailing elements, e.g. `chunks([1, 2, 3, 4, 5], 2) == [[1, 2], [3, 4]]`.
+
+`windows(l, n)`: splits the list `l` into overlapping windows of length `n`. e.g. `windows([1, 2, 3, 4, 5], 3) == [[1, 2, 3], [2, 3, 4], [3, 4, 5]]`.
+
+## Functions on bytes
+
+`length(bytes)` returns the length of the byte array.
+
+`bit_and(x, y)`, `bit_or(x, y)`, `bit_not(x)`, `bit_xor(x, y)`: calculate the respective boolean functions on bytes regarded as bit arrays. The two bytes must have the same lengths.
+
+`pack_bits([x, ...])` packs a list of booleans into a byte array; if the list is not divisible by 8, it is padded with `false`. `unpack_bits(x)` does the reverse. E.g. `unpack_bits(pack_bits([false, true, true])) == [false, true, true, false, false, false, false, false]`.
+
+`encode_base64(b)` encodes the byte array `b` into the [Base64](https://en.wikipedia.org/wiki/Base64) encoded string. Note that this is automatically done on output to JSON since JSON cannot represent bytes natively.
+
+`decode_base64(str)` tries to decode the `str` as a Base64-encoded byte array.
+
+## Type checking and conversion functions
+
+`to_float(x)` tries to convert `x` to a float. Conversion from `Number` always succeeds. Conversion from `String` has the following special cases in addition to the usual string representation:
+
+* `INF` is converted to infinity;
+* `NEG_INF` is converted to negative infinity;
+* `NAN` is converted to NAN (but don't compare NAN by equality, use `is_nan` instead);
+* `PI` is converted to pi (3.14159...);
+* `E` is converted to the base of natural logarithms, or Euler's constant (2.71828...).
+
+The obvious conversion functions: `is_null(x)`, `is_int(x)`, `is_float(x)`, `is_num(x)`, `is_bytes(x)`, `is_list(x)`, `is_string(x)`.
+
+`is_finite(x)` returns `true` if `x` is `Int` or a finite `Float`.
+
+`is_infinite(x)` returns `true` if `x` is infinity or negative infinity.
+
+`is_nan(x)` returns `true` if `x` is the special float `NAN`
+
+## Random functions
+
+`rand_float()` generates a float in the interval [0, 1], sampled uniformly.
+
+`rand_bernoulli(p)` generates a boolean with probability `p` of being `true`.
+
+`rand_int(lower, upper)` generates an integer within the given bounds, both bounds are inclusive.
+
+`rand_choose(list)` randomly chooses an element from `list` and returns it. If the list is empty, it returns `null`.
+
 ## Regex functions

 `regex_matches(x, reg)`: tests if `x` matches the regular expression `reg`.
@ -159,32 +249,4 @@ $     the end of the text
 \z    only the end of the text
 \b    a Unicode word boundary (\w on one side and \W, \A, or \z on the other)
 \B    not a Unicode word boundary
-```
-
-## Functions on lists
-
-`list` constructs a list from its argument, e.g. `list(1, 2, 3)`. You may prefer to use the literal form `[1, 2, 3]`.
-
-`is_in` tests the membership of an element in a list, e.g. `is_in(1, [1, 2, 3])` is true, whereas `is_in(5, [1, 2, 3])` is false.
-
-?> The spread-unify operator `?var <- ..[1, 2, 3]` is equivalent to `is_in(?var, [1, 2, 3])` if `?var` is bound.
-
-## Functions on bytes
-
-`bit_and(x, y)`, `bit_or(x, y)`, `bit_not(x)`, `bit_xor(x, y)`: calculate the respective boolean functions on bytes regarded as bit arrays. The two bytes must have the same lengths.
-
-`pack_bits([x, ...])` packs a list of booleans into a byte array; if the list is not divisible by 8, it is padded with `false`. `unpack_bits(x)` does the reverse. E.g. `unpack_bits(pack_bits([false, true, true])) == [false, true, true, false, false, false, false, false]`.
-
-## Random functions
-
-## Type checking functions
-
-The usual ones: `is_null(x)`, `is_int(x)`, `is_float(x)`, `is_num(x)`, `is_bytes(x)`, `is_list(x)`, `is_string(x)`.
-
-`is_finite(x)` returns `true` if `x` is `Int` or a finite `Float`.
-
-`is_infinite(x)` returns `true` if `x` is infinity or negative infinity.
-
-`is_nan(x)` returns `true` if `x` is the special float `NAN`
-
-## Conversion functions
+```
--- a/src/cozoscript.pest
+++ b/src/cozoscript.pest
@ -58,11 +58,11 @@ apply_args = {(expr ~ ",")* ~ expr?}
 grouped = _{"(" ~ rule_body ~ ")"}

 expr = {unary ~ (operation ~ unary)*}
-operation = _{ (op_and | op_or | op_pow | op_str_cat | op_add | op_sub | op_mul | op_div | op_mod |
+operation = _{ (op_and | op_or | op_pow | op_concat | op_add | op_sub | op_mul | op_div | op_mod |
                op_ge | op_le | op_gt | op_lt | op_eq | op_ne)}
 op_or = { "||" }
 op_and = { "&&" }
-op_str_cat = { "++" }
+op_concat = { "++" }
 op_add = { "+" }
 op_sub = { "-" }
 op_mul = { "*" }
--- a/src/data/expr.rs
+++ b/src/data/expr.rs
@ -409,7 +409,7 @@ pub(crate) fn get_op(name: &str) -> Option<&'static Op> {
        "bit_xor" => &OP_BIT_XOR,
        "pack_bits" => &OP_PACK_BITS,
        "unpack_bits" => &OP_UNPACK_BITS,
-        "str_cat" => &OP_STR_CAT,
+        "concat" => &OP_CONCAT,
        "str_includes" => &OP_STR_INCLUDES,
        "lowercase" => &OP_LOWERCASE,
        "uppercase" => &OP_UPPERCASE,
@ -432,17 +432,17 @@ pub(crate) fn get_op(name: &str) -> Option<&'static Op> {
        "length" => &OP_LENGTH,
        "sorted" => &OP_SORTED,
        "append" => &OP_APPEND,
+        "prepend" => &OP_PREPEND,
+        "unicode_normalize" => &OP_UNICODE_NORMALIZE,
        "haversine" => &OP_HAVERSINE,
        "haversine_deg_input" => &OP_HAVERSINE_DEG_INPUT,
        "deg_to_rad" => &OP_DEG_TO_RAD,
        "rad_to_deg" => &OP_RAD_TO_DEG,
-        "nth" => &OP_NTH,
-        "maybe_nth" => &OP_MAYBE_NTH,
-        "nth_char" => &OP_NTH_CHAR,
+        "get" => &OP_GET,
+        "maybe_get" => &OP_MAYBE_GET,
        "chars" => &OP_CHARS,
-        "maybe_nth_char" => &OP_MAYBE_NTH_CHAR,
+        "from_substrings" => &OP_FROM_SUBSTRINGS,
        "slice" => &OP_SLICE,
-        "str_slice" => &OP_STR_SLICE,
        "regex_matches" => &OP_REGEX_MATCHES,
        "regex_replace" => &OP_REGEX_REPLACE,
        "regex_replace_all" => &OP_REGEX_REPLACE_ALL,
--- a/src/data/functions.rs
+++ b/src/data/functions.rs
@ -6,9 +6,10 @@ use itertools::Itertools;
 use num_traits::FloatConst;
 use rand::prelude::*;
 use smartstring::SmartString;
+use unicode_normalization::UnicodeNormalization;

 use crate::data::expr::Op;
-use crate::data::value::{DataValue, Number, RegexWrapper, same_value_type};
+use crate::data::value::{same_value_type, DataValue, Number, RegexWrapper};

 macro_rules! define_op {
    ($name:ident, $min_arity:expr, $vararg:expr) => {
@ -682,17 +683,33 @@ pub(crate) fn op_pack_bits(args: &[DataValue]) -> Result<DataValue> {
    }
 }

-define_op!(OP_STR_CAT, 0, true);
-pub(crate) fn op_str_cat(args: &[DataValue]) -> Result<DataValue> {
-    let mut ret: String = Default::default();
-    for arg in args {
-        if let DataValue::String(s) = arg {
-            ret += s;
-        } else {
-            bail!("unexpected arg {:?} for OP_ADD", arg);
+define_op!(OP_CONCAT, 1, true);
+pub(crate) fn op_concat(args: &[DataValue]) -> Result<DataValue> {
+    match &args[0] {
+        DataValue::String(_) => {
+            let mut ret: String = Default::default();
+            for arg in args {
+                if let DataValue::String(s) = arg {
+                    ret += s;
+                } else {
+                    bail!("unexpected arg {:?} for OP_CAT", arg);
+                }
+            }
+            Ok(DataValue::String(ret.into()))
        }
+        DataValue::List(_) => {
+            let mut ret = vec![];
+            for arg in args {
+                if let DataValue::List(l) = arg {
+                    ret.extend_from_slice(l);
+                } else {
+                    bail!("unexpected arg {:?} for OP_CAT", arg);
+                }
+            }
+            Ok(DataValue::List(ret.into()))
+        }
+        arg => bail!("unexpected arg {:?} for OP_CAT", arg),
    }
-    Ok(DataValue::String(ret.into()))
 }

 define_op!(OP_STR_INCLUDES, 2, false);
@ -867,7 +884,7 @@ pub(crate) fn op_is_finite(args: &[DataValue]) -> Result<DataValue> {
    Ok(DataValue::Bool(match &args[0] {
        DataValue::Number(Number::Int(_)) => true,
        DataValue::Number(Number::Float(f)) => f.is_finite(),
-        _ => false
+        _ => false,
    }))
 }

@ -875,20 +892,18 @@ define_op!(OP_IS_INFINITE, 1, false);
 pub(crate) fn op_is_infinite(args: &[DataValue]) -> Result<DataValue> {
    Ok(DataValue::Bool(match &args[0] {
        DataValue::Number(Number::Float(f)) => f.is_infinite(),
-        _ => false
+        _ => false,
    }))
 }

-
 define_op!(OP_IS_NAN, 1, false);
 pub(crate) fn op_is_nan(args: &[DataValue]) -> Result<DataValue> {
    Ok(DataValue::Bool(match &args[0] {
        DataValue::Number(Number::Float(f)) => f.is_nan(),
-        _ => false
+        _ => false,
    }))
 }

-
 define_op!(OP_IS_STRING, 1, false);
 pub(crate) fn op_is_string(args: &[DataValue]) -> Result<DataValue> {
    Ok(DataValue::Bool(matches!(args[0], DataValue::String(_))))
@ -911,6 +926,18 @@ pub(crate) fn op_append(args: &[DataValue]) -> Result<DataValue> {
    }
 }

+define_op!(OP_PREPEND, 2, false);
+pub(crate) fn op_prepend(args: &[DataValue]) -> Result<DataValue> {
+    match &args[0] {
+        DataValue::List(pl) => {
+            let mut l = vec![args[1].clone()];
+            l.extend_from_slice(pl);
+            Ok(DataValue::List(l))
+        }
+        v => bail!("cannot prepend to {:?}", v),
+    }
+}
+
 define_op!(OP_IS_BYTES, 1, false);
 pub(crate) fn op_is_bytes(args: &[DataValue]) -> Result<DataValue> {
    Ok(DataValue::Bool(matches!(args[0], DataValue::Bytes(_))))
@ -927,6 +954,20 @@ pub(crate) fn op_length(args: &[DataValue]) -> Result<DataValue> {
    }))
 }

+define_op!(OP_UNICODE_NORMALIZE, 2, false);
+pub(crate) fn op_unicode_normalize(args: &[DataValue]) -> Result<DataValue> {
+    match (&args[0], &args[1]) {
+        (DataValue::String(s), DataValue::String(n)) => Ok(DataValue::String(match n as &str {
+            "nfc" => s.nfc().collect(),
+            "nfd" => s.nfd().collect(),
+            "nfkc" => s.nfkc().collect(),
+            "nfkd" => s.nfkd().collect(),
+            u => bail!("unknown normalization {} for 'unicode_normalize'", u),
+        })),
+        v => bail!("'unicode_normalize' requires string argument, got {:?}", v),
+    }
+}
+
 define_op!(OP_SORTED, 1, false);
 pub(crate) fn op_sorted(args: &[DataValue]) -> Result<DataValue> {
    let mut arg = args[0]
@ -1097,14 +1138,14 @@ fn get_index(mut i: i64, total: usize) -> Result<usize> {
    })
 }

-define_op!(OP_NTH, 2, false);
-pub(crate) fn op_nth(args: &[DataValue]) -> Result<DataValue> {
+define_op!(OP_GET, 2, false);
+pub(crate) fn op_get(args: &[DataValue]) -> Result<DataValue> {
    let l = args[0]
        .get_list()
-        .ok_or_else(|| anyhow!("first argument to 'nth' mut be a list, got args {:?}", args))?;
+        .ok_or_else(|| anyhow!("first argument to 'get' mut be a list, got args {:?}", args))?;
    let n = args[1].get_int().ok_or_else(|| {
        anyhow!(
-            "second argument to 'nth' mut be an integer, got args {:?}",
+            "second argument to 'get' mut be an integer, got args {:?}",
            args
        )
    })?;
@ -1112,14 +1153,17 @@ pub(crate) fn op_nth(args: &[DataValue]) -> Result<DataValue> {
    Ok(l[idx].clone())
 }

-define_op!(OP_MAYBE_NTH, 2, false);
-pub(crate) fn op_maybe_nth(args: &[DataValue]) -> Result<DataValue> {
-    let l = args[0]
-        .get_list()
-        .ok_or_else(|| anyhow!("first argument to 'nth' mut be a list, got args {:?}", args))?;
+define_op!(OP_MAYBE_GET, 2, false);
+pub(crate) fn op_maybe_get(args: &[DataValue]) -> Result<DataValue> {
+    let l = args[0].get_list().ok_or_else(|| {
+        anyhow!(
+            "first argument to 'maybe_get' mut be a list, got args {:?}",
+            args
+        )
+    })?;
    let n = args[1].get_int().ok_or_else(|| {
        anyhow!(
-            "second argument to 'nth' mut be an integer, got args {:?}",
+            "second argument to 'maybe_get' mut be an integer, got args {:?}",
            args
        )
    })?;
@ -1171,75 +1215,21 @@ pub(crate) fn op_chars(args: &[DataValue]) -> Result<DataValue> {
    ))
 }

-define_op!(OP_NTH_CHAR, 2, false);
-pub(crate) fn op_nth_char(args: &[DataValue]) -> Result<DataValue> {
-    let l = args[0].get_string().ok_or_else(|| {
-        anyhow!(
-            "first argument to 'nth_char' mut be a string, got args {:?}",
-            args
-        )
-    })?;
-    let n = args[1].get_int().ok_or_else(|| {
-        anyhow!(
-            "second argument to 'nth_char' mut be an integer, got args {:?}",
-            args
-        )
-    })?;
-    let chars = l.chars().collect_vec();
-    let idx = get_index(n, chars.len())?;
-    let mut c = SmartString::new();
-    c.push(chars[idx]);
-    Ok(DataValue::String(c))
-}
-
-define_op!(OP_MAYBE_NTH_CHAR, 2, false);
-pub(crate) fn op_maybe_nth_char(args: &[DataValue]) -> Result<DataValue> {
-    let l = args[0].get_string().ok_or_else(|| {
-        anyhow!(
-            "first argument to 'nth_char' mut be a string, got args {:?}",
-            args
-        )
-    })?;
-    let n = args[1].get_int().ok_or_else(|| {
-        anyhow!(
-            "second argument to 'nth_char' mut be an integer, got args {:?}",
-            args
-        )
-    })?;
-    let chars = l.chars().collect_vec();
-    if let Ok(idx) = get_index(n, chars.len()) {
-        let mut c = SmartString::new();
-        c.push(chars[idx]);
-        Ok(DataValue::String(c))
-    } else {
-        Ok(DataValue::Null)
+define_op!(OP_FROM_SUBSTRINGS, 1, false);
+pub(crate) fn op_from_substrings(args: &[DataValue]) -> Result<DataValue> {
+    let mut ret = String::new();
+    match &args[0] {
+        DataValue::List(ss) => {
+            for arg in ss {
+                if let DataValue::String(s) = arg {
+                    ret.push_str(s);
+                } else {
+                    bail!("cannot add {:?} to string", arg)
+                }
+            }
+        }
+        v => bail!("cannot apply 'from_substring' to {:?}", v),
    }
-}
-
-define_op!(OP_STR_SLICE, 3, false);
-pub(crate) fn op_str_slice(args: &[DataValue]) -> Result<DataValue> {
-    let l = args[0].get_string().ok_or_else(|| {
-        anyhow!(
-            "first argument to 'str_slice' mut be a string, got args {:?}",
-            args
-        )
-    })?;
-    let m = args[1].get_int().ok_or_else(|| {
-        anyhow!(
-            "second argument to 'str_slice' mut be an integer, got args {:?}",
-            args
-        )
-    })?;
-    let n = args[2].get_int().ok_or_else(|| {
-        anyhow!(
-            "third argument to 'str_slice' mut be an integer, got args {:?}",
-            args
-        )
-    })?;
-    let l = l.chars().collect_vec();
-    let m = get_index(m, l.len())?;
-    let n = get_index(n, l.len())?;
-    let ret: String = l[m..n].iter().collect();
    Ok(DataValue::String(ret.into()))
 }

@ -1273,8 +1263,8 @@ pub(crate) fn op_to_float(args: &[DataValue]) -> Result<DataValue> {
            "PI" => f64::PI().into(),
            "E" => f64::E().into(),
            "NAN" => f64::NAN.into(),
-            "INFINITY" => f64::INFINITY.into(),
-            "NEGATIVE_INFINITY" => f64::NEG_INFINITY.into(),
+            "INF" => f64::INFINITY.into(),
+            "NEG_INF" => f64::NEG_INFINITY.into(),
            s => f64::from_str(s)?.into(),
        },
        v => bail!("'to_float' cannot be applied to {:?}", v),
@ -1286,11 +1276,10 @@ pub(crate) fn op_rand_float(_args: &[DataValue]) -> Result<DataValue> {
    Ok(thread_rng().gen::<f64>().into())
 }

-define_op!(OP_RAND_BERNOULLI, 0, true);
+define_op!(OP_RAND_BERNOULLI, 1, false);
 pub(crate) fn op_rand_bernoulli(args: &[DataValue]) -> Result<DataValue> {
-    let prob = match args.get(0) {
-        None => 0.5,
-        Some(DataValue::Number(n)) => {
+    let prob = match &args[0] {
+        DataValue::Number(n) => {
            let f = n.get_float();
            ensure!(
                f >= 0. && f <= 1.,
@ -1299,7 +1288,7 @@ pub(crate) fn op_rand_bernoulli(args: &[DataValue]) -> Result<DataValue> {
            );
            f
        }
-        Some(v) => bail!(
+        v => bail!(
            "'rand_bernoulli' requires number between 0. and 1., got {:?}",
            v
        ),
--- a/src/data/tests/functions.rs
+++ b/src/data/tests/functions.rs
@ -571,15 +571,28 @@ fn test_unpack_bits() {
 }

 #[test]
-fn test_str_cat() {
+fn test_concat() {
    assert_eq!(
-        op_str_cat(&[
+        op_concat(&[
            DataValue::String("abc".into()),
            DataValue::String("def".into())
        ])
        .unwrap(),
        DataValue::String("abcdef".into())
    );
+
+    assert_eq!(
+        op_concat(&[
+            DataValue::List(vec![DataValue::Bool(true), DataValue::Bool(false)]),
+            DataValue::List(vec![DataValue::Bool(true)])
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::Bool(true),
+            DataValue::Bool(false),
+            DataValue::Bool(true)
+        ])
+    );
 }

 #[test]
@ -861,3 +874,365 @@ fn test_predicates() {
        DataValue::Bool(true)
    );
 }
+
+#[test]
+fn test_prepend_append() {
+    assert_eq!(
+        op_prepend(&[
+            DataValue::List(vec![DataValue::from(1), DataValue::from(2)]),
+            DataValue::Null,
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::Null,
+            DataValue::from(1),
+            DataValue::from(2)
+        ]),
+    );
+    assert_eq!(
+        op_append(&[
+            DataValue::List(vec![DataValue::from(1), DataValue::from(2)]),
+            DataValue::Null,
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::from(1),
+            DataValue::from(2),
+            DataValue::Null,
+        ]),
+    );
+}
+
+#[test]
+fn test_length() {
+    assert_eq!(
+        op_length(&[DataValue::String("abc".into())]).unwrap(),
+        DataValue::from(3)
+    );
+    assert_eq!(
+        op_length(&[DataValue::List(vec![])]).unwrap(),
+        DataValue::from(0)
+    );
+    assert_eq!(
+        op_length(&[DataValue::Bytes([].into())]).unwrap(),
+        DataValue::from(0)
+    );
+}
+
+#[test]
+fn test_unicode_normalize() {
+    assert_eq!(
+        op_unicode_normalize(&[
+            DataValue::String("abc".into()),
+            DataValue::String("nfc".into())
+        ])
+        .unwrap(),
+        DataValue::String("abc".into())
+    )
+}
+
+#[test]
+fn test_sort() {
+    assert_eq!(
+        op_sorted(&[DataValue::List(vec![
+            DataValue::from(2.0),
+            DataValue::from(1),
+            DataValue::from(2),
+            DataValue::Null
+        ])])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::Null,
+            DataValue::from(1),
+            DataValue::from(2),
+            DataValue::from(2.0),
+        ])
+    )
+}
+
+#[test]
+fn test_haversine() {
+    let d = op_haversine_deg_input(&[
+        DataValue::from(0),
+        DataValue::from(0),
+        DataValue::from(0),
+        DataValue::from(180),
+    ])
+    .unwrap()
+    .get_float()
+    .unwrap();
+    assert!(d.abs_diff_eq(&f64::PI(), 1e-5));
+
+    let d = op_haversine_deg_input(&[
+        DataValue::from(90),
+        DataValue::from(0),
+        DataValue::from(0),
+        DataValue::from(123),
+    ])
+    .unwrap()
+    .get_float()
+    .unwrap();
+    assert!(d.abs_diff_eq(&(f64::PI() / 2.), 1e-5));
+
+    let d = op_haversine(&[
+        DataValue::from(0),
+        DataValue::from(0),
+        DataValue::from(0),
+        DataValue::from(f64::PI()),
+    ])
+    .unwrap()
+    .get_float()
+    .unwrap();
+    assert!(d.abs_diff_eq(&f64::PI(), 1e-5));
+}
+
+#[test]
+fn test_deg_rad() {
+    assert_eq!(
+        op_deg_to_rad(&[DataValue::from(180)]).unwrap(),
+        DataValue::from(f64::PI())
+    );
+    assert_eq!(
+        op_rad_to_deg(&[DataValue::from(f64::PI())]).unwrap(),
+        DataValue::from(180.0)
+    );
+}
+
+#[test]
+fn test_first_last() {
+    assert_eq!(
+        op_first(&[DataValue::List(vec![])]).unwrap(),
+        DataValue::Null,
+    );
+    assert_eq!(
+        op_last(&[DataValue::List(vec![])]).unwrap(),
+        DataValue::Null,
+    );
+    assert_eq!(
+        op_first(&[DataValue::List(vec![
+            DataValue::from(1),
+            DataValue::from(2)
+        ])])
+        .unwrap(),
+        DataValue::from(1),
+    );
+    assert_eq!(
+        op_last(&[DataValue::List(vec![
+            DataValue::from(1),
+            DataValue::from(2)
+        ])])
+        .unwrap(),
+        DataValue::from(2),
+    );
+}
+
+#[test]
+fn test_chunks() {
+    assert_eq!(
+        op_chunks(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3),
+                DataValue::from(4),
+                DataValue::from(5),
+            ]),
+            DataValue::from(2),
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::List(vec![DataValue::from(1), DataValue::from(2),]),
+            DataValue::List(vec![DataValue::from(3), DataValue::from(4),]),
+            DataValue::List(vec![DataValue::from(5)]),
+        ])
+    );
+    assert_eq!(
+        op_chunks_exact(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3),
+                DataValue::from(4),
+                DataValue::from(5),
+            ]),
+            DataValue::from(2),
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::List(vec![DataValue::from(1), DataValue::from(2),]),
+            DataValue::List(vec![DataValue::from(3), DataValue::from(4),]),
+        ])
+    );
+    assert_eq!(
+        op_windows(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3),
+                DataValue::from(4),
+                DataValue::from(5),
+            ]),
+            DataValue::from(3),
+        ])
+        .unwrap(),
+        DataValue::List(vec![
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3)
+            ]),
+            DataValue::List(vec![
+                DataValue::from(2),
+                DataValue::from(3),
+                DataValue::from(4)
+            ]),
+            DataValue::List(vec![
+                DataValue::from(3),
+                DataValue::from(4),
+                DataValue::from(5)
+            ]),
+        ])
+    )
+}
+
+#[test]
+fn test_get() {
+    assert!(op_get(&[DataValue::List(vec![]), DataValue::from(0)]).is_err());
+    assert_eq!(
+        op_get(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3)
+            ]),
+            DataValue::from(1)
+        ])
+        .unwrap(),
+        DataValue::from(2)
+    );
+    assert_eq!(
+        op_maybe_get(&[DataValue::List(vec![]), DataValue::from(0)]).unwrap(),
+        DataValue::Null
+    );
+    assert_eq!(
+        op_maybe_get(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3)
+            ]),
+            DataValue::from(1)
+        ])
+        .unwrap(),
+        DataValue::from(2)
+    );
+}
+
+#[test]
+fn test_slice() {
+    assert!(op_slice(&[
+        DataValue::List(vec![
+            DataValue::from(1),
+            DataValue::from(2),
+            DataValue::from(3)
+        ]),
+        DataValue::from(1),
+        DataValue::from(4)
+    ])
+    .is_err());
+
+    assert_eq!(
+        op_slice(&[
+            DataValue::List(vec![
+                DataValue::from(1),
+                DataValue::from(2),
+                DataValue::from(3)
+            ]),
+            DataValue::from(1),
+            DataValue::from(-1)
+        ])
+        .unwrap(),
+        DataValue::List(vec![DataValue::from(2)])
+    );
+}
+
+#[test]
+fn test_chars() {
+    assert_eq!(
+        op_from_substrings(&[op_chars(&[DataValue::String("abc".into())]).unwrap()]).unwrap(),
+        DataValue::String("abc".into())
+    )
+}
+
+#[test]
+fn test_encode_decode() {
+    assert_eq!(
+        op_decode_base64(&[op_encode_base64(&[DataValue::Bytes([1, 2, 3].into())]).unwrap()])
+            .unwrap(),
+        DataValue::Bytes([1, 2, 3].into())
+    )
+}
+
+#[test]
+fn test_to_float() {
+    assert_eq!(
+        op_to_float(&[DataValue::from(1)]).unwrap(),
+        DataValue::from(1.0)
+    );
+    assert_eq!(
+        op_to_float(&[DataValue::from(1.0)]).unwrap(),
+        DataValue::from(1.0)
+    );
+    assert!(op_to_float(&[DataValue::String("NAN".into())])
+        .unwrap()
+        .get_float()
+        .unwrap()
+        .is_nan());
+    assert!(op_to_float(&[DataValue::String("INF".into())])
+        .unwrap()
+        .get_float()
+        .unwrap()
+        .is_infinite());
+    assert!(op_to_float(&[DataValue::String("NEG_INF".into())])
+        .unwrap()
+        .get_float()
+        .unwrap()
+        .is_infinite());
+    assert_eq!(
+        op_to_float(&[DataValue::String("3".into())])
+            .unwrap()
+            .get_float()
+            .unwrap(),
+        3.
+    );
+}
+
+#[test]
+fn test_rand() {
+    let n = op_rand_float(&[]).unwrap().get_float().unwrap();
+    assert!(n >= 0.);
+    assert!(n <= 1.);
+    assert_eq!(
+        op_rand_bernoulli(&[DataValue::from(0)]).unwrap(),
+        DataValue::Bool(false)
+    );
+    assert_eq!(
+        op_rand_bernoulli(&[DataValue::from(1)]).unwrap(),
+        DataValue::Bool(true)
+    );
+    assert!(op_rand_bernoulli(&[DataValue::from(2)]).is_err());
+    let n = op_rand_int(&[DataValue::from(100), DataValue::from(200)])
+        .unwrap()
+        .get_int()
+        .unwrap();
+    assert!(n >= 100);
+    assert!(n <= 200);
+    assert_eq!(
+        op_rand_choose(&[DataValue::List(vec![])]).unwrap(),
+        DataValue::Null
+    );
+    assert_eq!(
+        op_rand_choose(&[DataValue::List(vec![DataValue::from(123)])]).unwrap(),
+        DataValue::from(123)
+    );
+}
--- a/src/parse/cozoscript/query.rs
+++ b/src/parse/cozoscript/query.rs
@ -435,7 +435,7 @@ lazy_static! {
            Operator::new(Rule::op_eq, Left) | Operator::new(Rule::op_ne, Left),
            Operator::new(Rule::op_add, Left)
                | Operator::new(Rule::op_sub, Left)
-                | Operator::new(Rule::op_str_cat, Left),
+                | Operator::new(Rule::op_concat, Left),
            Operator::new(Rule::op_mul, Left) | Operator::new(Rule::op_div, Left),
            Operator::new(Rule::op_pow, Assoc::Right),
        ])
@ -461,7 +461,7 @@ fn build_expr_infix(
        Rule::op_ge => "ge",
        Rule::op_lt => "lt",
        Rule::op_le => "le",
-        Rule::op_str_cat => "str_cat",
+        Rule::op_concat => "concat",
        Rule::op_or => "or",
        Rule::op_and => "and",
        _ => unreachable!(),