Fixes https://github.com/cozodb/cozo/issues/90

1 year ago · 147101b7f3
parent 113c91a5de
commit 147101b7f3
3 changed files with 17 additions and 9 deletions
--- a/cozo-core/src/fts/mod.rs
+++ b/cozo-core/src/fts/mod.rs
@ -22,10 +22,10 @@ use smartstring::{LazyCompact, SmartString};
 use std::collections::HashMap;
 use std::sync::{Arc, RwLock};
 pub(crate) mod ast;
 pub(crate) mod cangjie;
 pub(crate) mod tokenizer;
 pub(crate) mod indexing;
-pub(crate) mod ast;
+pub(crate) mod tokenizer;
 #[derive(Debug, Clone, PartialEq, serde_derive::Serialize, serde_derive::Deserialize)]
 pub(crate) struct FtsIndexManifest {
@ -139,7 +139,7 @@ impl TokenizerConfig {
        Ok(match &self.name as &str {
            "AlphaNumOnly" => AlphaNumOnlyFilter.into(),
            "AsciiFolding" => AsciiFoldingFilter.into(),
-            "LowerCase" => LowerCaser.into(),
+            "LowerCase" | "Lowercase" => LowerCaser.into(),
            "RemoveLong" => RemoveLongFilter::limit(
                self.args
                    .get(0)
@ -180,7 +180,10 @@ impl TokenizerConfig {
                    .get_str()
                    .ok_or_else(|| {
                        miette!("First argument `language` to Stemmer must be a string")
-                    })? {
+                    })?
                    .to_lowercase()
                    .as_str()
                {
                    "arabic" => Language::Arabic,
                    "danish" => Language::Danish,
                    "dutch" => Language::Dutch,
@ -199,7 +202,7 @@ impl TokenizerConfig {
                    "swedish" => Language::Swedish,
                    "tamil" => Language::Tamil,
                    "turkish" => Language::Turkish,
-                    _ => bail!("Unsupported language: {}", self.name),
+                    lang => bail!("Unsupported language: {}", lang),
                };
                Stemmer::new(language).into()
            }
@ -226,7 +229,7 @@ impl TokenizerConfig {
                    _ => bail!("Filter Stopwords requires language name or a list of stopwords"),
                }
            }
-            _ => bail!("Unknown token filter: {}", self.name),
+            _ => bail!("Unknown token filter: {:?}", self.name),
        })
    }
 }
--- a/cozo-core/src/parse/sys.rs
+++ b/cozo-core/src/parse/sys.rs
@ -320,7 +320,7 @@ pub(crate) fn parse_sys(
                                expr.partial_eval()?;
                                match expr {
                                    Expr::Apply { op, args, .. } => {
-                                        if op.name != "LIST" {
+                                        if op.name != "OP_LIST" {
                                            bail!("Filters must be a list of filters");
                                        }
                                        for arg in args.iter() {
@ -454,7 +454,7 @@ pub(crate) fn parse_sys(
                                expr.partial_eval()?;
                                match expr {
                                    Expr::Apply { op, args, .. } => {
-                                        if op.name != "LIST" {
+                                        if op.name != "OP_LIST" {
                                            bail!("Filters must be a list of filters");
                                        }
                                        for arg in args.iter() {
--- a/cozo-core/src/runtime/tests.rs
+++ b/cozo-core/src/runtime/tests.rs
@ -914,7 +914,11 @@ fn test_fts_indexing() {
    )
    .unwrap();
    db.run_script(
-        r"::fts create a:fts {extractor: v, tokenizer: Simple }",
+        r"::fts create a:fts {
            extractor: v,
            tokenizer: Simple,
            filters: [Lowercase, Stemmer('English'), Stopwords('en')]
        }",
        Default::default(),
    )
    .unwrap();
@ -939,6 +943,7 @@ fn test_fts_indexing() {
    for row in res.into_json()["rows"].as_array().unwrap() {
        println!("{}", row);
    }
    println!("query");
    let res = db
        .run_script(
            r"?[k, v, s] := ~a:fts{k, v | query: 'world', k: 2, bind_score: s}",