From 147101b7f305461c3240af9cca1248aee85e93a0 Mon Sep 17 00:00:00 2001 From: Ziyang Hu Date: Fri, 5 May 2023 12:38:55 +0800 Subject: [PATCH] Fixes https://github.com/cozodb/cozo/issues/90 --- cozo-core/src/fts/mod.rs | 15 +++++++++------ cozo-core/src/parse/sys.rs | 4 ++-- cozo-core/src/runtime/tests.rs | 7 ++++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cozo-core/src/fts/mod.rs b/cozo-core/src/fts/mod.rs index 1dd031c3..c0973b16 100644 --- a/cozo-core/src/fts/mod.rs +++ b/cozo-core/src/fts/mod.rs @@ -22,10 +22,10 @@ use smartstring::{LazyCompact, SmartString}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; +pub(crate) mod ast; pub(crate) mod cangjie; -pub(crate) mod tokenizer; pub(crate) mod indexing; -pub(crate) mod ast; +pub(crate) mod tokenizer; #[derive(Debug, Clone, PartialEq, serde_derive::Serialize, serde_derive::Deserialize)] pub(crate) struct FtsIndexManifest { @@ -139,7 +139,7 @@ impl TokenizerConfig { Ok(match &self.name as &str { "AlphaNumOnly" => AlphaNumOnlyFilter.into(), "AsciiFolding" => AsciiFoldingFilter.into(), - "LowerCase" => LowerCaser.into(), + "LowerCase" | "Lowercase" => LowerCaser.into(), "RemoveLong" => RemoveLongFilter::limit( self.args .get(0) @@ -180,7 +180,10 @@ impl TokenizerConfig { .get_str() .ok_or_else(|| { miette!("First argument `language` to Stemmer must be a string") - })? { + })? + .to_lowercase() + .as_str() + { "arabic" => Language::Arabic, "danish" => Language::Danish, "dutch" => Language::Dutch, @@ -199,7 +202,7 @@ impl TokenizerConfig { "swedish" => Language::Swedish, "tamil" => Language::Tamil, "turkish" => Language::Turkish, - _ => bail!("Unsupported language: {}", self.name), + lang => bail!("Unsupported language: {}", lang), }; Stemmer::new(language).into() } @@ -226,7 +229,7 @@ impl TokenizerConfig { _ => bail!("Filter Stopwords requires language name or a list of stopwords"), } } - _ => bail!("Unknown token filter: {}", self.name), + _ => bail!("Unknown token filter: {:?}", self.name), }) } } diff --git a/cozo-core/src/parse/sys.rs b/cozo-core/src/parse/sys.rs index 34b1709f..70282fb3 100644 --- a/cozo-core/src/parse/sys.rs +++ b/cozo-core/src/parse/sys.rs @@ -320,7 +320,7 @@ pub(crate) fn parse_sys( expr.partial_eval()?; match expr { Expr::Apply { op, args, .. } => { - if op.name != "LIST" { + if op.name != "OP_LIST" { bail!("Filters must be a list of filters"); } for arg in args.iter() { @@ -454,7 +454,7 @@ pub(crate) fn parse_sys( expr.partial_eval()?; match expr { Expr::Apply { op, args, .. } => { - if op.name != "LIST" { + if op.name != "OP_LIST" { bail!("Filters must be a list of filters"); } for arg in args.iter() { diff --git a/cozo-core/src/runtime/tests.rs b/cozo-core/src/runtime/tests.rs index 9e283141..5326571c 100644 --- a/cozo-core/src/runtime/tests.rs +++ b/cozo-core/src/runtime/tests.rs @@ -914,7 +914,11 @@ fn test_fts_indexing() { ) .unwrap(); db.run_script( - r"::fts create a:fts {extractor: v, tokenizer: Simple }", + r"::fts create a:fts { + extractor: v, + tokenizer: Simple, + filters: [Lowercase, Stemmer('English'), Stopwords('en')] + }", Default::default(), ) .unwrap(); @@ -939,6 +943,7 @@ fn test_fts_indexing() { for row in res.into_json()["rows"].as_array().unwrap() { println!("{}", row); } + println!("query"); let res = db .run_script( r"?[k, v, s] := ~a:fts{k, v | query: 'world', k: 2, bind_score: s}",