From 7a98125d15906d0fd7786eeb9cc84753e7bf8f95 Mon Sep 17 00:00:00 2001 From: Yuki Omoto Date: Wed, 9 Nov 2022 00:22:17 +0900 Subject: [PATCH 1/5] Add configurable stopwords feature --- src/config/defaults.rs | 5 + src/config/options.rs | 82 ++++++++++ src/lexer/stopwords.rs | 350 +++++++++++++++++++++++++++++++++-------- 3 files changed, 368 insertions(+), 69 deletions(-) diff --git a/src/config/defaults.rs b/src/config/defaults.rs index b60c9809..cbca74d0 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -4,6 +4,7 @@ // Copyright: 2019, Valerian Saliou // License: Mozilla Public License v2.0 (MPL v2.0) +use super::options::ConfigChannelSearchStopwords; use std::net::SocketAddr; use std::path::PathBuf; @@ -47,6 +48,10 @@ pub fn channel_search_list_limit_maximum() -> u16 { 500 } +pub fn channel_search_stopwords() -> ConfigChannelSearchStopwords { + ConfigChannelSearchStopwords::default() +} + pub fn store_kv_path() -> PathBuf { PathBuf::from("./data/store/kv/") } diff --git a/src/config/options.rs b/src/config/options.rs index 02f82f46..94f0f20e 100644 --- a/src/config/options.rs +++ b/src/config/options.rs @@ -65,6 +65,88 @@ pub struct ConfigChannelSearch { #[serde(default = "defaults::channel_search_list_limit_maximum")] pub list_limit_maximum: u16, + + #[serde(default = "defaults::channel_search_stopwords")] + pub stopwords: ConfigChannelSearchStopwords, +} + +#[derive(Deserialize, Default)] +pub struct ConfigChannelSearchStopwords { + pub epo: Option>, + pub eng: Option>, + pub rus: Option>, + pub cmn: Option>, + pub spa: Option>, + pub por: Option>, + pub ita: Option>, + pub ben: Option>, + pub fra: Option>, + pub deu: Option>, + + pub ukr: Option>, + pub kat: Option>, + pub ara: Option>, + pub hin: Option>, + pub jpn: Option>, + pub heb: Option>, + pub yid: Option>, + pub pol: Option>, + pub amh: Option>, + pub jav: Option>, + + pub kor: Option>, + pub nob: Option>, + pub dan: Option>, + pub swe: Option>, + pub fin: Option>, + pub tur: Option>, + pub nld: Option>, + pub hun: Option>, + pub ces: Option>, + pub ell: Option>, + + pub bul: Option>, + pub bel: Option>, + pub mar: Option>, + pub kan: Option>, + pub ron: Option>, + pub slv: Option>, + pub hrv: Option>, + pub srp: Option>, + pub mkd: Option>, + pub lit: Option>, + + pub lav: Option>, + pub est: Option>, + pub tam: Option>, + pub vie: Option>, + pub urd: Option>, + pub tha: Option>, + pub guj: Option>, + pub uzb: Option>, + pub pan: Option>, + pub aze: Option>, + + pub ind: Option>, + pub tel: Option>, + pub pes: Option>, + pub mal: Option>, + pub ori: Option>, + pub mya: Option>, + pub nep: Option>, + pub sin: Option>, + pub khm: Option>, + pub tuk: Option>, + + pub aka: Option>, + pub zul: Option>, + pub sna: Option>, + pub afr: Option>, + pub lat: Option>, + pub slk: Option>, + pub cat: Option>, + pub tgl: Option>, + pub hye: Option>, } #[derive(Deserialize)] diff --git a/src/lexer/stopwords.rs b/src/lexer/stopwords.rs index 18cc304d..e582486f 100644 --- a/src/lexer/stopwords.rs +++ b/src/lexer/stopwords.rs @@ -8,110 +8,322 @@ use hashbrown::HashSet; use whatlang::{Lang, Script}; use crate::stopwords::*; +use crate::APP_CONF; pub struct LexerStopWord; // Recursion group #1 (10 items) lazy_static! { - static ref STOPWORDS_EPO: HashSet<&'static str> = make(epo::STOPWORDS_EPO); - static ref STOPWORDS_ENG: HashSet<&'static str> = make(eng::STOPWORDS_ENG); - static ref STOPWORDS_RUS: HashSet<&'static str> = make(rus::STOPWORDS_RUS); - static ref STOPWORDS_CMN: HashSet<&'static str> = make(cmn::STOPWORDS_CMN); - static ref STOPWORDS_SPA: HashSet<&'static str> = make(spa::STOPWORDS_SPA); - static ref STOPWORDS_POR: HashSet<&'static str> = make(por::STOPWORDS_POR); - static ref STOPWORDS_ITA: HashSet<&'static str> = make(ita::STOPWORDS_ITA); - static ref STOPWORDS_BEN: HashSet<&'static str> = make(ben::STOPWORDS_BEN); - static ref STOPWORDS_FRA: HashSet<&'static str> = make(fra::STOPWORDS_FRA); - static ref STOPWORDS_DEU: HashSet<&'static str> = make(deu::STOPWORDS_DEU); + static ref STOPWORDS_EPO: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.epo { + Some(words) => make_from_vec(words), + None => make(epo::STOPWORDS_EPO), + }; + static ref STOPWORDS_ENG: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.eng { + Some(words) => make_from_vec(words), + None => make(eng::STOPWORDS_ENG), + }; + static ref STOPWORDS_RUS: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.rus { + Some(words) => make_from_vec(words), + None => make(rus::STOPWORDS_RUS), + }; + static ref STOPWORDS_CMN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.cmn { + Some(words) => make_from_vec(words), + None => make(cmn::STOPWORDS_CMN), + }; + static ref STOPWORDS_SPA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.spa { + Some(words) => make_from_vec(words), + None => make(spa::STOPWORDS_SPA), + }; + static ref STOPWORDS_POR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.por { + Some(words) => make_from_vec(words), + None => make(por::STOPWORDS_POR), + }; + static ref STOPWORDS_ITA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ita { + Some(words) => make_from_vec(words), + None => make(ita::STOPWORDS_ITA), + }; + static ref STOPWORDS_BEN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ben { + Some(words) => make_from_vec(words), + None => make(ben::STOPWORDS_BEN), + }; + static ref STOPWORDS_FRA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.fra { + Some(words) => make_from_vec(words), + None => make(fra::STOPWORDS_FRA), + }; + static ref STOPWORDS_DEU: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.deu { + Some(words) => make_from_vec(words), + None => make(deu::STOPWORDS_DEU), + }; } // Recursion group #2 (10 items) lazy_static! { - static ref STOPWORDS_UKR: HashSet<&'static str> = make(ukr::STOPWORDS_UKR); - static ref STOPWORDS_KAT: HashSet<&'static str> = make(kat::STOPWORDS_KAT); - static ref STOPWORDS_ARA: HashSet<&'static str> = make(ara::STOPWORDS_ARA); - static ref STOPWORDS_HIN: HashSet<&'static str> = make(hin::STOPWORDS_HIN); - static ref STOPWORDS_JPN: HashSet<&'static str> = make(jpn::STOPWORDS_JPN); - static ref STOPWORDS_HEB: HashSet<&'static str> = make(heb::STOPWORDS_HEB); - static ref STOPWORDS_YID: HashSet<&'static str> = make(yid::STOPWORDS_YID); - static ref STOPWORDS_POL: HashSet<&'static str> = make(pol::STOPWORDS_POL); - static ref STOPWORDS_AMH: HashSet<&'static str> = make(amh::STOPWORDS_AMH); - static ref STOPWORDS_JAV: HashSet<&'static str> = make(jav::STOPWORDS_JAV); + static ref STOPWORDS_UKR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ukr { + Some(words) => make_from_vec(words), + None => make(ukr::STOPWORDS_UKR), + }; + static ref STOPWORDS_KAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kat { + Some(words) => make_from_vec(words), + None => make(kat::STOPWORDS_KAT), + }; + static ref STOPWORDS_ARA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ara { + Some(words) => make_from_vec(words), + None => make(ara::STOPWORDS_ARA), + }; + static ref STOPWORDS_HIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hin { + Some(words) => make_from_vec(words), + None => make(hin::STOPWORDS_HIN), + }; + static ref STOPWORDS_JPN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.jpn { + Some(words) => words.iter().clone().map(|word| &**word).collect(), + None => make(jpn::STOPWORDS_JPN), + }; + static ref STOPWORDS_HEB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.heb { + Some(words) => make_from_vec(words), + None => make(heb::STOPWORDS_HEB), + }; + static ref STOPWORDS_YID: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.yid { + Some(words) => make_from_vec(words), + None => make(yid::STOPWORDS_YID), + }; + static ref STOPWORDS_POL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pol { + Some(words) => make_from_vec(words), + None => make(pol::STOPWORDS_POL), + }; + static ref STOPWORDS_AMH: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.amh { + Some(words) => make_from_vec(words), + None => make(amh::STOPWORDS_AMH), + }; + static ref STOPWORDS_JAV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.jav { + Some(words) => make_from_vec(words), + None => make(jav::STOPWORDS_JAV), + }; } // Recursion group #3 (10 items) lazy_static! { - static ref STOPWORDS_KOR: HashSet<&'static str> = make(kor::STOPWORDS_KOR); - static ref STOPWORDS_NOB: HashSet<&'static str> = make(nob::STOPWORDS_NOB); - static ref STOPWORDS_DAN: HashSet<&'static str> = make(dan::STOPWORDS_DAN); - static ref STOPWORDS_SWE: HashSet<&'static str> = make(swe::STOPWORDS_SWE); - static ref STOPWORDS_FIN: HashSet<&'static str> = make(fin::STOPWORDS_FIN); - static ref STOPWORDS_TUR: HashSet<&'static str> = make(tur::STOPWORDS_TUR); - static ref STOPWORDS_NLD: HashSet<&'static str> = make(nld::STOPWORDS_NLD); - static ref STOPWORDS_HUN: HashSet<&'static str> = make(hun::STOPWORDS_HUN); - static ref STOPWORDS_CES: HashSet<&'static str> = make(ces::STOPWORDS_CES); - static ref STOPWORDS_ELL: HashSet<&'static str> = make(ell::STOPWORDS_ELL); + static ref STOPWORDS_KOR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kor { + Some(words) => make_from_vec(words), + None => make(kor::STOPWORDS_KOR), + }; + static ref STOPWORDS_NOB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nob { + Some(words) => make_from_vec(words), + None => make(nob::STOPWORDS_NOB), + }; + static ref STOPWORDS_DAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.dan { + Some(words) => make_from_vec(words), + None => make(dan::STOPWORDS_DAN), + }; + static ref STOPWORDS_SWE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.swe { + Some(words) => make_from_vec(words), + None => make(swe::STOPWORDS_SWE), + }; + static ref STOPWORDS_FIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.fin { + Some(words) => make_from_vec(words), + None => make(fin::STOPWORDS_FIN), + }; + static ref STOPWORDS_TUR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tur { + Some(words) => make_from_vec(words), + None => make(tur::STOPWORDS_TUR), + }; + static ref STOPWORDS_NLD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nld { + Some(words) => make_from_vec(words), + None => make(nld::STOPWORDS_NLD), + }; + static ref STOPWORDS_HUN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hun { + Some(words) => make_from_vec(words), + None => make(hun::STOPWORDS_HUN), + }; + static ref STOPWORDS_CES: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ces { + Some(words) => make_from_vec(words), + None => make(ces::STOPWORDS_CES), + }; + static ref STOPWORDS_ELL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ell { + Some(words) => make_from_vec(words), + None => make(ell::STOPWORDS_ELL), + }; } // Recursion group #4 (10 items) lazy_static! { - static ref STOPWORDS_BUL: HashSet<&'static str> = make(bul::STOPWORDS_BUL); - static ref STOPWORDS_BEL: HashSet<&'static str> = make(bel::STOPWORDS_BEL); - static ref STOPWORDS_MAR: HashSet<&'static str> = make(mar::STOPWORDS_MAR); - static ref STOPWORDS_KAN: HashSet<&'static str> = make(kan::STOPWORDS_KAN); - static ref STOPWORDS_RON: HashSet<&'static str> = make(ron::STOPWORDS_RON); - static ref STOPWORDS_SLV: HashSet<&'static str> = make(slv::STOPWORDS_SLV); - static ref STOPWORDS_HRV: HashSet<&'static str> = make(hrv::STOPWORDS_HRV); - static ref STOPWORDS_SRP: HashSet<&'static str> = make(srp::STOPWORDS_SRP); - static ref STOPWORDS_MKD: HashSet<&'static str> = make(mkd::STOPWORDS_MKD); - static ref STOPWORDS_LIT: HashSet<&'static str> = make(lit::STOPWORDS_LIT); + static ref STOPWORDS_BUL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.bul { + Some(words) => make_from_vec(words), + None => make(bul::STOPWORDS_BUL), + }; + static ref STOPWORDS_BEL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.bel { + Some(words) => make_from_vec(words), + None => make(bel::STOPWORDS_BEL), + }; + static ref STOPWORDS_MAR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mar { + Some(words) => make_from_vec(words), + None => make(mar::STOPWORDS_MAR), + }; + static ref STOPWORDS_KAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kan { + Some(words) => make_from_vec(words), + None => make(kan::STOPWORDS_KAN), + }; + static ref STOPWORDS_RON: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ron { + Some(words) => make_from_vec(words), + None => make(ron::STOPWORDS_RON), + }; + static ref STOPWORDS_SLV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.slv { + Some(words) => make_from_vec(words), + None => make(slv::STOPWORDS_SLV), + }; + static ref STOPWORDS_HRV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hrv { + Some(words) => make_from_vec(words), + None => make(hrv::STOPWORDS_HRV), + }; + static ref STOPWORDS_SRP: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.srp { + Some(words) => make_from_vec(words), + None => make(srp::STOPWORDS_SRP), + }; + static ref STOPWORDS_MKD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mkd { + Some(words) => make_from_vec(words), + None => make(mkd::STOPWORDS_MKD), + }; + static ref STOPWORDS_LIT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lit { + Some(words) => make_from_vec(words), + None => make(lit::STOPWORDS_LIT), + }; } // Recursion group #5 (10 items) lazy_static! { - static ref STOPWORDS_LAV: HashSet<&'static str> = make(lav::STOPWORDS_LAV); - static ref STOPWORDS_EST: HashSet<&'static str> = make(est::STOPWORDS_EST); - static ref STOPWORDS_TAM: HashSet<&'static str> = make(tam::STOPWORDS_TAM); - static ref STOPWORDS_VIE: HashSet<&'static str> = make(vie::STOPWORDS_VIE); - static ref STOPWORDS_URD: HashSet<&'static str> = make(urd::STOPWORDS_URD); - static ref STOPWORDS_THA: HashSet<&'static str> = make(tha::STOPWORDS_THA); - static ref STOPWORDS_GUJ: HashSet<&'static str> = make(guj::STOPWORDS_GUJ); - static ref STOPWORDS_UZB: HashSet<&'static str> = make(uzb::STOPWORDS_UZB); - static ref STOPWORDS_PAN: HashSet<&'static str> = make(pan::STOPWORDS_PAN); - static ref STOPWORDS_AZE: HashSet<&'static str> = make(aze::STOPWORDS_AZE); + static ref STOPWORDS_LAV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lav { + Some(words) => make_from_vec(words), + None => make(lav::STOPWORDS_LAV), + }; + static ref STOPWORDS_EST: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.est { + Some(words) => make_from_vec(words), + None => make(est::STOPWORDS_EST), + }; + static ref STOPWORDS_TAM: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tam { + Some(words) => make_from_vec(words), + None => make(tam::STOPWORDS_TAM), + }; + static ref STOPWORDS_VIE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.vie { + Some(words) => make_from_vec(words), + None => make(vie::STOPWORDS_VIE), + }; + static ref STOPWORDS_URD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.urd { + Some(words) => make_from_vec(words), + None => make(urd::STOPWORDS_URD), + }; + static ref STOPWORDS_THA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tha { + Some(words) => make_from_vec(words), + None => make(tha::STOPWORDS_THA), + }; + static ref STOPWORDS_GUJ: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.guj { + Some(words) => make_from_vec(words), + None => make(guj::STOPWORDS_GUJ), + }; + static ref STOPWORDS_UZB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.uzb { + Some(words) => make_from_vec(words), + None => make(uzb::STOPWORDS_UZB), + }; + static ref STOPWORDS_PAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pan { + Some(words) => make_from_vec(words), + None => make(pan::STOPWORDS_PAN), + }; + static ref STOPWORDS_AZE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.aze { + Some(words) => make_from_vec(words), + None => make(aze::STOPWORDS_AZE), + }; } // Recursion group #6 (10 items) lazy_static! { - static ref STOPWORDS_IND: HashSet<&'static str> = make(ind::STOPWORDS_IND); - static ref STOPWORDS_TEL: HashSet<&'static str> = make(tel::STOPWORDS_TEL); - static ref STOPWORDS_PES: HashSet<&'static str> = make(pes::STOPWORDS_PES); - static ref STOPWORDS_MAL: HashSet<&'static str> = make(mal::STOPWORDS_MAL); - static ref STOPWORDS_ORI: HashSet<&'static str> = make(ori::STOPWORDS_ORI); - static ref STOPWORDS_MYA: HashSet<&'static str> = make(mya::STOPWORDS_MYA); - static ref STOPWORDS_NEP: HashSet<&'static str> = make(nep::STOPWORDS_NEP); - static ref STOPWORDS_SIN: HashSet<&'static str> = make(sin::STOPWORDS_SIN); - static ref STOPWORDS_KHM: HashSet<&'static str> = make(khm::STOPWORDS_KHM); - static ref STOPWORDS_TUK: HashSet<&'static str> = make(tuk::STOPWORDS_TUK); + static ref STOPWORDS_IND: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ind { + Some(words) => words.iter().clone().map(|w| &**w).collect(), + None => make(ind::STOPWORDS_IND), + }; + static ref STOPWORDS_TEL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tel { + Some(words) => make_from_vec(words), + None => make(tel::STOPWORDS_TEL), + }; + static ref STOPWORDS_PES: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pes { + Some(words) => make_from_vec(words), + None => make(pes::STOPWORDS_PES), + }; + static ref STOPWORDS_MAL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mal { + Some(words) => make_from_vec(words), + None => make(mal::STOPWORDS_MAL), + }; + static ref STOPWORDS_ORI: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ori { + Some(words) => make_from_vec(words), + None => make(ori::STOPWORDS_ORI), + }; + static ref STOPWORDS_MYA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mya { + Some(words) => make_from_vec(words), + None => make(mya::STOPWORDS_MYA), + }; + static ref STOPWORDS_NEP: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nep { + Some(words) => make_from_vec(words), + None => make(nep::STOPWORDS_NEP), + }; + static ref STOPWORDS_SIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.sin { + Some(words) => make_from_vec(words), + None => make(sin::STOPWORDS_SIN), + }; + static ref STOPWORDS_KHM: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.khm { + Some(words) => make_from_vec(words), + None => make(khm::STOPWORDS_KHM), + }; + static ref STOPWORDS_TUK: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tuk { + Some(words) => make_from_vec(words), + None => make(tuk::STOPWORDS_TUK), + }; } // Recursion group #7 (9 items) lazy_static! { - static ref STOPWORDS_AKA: HashSet<&'static str> = make(aka::STOPWORDS_AKA); - static ref STOPWORDS_ZUL: HashSet<&'static str> = make(zul::STOPWORDS_ZUL); - static ref STOPWORDS_SNA: HashSet<&'static str> = make(sna::STOPWORDS_SNA); - static ref STOPWORDS_AFR: HashSet<&'static str> = make(afr::STOPWORDS_AFR); - static ref STOPWORDS_LAT: HashSet<&'static str> = make(lat::STOPWORDS_LAT); - static ref STOPWORDS_SLK: HashSet<&'static str> = make(slk::STOPWORDS_SLK); - static ref STOPWORDS_CAT: HashSet<&'static str> = make(cat::STOPWORDS_CAT); - static ref STOPWORDS_TGL: HashSet<&'static str> = make(tgl::STOPWORDS_TGL); - static ref STOPWORDS_HYE: HashSet<&'static str> = make(hye::STOPWORDS_HYE); + static ref STOPWORDS_AKA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.aka { + Some(words) => make_from_vec(words), + None => make(aka::STOPWORDS_AKA), + }; + static ref STOPWORDS_ZUL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.zul { + Some(words) => make_from_vec(words), + None => make(zul::STOPWORDS_ZUL), + }; + static ref STOPWORDS_SNA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.sna { + Some(words) => make_from_vec(words), + None => make(sna::STOPWORDS_SNA), + }; + static ref STOPWORDS_AFR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.afr { + Some(words) => make_from_vec(words), + None => make(afr::STOPWORDS_AFR), + }; + static ref STOPWORDS_LAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lat { + Some(words) => make_from_vec(words), + None => make(lat::STOPWORDS_LAT), + }; + static ref STOPWORDS_SLK: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.slk { + Some(words) => make_from_vec(words), + None => make(slk::STOPWORDS_SLK), + }; + static ref STOPWORDS_CAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.cat { + Some(words) => make_from_vec(words), + None => make(cat::STOPWORDS_CAT), + }; + static ref STOPWORDS_TGL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tgl { + Some(words) => make_from_vec(words), + None => make(tgl::STOPWORDS_TGL), + }; + static ref STOPWORDS_HYE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hye { + Some(words) => make_from_vec(words), + None => make(hye::STOPWORDS_HYE), + }; } fn make<'a>(words: &[&'a str]) -> HashSet<&'a str> { words.iter().copied().collect() } +fn make_from_vec<'a>(words: &'a Vec) -> HashSet<&'a str> { + words.iter().clone().map(|word| &**word).collect() +} + impl LexerStopWord { pub fn is(word: &str, locale: Option) -> bool { if let Some(locale) = locale { From 8ef957420ab35efe13daf8e06e5ff5959140deb1 Mon Sep 17 00:00:00 2001 From: Yuki Omoto Date: Wed, 9 Nov 2022 00:39:44 +0900 Subject: [PATCH 2/5] Update default config file for stopwords --- config.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/config.cfg b/config.cfg index 39de816a..e76a49ea 100644 --- a/config.cfg +++ b/config.cfg @@ -28,6 +28,7 @@ suggest_limit_maximum = 20 list_limit_default = 100 list_limit_maximum = 500 +[channel.search.stopwords] [store] From 9d5c8e9524e1d23ff1057439da0a7baa843cae5b Mon Sep 17 00:00:00 2001 From: Yuki Omoto Date: Wed, 9 Nov 2022 01:06:45 +0900 Subject: [PATCH 3/5] Update CONFIGURATION.md for stopwords --- CONFIGURATION.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONFIGURATION.md b/CONFIGURATION.md index b4bb1638..9d27ef98 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -25,6 +25,10 @@ Sonic Configuration * `list_limit_default` (type: _integer_, allowed: numbers, default: `100`) — Default listed words limit for a list command (if the LIMIT command modifier is not used when issuing a LIST command) * `list_limit_maximum` (type: _integer_, allowed: numbers, default: `500`) — Maximum listed words limit for a list command (if the LIMIT command modifier is being used when issuing a LIST command) +**[channel.search.stopwords]** + +* `${language_code}` (type: _string[]_, allowed: [supported language codes](https://github.com/valeriansaliou/sonic/tree/master/src/stopwords), default: none) — User defined stopwords for the selected language. Use it only if you want to override the preset of Sonic. Setting this value explicitly to `[]` disables stopwords at all. + **[store]** **[store.kv]** From dcc6430b11f3723ca430ddf2f394bb09a2a4cfb1 Mon Sep 17 00:00:00 2001 From: Yuki Omoto Date: Sun, 13 Nov 2022 16:48:23 +0900 Subject: [PATCH 4/5] Add a macro to generate stopwords for each language --- src/lexer/stopwords.rs | 442 ++++++++++++++--------------------------- 1 file changed, 153 insertions(+), 289 deletions(-) diff --git a/src/lexer/stopwords.rs b/src/lexer/stopwords.rs index e582486f..59c2e570 100644 --- a/src/lexer/stopwords.rs +++ b/src/lexer/stopwords.rs @@ -12,316 +12,180 @@ use crate::APP_CONF; pub struct LexerStopWord; -// Recursion group #1 (10 items) -lazy_static! { - static ref STOPWORDS_EPO: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.epo { - Some(words) => make_from_vec(words), - None => make(epo::STOPWORDS_EPO), - }; - static ref STOPWORDS_ENG: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.eng { - Some(words) => make_from_vec(words), - None => make(eng::STOPWORDS_ENG), - }; - static ref STOPWORDS_RUS: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.rus { - Some(words) => make_from_vec(words), - None => make(rus::STOPWORDS_RUS), - }; - static ref STOPWORDS_CMN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.cmn { - Some(words) => make_from_vec(words), - None => make(cmn::STOPWORDS_CMN), - }; - static ref STOPWORDS_SPA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.spa { - Some(words) => make_from_vec(words), - None => make(spa::STOPWORDS_SPA), - }; - static ref STOPWORDS_POR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.por { - Some(words) => make_from_vec(words), - None => make(por::STOPWORDS_POR), - }; - static ref STOPWORDS_ITA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ita { - Some(words) => make_from_vec(words), - None => make(ita::STOPWORDS_ITA), - }; - static ref STOPWORDS_BEN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ben { - Some(words) => make_from_vec(words), - None => make(ben::STOPWORDS_BEN), - }; - static ref STOPWORDS_FRA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.fra { - Some(words) => make_from_vec(words), - None => make(fra::STOPWORDS_FRA), - }; - static ref STOPWORDS_DEU: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.deu { - Some(words) => make_from_vec(words), - None => make(deu::STOPWORDS_DEU), +macro_rules! gen_stopwords { + ($override:expr, $default:expr) => { + match $override { + Some(words) => words.iter().clone().map(|word| &**word).collect(), + None => $default.iter().copied().collect(), + } }; } -// Recursion group #2 (10 items) +// Recursion group #1 (10 items) lazy_static! { - static ref STOPWORDS_UKR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ukr { - Some(words) => make_from_vec(words), - None => make(ukr::STOPWORDS_UKR), - }; - static ref STOPWORDS_KAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kat { - Some(words) => make_from_vec(words), - None => make(kat::STOPWORDS_KAT), - }; - static ref STOPWORDS_ARA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ara { - Some(words) => make_from_vec(words), - None => make(ara::STOPWORDS_ARA), - }; - static ref STOPWORDS_HIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hin { - Some(words) => make_from_vec(words), - None => make(hin::STOPWORDS_HIN), - }; - static ref STOPWORDS_JPN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.jpn { - Some(words) => words.iter().clone().map(|word| &**word).collect(), - None => make(jpn::STOPWORDS_JPN), - }; - static ref STOPWORDS_HEB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.heb { - Some(words) => make_from_vec(words), - None => make(heb::STOPWORDS_HEB), - }; - static ref STOPWORDS_YID: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.yid { - Some(words) => make_from_vec(words), - None => make(yid::STOPWORDS_YID), - }; - static ref STOPWORDS_POL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pol { - Some(words) => make_from_vec(words), - None => make(pol::STOPWORDS_POL), - }; - static ref STOPWORDS_AMH: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.amh { - Some(words) => make_from_vec(words), - None => make(amh::STOPWORDS_AMH), - }; - static ref STOPWORDS_JAV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.jav { - Some(words) => make_from_vec(words), - None => make(jav::STOPWORDS_JAV), - }; + static ref STOPWORDS_EPO: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.epo, epo::STOPWORDS_EPO); + static ref STOPWORDS_ENG: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.eng, eng::STOPWORDS_ENG); + static ref STOPWORDS_RUS: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.rus, rus::STOPWORDS_RUS); + static ref STOPWORDS_CMN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.cmn, cmn::STOPWORDS_CMN); + static ref STOPWORDS_SPA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.spa, spa::STOPWORDS_SPA); + static ref STOPWORDS_POR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.por, por::STOPWORDS_POR); + static ref STOPWORDS_ITA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ita, ita::STOPWORDS_ITA); + static ref STOPWORDS_BEN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ben, ben::STOPWORDS_BEN); + static ref STOPWORDS_FRA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.fra, fra::STOPWORDS_FRA); + static ref STOPWORDS_DEU: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.deu, deu::STOPWORDS_DEU); } -// Recursion group #3 (10 items) +// Recursion group #2 (10 items) { lazy_static! { - static ref STOPWORDS_KOR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kor { - Some(words) => make_from_vec(words), - None => make(kor::STOPWORDS_KOR), - }; - static ref STOPWORDS_NOB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nob { - Some(words) => make_from_vec(words), - None => make(nob::STOPWORDS_NOB), - }; - static ref STOPWORDS_DAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.dan { - Some(words) => make_from_vec(words), - None => make(dan::STOPWORDS_DAN), - }; - static ref STOPWORDS_SWE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.swe { - Some(words) => make_from_vec(words), - None => make(swe::STOPWORDS_SWE), - }; - static ref STOPWORDS_FIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.fin { - Some(words) => make_from_vec(words), - None => make(fin::STOPWORDS_FIN), - }; - static ref STOPWORDS_TUR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tur { - Some(words) => make_from_vec(words), - None => make(tur::STOPWORDS_TUR), - }; - static ref STOPWORDS_NLD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nld { - Some(words) => make_from_vec(words), - None => make(nld::STOPWORDS_NLD), - }; - static ref STOPWORDS_HUN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hun { - Some(words) => make_from_vec(words), - None => make(hun::STOPWORDS_HUN), - }; - static ref STOPWORDS_CES: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ces { - Some(words) => make_from_vec(words), - None => make(ces::STOPWORDS_CES), - }; - static ref STOPWORDS_ELL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ell { - Some(words) => make_from_vec(words), - None => make(ell::STOPWORDS_ELL), - }; + static ref STOPWORDS_UKR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ukr, ukr::STOPWORDS_UKR); + static ref STOPWORDS_KAT: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.kat, kat::STOPWORDS_KAT); + static ref STOPWORDS_ARA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ara, ara::STOPWORDS_ARA); + static ref STOPWORDS_HIN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.hin, hin::STOPWORDS_HIN); + static ref STOPWORDS_JPN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.jpn, jpn::STOPWORDS_JPN); + static ref STOPWORDS_HEB: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.heb, heb::STOPWORDS_HEB); + static ref STOPWORDS_YID: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.yid, yid::STOPWORDS_YID); + static ref STOPWORDS_POL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.pol, pol::STOPWORDS_POL); + static ref STOPWORDS_AMH: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.amh, amh::STOPWORDS_AMH); + static ref STOPWORDS_JAV: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.jav, jav::STOPWORDS_JAV); } -// Recursion group #4 (10 items) +// Recursion group #3 (10 items) { lazy_static! { - static ref STOPWORDS_BUL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.bul { - Some(words) => make_from_vec(words), - None => make(bul::STOPWORDS_BUL), - }; - static ref STOPWORDS_BEL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.bel { - Some(words) => make_from_vec(words), - None => make(bel::STOPWORDS_BEL), - }; - static ref STOPWORDS_MAR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mar { - Some(words) => make_from_vec(words), - None => make(mar::STOPWORDS_MAR), - }; - static ref STOPWORDS_KAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.kan { - Some(words) => make_from_vec(words), - None => make(kan::STOPWORDS_KAN), - }; - static ref STOPWORDS_RON: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ron { - Some(words) => make_from_vec(words), - None => make(ron::STOPWORDS_RON), - }; - static ref STOPWORDS_SLV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.slv { - Some(words) => make_from_vec(words), - None => make(slv::STOPWORDS_SLV), - }; - static ref STOPWORDS_HRV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hrv { - Some(words) => make_from_vec(words), - None => make(hrv::STOPWORDS_HRV), - }; - static ref STOPWORDS_SRP: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.srp { - Some(words) => make_from_vec(words), - None => make(srp::STOPWORDS_SRP), - }; - static ref STOPWORDS_MKD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mkd { - Some(words) => make_from_vec(words), - None => make(mkd::STOPWORDS_MKD), - }; - static ref STOPWORDS_LIT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lit { - Some(words) => make_from_vec(words), - None => make(lit::STOPWORDS_LIT), - }; + static ref STOPWORDS_KOR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.kor, kor::STOPWORDS_KOR); + static ref STOPWORDS_NOB: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.nob, nob::STOPWORDS_NOB); + static ref STOPWORDS_DAN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.dan, dan::STOPWORDS_DAN); + static ref STOPWORDS_SWE: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.swe, swe::STOPWORDS_SWE); + static ref STOPWORDS_FIN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.fin, fin::STOPWORDS_FIN); + static ref STOPWORDS_TUR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tur, tur::STOPWORDS_TUR); + static ref STOPWORDS_NLD: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.nld, nld::STOPWORDS_NLD); + static ref STOPWORDS_HUN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.hun, hun::STOPWORDS_HUN); + static ref STOPWORDS_CES: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ces, ces::STOPWORDS_CES); + static ref STOPWORDS_ELL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ell, ell::STOPWORDS_ELL); } -// Recursion group #5 (10 items) -lazy_static! { - static ref STOPWORDS_LAV: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lav { - Some(words) => make_from_vec(words), - None => make(lav::STOPWORDS_LAV), - }; - static ref STOPWORDS_EST: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.est { - Some(words) => make_from_vec(words), - None => make(est::STOPWORDS_EST), - }; - static ref STOPWORDS_TAM: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tam { - Some(words) => make_from_vec(words), - None => make(tam::STOPWORDS_TAM), - }; - static ref STOPWORDS_VIE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.vie { - Some(words) => make_from_vec(words), - None => make(vie::STOPWORDS_VIE), - }; - static ref STOPWORDS_URD: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.urd { - Some(words) => make_from_vec(words), - None => make(urd::STOPWORDS_URD), - }; - static ref STOPWORDS_THA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tha { - Some(words) => make_from_vec(words), - None => make(tha::STOPWORDS_THA), - }; - static ref STOPWORDS_GUJ: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.guj { - Some(words) => make_from_vec(words), - None => make(guj::STOPWORDS_GUJ), - }; - static ref STOPWORDS_UZB: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.uzb { - Some(words) => make_from_vec(words), - None => make(uzb::STOPWORDS_UZB), - }; - static ref STOPWORDS_PAN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pan { - Some(words) => make_from_vec(words), - None => make(pan::STOPWORDS_PAN), - }; - static ref STOPWORDS_AZE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.aze { - Some(words) => make_from_vec(words), - None => make(aze::STOPWORDS_AZE), - }; -} +// Recursion group #4 (10 items) { -// Recursion group #6 (10 items) lazy_static! { - static ref STOPWORDS_IND: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ind { - Some(words) => words.iter().clone().map(|w| &**w).collect(), - None => make(ind::STOPWORDS_IND), - }; - static ref STOPWORDS_TEL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tel { - Some(words) => make_from_vec(words), - None => make(tel::STOPWORDS_TEL), - }; - static ref STOPWORDS_PES: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.pes { - Some(words) => make_from_vec(words), - None => make(pes::STOPWORDS_PES), - }; - static ref STOPWORDS_MAL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mal { - Some(words) => make_from_vec(words), - None => make(mal::STOPWORDS_MAL), - }; - static ref STOPWORDS_ORI: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.ori { - Some(words) => make_from_vec(words), - None => make(ori::STOPWORDS_ORI), - }; - static ref STOPWORDS_MYA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.mya { - Some(words) => make_from_vec(words), - None => make(mya::STOPWORDS_MYA), - }; - static ref STOPWORDS_NEP: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.nep { - Some(words) => make_from_vec(words), - None => make(nep::STOPWORDS_NEP), - }; - static ref STOPWORDS_SIN: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.sin { - Some(words) => make_from_vec(words), - None => make(sin::STOPWORDS_SIN), - }; - static ref STOPWORDS_KHM: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.khm { - Some(words) => make_from_vec(words), - None => make(khm::STOPWORDS_KHM), - }; - static ref STOPWORDS_TUK: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tuk { - Some(words) => make_from_vec(words), - None => make(tuk::STOPWORDS_TUK), - }; + static ref STOPWORDS_BUL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.bul, bul::STOPWORDS_BUL); + static ref STOPWORDS_BEL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.bel, bel::STOPWORDS_BEL); + static ref STOPWORDS_MAR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.mar, mar::STOPWORDS_MAR); + static ref STOPWORDS_KAN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.kan, kan::STOPWORDS_KAN); + static ref STOPWORDS_RON: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ron, ron::STOPWORDS_RON); + static ref STOPWORDS_SLV: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.slv, slv::STOPWORDS_SLV); + static ref STOPWORDS_HRV: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.hrv, hrv::STOPWORDS_HRV); + static ref STOPWORDS_SRP: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.srp, srp::STOPWORDS_SRP); + static ref STOPWORDS_MKD: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.mkd, mkd::STOPWORDS_MKD); + static ref STOPWORDS_LIT: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.lit, lit::STOPWORDS_LIT); } -// Recursion group #7 (9 items) +// Recursion group #5 (10 items) { lazy_static! { - static ref STOPWORDS_AKA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.aka { - Some(words) => make_from_vec(words), - None => make(aka::STOPWORDS_AKA), - }; - static ref STOPWORDS_ZUL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.zul { - Some(words) => make_from_vec(words), - None => make(zul::STOPWORDS_ZUL), - }; - static ref STOPWORDS_SNA: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.sna { - Some(words) => make_from_vec(words), - None => make(sna::STOPWORDS_SNA), - }; - static ref STOPWORDS_AFR: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.afr { - Some(words) => make_from_vec(words), - None => make(afr::STOPWORDS_AFR), - }; - static ref STOPWORDS_LAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.lat { - Some(words) => make_from_vec(words), - None => make(lat::STOPWORDS_LAT), - }; - static ref STOPWORDS_SLK: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.slk { - Some(words) => make_from_vec(words), - None => make(slk::STOPWORDS_SLK), - }; - static ref STOPWORDS_CAT: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.cat { - Some(words) => make_from_vec(words), - None => make(cat::STOPWORDS_CAT), - }; - static ref STOPWORDS_TGL: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.tgl { - Some(words) => make_from_vec(words), - None => make(tgl::STOPWORDS_TGL), - }; - static ref STOPWORDS_HYE: HashSet<&'static str> = match &APP_CONF.channel.search.stopwords.hye { - Some(words) => make_from_vec(words), - None => make(hye::STOPWORDS_HYE), - }; + static ref STOPWORDS_LAV: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.lav, lav::STOPWORDS_LAV); + static ref STOPWORDS_EST: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.est, est::STOPWORDS_EST); + static ref STOPWORDS_TAM: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tam, tam::STOPWORDS_TAM); + static ref STOPWORDS_VIE: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.vie, vie::STOPWORDS_VIE); + static ref STOPWORDS_URD: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.urd, urd::STOPWORDS_URD); + static ref STOPWORDS_THA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tha, tha::STOPWORDS_THA); + static ref STOPWORDS_GUJ: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.guj, guj::STOPWORDS_GUJ); + static ref STOPWORDS_UZB: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.uzb, uzb::STOPWORDS_UZB); + static ref STOPWORDS_PAN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.pan, pan::STOPWORDS_PAN); + static ref STOPWORDS_AZE: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.aze, aze::STOPWORDS_AZE); } -fn make<'a>(words: &[&'a str]) -> HashSet<&'a str> { - words.iter().copied().collect() +// Recursion group #6 (10 items) { +lazy_static! { + static ref STOPWORDS_IND: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ind, ind::STOPWORDS_IND); + static ref STOPWORDS_TEL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tel, tel::STOPWORDS_TEL); + static ref STOPWORDS_PES: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.pes, pes::STOPWORDS_PES); + static ref STOPWORDS_MAL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.mal, mal::STOPWORDS_MAL); + static ref STOPWORDS_ORI: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.ori, ori::STOPWORDS_ORI); + static ref STOPWORDS_MYA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.mya, mya::STOPWORDS_MYA); + static ref STOPWORDS_NEP: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.nep, nep::STOPWORDS_NEP); + static ref STOPWORDS_SIN: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.sin, sin::STOPWORDS_SIN); + static ref STOPWORDS_KHM: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.khm, khm::STOPWORDS_KHM); + static ref STOPWORDS_TUK: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tuk, tuk::STOPWORDS_TUK); } -fn make_from_vec<'a>(words: &'a Vec) -> HashSet<&'a str> { - words.iter().clone().map(|word| &**word).collect() +// Recursion group #7 (9 items) { +lazy_static! { + static ref STOPWORDS_AKA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.aka, aka::STOPWORDS_AKA); + static ref STOPWORDS_ZUL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.zul, zul::STOPWORDS_ZUL); + static ref STOPWORDS_SNA: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.sna, sna::STOPWORDS_SNA); + static ref STOPWORDS_AFR: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.afr, afr::STOPWORDS_AFR); + static ref STOPWORDS_LAT: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.lat, lat::STOPWORDS_LAT); + static ref STOPWORDS_SLK: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.slk, slk::STOPWORDS_SLK); + static ref STOPWORDS_CAT: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.cat, cat::STOPWORDS_CAT); + static ref STOPWORDS_TGL: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.tgl, tgl::STOPWORDS_TGL); + static ref STOPWORDS_HYE: HashSet<&'static str> = + gen_stopwords!(&APP_CONF.channel.search.stopwords.hye, hye::STOPWORDS_HYE); } impl LexerStopWord { From 270231761faf08739496ed44597e10cdcdd10194 Mon Sep 17 00:00:00 2001 From: Yuki Omoto Date: Sun, 13 Nov 2022 16:52:11 +0900 Subject: [PATCH 5/5] removed unnecessary parts of comments --- src/lexer/stopwords.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/lexer/stopwords.rs b/src/lexer/stopwords.rs index 59c2e570..fd09be6c 100644 --- a/src/lexer/stopwords.rs +++ b/src/lexer/stopwords.rs @@ -45,7 +45,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.deu, deu::STOPWORDS_DEU); } -// Recursion group #2 (10 items) { +// Recursion group #2 (10 items) lazy_static! { static ref STOPWORDS_UKR: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.ukr, ukr::STOPWORDS_UKR); @@ -69,7 +69,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.jav, jav::STOPWORDS_JAV); } -// Recursion group #3 (10 items) { +// Recursion group #3 (10 items) lazy_static! { static ref STOPWORDS_KOR: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.kor, kor::STOPWORDS_KOR); @@ -93,8 +93,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.ell, ell::STOPWORDS_ELL); } -// Recursion group #4 (10 items) { - +// Recursion group #4 (10 items) lazy_static! { static ref STOPWORDS_BUL: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.bul, bul::STOPWORDS_BUL); @@ -118,7 +117,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.lit, lit::STOPWORDS_LIT); } -// Recursion group #5 (10 items) { +// Recursion group #5 (10 items) lazy_static! { static ref STOPWORDS_LAV: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.lav, lav::STOPWORDS_LAV); @@ -142,7 +141,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.aze, aze::STOPWORDS_AZE); } -// Recursion group #6 (10 items) { +// Recursion group #6 (10 items) lazy_static! { static ref STOPWORDS_IND: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.ind, ind::STOPWORDS_IND); @@ -166,7 +165,7 @@ lazy_static! { gen_stopwords!(&APP_CONF.channel.search.stopwords.tuk, tuk::STOPWORDS_TUK); } -// Recursion group #7 (9 items) { +// Recursion group #7 (9 items) lazy_static! { static ref STOPWORDS_AKA: HashSet<&'static str> = gen_stopwords!(&APP_CONF.channel.search.stopwords.aka, aka::STOPWORDS_AKA);