diff --git a/tests/search/significance/app_one_significance/models/docv1.json b/tests/search/significance/app_one_significance/models/docv1.json index 6ec2490d4..58cbae7b6 100644 --- a/tests/search/significance/app_one_significance/models/docv1.json +++ b/tests/search/significance/app_one_significance/models/docv1.json @@ -7,6 +7,7 @@ "description" : "english model", "document-count" : 12, "document-frequencies" : { + "the" : 11, "usa" : 2, "hello": 3, "world": 5, diff --git a/tests/search/significance/docs.json b/tests/search/significance/docs.json index 3900c918c..2865fac4a 100644 --- a/tests/search/significance/docs.json +++ b/tests/search/significance/docs.json @@ -1,4 +1,4 @@ [ - { "put": "id:test:doc::1", "fields": { "text": "Hello world" } }, + { "put": "id:test:doc::1", "fields": { "text": "Hello the world" } }, { "put": "id:test:doc::2", "fields": { "text": "Hei verden" } } ] diff --git a/tests/search/significance/significance.rb b/tests/search/significance/significance.rb index 4f8aa86ec..66c0dbab5 100644 --- a/tests/search/significance/significance.rb +++ b/tests/search/significance/significance.rb @@ -107,6 +107,28 @@ def test_significance_searcher_with_multiple_models def verify_default_significance_for_simple_query result = search({'yql' => 'select * from sources * where text contains "hello"', 'format' => 'json'}).json + puts "hello => #{result}" + significance_value = result["root"]["children"][0]["fields"]["summaryfeatures"]["term(0).significance"] + # "hello" { frequency: 3, count: 12 } + exp_significance = calculate_legacy_significance(3, 12) + assert_approx(exp_significance, significance_value) + + result = search({'yql' => 'select * from sources * where text contains "world"', 'format' => 'json'}).json + puts "world => #{result}" + significance_value = result["root"]["children"][0]["fields"]["summaryfeatures"]["term(0).significance"] + # "world" { frequency: 5, count: 12 } + exp_significance = calculate_legacy_significance(5, 12) + assert_approx(exp_significance, significance_value) + + result = search({'yql' => 'select * from sources * where text contains "the"', 'format' => 'json'}).json + puts "the => #{result}" + significance_value = result["root"]["children"][0]["fields"]["summaryfeatures"]["term(0).significance"] + # "the" { frequency: 11, count: 12 } + exp_significance = calculate_legacy_significance(11, 12) + assert_approx(exp_significance, significance_value) + + result = search({'yql' => 'select * from sources * where text contains alternatives({"hello":1.0,"world":1.0})', 'format' => 'json'}).json + puts "hello world => #{result}" significance_value = result["root"]["children"][0]["fields"]["summaryfeatures"]["term(0).significance"] # "hello" { frequency: 3, count: 12 } exp_significance = calculate_legacy_significance(3, 12)