Skip to content

Commit 7656b91

Browse files
authored
Add initial space at the start of Document's to fix word boundary bug (#22)
* add space to fix `word_boundary` matching * bump version * fix test * Update Project.toml
1 parent a81ed29 commit 7656b91

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "KeywordSearch"
22
uuid = "f977ba8c-7144-47e8-b06b-e3f658952959"
33
authors = ["Beacon Biosignals, Inc"]
4-
version = "0.3.0"
4+
version = "0.4.0"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

src/core.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ struct Document{T<:NamedTuple}
1919
function Document(text::AbstractString, metadata::T) where {T}
2020
check_keys(T)
2121

22-
# Add a final space to ensure that the last word is recognized
22+
# Add an initial and final space to ensure that the last word is recognized
2323
# as a word boundary.
24-
new_text = apply_replacements(text) * " "
24+
new_text = " " * apply_replacements(text) * " "
2525
return new{T}(new_text, metadata)
2626
end
2727
end

test/runtests.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ end
9696

9797
# we count hyphens as a word boundary here (since we remove them from the documents and queries)
9898
@test match(word_boundary(Query("ant")), Document("This matches-ant")) !== nothing
99+
100+
# Test that the first/last word in a document can be matched by a word boundary
101+
@test match(word_boundary(Query("abcd")), Document("abcd")) !== nothing
102+
@test match(word_boundary(Query("abcd")), Document("abcd hello")) !== nothing
103+
@test match(word_boundary(Query("abcd")), Document("hello abcd")) !== nothing
99104
end
100105

101106
## A more representative test
@@ -373,7 +378,7 @@ end
373378
(:document, :distance, :indices, :query, :query_name, :corpus_uuid,
374379
:document_uuid)
375380
@test sprint(explain, (first(Tables.rowtable(tbl)))) ===
376-
"The query \" other\" exactly matched the text \"There were other cobras \".\n"
381+
"The query \" other\" exactly matched the text \" There were other cobras \".\n"
377382
end
378383
end
379384

0 commit comments

Comments
 (0)