Skip to content

Commit 34168ef

Browse files
author
Gal Ben David
committed
WordSegmenter is now a static class with static methods
1 parent 4054bb5 commit 34168ef

File tree

6 files changed

+49
-41
lines changed

6 files changed

+49
-41
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pywordsegment"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
authors = ["Gal Ben David <[email protected]>"]
55
edition = "2018"
66
description = "Concatenated-word segmentation Python library written in Rust"

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,24 +50,23 @@ import pywordsegment
5050
# The internal UNIGRAMS & BIGRAMS corpuses are lazy initialized
5151
# once per the whole module. Multiple WordSegmenter instances would
5252
# not create new dictionaries.
53-
word_segmenter = pywordsegment.WordSegmenter()
5453

5554
# Segments a word to its parts
56-
word_segmenter.segment(
55+
pywordsegment.WordSegmenter.segment(
5756
text="theusashops",
5857
)
5958
# ["the", "usa", "shops"]
6059

6160

6261
# This function checks whether the substring exists as a whole segment
6362
# inside text.
64-
word_segmenter.exist_as_segment(
63+
pywordsegment.WordSegmenter.exist_as_segment(
6564
substring="inter",
6665
text="internationalairport",
6766
)
6867
# False
6968

70-
word_segmenter.exist_as_segment(
69+
pywordsegment.WordSegmenter.exist_as_segment(
7170
substring="inter",
7271
text="intermilan",
7372
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ strip = true
1515

1616
[tool.poetry]
1717
name = "pywordsegment"
18-
version = "0.1.2"
18+
version = "0.1.3"
1919
authors = ["Gal Ben David <[email protected]>"]
2020
description = "Concatenated-word segmentation Python library written in Rust"
2121
readme = "README.md"

pywordsegment/__init__.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
class WordSegmenter:
1010
word_segmenter: pywordsegment.WordSegmenter = None
1111

12-
def __init__(
13-
self,
14-
) -> None:
12+
@staticmethod
13+
def load() -> None:
1514
if WordSegmenter.word_segmenter is None:
1615
current_file_dir = pathlib.Path(__file__).parent.absolute()
1716

@@ -35,20 +34,26 @@ def __init__(
3534
total_words_frequency=1024908267229.0,
3635
)
3736

37+
@staticmethod
3838
def segment(
39-
self,
4039
text: str,
4140
) -> typing.List[str]:
42-
return self.word_segmenter.segment(
41+
if WordSegmenter.word_segmenter is None:
42+
WordSegmenter.load()
43+
44+
return WordSegmenter.word_segmenter.segment(
4345
text=text,
4446
)
4547

48+
@staticmethod
4649
def exist_as_segment(
47-
self,
4850
substring: str,
4951
text: str,
5052
) -> bool:
51-
return self.word_segmenter.exist_as_segment(
53+
if WordSegmenter.word_segmenter is None:
54+
WordSegmenter.load()
55+
56+
return WordSegmenter.word_segmenter.exist_as_segment(
5257
substring=substring,
5358
text=text,
5459
)

pywordsegment/pywordsegment.pyi

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,16 @@ import typing
22

33

44
class WordSegmenter:
5-
def __init__(
6-
self,
7-
) -> None: ...
5+
@staticmethod
6+
def load() -> None: ...
87

8+
@staticmethod
99
def segment(
10-
self,
1110
text: str,
12-
) -> typing.List[str, str]: ...
11+
) -> typing.List[str]: ...
1312

13+
@staticmethod
1414
def exist_as_segment(
15-
self,
1615
substring: str,
1716
text: str,
1817
) -> bool: ...

tests/test_pywordsegment.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ class WordSegmentTestCase(
99
def test_segment_1(
1010
self,
1111
):
12-
word_segmenter = pywordsegment.WordSegmenter()
1312
self.assertEqual(
14-
first=word_segmenter.segment(
13+
first=pywordsegment.WordSegmenter.segment(
1514
text='theusashops',
1615
),
1716
second=[
@@ -24,9 +23,8 @@ def test_segment_1(
2423
def test_segment_2(
2524
self,
2625
):
27-
word_segmenter = pywordsegment.WordSegmenter()
2826
self.assertEqual(
29-
first=word_segmenter.segment(
27+
first=pywordsegment.WordSegmenter.segment(
3028
text='choosespain',
3129
),
3230
second=[
@@ -38,9 +36,8 @@ def test_segment_2(
3836
def test_segment_3(
3937
self,
4038
):
41-
word_segmenter = pywordsegment.WordSegmenter()
4239
self.assertEqual(
43-
first=word_segmenter.segment(
40+
first=pywordsegment.WordSegmenter.segment(
4441
text='thisisatest',
4542
),
4643
second=[
@@ -54,9 +51,8 @@ def test_segment_3(
5451
def test_segment_4(
5552
self,
5653
):
57-
word_segmenter = pywordsegment.WordSegmenter()
5854
self.assertEqual(
59-
first=word_segmenter.segment(
55+
first=pywordsegment.WordSegmenter.segment(
6056
text='wheninthecourseofhumaneventsitbecomesnecessary',
6157
),
6258
second=[
@@ -76,9 +72,8 @@ def test_segment_4(
7672
def test_segment_5(
7773
self,
7874
):
79-
word_segmenter = pywordsegment.WordSegmenter()
8075
self.assertEqual(
81-
first=word_segmenter.segment(
76+
first=pywordsegment.WordSegmenter.segment(
8277
text='whorepresents',
8378
),
8479
second=[
@@ -90,9 +85,8 @@ def test_segment_5(
9085
def test_segment_6(
9186
self,
9287
):
93-
word_segmenter = pywordsegment.WordSegmenter()
9488
self.assertEqual(
95-
first=word_segmenter.segment(
89+
first=pywordsegment.WordSegmenter.segment(
9690
text='expertsexchange',
9791
),
9892
second=[
@@ -104,9 +98,8 @@ def test_segment_6(
10498
def test_segment_7(
10599
self,
106100
):
107-
word_segmenter = pywordsegment.WordSegmenter()
108101
self.assertEqual(
109-
first=word_segmenter.segment(
102+
first=pywordsegment.WordSegmenter.segment(
110103
text='speedofart',
111104
),
112105
second=[
@@ -119,9 +112,8 @@ def test_segment_7(
119112
def test_segment_8(
120113
self,
121114
):
122-
word_segmenter = pywordsegment.WordSegmenter()
123115
self.assertEqual(
124-
first=word_segmenter.segment(
116+
first=pywordsegment.WordSegmenter.segment(
125117
text='nowisthetimeforallgood',
126118
),
127119
second=[
@@ -138,9 +130,8 @@ def test_segment_8(
138130
def test_segment_9(
139131
self,
140132
):
141-
word_segmenter = pywordsegment.WordSegmenter()
142133
self.assertEqual(
143-
first=word_segmenter.segment(
134+
first=pywordsegment.WordSegmenter.segment(
144135
text='itisatruthuniversallyacknowledged',
145136
),
146137
second=[
@@ -156,9 +147,8 @@ def test_segment_9(
156147
def test_segment_10(
157148
self,
158149
):
159-
word_segmenter = pywordsegment.WordSegmenter()
160150
self.assertEqual(
161-
first=word_segmenter.segment(
151+
first=pywordsegment.WordSegmenter.segment(
162152
text='itwasabrightcolddayinaprilandtheclockswerestrikingthirteen',
163153
),
164154
second=[
@@ -182,13 +172,28 @@ def test_segment_10(
182172
def test_segment_11(
183173
self,
184174
):
185-
word_segmenter = pywordsegment.WordSegmenter()
186175
self.assertEqual(
187-
first=word_segmenter.segment(
176+
first=pywordsegment.WordSegmenter.segment(
188177
text='CaseTest',
189178
),
190179
second=[
191180
'case',
192181
'test',
193182
],
194183
)
184+
185+
def test_exist_as_segment_1(
186+
self,
187+
):
188+
self.assertFalse(
189+
expr=pywordsegment.WordSegmenter.exist_as_segment(
190+
substring='man',
191+
text='manual',
192+
),
193+
)
194+
self.assertTrue(
195+
expr=pywordsegment.WordSegmenter.exist_as_segment(
196+
substring='man',
197+
text='oneman',
198+
),
199+
)

0 commit comments

Comments
 (0)