Skip to content

Commit 988c326

Browse files
authored
Merge pull request #59 from SimpleITK/BibtexSupport
Support of adding dictionary words from Bibtex files
2 parents 17c7243 + 9c7f086 commit 988c326

File tree

6 files changed

+142
-16
lines changed

6 files changed

+142
-16
lines changed

comment_spell_check.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@
2525
import argparse
2626
import re
2727
from pathlib import Path
28+
from importlib.metadata import version, PackageNotFoundError
2829

2930
from enchant.checker import SpellChecker
3031
from enchant.tokenize import EmailFilter, URLFilter
3132
from enchant import Dict
3233

3334
from comment_parser import comment_parser
3435

35-
from importlib.metadata import version, PackageNotFoundError
36+
from lib import bibtex_loader
3637

3738
__version__ = "unknown"
3839

@@ -378,6 +379,13 @@ def parse_args():
378379
help="Set file mime type. File name suffix will be ignored.",
379380
)
380381

382+
parser.add_argument(
383+
"--bibtex",
384+
action="append",
385+
dest="bibtex",
386+
help="Bibtex file to load for additional dictionary words.",
387+
)
388+
381389
parser.add_argument("--version", action="version", version=f"{__version__}")
382390

383391
args = parser.parse_args()
@@ -404,11 +412,38 @@ def add_dict(enchant_dict, filename, verbose=False):
404412
enchant_dict.add(wrd)
405413

406414

415+
def create_spell_checker(args, output_lvl):
416+
"""Create a SpellChecker."""
417+
418+
my_dict = Dict("en_US")
419+
420+
# Load the dictionary files
421+
#
422+
initial_dct = Path(__file__).parent / "additional_dictionary.txt"
423+
if not initial_dct.exists():
424+
initial_dct = None
425+
else:
426+
add_dict(my_dict, str(initial_dct), any([args.brief, output_lvl >= 0]))
427+
428+
if args.dict is not None:
429+
for d in args.dict:
430+
add_dict(my_dict, d, any([args.brief, output_lvl >= 0]))
431+
432+
# Load the bibliography files
433+
#
434+
if args.bibtex is not None:
435+
for bib in args.bibtex:
436+
bibtex_loader.add_bibtex(my_dict, bib, any([args.brief, output_lvl >= 0]))
437+
438+
# Create the SpellChecker
439+
spell_checker = SpellChecker(my_dict, filters=[EmailFilter, URLFilter])
440+
441+
return spell_checker
442+
443+
407444
def main():
408445
args = parse_args()
409446

410-
sitk_dict = Dict("en_US")
411-
412447
# Set the amount of debugging messages to print.
413448
output_lvl = 1
414449
if args.brief:
@@ -419,19 +454,7 @@ def main():
419454
if args.miss:
420455
output_lvl = -1
421456

422-
# Load the dictionary files
423-
#
424-
initial_dct = Path(__file__).parent / "additional_dictionary.txt"
425-
if not initial_dct.exists():
426-
initial_dct = None
427-
else:
428-
add_dict(sitk_dict, str(initial_dct), any([args.brief, output_lvl >= 0]))
429-
430-
if args.dict is not None:
431-
for d in args.dict:
432-
add_dict(sitk_dict, d, any([args.brief, output_lvl >= 0]))
433-
434-
spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])
457+
spell_checker = create_spell_checker(args, output_lvl)
435458

436459
file_list = []
437460
if len(args.filenames):

lib/bibtex_loader.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import bibtexparser
2+
3+
4+
def split_bibtex_name(name):
5+
"""
6+
Split a Bibtex name, which is two words seperated by a number.
7+
"""
8+
9+
# map any digit to space
10+
mytable = str.maketrans("0123456789", " ")
11+
new_name = name.translate(mytable)
12+
13+
# split by space
14+
words = new_name.split()
15+
return words
16+
17+
18+
def add_bibtex(enchant_dict, filename, verbose=False):
19+
"""Update ``enchant_dict`` spell checking dictionary with names
20+
from ``filename``, a Bibtex file."""
21+
22+
if verbose:
23+
print(f"Bibtex file: {filename}")
24+
25+
with open(filename, "rt", encoding="utf-8") as biblatex_file:
26+
bib_database = bibtexparser.load(biblatex_file)
27+
28+
for k in bib_database.get_entry_dict().keys():
29+
words = split_bibtex_name(k)
30+
for w in words:
31+
enchant_dict.add(w)
32+
if verbose:
33+
print("Added Bibtex word:", w)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
comment_parser
22
pyenchant
3+
bibtexparser

tests/bibtest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# lowekamp2013design
2+
# yaniv2018simpleitk
3+
# ibanez2003itk
4+
# avants2014insight
5+
# yushkevich2017itk
6+
7+
print("Hello World")

tests/itk.bib

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
@article{lowekamp2013design,
2+
title={The design of SimpleITK},
3+
author={Lowekamp, Bradley C and Chen, David T and Ib{\'a}{\~n}ez, Luis and Blezek, Daniel},
4+
journal={Frontiers in neuroinformatics},
5+
volume={7},
6+
pages={45},
7+
year={2013},
8+
publisher={Frontiers Media SA}
9+
}
10+
11+
@article{yaniv2018simpleitk,
12+
title={SimpleITK image-analysis notebooks: a collaborative environment for education and reproducible research},
13+
author={Yaniv, Ziv and Lowekamp, Bradley C and Johnson, Hans J and Beare, Richard},
14+
journal={Journal of digital imaging},
15+
volume={31},
16+
number={3},
17+
pages={290--303},
18+
year={2018},
19+
publisher={Springer}
20+
}
21+
22+
@misc{ibanez2003itk,
23+
title={The ITK software guide},
24+
author={Ibanez, Luis and Schroeder, Will and Ng, Lydia and Cates, Josh and others},
25+
year={2003},
26+
publisher={Kitware, Incorporated Clifton Park, New York}
27+
}
28+
29+
@article{avants2014insight,
30+
title={The Insight ToolKit image registration framework},
31+
author={Avants, Brian B and Tustison, Nicholas J and Stauffer, Michael and Song, Gang and Wu, Baohua and Gee, James C},
32+
journal={Frontiers in neuroinformatics},
33+
volume={8},
34+
pages={44},
35+
year={2014},
36+
publisher={Frontiers Media SA}
37+
}
38+
39+
@article{yushkevich2017itk,
40+
title={ITK-SNAP: an intractive medical image segmentation tool to meet the need for expert-guided segmentation of complex medical images},
41+
author={Yushkevich, Paul A and Gerig, Guido},
42+
journal={IEEE pulse},
43+
volume={8},
44+
number={4},
45+
pages={54--57},
46+
year={2017},
47+
publisher={IEEE}
48+
}

tests/test_comment_spell_check.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,17 @@ def test_version(self):
8181
self.assertNotEqual(
8282
version_string, "unknown", "version string contains 'unknown'"
8383
)
84+
85+
def test_bibtex(self):
86+
"""Bibtext test"""
87+
runresult = subprocess.run(
88+
[
89+
"python",
90+
"comment_spell_check.py",
91+
"--bibtex",
92+
"tests/itk.bib",
93+
"tests/bibtest.py",
94+
],
95+
stdout=subprocess.PIPE,
96+
)
97+
self.assertEqual(runresult.returncode, 0, runresult.stdout)

0 commit comments

Comments
 (0)