Merge pull request #59 from SimpleITK/BibtexSupport

dave3d · web-flow · commit 988c3265412a · 2025-01-27T15:22:11.000-05:00
Support of adding dictionary words from Bibtex files
diff --git a/comment_spell_check.py b/comment_spell_check.py
@@ -25,14 +25,15 @@
 import argparse
 import re
 from pathlib import Path
+from importlib.metadata import version, PackageNotFoundError
 
 from enchant.checker import SpellChecker
 from enchant.tokenize import EmailFilter, URLFilter
 from enchant import Dict
 
 from comment_parser import comment_parser
 
-from importlib.metadata import version, PackageNotFoundError
+from lib import bibtex_loader
 
 __version__ = "unknown"
 
@@ -378,6 +379,13 @@ def parse_args():
         help="Set file mime type. File name suffix will be ignored.",
     )
 
+    parser.add_argument(
+        "--bibtex",
+        action="append",
+        dest="bibtex",
+        help="Bibtex file to load for additional dictionary words.",
+    )
+
     parser.add_argument("--version", action="version", version=f"{__version__}")
 
     args = parser.parse_args()
@@ -404,11 +412,38 @@ def add_dict(enchant_dict, filename, verbose=False):
             enchant_dict.add(wrd)
 
 
+def create_spell_checker(args, output_lvl):
+    """Create a SpellChecker."""
+
+    my_dict = Dict("en_US")
+
+    # Load the dictionary files
+    #
+    initial_dct = Path(__file__).parent / "additional_dictionary.txt"
+    if not initial_dct.exists():
+        initial_dct = None
+    else:
+        add_dict(my_dict, str(initial_dct), any([args.brief, output_lvl >= 0]))
+
+    if args.dict is not None:
+        for d in args.dict:
+            add_dict(my_dict, d, any([args.brief, output_lvl >= 0]))
+
+    # Load the bibliography files
+    #
+    if args.bibtex is not None:
+        for bib in args.bibtex:
+            bibtex_loader.add_bibtex(my_dict, bib, any([args.brief, output_lvl >= 0]))
+
+    # Create the SpellChecker
+    spell_checker = SpellChecker(my_dict, filters=[EmailFilter, URLFilter])
+
+    return spell_checker
+
+
 def main():
     args = parse_args()
 
-    sitk_dict = Dict("en_US")
-
     # Set the amount of debugging messages to print.
     output_lvl = 1
     if args.brief:
@@ -419,19 +454,7 @@ def main():
     if args.miss:
         output_lvl = -1
 
-    # Load the dictionary files
-    #
-    initial_dct = Path(__file__).parent / "additional_dictionary.txt"
-    if not initial_dct.exists():
-        initial_dct = None
-    else:
-        add_dict(sitk_dict, str(initial_dct), any([args.brief, output_lvl >= 0]))
-
-    if args.dict is not None:
-        for d in args.dict:
-            add_dict(sitk_dict, d, any([args.brief, output_lvl >= 0]))
-
-    spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter])
+    spell_checker = create_spell_checker(args, output_lvl)
 
     file_list = []
     if len(args.filenames):
diff --git a/lib/bibtex_loader.py b/lib/bibtex_loader.py
@@ -0,0 +1,33 @@
+import bibtexparser
+
+
+def split_bibtex_name(name):
+    """
+    Split a Bibtex name, which is two words seperated by a number.
+    """
+
+    # map any digit to space
+    mytable = str.maketrans("0123456789", "          ")
+    new_name = name.translate(mytable)
+
+    # split by space
+    words = new_name.split()
+    return words
+
+
+def add_bibtex(enchant_dict, filename, verbose=False):
+    """Update ``enchant_dict`` spell checking dictionary with names
+    from ``filename``, a Bibtex file."""
+
+    if verbose:
+        print(f"Bibtex file: {filename}")
+
+    with open(filename, "rt", encoding="utf-8") as biblatex_file:
+        bib_database = bibtexparser.load(biblatex_file)
+
+        for k in bib_database.get_entry_dict().keys():
+            words = split_bibtex_name(k)
+            for w in words:
+                enchant_dict.add(w)
+                if verbose:
+                    print("Added Bibtex word:", w)
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 comment_parser
 pyenchant
+bibtexparser
diff --git a/tests/bibtest.py b/tests/bibtest.py
@@ -0,0 +1,7 @@
+# lowekamp2013design
+# yaniv2018simpleitk
+# ibanez2003itk
+# avants2014insight
+# yushkevich2017itk
+
+print("Hello World")
diff --git a/tests/itk.bib b/tests/itk.bib
@@ -0,0 +1,48 @@
+@article{lowekamp2013design,
+  title={The design of SimpleITK},
+  author={Lowekamp, Bradley C and Chen, David T and Ib{\'a}{\~n}ez, Luis and Blezek, Daniel},
+  journal={Frontiers in neuroinformatics},
+  volume={7},
+  pages={45},
+  year={2013},
+  publisher={Frontiers Media SA}
+}
+
+@article{yaniv2018simpleitk,
+  title={SimpleITK image-analysis notebooks: a collaborative environment for education and reproducible research},
+  author={Yaniv, Ziv and Lowekamp, Bradley C and Johnson, Hans J and Beare, Richard},
+  journal={Journal of digital imaging},
+  volume={31},
+  number={3},
+  pages={290--303},
+  year={2018},
+  publisher={Springer}
+}
+
+@misc{ibanez2003itk,
+  title={The ITK software guide},
+  author={Ibanez, Luis and Schroeder, Will and Ng, Lydia and Cates, Josh and others},
+  year={2003},
+  publisher={Kitware, Incorporated Clifton Park, New York}
+}
+
+@article{avants2014insight,
+  title={The Insight ToolKit image registration framework},
+  author={Avants, Brian B and Tustison, Nicholas J and Stauffer, Michael and Song, Gang and Wu, Baohua and Gee, James C},
+  journal={Frontiers in neuroinformatics},
+  volume={8},
+  pages={44},
+  year={2014},
+  publisher={Frontiers Media SA}
+}
+
+@article{yushkevich2017itk,
+  title={ITK-SNAP: an intractive medical image segmentation tool to meet the need for expert-guided segmentation of complex medical images},
+  author={Yushkevich, Paul A and Gerig, Guido},
+  journal={IEEE pulse},
+  volume={8},
+  number={4},
+  pages={54--57},
+  year={2017},
+  publisher={IEEE}
+}
diff --git a/tests/test_comment_spell_check.py b/tests/test_comment_spell_check.py
@@ -81,3 +81,17 @@ def test_version(self):
         self.assertNotEqual(
             version_string, "unknown", "version string contains 'unknown'"
         )
+
+    def test_bibtex(self):
+        """Bibtext test"""
+        runresult = subprocess.run(
+            [
+                "python",
+                "comment_spell_check.py",
+                "--bibtex",
+                "tests/itk.bib",
+                "tests/bibtest.py",
+            ],
+            stdout=subprocess.PIPE,
+        )
+        self.assertEqual(runresult.returncode, 0, runresult.stdout)

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`comment_parser`
`2`	`2`	`pyenchant`
	`3`	`+bibtexparser`