Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,31 @@ def check_and_call_extract_file(
if pathmatch(opattern, filename):
options = odict
break

# Merge keywords and comment_tags from per-format options if present.
file_keywords = keywords
file_comment_tags = comment_tags
if keywords_opt := options.get("keywords"):
if not isinstance(keywords_opt, dict): # pragma: no cover
raise TypeError(
f"The `keywords` option must be a dict of parsed keywords, not {keywords_opt!r}",
)
file_keywords = {**keywords, **keywords_opt}

if comments_opt := options.get("add_comments"):
if not isinstance(comments_opt, (list, tuple, set)): # pragma: no cover
raise TypeError(
f"The `add_comments` option must be a collection of comment tags, not {comments_opt!r}.",
)
file_comment_tags = tuple(set(comment_tags) | set(comments_opt))

if callback:
callback(filename, method, options)
for message_tuple in extract_from_file(
method,
filepath,
keywords=keywords,
comment_tags=comment_tags,
keywords=file_keywords,
comment_tags=file_comment_tags,
options=options,
strip_comment_tags=strip_comment_tags,
):
Expand Down
45 changes: 42 additions & 3 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import warnings
from configparser import RawConfigParser
from io import StringIO
from typing import BinaryIO, Iterable, Literal
from typing import Any, BinaryIO, Iterable, Literal

from babel import Locale, localedata
from babel import __version__ as VERSION
Expand Down Expand Up @@ -584,7 +584,7 @@ def _get_mappings(self):
method_map, options_map = [], {}
for pattern, method, options in mapping:
method_map.append((pattern, method))
options_map[pattern] = options or {}
options_map[pattern] = _parse_string_options(options or {})
mappings.append((path, method_map, options_map))

else:
Expand Down Expand Up @@ -1075,7 +1075,7 @@ def parse_mapping_cfg(fileobj, filename=None):
else:
method, pattern = (part.strip() for part in section.split(':', 1))
method_map.append((pattern, method))
options_map[pattern] = dict(parser.items(section))
options_map[pattern] = _parse_string_options(dict(parser.items(section)))

if extractors:
for idx, (pattern, method) in enumerate(method_map):
Expand All @@ -1086,6 +1086,25 @@ def parse_mapping_cfg(fileobj, filename=None):
return method_map, options_map


def _parse_string_options(options: dict[str, str]) -> dict[str, Any]:
"""
Parse string-formatted options from a mapping configuration.

The `keywords` and `add_comments` options are parsed into a canonical
internal format, so they can be merged with global keywords/comment tags
during extraction.
"""
options: dict[str, Any] = options.copy()

if keywords_val := options.pop("keywords", None):
options['keywords'] = parse_keywords(listify_value(keywords_val))

if comments_val := options.pop("add_comments", None):
options['add_comments'] = listify_value(comments_val)

return options


def _parse_config_object(config: dict, *, filename="(unknown)"):
extractors = {}
method_map = []
Expand Down Expand Up @@ -1140,6 +1159,26 @@ def _parse_config_object(config: dict, *, filename="(unknown)"):
if not isinstance(pattern, list):
pattern = [pattern]

if keywords_val := entry.pop("keywords", None):
if isinstance(keywords_val, str):
entry["keywords"] = parse_keywords(listify_value(keywords_val))
elif isinstance(keywords_val, list):
entry["keywords"] = parse_keywords(keywords_val)
else:
raise ConfigurationError(
f"{filename}: mappings[{idx}]: 'keywords' must be a string or list, got {keywords_val!r}",
)

if comments_val := entry.pop("add_comments", None):
if isinstance(comments_val, str):
entry["add_comments"] = [comments_val]
elif isinstance(comments_val, list):
entry["add_comments"] = comments_val
else:
raise ConfigurationError(
f"{filename}: mappings[{idx}]: 'add_comments' must be a string or list, got {comments_val!r}",
)

for pat in pattern:
if not isinstance(pat, str):
raise ConfigurationError(
Expand Down
131 changes: 123 additions & 8 deletions docs/messages.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,6 @@ Genshi markup templates and text templates:
[javascript: **.js]
extract_messages = $._, jQuery._

The configuration file syntax is based on the format commonly found in ``.INI``
files on Windows systems, and as supported by the ``ConfigParser`` module in
the Python standard library. Section names (the strings enclosed in square
brackets) specify both the name of the extraction method, and the extended glob
pattern to specify the files that this extraction method should be used for,
separated by a colon. The options in the sections are passed to the extraction
method. Which options are available is specific to the extraction method used.

The extended glob patterns used in this configuration are similar to the glob
patterns provided by most shells. A single asterisk (``*``) is a wildcard for
any number of characters (except for the pathname component separator "/"),
Expand All @@ -155,9 +147,132 @@ two subsequent asterisk characters (``**``) can be used to make the wildcard
match any directory level, so the pattern ``**.txt`` matches any file with the
extension ``.txt`` in any directory.

Babel supports two configuration file formats: INI and TOML.

INI Configuration Format
^^^^^^^^^^^^^^^^^^^^^^^^

The INI configuration file syntax is based on the format commonly found in ``.INI``
files on Windows systems, and as supported by the ``ConfigParser`` module in
the Python standard library. Section names (the strings enclosed in square
brackets) specify both the name of the extraction method, and the extended glob
pattern to specify the files that this extraction method should be used for,
separated by a colon. The options in the sections are passed to the extraction
method. Which options are available is specific to the extraction method used.

Lines that start with a ``#`` or ``;`` character are ignored and can be used
for comments. Empty lines are ignored, too.

TOML Configuration Format
^^^^^^^^^^^^^^^^^^^^^^^^^^

Babel also supports TOML format for configuration files, when the ``tomllib``
module is available (Python 3.11+), or when the ``tomli`` package is installed
(for Python versions prior to 3.11).

TOML provides a more structured format and is particularly useful when combined
with ``pyproject.toml``.

The same configuration examples shown above can be written in TOML format:

.. code-block:: toml

# Extraction from Python source files
[[mappings]]
method = "python"
pattern = "**.py"

# Extraction from Genshi HTML and text templates
[[mappings]]
method = "genshi"
pattern = "**/templates/**.html"
ignore_tags = "script,style"
include_attrs = "alt title summary"

[[mappings]]
method = "genshi"
pattern = "**/templates/**.txt"
template_class = "genshi.template:TextTemplate"
encoding = "ISO-8819-15"

# Extraction from JavaScript files
[[mappings]]
method = "javascript"
pattern = "**.js"
extract_messages = "$._, jQuery._"

In TOML format, each ``[[mappings]]`` section defines a mapping. The ``method``
and ``pattern`` fields are required. The ``pattern`` field can be a string or
an array of strings to match multiple patterns with the same configuration.

If you're using ``pyproject.toml``, nest the configuration under ``[tool.babel]``:

.. code-block:: toml

[tool.babel]
[[tool.babel.mappings]]
method = "python"
pattern = "**.py"

You can reference custom extractors in both formats. In TOML:

.. code-block:: toml

[extractors]
custom = "mypackage.module:extract_custom"

[[mappings]]
method = "custom"
pattern = "**.ctm"
some_option = "foo"

Common Options
^^^^^^^^^^^^^^

In addition to extractor-specific options, the following options can be specified
in any mapping section and will be merged with global settings:

``keywords``
A list of keywords (function names) to extract messages from.
This uses the same syntax as the ``--keyword`` command-line option.
Keywords specified here are added to (not replacing) the default keywords or
those specified via command-line.

In INI format, whitespace-separated: ``keywords = _ gettext ngettext:1,2 pgettext:1c,2``

In TOML format, use either a whitespace-separated string or an array:
``keywords = "_ gettext ngettext:1,2"`` or
``keywords = ["_", "gettext", "ngettext:1,2"]``

``add_comments``
A list of comment tag prefixes to extract and include in the
output. This uses the same syntax as the ``--add-comments`` command-line option.
Comment tags specified here are added to those specified via command-line.

In INI format, whitespace-separated: ``add_comments = TRANSLATOR: NOTE:``

In TOML format, use either a string or an array:
``add_comments = "TRANSLATOR NOTE:"`` (parsed as a single string!) or
``add_comments = ["TRANSLATOR:", "NOTE:"]``

**Example in INI format:**

.. code-block:: ini

[python: **.py]
keywords = _ _l _n:1,2
add_comments = TRANSLATOR:

**Example in TOML format:**

.. code-block:: toml

[[mappings]]
method = "python"
pattern = "**.py"
keywords = ["_", "_l", "_n:1,2"]
add_comments = ["TRANSLATOR:"]

.. note:: if you're performing message extraction using the command Babel
provides for integration into ``setup.py`` scripts, you can also
provide this configuration in a different way, namely as a keyword
Expand Down
5 changes: 5 additions & 0 deletions tests/messages/data/mapping_with_keywords.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Test mapping file with keywords option (issue #1224)

[python: **.py]
encoding = utf-8
keywords = _ _l _n:1,2 _nl:1,2 _p:1c,2 _pl:1c,2 _np:1c,2,3 _npl:1c,2,3
8 changes: 8 additions & 0 deletions tests/messages/data/mapping_with_keywords_and_comments.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Test mapping file with keywords and add_comments options (issue #1224)

[[mappings]]
method = "python"
pattern = "**.py"
encoding = "utf-8"
keywords = ["_", "_l", "_n:1,2"]
add_comments = ["SPECIAL:"]
12 changes: 12 additions & 0 deletions tests/messages/data/project/issue_1224_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from myproject.i18n import lazy_gettext as _l, lazy_ngettext as _n


class Choices:
# SPECIAL: This comment should be extracted
CHOICE_X = 1, _l("Choice X")
# SPECIAL: Another special comment
CHOICE_Y = 2, _l("Choice Y")
# No comment...
OPTION_C = 3, _l("Option C")
# Test for _n too! (but no comment... shush...)
OPTION_A = 4, (_n("Option A", "Options of the A kind", 1))
51 changes: 51 additions & 0 deletions tests/messages/frontend/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,54 @@ def test_extraction_add_location_file(extract_cmd, pot_file):

"""
assert expected_content == pot_file.read_text()


def test_extraction_with_mapping_file_with_keywords(extract_cmd, pot_file):
"""
Test that keywords specified in mapping config file are properly parsed,
and merged with default keywords.
"""
extract_cmd.mapping_file = 'mapping_with_keywords.cfg'
extract_cmd.output_file = pot_file
extract_cmd.input_paths = 'project'

extract_cmd.finalize_options()
extract_cmd.run()

with pot_file.open() as f:
catalog = read_po(f)

for msgid in ('bar', 'Choice X', 'Choice Y', 'Option C', 'Option A'):
msg = catalog[msgid]
assert not msg.auto_comments # This configuration didn't specify SPECIAL:...
assert msg.pluralizable == (msgid == 'Option A')


def test_extraction_with_mapping_file_with_comments(extract_cmd, pot_file):
"""
Test that add_comments specified in mapping config file are properly parsed.
Uses TOML format to test that code path.
"""
extract_cmd.mapping_file = 'mapping_with_keywords_and_comments.toml'
extract_cmd.output_file = pot_file
extract_cmd.input_paths = 'project/issue_1224_test.py'

extract_cmd.finalize_options()
extract_cmd.run()

with pot_file.open() as f:
catalog = read_po(f)

# Check that messages were extracted and have the expected auto_comments
for msgid, expected_comment in [
('Choice X', 'extracted'),
('Choice Y', 'special'),
('Option C', None),
('Option A', None),
]:
msg = catalog[msgid]
if expected_comment:
assert any('SPECIAL' in comment and expected_comment in comment for comment in msg.auto_comments)
else:
assert not msg.auto_comments
assert msg.pluralizable == (msgid == 'Option A')
Loading
Loading