Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/rfc3987_syntax/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,37 @@
from . import syntax_helpers as _syntax_helpers

from .syntax_helpers import *


def __dir__():
result = _syntax_helpers.__all__.copy()
for start_rule in ALL_START_RULES:
result.append(SYNTAX_VALIDATOR_PREFIX + start_rule)
Comment on lines +5 to +9

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defining __all__ in syntax_helpers means from .syntax_helpers import * (used here) now exports only names listed in syntax_helpers.all. Since the term-specific validators (e.g., is_valid_syntax_iri) are no longer in all, from rfc3987_syntax import * will stop exporting them compared to the previous behavior. If that’s not intended, include the validator names (as strings) in an appropriate all (package-level and/or syntax_helpers) without eagerly constructing the parser.

Suggested change
def __dir__():
result = _syntax_helpers.__all__.copy()
for start_rule in ALL_START_RULES:
result.append(SYNTAX_VALIDATOR_PREFIX + start_rule)
__all__ = _syntax_helpers.__all__.copy()
__all__.extend(
SYNTAX_VALIDATOR_PREFIX + start_rule for start_rule in ALL_START_RULES
)
def __dir__():
result = __all__.copy()

Copilot uses AI. Check for mistakes.
return result


# Cache for lazily created validators
_attr_cache = {}


def __getattr__(name):
"""
Lazily create attributes, in particular syntax validators, when accessed.

When an attribute like 'is_valid_syntax_iri' is accessed, this function
will create and cache the corresponding validator function.

We also create the syntax parser lazily.
"""
try:
return _attr_cache[name]
except KeyError:
if name == "syntax_parser":
parser = _syntax_helpers.syntax_parser
_attr_cache[name] = parser
return parser
if validator := get_syntax_validator(name):
_attr_cache[name] = validator
return validator

raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
147 changes: 69 additions & 78 deletions src/rfc3987_syntax/syntax_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from lark import Lark, ParseTree, exceptions
import sys

from functools import partial
from pathlib import Path

from rfc3987_syntax.utils import load_grammar
Expand All @@ -12,7 +13,7 @@
"absolute_iri",
"scheme",
"irelative_ref",
"irelative_part"
"irelative_part",
"ihier_part",
"iauthority",
"iuserinfo",
Expand Down Expand Up @@ -45,108 +46,98 @@
"pct_encoded",
]

# All start rules needed by validators
ALL_START_RULES: list[str] = RFC3987_SYNTAX_TERMS.copy()
for term in ("non_zero", "pct_encoded", "scheme"):
ALL_START_RULES.remove(term)
ALL_START_RULES.extend(
["ipath", "ipath_absolute", "ipath_empty", "ipath_noscheme", "ipath_rootless"]
)
Comment on lines +49 to +55

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ALL_START_RULES removes "scheme" and "pct_encoded" (and "non_zero"), but tests and public API use these terms via is_valid_syntax(term=...). With the shared parser built using start=ALL_START_RULES, parse(term="scheme"/"pct_encoded") will fail (unknown start rule) or return incorrect results. Keep these terms in ALL_START_RULES (or handle them via a separate parser path) so parse()/is_valid_syntax() continue to work for all RFC3987_SYNTAX_TERMS.

Suggested change
# All start rules needed by validators
ALL_START_RULES: list[str] = RFC3987_SYNTAX_TERMS.copy()
for term in ("non_zero", "pct_encoded", "scheme"):
ALL_START_RULES.remove(term)
ALL_START_RULES.extend(
["ipath", "ipath_absolute", "ipath_empty", "ipath_noscheme", "ipath_rootless"]
)
# All start rules needed by validators and public parse()/is_valid_syntax() calls
ALL_START_RULES: list[str] = RFC3987_SYNTAX_TERMS.copy()
for term in ("ipath", "ipath_absolute", "ipath_empty", "ipath_noscheme", "ipath_rootless"):
if term not in ALL_START_RULES:
ALL_START_RULES.append(term)

Copilot uses AI. Check for mistakes.

grammar: str = load_grammar(RFC3987_SYNTAX_GRAMMAR_PATH)

syntax_parser = Lark(grammar, start=["iri", "iri_reference", "absolute_iri"], parser=RFC3987_SYNTAX_PARSER_TYPE)
__all__ = [
"RFC3987_SYNTAX_PARSER_TYPE",
"RFC3987_SYNTAX_GRAMMAR_PATH",
"RFC3987_SYNTAX_TERMS",
"ALL_START_RULES",
"SYNTAX_VALIDATOR_PREFIX",
"grammar",
"parse",
"is_valid_syntax",
"make_syntax_validator",
"get_syntax_validator",
]


def parse(term: str, value: str) -> ParseTree:
return syntax_parser.parse(value, start=term)
def parse(term: str, value: str) -> sys.modules[__name__].lark.ParseTree:
return sys.modules[__name__].syntax_parser.parse(value, start=term)
Comment on lines +73 to +74

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type annotation -> sys.modules[__name__].lark.ParseTree is evaluated at import time (no postponed-evaluation import is present), which will trigger the lazy lark import and defeats the purpose of this PR. It’s also an unusual/fragile typing pattern. Use postponed evaluation (e.g., from __future__ import annotations) and/or a string/TYPE_CHECKING-based annotation so lark isn’t imported just to define parse().

Copilot uses AI. Check for mistakes.


def is_valid_syntax(term: str, value: str):
try:
parse(term=term, value=value)
return True
except exceptions.LarkError:
except sys.modules[__name__].lark.exceptions.LarkError:
return False


def make_syntax_validator(rule_name):
parser = Lark(grammar, start=rule_name, parser=RFC3987_SYNTAX_PARSER_TYPE)

def syntax_validator(text):
try:
parser.parse(text)
return True
except exceptions.LarkError:
return False

return syntax_validator


is_valid_syntax_iri = make_syntax_validator("iri")

is_valid_syntax_iri_reference = make_syntax_validator("iri_reference")

is_valid_syntax_absolute_iri = make_syntax_validator("absolute_iri")

is_valid_syntax_irelative_ref = make_syntax_validator("irelative_ref")

is_valid_syntax_irelative_part = make_syntax_validator("irelative_part")

is_valid_syntax_ihier_part = make_syntax_validator("ihier_part")

is_valid_syntax_iauthority = make_syntax_validator("iauthority")

is_valid_syntax_iuserinfo = make_syntax_validator("iuserinfo")

is_valid_syntax_ihost = make_syntax_validator("ihost")

is_valid_syntax_ireg_name = make_syntax_validator("ireg_name")
"""Create a validator using the shared parser."""
return partial(is_valid_syntax, term=rule_name)

is_valid_syntax_ipath = make_syntax_validator("ipath")

is_valid_syntax_ipath_abempty = make_syntax_validator("ipath_abempty")
# Cache for lazily created validators
_attr_cache = {}

is_valid_syntax_ipath_absolute = make_syntax_validator("ipath_absolute")

is_valid_syntax_ipath_noscheme = make_syntax_validator("ipath_noscheme")
SYNTAX_VALIDATOR_PREFIX = "is_valid_syntax_"

is_valid_syntax_ipath_rootless = make_syntax_validator("ipath_rootless")

is_valid_syntax_ipath_empty = make_syntax_validator("ipath_empty")
def get_syntax_parser():
# Single shared parser with all start rules
return sys.modules[__name__].lark.Lark(
grammar,
start=ALL_START_RULES,
parser=RFC3987_SYNTAX_PARSER_TYPE,
)

is_valid_syntax_isegment = make_syntax_validator("isegment")

is_valid_syntax_isegment_nz = make_syntax_validator("isegment_nz")
def get_syntax_validator(attr_name: str):
if not attr_name.startswith(SYNTAX_VALIDATOR_PREFIX):
return None

is_valid_syntax_isegment_nz_nc = make_syntax_validator("isegment_nz_nc")
term_name = attr_name[len(SYNTAX_VALIDATOR_PREFIX) :]

is_valid_syntax_ipchar = make_syntax_validator("ipchar")
if term_name not in ALL_START_RULES:
return None

is_valid_syntax_iquery = make_syntax_validator("iquery")
return make_syntax_validator(term_name)

is_valid_syntax_ifragment = make_syntax_validator("ifragment")

is_valid_syntax_iunreserved = make_syntax_validator("iunreserved")
def __getattr__(name):
"""
Lazily create attributes, in particular syntax validators, when accessed.

is_valid_syntax_ucschar = make_syntax_validator("ucschar")
When an attribute like 'is_valid_syntax_iri' is accessed, this function
will create and cache the corresponding validator function.

is_valid_syntax_iprivate = make_syntax_validator("iprivate")

is_valid_syntax_sub_delims = make_syntax_validator("sub_delims")

is_valid_syntax_ip_literal = make_syntax_validator("ip_literal")

is_valid_syntax_ipvfuture = make_syntax_validator("ipvfuture")

is_valid_syntax_ipv6address = make_syntax_validator("ipv6address")

is_valid_syntax_h16 = make_syntax_validator("h16")

is_valid_syntax_ls32 = make_syntax_validator("ls32")

is_valid_syntax_ipv4address = make_syntax_validator("ipv4address")

is_valid_syntax_dec_octet = make_syntax_validator("dec_octet")

is_valid_syntax_unreserved = make_syntax_validator("unreserved")

is_valid_syntax_alpha = make_syntax_validator("alpha")

is_valid_syntax_digit = make_syntax_validator("digit")

is_valid_syntax_hexdig = make_syntax_validator("hexdig")

is_valid_syntax_port = make_syntax_validator("port")
We also create the syntax parser lazily.
"""
try:
return _attr_cache[name]
except KeyError:
if name == "lark":
import lark

_attr_cache[name] = lark
return lark
if name == "syntax_parser":
syntax_parser = get_syntax_parser()
_attr_cache[name] = syntax_parser
return syntax_parser
if validator := get_syntax_validator(name):
_attr_cache[name] = validator
return validator

raise AttributeError(f"module '{__name__}' has no attribute '{name}'")