Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 32 additions & 80 deletions src/rfc3987_syntax/syntax_helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
__all__ = [
"is_valid_syntax",
]
Comment on lines +1 to +3

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are behavior changes here around lazy module attributes (grammar, syntax_parser, is_valid_syntax_*) and around what gets exported at package import time. Existing tests only exercise is_valid_syntax via import rfc3987_syntax as h, so regressions like missing re-exports (parse, RFC3987_SYNTAX_TERMS, is_valid_syntax_iri, etc.) or __getattr__ behavior won't be caught. Add tests that assert these public attributes are available and callable after importing rfc3987_syntax.

Copilot uses AI. Check for mistakes.
Comment on lines +1 to +3

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defining __all__ as only ['is_valid_syntax'] changes what from .syntax_helpers import * exports. Since src/rfc3987_syntax/__init__.py uses a star import, this will stop re-exporting parse, RFC3987_SYNTAX_TERMS, and the documented is_valid_syntax_* helpers (breaking the public API/README examples). Consider removing the star import from __init__.py and explicitly re-exporting symbols (possibly via package-level __getattr__), or otherwise ensuring the previous public surface remains available without eagerly constructing all validators on import.

Copilot uses AI. Check for mistakes.
import functools
from typing import Any
from lark import Lark, ParseTree, exceptions

from pathlib import Path
Expand All @@ -12,7 +17,7 @@
"absolute_iri",
"scheme",
"irelative_ref",
"irelative_part"
"irelative_part",
"ihier_part",
"iauthority",
"iuserinfo",
Expand Down Expand Up @@ -45,13 +50,24 @@
"pct_encoded",
]

grammar: str = load_grammar(RFC3987_SYNTAX_GRAMMAR_PATH)

syntax_parser = Lark(grammar, start=["iri", "iri_reference", "absolute_iri"], parser=RFC3987_SYNTAX_PARSER_TYPE)
@functools.lru_cache(maxsize=None)
def get_grammar():
return load_grammar(RFC3987_SYNTAX_GRAMMAR_PATH)


@functools.lru_cache(maxsize=None)
def get_syntax_parser():
syntax_parser = Lark(
get_grammar(),
start=["iri", "iri_reference", "absolute_iri"],
parser=RFC3987_SYNTAX_PARSER_TYPE,
)
return syntax_parser


def parse(term: str, value: str) -> ParseTree:
return syntax_parser.parse(value, start=term)
return get_syntax_parser().parse(value, start=term)


def is_valid_syntax(term: str, value: str):
Expand All @@ -62,8 +78,9 @@ def is_valid_syntax(term: str, value: str):
return False


@functools.lru_cache(maxsize=None)
def make_syntax_validator(rule_name):
parser = Lark(grammar, start=rule_name, parser=RFC3987_SYNTAX_PARSER_TYPE)
parser = Lark(get_grammar(), start=rule_name, parser=RFC3987_SYNTAX_PARSER_TYPE)

def syntax_validator(text):
try:
Expand All @@ -75,78 +92,13 @@ def syntax_validator(text):
return syntax_validator


is_valid_syntax_iri = make_syntax_validator("iri")

is_valid_syntax_iri_reference = make_syntax_validator("iri_reference")

is_valid_syntax_absolute_iri = make_syntax_validator("absolute_iri")

is_valid_syntax_irelative_ref = make_syntax_validator("irelative_ref")

is_valid_syntax_irelative_part = make_syntax_validator("irelative_part")

is_valid_syntax_ihier_part = make_syntax_validator("ihier_part")

is_valid_syntax_iauthority = make_syntax_validator("iauthority")

is_valid_syntax_iuserinfo = make_syntax_validator("iuserinfo")

is_valid_syntax_ihost = make_syntax_validator("ihost")

is_valid_syntax_ireg_name = make_syntax_validator("ireg_name")

is_valid_syntax_ipath = make_syntax_validator("ipath")

is_valid_syntax_ipath_abempty = make_syntax_validator("ipath_abempty")

is_valid_syntax_ipath_absolute = make_syntax_validator("ipath_absolute")

is_valid_syntax_ipath_noscheme = make_syntax_validator("ipath_noscheme")

is_valid_syntax_ipath_rootless = make_syntax_validator("ipath_rootless")

is_valid_syntax_ipath_empty = make_syntax_validator("ipath_empty")

is_valid_syntax_isegment = make_syntax_validator("isegment")

is_valid_syntax_isegment_nz = make_syntax_validator("isegment_nz")

is_valid_syntax_isegment_nz_nc = make_syntax_validator("isegment_nz_nc")

is_valid_syntax_ipchar = make_syntax_validator("ipchar")

is_valid_syntax_iquery = make_syntax_validator("iquery")

is_valid_syntax_ifragment = make_syntax_validator("ifragment")

is_valid_syntax_iunreserved = make_syntax_validator("iunreserved")

is_valid_syntax_ucschar = make_syntax_validator("ucschar")

is_valid_syntax_iprivate = make_syntax_validator("iprivate")

is_valid_syntax_sub_delims = make_syntax_validator("sub_delims")

is_valid_syntax_ip_literal = make_syntax_validator("ip_literal")

is_valid_syntax_ipvfuture = make_syntax_validator("ipvfuture")

is_valid_syntax_ipv6address = make_syntax_validator("ipv6address")

is_valid_syntax_h16 = make_syntax_validator("h16")

is_valid_syntax_ls32 = make_syntax_validator("ls32")

is_valid_syntax_ipv4address = make_syntax_validator("ipv4address")

is_valid_syntax_dec_octet = make_syntax_validator("dec_octet")

is_valid_syntax_unreserved = make_syntax_validator("unreserved")

is_valid_syntax_alpha = make_syntax_validator("alpha")

is_valid_syntax_digit = make_syntax_validator("digit")

is_valid_syntax_hexdig = make_syntax_validator("hexdig")

is_valid_syntax_port = make_syntax_validator("port")
def __getattr__(name: str) -> Any:
if name == "grammar":
return get_grammar()
if name == "syntax_parser":
return get_syntax_parser()
if name.startswith("is_valid_syntax_"):
term = name.removeprefix("is_valid_syntax_")
return make_syntax_validator(term)

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__getattr__ currently treats any is_valid_syntax_* attribute as valid and passes the suffix straight into make_syntax_validator(). Combined with @lru_cache(maxsize=None) this allows unbounded creation/caching of parsers for arbitrary attribute names (and can also surface confusing Lark configuration errors for typos). Consider checking term against RFC3987_SYNTAX_TERMS and raising AttributeError for unknown terms.

Suggested change
return make_syntax_validator(term)
if term in RFC3987_SYNTAX_TERMS:
return make_syntax_validator(term)

Copilot uses AI. Check for mistakes.
msg = f"Attribute {name} not found"
raise AttributeError(msg, name=name)

Copilot AI Apr 30, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

raise AttributeError(msg, name=name) is not a valid way to construct AttributeError (it doesn't accept keyword arguments). This will raise a TypeError when an unknown attribute is accessed. Raise AttributeError(msg) (or AttributeError(msg, name) positionally if you want .name set) instead.

Suggested change
raise AttributeError(msg, name=name)
raise AttributeError(msg)

Copilot uses AI. Check for mistakes.