diff --git a/lib/python-sdk/.coverage b/lib/python-sdk/.coverage new file mode 100644 index 000000000..9ac35a7ec Binary files /dev/null and b/lib/python-sdk/.coverage differ diff --git a/lib/python-sdk/Makefile b/lib/python-sdk/Makefile index ba7ff350d..bd9d4614e 100644 --- a/lib/python-sdk/Makefile +++ b/lib/python-sdk/Makefile @@ -28,4 +28,4 @@ check-types: plugins checks: check-format check-lint check-types plugins: - $(RUNTIME_PREFIX) python -m common_grants_sdk.extensions.generate --plugin examples/plugins/opportunity_extensions + $(RUNTIME_PREFIX) python -m common_grants_sdk.extensions.generate --plugin examples/plugins/opportunity_extensions examples/plugins/grants_gov diff --git a/lib/python-sdk/common_grants_sdk/extensions/README.md b/lib/python-sdk/common_grants_sdk/extensions/README.md index 9083276ce..0a3786f8f 100644 --- a/lib/python-sdk/common_grants_sdk/extensions/README.md +++ b/lib/python-sdk/common_grants_sdk/extensions/README.md @@ -22,6 +22,10 @@ The `common-grants/sdk/extensions` module contains the utilities for working wit - [Defining a plugin](#defining-a-plugin) - [Publishing a plugin](#publishing-a-plugin) - [Combining Plugins](#combining-plugins) +- [Bidirectional Transforms](#bidirectional-transforms) + - [Defining transforms](#defining-transforms) + - [Mapping format](#mapping-format) + - [Using transforms](#using-transforms) - [Using plugins with the API client](#using-plugins-with-the-api-client) - [Best practices](#best-practices) - [Field naming](#field-naming) @@ -37,11 +41,15 @@ The `common-grants/sdk/extensions` module contains the utilities for working wit Here are some key concepts that are used to define custom fields and plugins that extend base schemas from the CommonGrants protocol. | Concept | Description | -| ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Custom field** | A key-value pair attached to a resource's `customFields` property. Each field has a `name`, `fieldType`, `value`, and optional `description`. | -| **`CustomFieldSpec`** | A Python dataclass that _describes_ a custom field: its `field_type`, optional `value` (a Python type for the `value` property), and optional `name` and `description`. | -| **`SchemaExtensions`** | A mapping of extensible model names (e.g. `"Opportunity"`) to dicts of `CustomFieldSpec` objects. This is the shape that `define_plugin()` and `with_custom_fields()` accept. | -| **`Plugin`** | A dataclass with `.extensions` (the raw `SchemaExtensions`) and `.schemas` (Pydantic models with typed `customFields` applied). Created by `define_plugin()`. | +| ----------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Custom field** | A key-value pair attached to a resource's `customFields` property. Each field has a `name`, `fieldType`, `value`, and optional `description`. | +| **`CustomFieldSpec`** | A Python dataclass that _describes_ a custom field: its `field_type`, optional `value` (a Python type for the `value` property), and optional `name` and `description`. | +| **`SchemaExtensions`** | A mapping of extensible model names (e.g. `"Opportunity"`) to dicts of `CustomFieldSpec` objects. This is the shape that `define_plugin()` and `with_custom_fields()` accept. | +| **`Plugin`** | A dataclass with `.extensions` (the raw `SchemaExtensions`) and `.schemas` (Pydantic models with typed `customFields` applied). Created by `define_plugin()`. | +| **`PluginExtensionsMeta`** | Optional metadata attached to a plugin: `name`, `version`, `source_system`, and `capabilities` (e.g. `["customFields", "transforms"]`). | +| **`build_transforms()`**| Compiles a pair of mapping dicts into `(to_common, from_common)` callables. Each callable accepts a data dict and returns a `TransformResult`. | +| **`TransformResult`** | A dataclass `(result: dict, errors: list[PluginError])` returned by each transform callable. Errors are non-fatal — a partial result is always returned alongside any errors. | +| **`ObjectSchemasInput`**| Bundles a `to_common` and `from_common` callable for a single object type. Passed to `define_plugin()` via the `transform_schemas` parameter. | @@ -433,6 +441,140 @@ Prefer unique, namespaced field names so `"error"` is never triggered. After building your package, import the plugin in a test file and confirm that `.extensions` keys and `.schemas` parse types resolve correctly. Hover over the types in your editor to confirm they are not `any`. +## Bidirectional Transforms + +Plugins can define bidirectional mappings between a source system's native data format and the CommonGrants format. These transforms are authored as plain Python dicts and compiled into callable functions by `build_transforms()`. + +### Defining transforms + +Use `build_transforms()` to compile a pair of mapping dicts into `(to_common, from_common)` callables, then pass them to `define_plugin()` via `transform_schemas`: + +```python +from common_grants_sdk.extensions import ( + CustomFieldSpec, + ObjectSchemasInput, + PluginExtensionsMeta, + build_transforms, + define_plugin, +) +from common_grants_sdk.schemas.pydantic.fields import CustomFieldType + +to_common, from_common = build_transforms( + to_common_mapping={ + "title": {"field": "data.opportunity_title"}, + "status": { + "value": { + "match": { + "field": "data.opportunity_status", + "case": {"posted": "open", "archived": "closed", "forecasted": "forecasted"}, + "default": "custom", + } + }, + "description": {"const": "The opportunity is currently accepting applications"}, + }, + "funding": { + "minAwardAmount": { + "amount": {"field": "data.summary.award_floor"}, + "currency": {"const": "USD"}, + }, + }, + }, + from_common_mapping={ + "data": { + "opportunity_title": {"field": "title"}, + "opportunity_status": { + "match": { + "field": "status.value", + "case": {"open": "posted", "closed": "archived", "forecasted": "forecasted"}, + "default": "custom", + } + }, + } + }, +) + +plugin = define_plugin( + extensions={ + "Opportunity": { + "legacyId": CustomFieldSpec( + field_type=CustomFieldType.INTEGER, + description="Unique identifier in legacy database", + ), + } + }, + meta=PluginExtensionsMeta( + name="my-system", + version="0.1.0", + source_system="my-system.example.gov", + capabilities=["customFields", "transforms"], + ), + transform_schemas={ + "Opportunity": ObjectSchemasInput( + to_common=to_common, + from_common=from_common, + ) + }, +) +``` + +Both directions must be provided explicitly. `build_transforms()` does not invert one mapping from the other, because many-to-one handlers like `match` are not reversible. + +### Mapping format + +A mapping dict describes how to build an output object from a source dict. Each leaf node is either a literal value or a single-key dict that invokes a named handler. + +| Handler | Syntax | Description | +|---|---|---| +| `const` | `{"const": "USD"}` | Returns a fixed literal value, ignoring source data | +| `field` | `{"field": "data.summary.award_floor"}` | Extracts a value using a dot-notation path | +| `match` | `{"match": {"field": "...", "case": {...}, "default": "..."}}` | Case-based lookup on a field value (canonical ADR name) | +| `switch` | `{"switch": {...}}` | Alias for `match`, kept for backward compatibility | +| `numberToString` | `{"numberToString": "data.summary.award_floor"}` | Extracts a numeric value and coerces it to a string | +| `stringToNumber` | `{"stringToNumber": "some.string.field"}` | Extracts a string and coerces it to `int` or `float` | + +Bare non-dict values (strings, numbers, booleans) in a mapping are treated as literals and passed through unchanged. Use `{"const": ...}` when you want a literal value inside a dict node that might otherwise be mistaken for a field name. + +You can also register custom handlers by passing a `handlers` dict to `build_transforms()`: + +```python +def handle_upper(data, field_path): + val = get_from_path(data, field_path) + return val.upper() if isinstance(val, str) else val + +to_common, from_common = build_transforms( + to_common_mapping={"title": {"upper": "data.opportunity_title"}}, + from_common_mapping={...}, + handlers={"upper": handle_upper}, +) +``` + +Custom handlers are merged with the defaults; they cannot override built-in handler names. + +### Using transforms + +The compiled callables are stored on the plugin's `transform_schemas` dict, keyed by object name. Each callable takes a data dict and returns a `TransformResult`: + +```python +opp_transforms = plugin.transform_schemas["Opportunity"] + +# Source system → CommonGrants +result = opp_transforms.to_common(native_data) +if result.errors: + for err in result.errors: + print(f"[{err.path}] {err}") +else: + cg_data = result.result + +# CommonGrants → source system +result = opp_transforms.from_common(cg_data) +native_data = result.result +``` + +`TransformResult.errors` is always a list (empty on success). A non-empty errors list means the transform encountered a problem but still returned a partial result in `result`. + +See `examples/transforms.py` for a complete working example with roundtrip verification. + + ## Using plugins with the API client Pass a plugin's extended schema to the API client via the `schema` parameter. The client uses it to hydrate API responses into fully typed models. The `schema` parameter accepts any `Type[OpportunityBase]` subclass. diff --git a/lib/python-sdk/common_grants_sdk/extensions/__init__.py b/lib/python-sdk/common_grants_sdk/extensions/__init__.py index 87a089683..7a9ef1185 100644 --- a/lib/python-sdk/common_grants_sdk/extensions/__init__.py +++ b/lib/python-sdk/common_grants_sdk/extensions/__init__.py @@ -2,8 +2,23 @@ from .plugin import Plugin, PluginConfig, define_plugin from .specs import ConflictStrategy, CustomFieldSpec, SchemaExtensions, merge_extensions +from .transforms import build_transforms +from .types import ( + ClientConfig, + Handler, + ObjectMappings, + ObjectSchemas, + ObjectSchemasInput, + PluginCapability, + PluginError, + PluginExtensions, + PluginExtensionsMeta, + PluginExtensionsSchema, + TransformResult, +) __all__ = [ + # Existing exports (unchanged) "ConflictStrategy", "CustomFieldSpec", "Plugin", @@ -11,4 +26,18 @@ "SchemaExtensions", "define_plugin", "merge_extensions", + # New: build_transforms + "build_transforms", + # New: ADR-0022 types + "ClientConfig", + "Handler", + "ObjectMappings", + "ObjectSchemas", + "ObjectSchemasInput", + "PluginCapability", + "PluginError", + "PluginExtensions", + "PluginExtensionsMeta", + "PluginExtensionsSchema", + "TransformResult", ] diff --git a/lib/python-sdk/common_grants_sdk/extensions/generate.py b/lib/python-sdk/common_grants_sdk/extensions/generate.py index 40eba082b..9657a84a1 100644 --- a/lib/python-sdk/common_grants_sdk/extensions/generate.py +++ b/lib/python-sdk/common_grants_sdk/extensions/generate.py @@ -487,13 +487,15 @@ def main(argv: list[str] | None = None) -> int: ) parser.add_argument( "--plugin", - default=".", - help="Path to plugin directory containing cg_config.py (default: current directory)", + nargs="+", + default=["."], + help="One or more plugin directories containing cg_config.py (default: current directory)", ) args = parser.parse_args(argv) - generated_dir = generate_plugin(Path(args.plugin)) - print(f"Generated plugin schemas at {generated_dir}") + for plugin_path in args.plugin: + generated_dir = generate_plugin(Path(plugin_path)) + print(f"Generated plugin schemas at {generated_dir}") return 0 diff --git a/lib/python-sdk/common_grants_sdk/extensions/plugin.py b/lib/python-sdk/common_grants_sdk/extensions/plugin.py index 7b4670f34..eab8cb8f4 100644 --- a/lib/python-sdk/common_grants_sdk/extensions/plugin.py +++ b/lib/python-sdk/common_grants_sdk/extensions/plugin.py @@ -1,28 +1,81 @@ """Plugin configuration and composition APIs.""" +from __future__ import annotations + from dataclasses import dataclass -from typing import Generic, TypeVar +from typing import Any, Callable, Generic, TypeVar from .specs import SchemaExtensions +from .types import ClientConfig, ObjectSchemas, ObjectSchemasInput, PluginExtensionsMeta T = TypeVar("T") @dataclass(frozen=True) class PluginConfig: - """Build-time plugin config discoverable by the generator.""" + """Build-time plugin config discoverable by the code generator. + + extensions: custom field declarations (read by generate.py — do not rename). + meta: optional plugin identity and capability declaration. + transform_schemas: optional bidirectional transform callables per object. + Stored as ObjectSchemasInput (not compiled to ObjectSchemas) in the PoC. + Full compilation with model_validate wrapping is a TODO for the real SDK. + + TODO (full SDK): add get_client, filters. + """ extensions: SchemaExtensions + meta: PluginExtensionsMeta | None = None + transform_schemas: dict[str, ObjectSchemasInput[Any, Any]] | None = None -@dataclass(frozen=True) +@dataclass class Plugin(Generic[T]): - """Runtime plugin container with both extension specs and generated schemas.""" + """Runtime plugin container with extension specs and generated schemas. + + extensions: SchemaExtensions used by generate.py (do not rename or reorder — + the generated __init__.py constructs Plugin(extensions=..., schemas=...)). + schemas: generated _Schemas object (typed Pydantic model classes from generate.py). + NOTE: there is a naming collision: ADR-0022 also calls its runtime transform + dict "schemas". These are different concepts sharing the same name — a design + question to resolve in the full SDK (see Design Finding #1 in the spec). + transform_schemas: ADR-0022 runtime transform dict; named distinctly from + `schemas` to avoid collision with the generated schemas field in the PoC. + + TODO (full SDK): memoize get_client. + """ extensions: SchemaExtensions - schemas: T + schemas: T # generated _Schemas object — keep as positional for generate.py compat + meta: PluginExtensionsMeta | None = None + get_client: Callable[[ClientConfig], Any] | None = None # TODO: memoize + # PoC stores ObjectSchemasInput here (no compilation yet); full SDK will store + # ObjectSchemas after model_validate wrapping. Annotated as the union so both + # the current PoC usage and the future compiled form are type-safe. + transform_schemas: ( + dict[str, ObjectSchemasInput[Any, Any] | ObjectSchemas[Any, Any]] | None + ) = None + filters: dict[str, dict[str, Any]] | None = None + + +def define_plugin( + extensions: SchemaExtensions, + meta: PluginExtensionsMeta | None = None, + transform_schemas: dict[str, ObjectSchemasInput[Any, Any]] | None = None, + # TODO (full SDK): get_client, filters +) -> PluginConfig: + """Create a PluginConfig object consumed by the code generator. + Backward-compatible: existing callers passing only `extensions` are unaffected. + New params are stored as-is — no compilation occurs in the PoC. -def define_plugin(extensions: SchemaExtensions) -> PluginConfig: - """Create a plugin config object consumed by the code generator.""" - return PluginConfig(extensions=extensions) + TODO (full SDK): + - Auto-generate transforms from extensions.schemas[obj].mappings when no + explicit to_common/from_common is supplied. + - Wrap transform output with model_validate. + """ + return PluginConfig( + extensions=extensions, + meta=meta, + transform_schemas=transform_schemas, + ) diff --git a/lib/python-sdk/common_grants_sdk/extensions/transforms.py b/lib/python-sdk/common_grants_sdk/extensions/transforms.py new file mode 100644 index 000000000..c2aaacdd4 --- /dev/null +++ b/lib/python-sdk/common_grants_sdk/extensions/transforms.py @@ -0,0 +1,182 @@ +"""build_transforms() — generates to_common/from_common callables from mapping dicts. + +Using this utility is optional — plugin authors may provide plain hand-written +callables instead. + +Mappings are validated at call time. Custom handler names are +registered per call only; name collisions with defaults raise at call time +rather than silently shadowing them. +""" + +from __future__ import annotations + +from typing import Any, Callable + +from pydantic import BaseModel, ValidationError + +from common_grants_sdk.utils.transformation import ( + DEFAULT_HANDLERS, + HandlerError, + transform_from_mapping, +) + +from .types import Handler, PluginError, TransformResult + + +def _validate_mapping(mapping: Any, known_handlers: set[str], path: str = "") -> None: + """Walk the mapping tree and raise ValueError on structural malformation. + + For each dict node: + - If a key is a known handler, the node must contain ONLY that handler key. + The corresponding value is a runtime-only handler argument and is NOT + recursed into. + - All other keys are output field names (always valid); their values are + recursed into. + + Raises ValueError if any node is not a dict, string, number, boolean, or None + (e.g. a list where a scalar or dict is expected), or if a handler key appears + alongside sibling keys in the same dict (ambiguous — handler invocations must + be the sole key in their dict). + + Note: this function cannot detect intended-but-unknown handler invocations + because unknown keys are indistinguishable from output field names at static + analysis time. That detection is deferred to the full SDK. + """ + if mapping is None or isinstance(mapping, (str, int, float, bool)): + return # primitives and None are valid literals + + if not isinstance(mapping, dict): + raise ValueError( + f"Invalid mapping node at '{path}': expected dict, str, number, or bool, " + f"got {type(mapping).__name__}" + ) + + handler_keys = [k for k in mapping if k in known_handlers] + if handler_keys and len(mapping) > 1: + label = f" at '{path}'" if path else "" + raise ValueError( + f"Invalid mapping node{label}: handler key {handler_keys[0]!r} " + f"cannot have sibling keys {sorted(k for k in mapping if k not in known_handlers)!r}. " + f"A handler invocation must be the only key in its dict." + ) + + for key, value in mapping.items(): + current_path = f"{path}.{key}" if path else key + if key in known_handlers: + # Handler invocation — argument is runtime-only, do not recurse + continue + _validate_mapping(value, known_handlers, current_path) + + +def build_transforms( + to_common_mapping: dict[str, Any], + from_common_mapping: dict[str, Any], + handlers: dict[str, Handler] | None = None, + common_model: type[BaseModel] | None = None, +) -> tuple[ + Callable[[Any], TransformResult[Any]], + Callable[[Any], TransformResult[Any]], +]: + """Generate to_common and from_common callables from mapping dicts. + + Args: + to_common_mapping: mapping from native source → CommonGrants. + from_common_mapping: mapping from CommonGrants → native source. + handlers: Optional additional handlers registered for this call only. + Keys must not collide with DEFAULT_HANDLERS (raises ValueError if they do). + common_model: Optional Pydantic model class to validate the to_common output + against. Must be the fully extended generated model class (e.g. the + generated Opportunity from generated/schemas.py), NOT the base class + (e.g. OpportunityBase). Passing a base class will silently weaken + validation — custom_fields will only be checked against + dict[str, CustomField] rather than the typed container produced by the + plugin's custom field declarations. When provided, model_validate is + called on the transform result and any ValidationErrors are appended to + TransformResult.errors rather than raised. + + Note on result shape: when common_model is set, TransformResult.result + holds the validated Pydantic instance on success, or the raw transformed + dict on ValidationError (so callers can inspect the malformed data + alongside the errors). This is intentional — check TransformResult.errors + before consuming TransformResult.result. + + Returns: + A (to_common, from_common) tuple. Each callable accepts a dict and returns + TransformResult[Any]. Failures surface as PluginError entries in + TransformResult.errors rather than being raised. + + Raises: + ValueError: At call time if handler names collide with defaults, + or if either mapping has structural malformation. + + TODO (full SDK): + - Validate field-path resolvability at call time (requires sample data or + schema introspection). + """ + # Custom handler names must not shadow defaults + if handlers: + collisions = set(handlers) & set(DEFAULT_HANDLERS) + if collisions: + raise ValueError( + f"build_transforms: handler names collide with defaults: {sorted(collisions)}" + ) + + merged = {**DEFAULT_HANDLERS, **(handlers or {})} + known = set(merged) + + # Validate mapping structure at call time + _validate_mapping(to_common_mapping, known) + _validate_mapping(from_common_mapping, known) + + def to_common(native: Any) -> TransformResult[Any]: + try: + result = transform_from_mapping(native, to_common_mapping, handlers=merged) + except HandlerError as exc: + error = PluginError( + str(exc.cause), + path=None, + handler=exc.handler, + source_value=native, + cause=exc.cause, + ) + return TransformResult(result={}, errors=[error]) + except Exception as exc: + error = PluginError(str(exc), path=None, source_value=native, cause=exc) + return TransformResult(result={}, errors=[error]) + + if common_model is None: + return TransformResult(result=result, errors=[]) + + try: + validated = common_model.model_validate(result) + return TransformResult(result=validated, errors=[]) + except ValidationError as exc: + errors = [ + PluginError( + e["msg"], + path=".".join(str(loc) for loc in e["loc"]), + ) + for e in exc.errors() + ] + return TransformResult(result=result, errors=errors) + + def from_common(common: Any) -> TransformResult[Any]: + try: + result = transform_from_mapping( + common, from_common_mapping, handlers=merged + ) + return TransformResult(result=result, errors=[]) + except HandlerError as exc: + error = PluginError( + str(exc.cause), + path=None, + handler=exc.handler, + source_value=common, + cause=exc.cause, + ) + return TransformResult(result={}, errors=[error]) + except Exception as exc: + error = PluginError(str(exc), path=None, source_value=common, cause=exc) + return TransformResult(result={}, errors=[error]) + + return to_common, from_common diff --git a/lib/python-sdk/common_grants_sdk/extensions/types.py b/lib/python-sdk/common_grants_sdk/extensions/types.py new file mode 100644 index 000000000..8be07358a --- /dev/null +++ b/lib/python-sdk/common_grants_sdk/extensions/types.py @@ -0,0 +1,151 @@ +"""Plugin framework types for the CommonGrants Python SDK.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable, Generic, Literal, TypeVar + +from pydantic import BaseModel, ConfigDict, Field + +from .specs import CustomFieldSpec + +TNative = TypeVar("TNative") +TCommon = TypeVar("TCommon") +T = TypeVar("T") + +# Capability enum — Literal rather than StrEnum to stay JSON-safe. +PluginCapability = Literal["customFields", "customFilters", "transforms", "client"] + +# Type aliases +Handler = Callable[[Any, Any], Any] +ClientConfig = dict[str, Any] + + +class PluginError(Exception): + """Structured transformation error per ADR-0022 Decision #9. + + Carries field path, handler name, source value, and underlying cause so + consumers can reason about failures programmatically without parsing error text. + + Note: source_value may contain PII when transforming applicant data. + Adopters are responsible for redacting it before logging or re-raising. + The SDK does not redact by default. + """ + + def __init__( + self, + message: str, + *, + path: str | None = None, + handler: str | None = None, + source_value: Any = None, + cause: BaseException | None = None, + ) -> None: + super().__init__(message) + self.path = path + self.handler = handler + self.source_value = source_value + self.cause = cause + + +@dataclass +class TransformResult(Generic[T]): + """Unconditional return shape for to_common / from_common (ADR-0022 Decision #7). + + result: the transformed value (may be partial on error). + errors: aggregated PluginErrors; empty on full success. + + Consumers apply their own strict-vs-lenient rule for what counts as success: + - Strict: treat any non-empty errors as failure. + - Lenient: use result despite warnings; inspect errors for context. + """ + + result: T + errors: list[PluginError] + + +class ObjectMappings(BaseModel): + """ADR-0017 mapping dicts for a single object, stored in the serializable extensions config. + + Each direction is author-provided — build_transforms() does not invert one into + the other because many-to-one handlers like switch are not reversible (Decision #6). + """ + + model_config = ConfigDict(populate_by_name=True) + + to_common: dict[str, Any] | None = Field(default=None, alias="toCommon") + from_common: dict[str, Any] | None = Field(default=None, alias="fromCommon") + + +class PluginExtensionsMeta(BaseModel): + """Plugin identity and capability declaration. All fields are optional.""" + + model_config = ConfigDict(populate_by_name=True) + + name: str | None = None + version: str | None = None + source_system: str | None = Field(default=None, alias="sourceSystem") + capabilities: list[PluginCapability] | None = None + + +class PluginExtensionsSchema(BaseModel): + """Per-object config inside extensions.schemas. + + custom_fields: custom field declarations (merged by merge_extensions). + mappings: optional ADR-0017 declarative mappings. When present and no explicit + to_common / from_common is supplied in schemas[obj], define_plugin() will + auto-invoke build_transforms() on these (TODO — ADR-0022 Decision #6). + """ + + model_config = ConfigDict(populate_by_name=True) + + custom_fields: dict[str, CustomFieldSpec] | None = Field( + default=None, alias="customFields" + ) + mappings: ObjectMappings | None = None + + +class PluginExtensions(BaseModel): + """Serializable portion of plugin config — safe to store as JSON. + + Used by merge_extensions() to combine declarations from multiple plugin packages. + """ + + model_config = ConfigDict(populate_by_name=True) + + meta: PluginExtensionsMeta | None = None + schemas: dict[str, PluginExtensionsSchema] | None = None + + +@dataclass +class ObjectSchemasInput(Generic[TNative, TCommon]): + """Input type provided by plugin authors inside define_plugin(schemas=...). + + Plugin authors supply to_common and from_common as plain callables — either + hand-written or generated via build_transforms(). native defaults to + dict[str, Any] if omitted. + + common is intentionally absent here. It is injected by define_plugin() during + compilation from ObjectSchemasInput → ObjectSchemas, resolved from the generated + model classes produced by the code generator. Plugin authors never set it directly — + cg_config.py cannot import from generated/ (it is the input to generation). + """ + + native: type[TNative] | None = None + to_common: Callable[[TNative], TransformResult[TCommon]] | None = None + from_common: Callable[[TCommon], TransformResult[TNative]] | None = None + + +@dataclass +class ObjectSchemas(Generic[TNative, TCommon]): + """Runtime compiled type produced by define_plugin() — not provided directly by authors. + + In the PoC, define_plugin() stores ObjectSchemasInput as-is; full compilation + (adding common from the base CG model, wrapping with model_validate) is a TODO + for the real SDK (ADR-0022 Decision #7). + """ + + native: type[TNative] + common: type[TCommon] + to_common: Callable[[TNative], TransformResult[TCommon]] + from_common: Callable[[TCommon], TransformResult[TNative]] diff --git a/lib/python-sdk/common_grants_sdk/utils/transformation.py b/lib/python-sdk/common_grants_sdk/utils/transformation.py index 10153a71e..bb0778122 100644 --- a/lib/python-sdk/common_grants_sdk/utils/transformation.py +++ b/lib/python-sdk/common_grants_sdk/utils/transformation.py @@ -64,10 +64,84 @@ def switch_on_value(data: dict, switch_spec: dict) -> Any: return lookup.get(val, switch_spec.get("default")) +def const_value(_data: dict, value: Any) -> Any: + """ + Handles a const transformation by returning a fixed literal value. + + Args: + _data: The source data dictionary (unused) + value: The constant value to return + + Returns: + The constant value exactly as specified + """ + return value + + +def number_to_string(data: dict, field_path: str) -> str | None: + """ + Handles a numberToString transformation by extracting a numeric value and coercing it to a string. + + Args: + data: The source data dictionary + field_path: A dot-separated string representing the path to the numeric value + + Returns: + The value at the specified path converted to a string, or None if the path doesn't exist + """ + val = get_from_path(data, field_path) + return str(val) if val is not None else None + + +def string_to_number(data: dict, field_path: str) -> int | float | None: + """ + Handles a stringToNumber transformation by extracting a string value and coercing it to a number. + + Attempts integer conversion first; falls back to float for decimal strings. + + Args: + data: The source data dictionary + field_path: A dot-separated string representing the path to the string value + + Returns: + The value at the specified path converted to int or float, or None if the path doesn't exist + + Raises: + ValueError: If the extracted value cannot be converted to a number + """ + val = get_from_path(data, field_path) + if val is None: + return None + s = str(val) + try: + return int(s) + except ValueError: + return float(s) + + +class HandlerError(ValueError): + """Raised when a handler function raises, carrying the handler name for attribution. + + Extends ValueError so that existing ``except ValueError`` handlers around + ``transform_from_mapping``, ``dump_with_mapping``, and ``validate_with_mapping`` + continue to work after this class was introduced. Callers that want handler-level + attribution can catch ``HandlerError`` specifically (it is more derived). + """ + + def __init__(self, handler: str, cause: Exception) -> None: + super().__init__(str(cause)) + self.handler = handler + self.cause = cause + + # Registry for handlers DEFAULT_HANDLERS: dict[str, handle_func] = { + "const": const_value, "field": pluck_field_value, - "switch": switch_on_value, + "match": switch_on_value, # ADR-0017 canonical name + "numberToString": number_to_string, + "stringToNumber": string_to_number, + "switch": switch_on_value, # alias kept for backward compatibility } @@ -83,8 +157,12 @@ def transform_from_mapping( The mapping supports both literal values and transformations keyed by the following reserved words: + - `const`: Returns a fixed literal value regardless of input data - `field`: Extracts a value from the data using a dot-notation path - - `switch`: Performs a case-based lookup based on a field value + - `match`: Performs a case-based lookup based on a field value (canonical) + - `numberToString`: Extracts a numeric value and coerces it to a string + - `stringToNumber`: Extracts a string value and coerces it to int or float + - `switch`: Alias for `match` (kept for backward compatibility) Args: data: The source data dictionary to transform @@ -123,6 +201,13 @@ def transform_from_mapping( } ``` """ + # Normalize Pydantic model instances to plain dicts so that field path + # extraction works regardless of whether the caller passes a raw dict or a + # validated model (e.g. the output of to_common with common_model set). + # mode="json" matches the convention used by CommonGrantsBaseModel.dump_with_mapping. + if hasattr(data, "model_dump"): + data = data.model_dump(mode="json") + # Check for maximum depth # This is a sanity check to prevent stack overflow from deeply nested mappings # which may be a concern when running this function on third-party mappings @@ -150,7 +235,10 @@ def transform_node(node: Any, depth: int) -> Any: # Returns: `extract_field_value(data, "opportunity_status")` if k in handlers: handler_func = handlers[k] - return handler_func(data, v) + try: + return handler_func(data, v) + except Exception as exc: + raise HandlerError(k, exc) from exc # Otherwise, preserve the dictionary structure and # recursively apply the transformation to each value. diff --git a/lib/python-sdk/examples/README.md b/lib/python-sdk/examples/README.md index d624cffa5..96b7bbc67 100644 --- a/lib/python-sdk/examples/README.md +++ b/lib/python-sdk/examples/README.md @@ -119,6 +119,106 @@ None ``` +# Bidirectional transforms example + +This example demonstrates the plugin transform framework: mapping source system data (grants.gov format) to the CommonGrants format and back again, with a roundtrip consistency check. No API server is required — the script runs entirely offline using sample data defined in the file itself. + +**Step 1:** Generate the typed models for the grants.gov plugin (only needed once, or after changing `cg_config.py`): + +```bash +cd lib/python-sdk +poetry run python -m common_grants_sdk.extensions.generate --plugin examples/plugins/grants_gov +``` + +Or generate all example plugins at once with: + +```bash +make plugins +``` + +**Step 2:** Run the example: + +```bash +poetry run python examples/transforms.py +``` + +**Output Example:** +``` +============================================================ +SOURCE DATA (grants.gov format) +============================================================ +{ + "data": { + "agency_name": "Department of Examples", + "created_at": "2025-01-15T09:00:00Z", + "last_modified_at": "2025-04-01T12:30:00Z", + "opportunity_description": "Funding to advance research into conservation techniques for endangered ecosystems.", + "opportunity_id": 12345, + "opportunity_number": "ABC-123-XYZ-001", + "opportunity_status": "posted", + "opportunity_title": "Research into conservation techniques", + "opportunity_uuid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "summary": { + "applicant_types": ["state_governments"], + "archive_date": "2025-05-01", + "award_ceiling": 100000, + "award_floor": 10000, + "forecasted_award_date": "2025-09-01", + "forecasted_close_date": "2025-07-15", + "forecasted_post_date": "2025-05-01" + } + } +} + +============================================================ +to_common: grants.gov → CommonGrants +============================================================ +Errors: none + +Result: +{ + "title": "Research into conservation techniques", + "status": { "value": "open", "description": "The opportunity is currently accepting applications" }, + "funding": { + "minAwardAmount": { "amount": 10000, "currency": "USD" }, + "maxAwardAmount": { "amount": 100000, "currency": "USD" } + }, + ... +} + +============================================================ +from_common: CommonGrants → grants.gov +============================================================ +Errors: none + +Result: +{ + "data": { + "opportunity_title": "Research into conservation techniques", + "opportunity_status": "posted", + "summary": { + "award_floor": 10000, + "award_ceiling": 100000, + "forecasted_post_date": "2025-05-01", + "forecasted_close_date": "2025-07-15" + } + } +} + +============================================================ +ROUNDTRIP CHECK +============================================================ + [PASS] title: 'Research into conservation techniques' -> 'Research into conservation techniques' + [PASS] status: 'posted' -> 'posted' + [PASS] award_floor: 10000 -> 10000 + [PASS] award_ceiling: 100000 -> 100000 + +Roundtrip result: ALL PASS +``` + +The transform mappings live in `examples/plugins/grants_gov/cg_config.py`. See the [extensions README](../common_grants_sdk/extensions/README.md#bidirectional-transforms) for a full explanation of the mapping format. + + # Plugin framework example This example uses the plugin framework to define four typed custom fields, generate static Pydantic models, and validate an API payload. diff --git a/lib/python-sdk/examples/plugins/grants_gov/__init__.py b/lib/python-sdk/examples/plugins/grants_gov/__init__.py new file mode 100644 index 000000000..5ae38feac --- /dev/null +++ b/lib/python-sdk/examples/plugins/grants_gov/__init__.py @@ -0,0 +1,14 @@ +# This file is auto-generated. Do not edit it manually — it will be overwritten +# the next time `python -m common_grants_sdk.extensions.generate` is run. +from __future__ import annotations + +from common_grants_sdk.extensions import Plugin +from .cg_config import config +from .generated import schemas + +grants_gov = Plugin( + extensions=config.extensions, + schemas=schemas, +) + +__all__ = ["grants_gov", "schemas"] diff --git a/lib/python-sdk/examples/plugins/grants_gov/cg_config.py b/lib/python-sdk/examples/plugins/grants_gov/cg_config.py new file mode 100644 index 000000000..b3f28b5e3 --- /dev/null +++ b/lib/python-sdk/examples/plugins/grants_gov/cg_config.py @@ -0,0 +1,143 @@ +"""Grants.gov sample plugin — bidirectional transform PoC. + +Demonstrates the plugin framework shape using the grants.gov scenario. + +Usage (from lib/python-sdk/): + poetry run python examples/transforms.py + +Code generation (generates typed custom-field schemas): + poetry run python -m common_grants_sdk.extensions.generate --plugin examples/plugins/grants_gov +""" + +from common_grants_sdk.extensions import ( + CustomFieldSpec, + ObjectSchemasInput, + PluginExtensionsMeta, + build_transforms, + define_plugin, +) +from common_grants_sdk.schemas.pydantic.fields import CustomFieldType + +# --------------------------------------------------------------------------- +# Bidirectional transforms +# +# Both directions are author-provided — build_transforms() does not invert +# one into the other because many-to-one handlers like switch are not +# reversible. +# +# Convention: field extraction uses {"field": "dot.notation.path"} — bare +# string values are treated as literals by transform_from_mapping(), not +# as field paths. See Design Finding #2 in the spec for the open question +# about which convention is canonical. +# --------------------------------------------------------------------------- + +to_common, from_common = build_transforms( + # to_common: grants.gov native → CommonGrants Opportunity + to_common_mapping={ + "title": {"field": "data.opportunity_title"}, + "status": { + "value": { + "match": { + "field": "data.opportunity_status", + "case": { + "forecasted": "forecasted", + "posted": "open", + "archived": "closed", + }, + "default": "custom", + } + }, + "description": { + "const": "The opportunity is currently accepting applications" + }, + }, + "funding": { + "minAwardAmount": { + "amount": {"field": "data.summary.award_floor"}, + "currency": {"const": "USD"}, + }, + "maxAwardAmount": { + "amount": {"field": "data.summary.award_ceiling"}, + "currency": {"const": "USD"}, + }, + }, + "keyDates": { + "appOpens": { + "name": {"const": "Open Date"}, + "date": {"field": "data.summary.forecasted_post_date"}, + "description": {"const": "Applications begin being accepted"}, + }, + "appDeadline": { + "name": {"const": "Application Deadline"}, + "date": {"field": "data.summary.forecasted_close_date"}, + "description": { + "const": "Final submission deadline for all grant applications" + }, + }, + }, + }, + # from_common: CommonGrants Opportunity → grants.gov native + from_common_mapping={ + "data": { + "opportunity_title": {"field": "title"}, + "opportunity_status": { + "match": { + "field": "status.value", + "case": { + "open": "posted", + "closed": "archived", + "forecasted": "forecasted", + }, + "default": "custom", + } + }, + "summary": { + "award_floor": {"field": "funding.minAwardAmount.amount"}, + "award_ceiling": {"field": "funding.maxAwardAmount.amount"}, + "forecasted_post_date": {"field": "keyDates.appOpens.date"}, + "forecasted_close_date": {"field": "keyDates.appDeadline.date"}, + }, + } + }, +) + +# --------------------------------------------------------------------------- +# Plugin config +# --------------------------------------------------------------------------- + +plugin = define_plugin( + # extensions: SchemaExtensions — dict[str, dict[str, CustomFieldSpec]] + extensions={ + "Opportunity": { + "legacyId": CustomFieldSpec( + field_type=CustomFieldType.INTEGER, + name="Legacy ID", + description="Unique identifier in legacy database", + ), + "agencyName": CustomFieldSpec( + field_type=CustomFieldType.STRING, + name="Agency", + description="Agency hosting the opportunity", + ), + "applicantTypes": CustomFieldSpec( + field_type=CustomFieldType.ARRAY, + name="Applicant types", + description="Types of applicants eligible to apply", + ), + } + }, + meta=PluginExtensionsMeta( + name="grants-gov", + version="0.1.0", + sourceSystem="grants.gov", + capabilities=["customFields", "transforms"], + ), + transform_schemas={ + "Opportunity": ObjectSchemasInput( + to_common=to_common, + from_common=from_common, + ) + }, +) + +config = plugin diff --git a/lib/python-sdk/examples/transforms.py b/lib/python-sdk/examples/transforms.py new file mode 100644 index 000000000..030521058 --- /dev/null +++ b/lib/python-sdk/examples/transforms.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +"""Bidirectional transform PoC — plugin transformation interface. + +Demonstrates source (grants.gov) → CommonGrants and CommonGrants → source +bidirectional transformations using the grants.gov sample plugin. + +Requires generated schemas (examples/plugins/grants_gov/generated/). +Generate them first (from lib/python-sdk/): + poetry run python -m common_grants_sdk.extensions.generate --plugin examples/plugins/grants_gov +Or run all plugins at once: + make plugins + +Then run (from lib/python-sdk/): + poetry run python examples/transforms.py +""" + +from __future__ import annotations + +import json +from typing import Any + +# When run as `poetry run python examples/transforms.py`, Python automatically +# adds the script's directory (examples/) to sys.path. Import from there using +# the `plugins.` prefix (not `examples.plugins.`) — the `examples.` prefix only +# works in -c or interactive contexts where lib/python-sdk/ is sys.path[0]. +from plugins.grants_gov.cg_config import plugin +from plugins.grants_gov.generated.schemas import Opportunity + +from common_grants_sdk.extensions import build_transforms +from common_grants_sdk.utils.transformation import get_from_path + +# --------------------------------------------------------------------------- +# Sample grants.gov source data +# --------------------------------------------------------------------------- + +SOURCE_DATA: dict[str, Any] = { + "data": { + "agency_name": "Department of Examples", + "created_at": "2025-01-15T09:00:00Z", + "last_modified_at": "2025-04-01T12:30:00Z", + "opportunity_description": "Funding to advance research into conservation techniques for endangered ecosystems.", + "opportunity_id": 12345, + "opportunity_number": "ABC-123-XYZ-001", + "opportunity_status": "posted", + "opportunity_title": "Research into conservation techniques", + "opportunity_uuid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "summary": { + "applicant_types": ["state_governments"], + "archive_date": "2025-05-01", + "award_ceiling": 100000, + "award_floor": 10000, + "forecasted_award_date": "2025-09-01", + "forecasted_close_date": "2025-07-15", + "forecasted_post_date": "2025-05-01", + }, + } +} + + +# --------------------------------------------------------------------------- +# Custom handlers: join_fields and split_field +# +# join_fields concatenates multiple source field values with a configurable +# separator. Mapping spec: {"join": {"fields": ["a.b", "c.d"], "sep": " — "}} +# +# split_field is the inverse: it splits a single field on a separator and +# returns the element at the given index. +# Mapping spec: {"split": {"field": "label", "sep": " — ", "index": 0}} +# --------------------------------------------------------------------------- + + +def join_fields(data: dict[str, Any], spec: dict[str, Any]) -> str | None: + """Custom handler that joins multiple field values with a separator.""" + sep = spec.get("sep", " ") + parts = [get_from_path(data, path) for path in spec.get("fields", [])] + values = [str(p) for p in parts if p is not None] + return sep.join(values) if values else None + + +def split_field(data: dict[str, Any], spec: dict[str, Any]) -> str | None: + """Custom handler that splits a field value and returns the element at index.""" + value = get_from_path(data, spec.get("field", "")) + if value is None: + return None + parts = str(value).split(spec.get("sep", " ")) + index = spec.get("index", 0) + return parts[index] if index < len(parts) else None + + +# Transform that uses the custom handlers and validates output against the generated +# Opportunity model. common_model=Opportunity (from generated/schemas.py) ensures +# model_validate runs against the extended class with typed custom fields +# (legacyId, agencyName, applicantTypes), not just the base OpportunityBase. +to_common_with_custom, from_common_with_custom = build_transforms( + to_common_mapping={ + "id": {"field": "data.opportunity_uuid"}, + "title": {"field": "data.opportunity_title"}, + "description": {"field": "data.opportunity_description"}, + "createdAt": {"field": "data.created_at"}, + "lastModifiedAt": {"field": "data.last_modified_at"}, + "status": { + "value": { + "match": { + "field": "data.opportunity_status", + "case": { + "posted": "open", + "archived": "closed", + "forecasted": "forecasted", + }, + "default": "custom", + } + }, + }, + "label": { + "join": { + "fields": ["data.opportunity_number", "data.opportunity_title"], + "sep": " — ", + } + }, + "customFields": { + "legacyId": { + "value": {"field": "data.opportunity_id"}, + }, + "agencyName": { + "value": {"field": "data.agency_name"}, + }, + "applicantTypes": { + "value": {"field": "data.summary.applicant_types"}, + }, + }, + }, + from_common_mapping={ + "data": { + # label is produced by the join handler above but gets dropped by + # model_validate (it is not a CG field), so from_common maps directly + # from the standard CG title field instead. + "opportunity_title": {"field": "title"}, + } + }, + handlers={"join": join_fields, "split": split_field}, + common_model=Opportunity, +) + + +def _section(title: str) -> None: + print(f"\n{'=' * 60}") + print(title) + print("=" * 60) + + +def main() -> None: + assert plugin.transform_schemas is not None + opp = plugin.transform_schemas["Opportunity"] + + _section("SOURCE DATA (grants.gov format)") + print(json.dumps(SOURCE_DATA, indent=2)) + + # --- to_common: grants.gov → CommonGrants --- + assert opp.to_common is not None + cg_result = opp.to_common(SOURCE_DATA) + + _section("to_common: grants.gov → CommonGrants") + if cg_result.errors: + print(f"ERRORS ({len(cg_result.errors)}):") + for err in cg_result.errors: + print(f" [path={err.path}] {err}") + else: + print("Errors: none") + print("\nResult:") + print(json.dumps(cg_result.result, indent=2)) + + # --- from_common: CommonGrants → grants.gov --- + assert opp.from_common is not None + native_result = opp.from_common(cg_result.result) + + _section("from_common: CommonGrants → grants.gov") + if native_result.errors: + print(f"ERRORS ({len(native_result.errors)}):") + for err in native_result.errors: + print(f" [path={err.path}] {err}") + else: + print("Errors: none") + print("\nResult:") + print(json.dumps(native_result.result, indent=2)) + + # --- Roundtrip comparison --- + # Note: SOURCE_DATA contains fields not covered by the mappings (agency_name, + # opportunity_id, etc.). Those fields are intentionally absent from the roundtrip + # output — the mapping layer is selective by design. + _section("ROUNDTRIP CHECK") + checks = [ + ( + "title", + SOURCE_DATA["data"]["opportunity_title"], + native_result.result.get("data", {}).get("opportunity_title"), + ), + ( + "status", + SOURCE_DATA["data"]["opportunity_status"], + native_result.result.get("data", {}).get("opportunity_status"), + ), + ( + "award_floor", + SOURCE_DATA["data"]["summary"]["award_floor"], + native_result.result.get("data", {}).get("summary", {}).get("award_floor"), + ), + ( + "award_ceiling", + SOURCE_DATA["data"]["summary"]["award_ceiling"], + native_result.result.get("data", {}) + .get("summary", {}) + .get("award_ceiling"), + ), + ] + all_pass = True + for field, original, roundtripped in checks: + ok = original == roundtripped + if not ok: + all_pass = False + status = "PASS" if ok else "FAIL" + print(f" [{status}] {field}: {original!r} -> {roundtripped!r}") + + print( + f"\nRoundtrip result ({len(checks)} mapped fields checked; unmapped fields dropped by design): {'ALL PASS' if all_pass else 'SOME FIELDS DIFFER'}" + ) + + # --- Custom handler + model_validate demo --- + _section( + "CUSTOM HANDLER + MODEL VALIDATE DEMO (join / split / extended Opportunity)" + ) + print("Custom handlers: join, split") + print("common_model: generated Opportunity (with typed customFields)\n") + + custom_cg = to_common_with_custom(SOURCE_DATA) + + if custom_cg.errors: + print(f"ERRORS ({len(custom_cg.errors)}):") + for err in custom_cg.errors: + print(f" [path={err.path}] {err}") + else: + print("Validation: PASS — result is a typed Opportunity instance") + opp_instance = custom_cg.result + print(f"\n title: {opp_instance.title}") + print(f" id: {opp_instance.id}") + print(f" status: {opp_instance.status.value}") + if opp_instance.custom_fields: + cf = opp_instance.custom_fields + print("\n customFields (typed):") + if cf.legacy_id: + print( + f" legacyId.value: {cf.legacy_id.value!r} ({type(cf.legacy_id.value).__name__})" + ) + if cf.agency_name: + print( + f" agencyName.value: {cf.agency_name.value!r} ({type(cf.agency_name.value).__name__})" + ) + if cf.applicant_types: + print( + f" applicantTypes.value: {cf.applicant_types.value!r} ({type(cf.applicant_types.value).__name__})" + ) + + custom_native = from_common_with_custom( + custom_cg.result if not custom_cg.errors else {} + ) + orig_title = SOURCE_DATA["data"]["opportunity_title"] + rt_title = custom_native.result.get("data", {}).get("opportunity_title") + print( + f"\n [{'PASS' if orig_title == rt_title else 'FAIL'}] opportunity_title: {orig_title!r} -> {rt_title!r}" + ) + + # --- Plugin metadata --- + _section("PLUGIN METADATA") + assert plugin.meta is not None + print(f"name: {plugin.meta.name}") + print(f"version: {plugin.meta.version}") + print(f"sourceSystem: {plugin.meta.source_system}") + print(f"capabilities: {plugin.meta.capabilities}") + + +if __name__ == "__main__": + main() diff --git a/lib/python-sdk/tests/extensions/__init__.py b/lib/python-sdk/tests/extensions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/python-sdk/tests/extensions/test_plugin.py b/lib/python-sdk/tests/extensions/test_plugin.py new file mode 100644 index 000000000..8fcadc799 --- /dev/null +++ b/lib/python-sdk/tests/extensions/test_plugin.py @@ -0,0 +1,73 @@ +"""Tests for expanded plugin.py — backward compat + new optional fields.""" + +from common_grants_sdk.extensions.plugin import Plugin, PluginConfig, define_plugin +from common_grants_sdk.extensions.specs import SchemaExtensions +from common_grants_sdk.extensions.types import ( + ObjectSchemasInput, + PluginExtensionsMeta, + TransformResult, +) + +EXTENSIONS: SchemaExtensions = {} # minimal valid extensions + + +def test_define_plugin_backward_compat(): + """define_plugin(extensions=...) still returns PluginConfig with all optional fields None.""" + config = define_plugin(extensions=EXTENSIONS) + assert isinstance(config, PluginConfig) + assert config.extensions is EXTENSIONS + assert config.meta is None + assert config.transform_schemas is None + + +def test_define_plugin_with_meta_and_schemas(): + meta = PluginExtensionsMeta(name="test", source_system="test-system") + + def passthrough(x): + return TransformResult(result=x, errors=[]) + + schemas = { + "Opportunity": ObjectSchemasInput( + to_common=passthrough, from_common=passthrough + ) + } + config = define_plugin(extensions=EXTENSIONS, meta=meta, transform_schemas=schemas) + assert config.meta is meta + assert config.meta.name == "test" + assert config.transform_schemas is schemas + + +def test_plugin_fields(): + """Plugin accepts all fields; optional ones default to None.""" + base = Plugin(extensions=EXTENSIONS, schemas=object()) + assert base.meta is None + assert base.get_client is None + assert base.transform_schemas is None + assert base.filters is None + + meta = PluginExtensionsMeta(name="p", source_system="s") + schemas = {"Opportunity": object()} + full = Plugin( + extensions=EXTENSIONS, schemas=object(), meta=meta, transform_schemas=schemas + ) + assert full.meta is meta + assert full.transform_schemas is schemas + + +def test_transform_schemas_callable_roundtrip(): + """The demo calls config.transform_schemas["Opportunity"].to_common(data).""" + + def always_transformed(_x): + return TransformResult(result={"transformed": True}, errors=[]) + + config = define_plugin( + extensions=EXTENSIONS, + transform_schemas={ + "Opportunity": ObjectSchemasInput( + to_common=always_transformed, from_common=always_transformed + ) + }, + ) + result = config.transform_schemas["Opportunity"].to_common({"raw": "data"}) + assert result.result == {"transformed": True} + assert result.errors == [] diff --git a/lib/python-sdk/tests/extensions/test_transforms.py b/lib/python-sdk/tests/extensions/test_transforms.py new file mode 100644 index 000000000..b0353ada1 --- /dev/null +++ b/lib/python-sdk/tests/extensions/test_transforms.py @@ -0,0 +1,213 @@ +"""Tests for build_transforms() in common_grants_sdk.extensions.transforms.""" + +import pytest +from pydantic import BaseModel +from common_grants_sdk.extensions.transforms import build_transforms +from common_grants_sdk.extensions.types import PluginError, TransformResult + +# Shared source data matching the ADR-0017 grants.gov example +SOURCE_DATA = { + "data": { + "opportunity_title": "Research into conservation techniques", + "opportunity_status": "posted", + "summary": { + "award_floor": 10000, + "award_ceiling": 100000, + "forecasted_post_date": "2025-05-01", + "forecasted_close_date": "2025-07-15", + }, + } +} + +TO_COMMON_MAPPING = { + "title": {"field": "data.opportunity_title"}, + "status": { + "value": { + "switch": { + "field": "data.opportunity_status", + "case": { + "posted": "open", + "archived": "closed", + "forecasted": "forecasted", + }, + "default": "custom", + } + }, + "description": "The opportunity is currently accepting applications", + }, + "funding": { + "minAwardAmount": { + "amount": {"field": "data.summary.award_floor"}, + "currency": "USD", + }, + }, +} + +FROM_COMMON_MAPPING = { + "data": { + "opportunity_title": {"field": "title"}, + "opportunity_status": { + "switch": { + "field": "status.value", + "case": { + "open": "posted", + "closed": "archived", + "forecasted": "forecasted", + }, + "default": "custom", + } + }, + "summary": { + "award_floor": {"field": "funding.minAwardAmount.amount"}, + }, + } +} + + +# --- Call-time validation --- + + +@pytest.mark.parametrize("name", ["field", "switch"]) +def test_handler_collision_raises(name): + """build_transforms raises if custom handler shadows a default handler name.""" + with pytest.raises(ValueError, match="collide with defaults"): + build_transforms( + TO_COMMON_MAPPING, + FROM_COMMON_MAPPING, + handlers={name: lambda d, v: v}, + ) + + +def test_structural_error_raises_with_path(): + """build_transforms raises on list nodes and includes the field path.""" + with pytest.raises(ValueError, match="Invalid mapping node"): + build_transforms( + {"title": ["should", "not", "be", "a", "list"]}, FROM_COMMON_MAPPING + ) + with pytest.raises(ValueError, match="funding.amount"): + build_transforms({"funding": {"amount": [1, 2]}}, {}) + + +def test_handler_with_sibling_keys_raises(): + """build_transforms raises when a handler key has siblings in the same dict.""" + with pytest.raises(ValueError, match="sibling keys"): + build_transforms({"title": {"field": "x", "extra": "literal"}}, {}) + # Nested occurrence is also caught, and the path is reported + with pytest.raises(ValueError, match="nested.title"): + build_transforms({"nested": {"title": {"field": "x", "extra": "literal"}}}, {}) + + +# --- to_common transform --- + + +def test_to_common(): + to_common, _ = build_transforms(TO_COMMON_MAPPING, FROM_COMMON_MAPPING) + result = to_common(SOURCE_DATA) + assert isinstance(result, TransformResult) + assert result.errors == [] + assert result.result["title"] == "Research into conservation techniques" + assert result.result["status"]["value"] == "open" + assert ( + result.result["status"]["description"] + == "The opportunity is currently accepting applications" + ) + assert result.result["funding"]["minAwardAmount"]["amount"] == 10000 + assert result.result["funding"]["minAwardAmount"]["currency"] == "USD" + + +# --- from_common roundtrip --- + + +def test_from_common_roundtrip(): + """Status roundtrip: posted → open → posted.""" + to_common, from_common = build_transforms(TO_COMMON_MAPPING, FROM_COMMON_MAPPING) + native = from_common(to_common(SOURCE_DATA).result) + assert isinstance(native, TransformResult) + assert native.errors == [] + assert ( + native.result["data"]["opportunity_title"] + == "Research into conservation techniques" + ) + assert native.result["data"]["opportunity_status"] == "posted" + + +# --- Error surfacing --- + + +def test_exception_surfaces_as_plugin_error_not_raised(): + """Exceptions inside handlers surface as PluginError, not raised.""" + + def boom(data, _arg): + raise RuntimeError("handler exploded") + + to_common, _ = build_transforms( + {"title": {"boom": "anything"}}, + {}, + handlers={"boom": boom}, + ) + result = to_common(SOURCE_DATA) + assert len(result.errors) == 1 + err = result.errors[0] + assert isinstance(err, PluginError) + assert "handler exploded" in str(err) + assert err.handler == "boom" + assert isinstance(err.cause, RuntimeError) + assert str(err.cause) == "handler exploded" + + +# --- model_validate via common_model --- + + +class _TitleModel(BaseModel): + title: str + + +class _StrictModel(BaseModel): + title: str + required_field: str # always missing from SOURCE_DATA transform output + + +def test_common_model_validates_result(): + """When common_model is provided, result is a model instance on success.""" + to_common, _ = build_transforms( + {"title": {"field": "data.opportunity_title"}}, + {}, + common_model=_TitleModel, + ) + result = to_common(SOURCE_DATA) + assert result.errors == [] + assert isinstance(result.result, _TitleModel) + assert result.result.title == "Research into conservation techniques" + + +def test_common_model_validation_failure(): + """ValidationError surfaces as PluginError entries; raw dict is still returned.""" + to_common, _ = build_transforms( + {"title": {"field": "data.opportunity_title"}}, + {}, + common_model=_StrictModel, + ) + result = to_common(SOURCE_DATA) + assert len(result.errors) >= 1 + assert all(isinstance(e, PluginError) for e in result.errors) + assert any("required_field" in (e.path or "") for e in result.errors) + assert result.result["title"] == "Research into conservation techniques" + + +def test_custom_handler_registered_per_call(): + """Custom handlers apply only to the call they are registered on.""" + + def handle_upper(data, path): + parts = path.split(".") + val = data + for part in parts: + val = val.get(part) if isinstance(val, dict) else None + return str(val).upper() if val is not None else None + + to_common, _ = build_transforms( + {"title": {"upper": "data.opportunity_title"}}, + {}, + handlers={"upper": handle_upper}, + ) + result = to_common(SOURCE_DATA) + assert result.result["title"] == "RESEARCH INTO CONSERVATION TECHNIQUES" diff --git a/lib/python-sdk/tests/extensions/test_types.py b/lib/python-sdk/tests/extensions/test_types.py new file mode 100644 index 000000000..f44a3a86b --- /dev/null +++ b/lib/python-sdk/tests/extensions/test_types.py @@ -0,0 +1,132 @@ +"""Tests for ADR-0022 types defined in common_grants_sdk.extensions.types.""" + +import pytest +from common_grants_sdk.extensions.specs import CustomFieldSpec +from common_grants_sdk.extensions.types import ( + ObjectMappings, + ObjectSchemas, + ObjectSchemasInput, + PluginError, + PluginExtensions, + PluginExtensionsMeta, + PluginExtensionsSchema, + TransformResult, +) +from common_grants_sdk.schemas.pydantic.fields.custom import CustomFieldType + +# --- PluginError --- + + +def test_plugin_error_is_exception_with_defaults(): + err = PluginError("something went wrong") + assert isinstance(err, Exception) + assert str(err) == "something went wrong" + assert err.path is None + assert err.handler is None + assert err.source_value is None + assert err.cause is None + + +def test_plugin_error_structured_fields(): + cause = ValueError("root cause") + err = PluginError( + "msg", + path="status.value", + handler="switch", + source_value={"x": 1}, + cause=cause, + ) + assert err.path == "status.value" + assert err.handler == "switch" + assert err.source_value == {"x": 1} + assert err.cause is cause + + +# --- TransformResult --- + + +def test_transform_result(): + ok = TransformResult(result={"title": "hello"}, errors=[]) + assert ok.result == {"title": "hello"} + assert ok.errors == [] + + err = PluginError("bad") + partial = TransformResult(result={}, errors=[err]) + assert len(partial.errors) == 1 + assert partial.errors[0] is err + + +# --- ObjectMappings --- + + +def test_object_mappings(): + assert ObjectMappings().to_common is None + assert ObjectMappings().from_common is None + m = ObjectMappings(toCommon={"title": "x"}, fromCommon={"x": "title"}) + assert m.to_common == {"title": "x"} + assert m.from_common == {"x": "title"} + + +# --- PluginExtensionsMeta --- + + +def test_plugin_extensions_meta(): + assert PluginExtensionsMeta().name is None + assert PluginExtensionsMeta().source_system is None + m = PluginExtensionsMeta(sourceSystem="grants.gov") + assert m.source_system == "grants.gov" + + +# --- PluginExtensionsSchema --- + + +def test_plugin_extensions_schema(): + assert PluginExtensionsSchema().custom_fields is None + assert PluginExtensionsSchema().mappings is None + s = PluginExtensionsSchema(mappings=ObjectMappings(toCommon={"a": "b"})) + assert s.mappings.to_common == {"a": "b"} + + +# --- PluginExtensions --- + + +def test_plugin_extensions(): + assert PluginExtensions().meta is None + assert PluginExtensions().schemas is None + spec = CustomFieldSpec(field_type=CustomFieldType.INTEGER) + schema = PluginExtensionsSchema(customFields={"legacyId": spec}) + ext = PluginExtensions(schemas={"Opportunity": schema}) + assert ext.schemas["Opportunity"].custom_fields == {"legacyId": spec} + + +# --- ObjectSchemasInput --- + + +def test_object_schemas_input(): + assert ObjectSchemasInput().native is None + assert ObjectSchemasInput().to_common is None + + def passthrough(x): + return TransformResult(result=x, errors=[]) + + inp = ObjectSchemasInput(to_common=passthrough, from_common=passthrough) + assert inp.to_common is passthrough + + +# --- ObjectSchemas --- + + +def test_object_schemas(): + def passthrough(x): + return TransformResult(result=x, errors=[]) + + schemas = ObjectSchemas( + native=dict, common=dict, to_common=passthrough, from_common=passthrough + ) + assert schemas.native is dict + assert schemas.common is dict + + with pytest.raises(TypeError): + ObjectSchemas( + native=dict, common=dict, to_common=passthrough + ) # missing from_common diff --git a/lib/python-sdk/tests/utils/test_transformation.py b/lib/python-sdk/tests/utils/test_transformation.py index 5365e665f..8f826c03e 100644 --- a/lib/python-sdk/tests/utils/test_transformation.py +++ b/lib/python-sdk/tests/utils/test_transformation.py @@ -1,7 +1,9 @@ import pytest +from pydantic import BaseModel from common_grants_sdk.utils.transformation import ( DEFAULT_HANDLERS, + HandlerError, transform_from_mapping, ) @@ -179,13 +181,15 @@ def test_extend_with_concat(input_data): - The handler works with both field values and constants """ - # Patch in a concat handler for this test def handle_concat(data, concat_spec): return "".join( str(transform_from_mapping(data, part)) for part in concat_spec["parts"] ) - DEFAULT_HANDLERS["concat"] = handle_concat + handlers = { + **DEFAULT_HANDLERS, + "concat": handle_concat, + } mapping = { "opportunity_code": { @@ -198,7 +202,7 @@ def handle_concat(data, concat_spec): } } } - result = transform_from_mapping(input_data, mapping) + result = transform_from_mapping(input_data, mapping, handlers=handlers) assert result == {"opportunity_code": "ABC-123-XYZ-001-12345"} @@ -233,6 +237,121 @@ def handle_type(data, type_spec): assert result == {"id_str": "12345"} +def test_const_string(input_data): + """Test const handler returns a fixed string value.""" + mapping = {"currency": {"const": "USD"}} + result = transform_from_mapping(input_data, mapping) + assert result == {"currency": "USD"} + + +def test_const_number(input_data): + """Test const handler returns a fixed numeric value.""" + mapping = {"version": {"const": 1}} + result = transform_from_mapping(input_data, mapping) + assert result == {"version": 1} + + +def test_const_ignores_source_data(input_data): + """Test const handler is independent of source data.""" + mapping = {"x": {"const": "fixed"}} + result = transform_from_mapping({}, mapping) + assert result == {"x": "fixed"} + + +def test_match_key_alias(input_data): + """Test match key (ADR-0017 canonical name) works identically to switch.""" + mapping = { + "status": { + "match": { + "field": "opportunity_status", + "case": {"posted": "open", "archived": "closed"}, + "default": "custom", + } + } + } + result = transform_from_mapping(input_data, mapping) + assert result == {"status": "open"} + + +def test_number_to_string(input_data): + """Test numberToString handler coerces a numeric field to a string.""" + mapping = {"floor_str": {"numberToString": "summary.award_floor"}} + result = transform_from_mapping(input_data, mapping) + assert result == {"floor_str": "10000"} + + +def test_number_to_string_missing_field(input_data): + """Test numberToString returns None when the field path does not exist.""" + mapping = {"x": {"numberToString": "nonexistent.path"}} + result = transform_from_mapping(input_data, mapping) + assert result == {"x": None} + + +def test_string_to_number_integer(input_data): + """Test stringToNumber handler coerces a string integer field to int.""" + data = {**input_data, "amount_str": "50000"} + result = transform_from_mapping(data, {"amount": {"stringToNumber": "amount_str"}}) + assert result == {"amount": 50000} + assert isinstance(result["amount"], int) + + +def test_string_to_number_float(input_data): + """Test stringToNumber handler coerces a decimal string to float.""" + data = {**input_data, "rate_str": "3.14"} + result = transform_from_mapping(data, {"rate": {"stringToNumber": "rate_str"}}) + assert result == {"rate": 3.14} + assert isinstance(result["rate"], float) + + +def test_string_to_number_missing_field(input_data): + """Test stringToNumber returns None when the field path does not exist.""" + mapping = {"x": {"stringToNumber": "nonexistent.path"}} + result = transform_from_mapping(input_data, mapping) + assert result == {"x": None} + + +def test_string_to_number_invalid_raises(input_data): + """Test stringToNumber raises HandlerError (wrapping ValueError) for non-numeric strings.""" + + data = {**input_data, "bad": "not-a-number"} + with pytest.raises(HandlerError) as exc_info: + transform_from_mapping(data, {"x": {"stringToNumber": "bad"}}) + assert exc_info.value.handler == "stringToNumber" + assert isinstance(exc_info.value.cause, ValueError) + + +def test_handler_error_is_value_error(): + """HandlerError is a subclass of ValueError for backward compat with existing callers.""" + err = HandlerError("myHandler", ValueError("bad input")) + assert isinstance(err, ValueError) + # Callers catching ValueError continue to work; callers wanting attribution catch HandlerError + with pytest.raises(ValueError): + transform_from_mapping( + {"bad": "not-a-number"}, {"x": {"stringToNumber": "bad"}} + ) + + +def test_pydantic_model_instance_is_normalized(): + """transform_from_mapping accepts a Pydantic model instance and extracts fields correctly.""" + + class Inner(BaseModel): + value: str + + class Source(BaseModel): + title: str + nested: Inner + + model = Source(title="hello", nested=Inner(value="world")) + result = transform_from_mapping( + model, + { + "out_title": {"field": "title"}, + "out_value": {"field": "nested.value"}, + }, + ) + assert result == {"out_title": "hello", "out_value": "world"} + + def test_deeply_nested(input_data): """ Test transformation with deeply nested structures. diff --git a/website/src/content/docs/governance/adr/0022-plugin-framework.mdx b/website/src/content/docs/governance/adr/0022-plugin-framework.mdx index c649b2f7b..52b33b74c 100644 --- a/website/src/content/docs/governance/adr/0022-plugin-framework.mdx +++ b/website/src/content/docs/governance/adr/0022-plugin-framework.mdx @@ -42,7 +42,7 @@ We decided to: 6. **Plugin authors provide `toCommon` / `fromCommon` as functions; mappings are one way to generate them.** The SDK exposes `buildTransforms()` / `build_transforms()` as a public utility wrapping the existing mapping runtimes. `PluginExtensions.schemas.` gains an optional `mappings` key carrying JSON-safe `toCommon` / `fromCommon` mapping objects; when those are declared and no explicit transform is supplied in `schemas.`, `definePlugin()` invokes `buildTransforms()` automatically. Mappings for each direction are author-provided — `buildTransforms()` does not invert one direction into the other, because many-to-one handlers like `switch` are not reversible. -7. **`toCommon` / `fromCommon` return a `TransformResult` of `{ result, errors }` unconditionally; mapping definitions are validated at `buildTransforms()` call time.** Partial failure is routine for cross-schema transforms — field handlers can emit warnings that do not invalidate a record — so the transform surface is safe by default rather than throwing. `definePlugin()` wraps the underlying transform output with runtime schema validation (Zod `.parse()` / Pydantic `model_validate()`); validation failures surface as entries in `errors` rather than thrown exceptions. Consumers apply their own rule for what counts as success — strict adopters treat any non-empty `errors` as failure, lenient adopters tolerate warnings. Mappings passed to `buildTransforms()` are checked at the call site, failing fast on structural errors, unknown handlers, or unresolvable field paths. +7. **`toCommon` / `fromCommon` return a `TransformResult` of `{ result, errors }` unconditionally; mapping definitions are validated at `buildTransforms()` call time.** Partial failure is routine for cross-schema transforms — field handlers can emit warnings that do not invalidate a record — so the transform surface is safe by default rather than throwing. Runtime schema validation (Zod `.parse()` / Pydantic `model_validate()`) surfaces as entries in `errors` rather than thrown exceptions. In the current PoC, this validation is opt-in at the `buildTransforms()` call site via the `commonModel` / `common_model` parameter — when supplied, validation runs inside `toCommon` against the fully extended generated schema. In the full SDK, `definePlugin()` will additionally inject validation when auto-generating transforms from `extensions.schemas..mappings`. Plugin authors using hand-written transforms are responsible for their own validation. Consumers apply their own rule for what counts as success — strict adopters treat any non-empty `errors` as failure, lenient adopters tolerate warnings. Mappings passed to `buildTransforms()` are checked at the call site, failing fast on structural errors, unknown handlers, or unresolvable field paths. 8. **Custom handlers are registered per utility call, not globally.** `buildTransforms()` accepts an optional `handlers` argument for registering additional handler names. Per-call scoping keeps behavior explicit and testable; name collisions with the default set raise at `buildTransforms()` call time rather than silently shadowing them. Handler-name lookup must not resolve inherited attributes, because mapping JSON can be reconstituted from untrusted sources via `mergeExtensions()`. @@ -102,6 +102,9 @@ interface ObjectSchemas { } // Input type — provided by plugin authors inside DefinePluginOptions.schemas +// common is intentionally absent: the plugin config file cannot import from generated/ +// since it is the input to generation. definePlugin() injects common during compilation +// from ObjectSchemasInput → ObjectSchemas, resolved from the generated model classes. interface ObjectSchemasInput { native?: ZodType; // defaults to Record if omitted toCommon?: (native: TNative) => TransformResult; @@ -203,10 +206,19 @@ type Handler = (value: unknown, context: unknown) => unknown; // may provide plain hand-written functions instead. Mappings are validated at call // time (see Decision #7); the optional `handlers` argument registers custom handler // names for this call only (see Decision #8). +// When commonModel is provided, toCommon calls commonModel.parse (Zod) on its output +// and appends any validation errors to TransformResult.errors rather than throwing. +// commonModel must be the fully extended generated schema (e.g. the generated +// Opportunity with typed customFields), not the base schema — passing a base schema +// silently weakens validation of typed custom fields. +// The underlying mapping runtime normalizes model/schema instances to plain objects +// at the entry point, so fromCommon can receive the validated output of toCommon +// and field paths still resolve correctly. function buildTransforms( toCommonMapping: Record, // ADR-0017 mapping from native → CommonGrants fromCommonMapping: Record, // ADR-0017 mapping from CommonGrants → native handlers?: Record, + commonModel?: ZodType, // must be the generated extended schema, not the base ): { toCommon: (native: TNative) => TransformResult; fromCommon: (common: TCommon) => TransformResult; @@ -261,6 +273,9 @@ class ObjectSchemas(Generic[TNative, TCommon]): from_common: Callable[[TCommon], TransformResult[TNative]] # Input type — provided by plugin authors inside define_plugin(schemas=...) +# common is intentionally absent: cg_config.py cannot import from generated/ since +# it is the input to generation. define_plugin() injects common during compilation +# from ObjectSchemasInput → ObjectSchemas, resolved from the generated model classes. @dataclass class ObjectSchemasInput(Generic[TNative, TCommon]): native: type[TNative] | None = None # defaults to dict[str, Any] if omitted @@ -366,10 +381,19 @@ Handler = Callable[[Any, Any], Any] # may provide plain hand-written callables instead. Mappings are validated at call # time (see Decision #7); the optional `handlers` argument registers custom handler # names for this call only (see Decision #8). +# When common_model is provided, to_common calls model_validate on its output and +# appends any ValidationErrors to TransformResult.errors rather than raising. +# common_model must be the fully extended generated model class (e.g. +# generated/schemas.py's Opportunity), not the base class — passing a base class +# silently weakens validation of typed custom fields. +# transform_from_mapping normalizes Pydantic model instances to plain dicts via +# model_dump(mode="json") at the entry point, so from_common can receive the +# validated model output of to_common and field paths still resolve correctly. def build_transforms( to_common_mapping: dict[str, Any], # ADR-0017 mapping from native → CommonGrants from_common_mapping: dict[str, Any], # ADR-0017 mapping from CommonGrants → native handlers: dict[str, Handler] | None = None, + common_model: type[BaseModel] | None = None, # must be the generated extended model, not the base ) -> tuple[ Callable[[Any], TransformResult[Any]], Callable[[Any], TransformResult[Any]],