Skip to content
13 changes: 6 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,16 @@ FROM dhi.io/python:3.11-debian13-dev AS runtime
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
VIRTUAL_ENV=/opt/venv \
JAVA_HOME=/opt/java/openjdk/17-jre \
PATH=/opt/venv/bin:/opt/java/openjdk/17-jre/bin:$PATH
JAVA_HOME=/opt/java/openjdk/21-jre \
PATH=/opt/venv/bin:/opt/java/openjdk/21-jre/bin:$PATH

# copy the pre-built venv (readable+executable by the nonroot user)
COPY --from=builder --chown=65532:65532 /opt/venv /opt/venv

# Eclipse Temurin JRE 17 — required by PySpark-backed engines (Kafka, Spark).
# Without Java, those engines fail at SparkSession startup. Adding the JRE here
# means `datacontract test` against a kafka/spark server works in the image
# without users having to extend it.
COPY --from=dhi.io/eclipse-temurin:17-debian13 /opt/java/openjdk /opt/java/openjdk
# Eclipse Temurin JRE 21 — required by PySpark-backed engines (Kafka, Spark).
# Spark 4.0 (what `.[all]` resolves to) supports Java 17 and 21. Without Java,
# those engines fail at SparkSession startup.
COPY --from=dhi.io/eclipse-temurin:21-debian13 /opt/java/openjdk /opt/java/openjdk

USER nonroot:nonroot
WORKDIR /home/datacontract
Expand Down
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ models:
```

##### Environment Variables
All [parameters supported by Soda](https://docs.soda.io/soda/connect-snowflake.html), uppercased and prepended by `DATACONTRACT_SNOWFLAKE_` prefix.
Any `DATACONTRACT_SNOWFLAKE_`-prefixed variable is passed (lowercased, prefix stripped) as a connection parameter to the [snowflake-connector-python](https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#connect) driver.
Depending on the `authenticator` mode required by your Snowflake workspace, please set your environment variables accordingly.
For example:

Expand Down Expand Up @@ -1824,6 +1824,7 @@ Usage: datacontract import [OPTIONS] COMMAND [ARGS]...
│ spark Import a data contract from a Spark schema. │
│ iceberg Import a data contract from an Iceberg schema. │
│ excel Import a data contract from an Excel file. │
│ powerbi Import a data contract from an PowerBI template file. │
│ snowflake Import a data contract from an Snowflake account │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯

Expand Down Expand Up @@ -2065,6 +2066,21 @@ Example:
datacontract import --format snowflake --source account.canada-central.azure --database databaseName --schema schemaName
```

<details markdown="1">
<summary><strong>snowflake</strong></summary>

Importing from snowflake schema. Specify snowflake workspace account in `source` parameter, database name `database` and schema in `schema`.
Multiple authentification are supported,
login/password using the `DATACONTRACT_SNOWFLAKE_ ...` test environement variable are setup,
MFA using external browser is selected when `DATACONTRACT_SNOWFLAKE_PASSWORD` is missing
TOML file authentification using the default profile when `SNOWFLAKE_DEFAULT_CONNECTION_NAME` environment variable is defined

Example:

```bash
datacontract import snowflake --source account.canada-central.azure --database databaseName --schema schemaName
```
</details>

### catalog
```
Expand Down
18 changes: 18 additions & 0 deletions datacontract/command_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,24 @@ def import_excel(
_write_result(result, output)


@import_app.command(
name="powerbi",
epilog="Example: datacontract import powerbi --source datacontract.pbit --output datacontract.yaml",
)
def import_powerbi(
source: Annotated[Optional[str], typer.Option(help="Path to the Power BI .pbit file.")] = None,
output: output_option = None,
schema: schema_option = None,
owner: owner_option = None,
id: id_option = None,
debug: debug_option = None,
):
"""Import a data contract from a Power BI .pbit file."""
enable_debug_logging(debug)
result = DataContract.import_from_source(format="powerbi", source=source, schema=schema, owner=owner, id=id)
_write_result(result, output)


@import_app.command(
name="snowflake",
epilog="Example: datacontract import snowflake --source account --database DEMO_DB --schema PUBLIC --output datacontract.yaml",
Expand Down
1 change: 1 addition & 0 deletions datacontract/imports/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class ImportFormat(str, Enum):
csv = "csv"
protobuf = "protobuf"
excel = "excel"
powerbi = "powerbi"
snowflake = "snowflake"

@classmethod
Expand Down
6 changes: 5 additions & 1 deletion datacontract/imports/importer_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,16 @@ def load_module_class(module_path, class_name):
module_path="datacontract.imports.excel_importer",
class_name="ExcelImporter",
)
importer_factory.register_lazy_importer(
name=ImportFormat.powerbi,
module_path="datacontract.imports.powerbi_importer",
class_name="PowerBIImporter",
)
importer_factory.register_lazy_importer(
name=ImportFormat.snowflake,
module_path="datacontract.imports.snowflake_importer",
class_name="SnowflakeImporter",
)

importer_factory.register_lazy_importer(
name=ImportFormat.json,
module_path="datacontract.imports.json_importer",
Expand Down
Loading