diff --git a/import-automation/terraform/main.tf b/import-automation/terraform/main.tf new file mode 100644 index 0000000000..8f74c10710 --- /dev/null +++ b/import-automation/terraform/main.tf @@ -0,0 +1,395 @@ +# Terraform deployment for Data Commons Import Automation Workflow +# This file sets up: +# - Necessary GCP APIs +# - Secret Manager for the import-config secret +# - GCS Buckets for imports, mounting, and Dataflow templates +# - Spanner Instance and Database with schema +# - Artifact Registry for hosting Docker images (Flex Template & Executor) +# - Pub/Sub Topic and Subscription for triggering imports +# - Cloud Build Triggers for CI/CD of Executor, Functions, Workflows, and Ingestion Pipeline +# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub + +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = ">= 5.0.0" + } + archive = { + source = "hashicorp/archive" + } + } +} + +variable "project_id" { + description = "The GCP Project ID" + type = string +} + +variable "region" { + description = "The GCP Region" + type = string + default = "us-central1" +} + +variable "github_owner" { + description = "The owner of the GitHub repository" + type = string + default = "datacommonsorg" +} + +variable "github_repo_name" { + description = "The name of the GitHub repository (data)" + type = string + default = "data" +} + +variable "github_repo_ingestion_name" { + description = "The name of the GitHub repository (import)" + type = string + default = "import" +} + +variable "spanner_instance_id" { + description = "Spanner Instance ID" + type = string + default = "datcom-import-instance" +} + +variable "spanner_database_id" { + description = "Spanner Database ID" + type = string + default = "dc-import-db" +} + +variable "bq_dataset_id" { + description = "BigQuery Dataset ID for aggregation" + type = string + default = "datacommons" +} + +variable "dc_api_key" { + description = "Data Commons API Key" + type = string + sensitive = true +} + +# --- APIs --- + +locals { + services = [ + "artifactregistry.googleapis.com", + "batch.googleapis.com", + "cloudbuild.googleapis.com", + "cloudfunctions.googleapis.com", + "cloudscheduler.googleapis.com", + "compute.googleapis.com", + "dataflow.googleapis.com", + "iam.googleapis.com", + "pubsub.googleapis.com", + "run.googleapis.com", + "secretmanager.googleapis.com", + "spanner.googleapis.com", + "storage.googleapis.com", + "workflows.googleapis.com", + ] +} + +resource "google_project_service" "services" { + for_each = toset(local.services) + project = var.project_id + service = each.key + + disable_on_destroy = false +} + +# --- Secret Manager --- + +resource "google_secret_manager_secret" "import_config" { + secret_id = "import-config" + project = var.project_id + + replication { + auto {} + } + + depends_on = [google_project_service.services] +} + +resource "google_secret_manager_secret_version" "import_config_v1" { + secret = google_secret_manager_secret.import_config.id + secret_data = jsonencode({ + dc_api_key = var.dc_api_key + }) +} + +resource "google_secret_manager_secret" "dc_api_key" { + secret_id = "dc-api-key" + project = var.project_id + + replication { + auto {} + } + + depends_on = [google_project_service.services] +} + +resource "google_secret_manager_secret_version" "dc_api_key_v1" { + secret = google_secret_manager_secret.dc_api_key.id + secret_data = var.dc_api_key +} + +# --- GCS Buckets --- + +resource "google_storage_bucket" "import_bucket" { + name = "${var.project_id}-imports" + location = var.region + project = var.project_id + uniform_bucket_level_access = true + + depends_on = [google_project_service.services] +} + +# --- Spanner --- + +resource "google_spanner_instance" "import_instance" { + name = var.spanner_instance_id + config = "regional-${var.region}" + display_name = "Import Automation Spanner Instance" + num_nodes = 1 + project = var.project_id + + depends_on = [google_project_service.services] +} + +resource "google_spanner_database" "import_db" { + instance = google_spanner_instance.import_instance.name + name = var.spanner_database_id + project = var.project_id + ddl = [for s in split(";", file("${path.module}/../workflow/spanner_schema.sql")) : trimspace(s) if trimspace(s) != ""] + + deletion_protection = true +} + +# Initialize IngestionLock (DML) +resource "null_resource" "init_spanner_lock" { + provisioner "local-exec" { + command = <