Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
395 changes: 395 additions & 0 deletions import-automation/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,395 @@
# Terraform deployment for Data Commons Import Automation Workflow
# This file sets up:
# - Necessary GCP APIs
# - Secret Manager for the import-config secret
# - GCS Buckets for imports, mounting, and Dataflow templates
# - Spanner Instance and Database with schema
# - Artifact Registry for hosting Docker images (Flex Template & Executor)
# - Pub/Sub Topic and Subscription for triggering imports
# - Cloud Build Triggers for CI/CD of Executor, Functions, Workflows, and Ingestion Pipeline
# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub

terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.0.0"
}
archive = {
source = "hashicorp/archive"
}
}
}

variable "project_id" {
description = "The GCP Project ID"
type = string
}

variable "region" {
description = "The GCP Region"
type = string
default = "us-central1"
}

variable "github_owner" {
description = "The owner of the GitHub repository"
type = string
default = "datacommonsorg"
}

variable "github_repo_name" {
description = "The name of the GitHub repository (data)"
type = string
default = "data"
}

variable "github_repo_ingestion_name" {
description = "The name of the GitHub repository (import)"
type = string
default = "import"
}

variable "spanner_instance_id" {
description = "Spanner Instance ID"
type = string
default = "datcom-import-instance"
}

variable "spanner_database_id" {
description = "Spanner Database ID"
type = string
default = "dc-import-db"
}

variable "bq_dataset_id" {
description = "BigQuery Dataset ID for aggregation"
type = string
default = "datacommons"
}

variable "dc_api_key" {
description = "Data Commons API Key"
type = string
sensitive = true
}

# --- APIs ---

locals {
services = [
"artifactregistry.googleapis.com",
"batch.googleapis.com",
"cloudbuild.googleapis.com",
"cloudfunctions.googleapis.com",
"cloudscheduler.googleapis.com",
"compute.googleapis.com",
"dataflow.googleapis.com",
"iam.googleapis.com",
"pubsub.googleapis.com",
"run.googleapis.com",
"secretmanager.googleapis.com",
"spanner.googleapis.com",
"storage.googleapis.com",
"workflows.googleapis.com",
]
}

resource "google_project_service" "services" {
for_each = toset(local.services)
project = var.project_id
service = each.key

disable_on_destroy = false
}

# --- Secret Manager ---

resource "google_secret_manager_secret" "import_config" {
secret_id = "import-config"
project = var.project_id

replication {
auto {}
}

depends_on = [google_project_service.services]
}

resource "google_secret_manager_secret_version" "import_config_v1" {
secret = google_secret_manager_secret.import_config.id
secret_data = jsonencode({
dc_api_key = var.dc_api_key
})
}

resource "google_secret_manager_secret" "dc_api_key" {
secret_id = "dc-api-key"
project = var.project_id

replication {
auto {}
}

depends_on = [google_project_service.services]
}

resource "google_secret_manager_secret_version" "dc_api_key_v1" {
secret = google_secret_manager_secret.dc_api_key.id
secret_data = var.dc_api_key
}

# --- GCS Buckets ---

resource "google_storage_bucket" "import_bucket" {
name = "${var.project_id}-imports"
location = var.region
project = var.project_id
uniform_bucket_level_access = true

depends_on = [google_project_service.services]
}

# --- Spanner ---

resource "google_spanner_instance" "import_instance" {
name = var.spanner_instance_id
config = "regional-${var.region}"
display_name = "Import Automation Spanner Instance"
num_nodes = 1
project = var.project_id

depends_on = [google_project_service.services]
}

resource "google_spanner_database" "import_db" {
instance = google_spanner_instance.import_instance.name
name = var.spanner_database_id
project = var.project_id
ddl = [for s in split(";", file("${path.module}/../workflow/spanner_schema.sql")) : trimspace(s) if trimspace(s) != ""]

deletion_protection = true
}

# Initialize IngestionLock (DML)
resource "null_resource" "init_spanner_lock" {
provisioner "local-exec" {
command = <<EOT
gcloud spanner databases execute-sql ${google_spanner_database.import_db.name} \
--instance=${google_spanner_instance.import_instance.name} \
--project=${var.project_id} \
--sql="INSERT INTO IngestionLock (LockID) VALUES ('global_ingestion_lock')" || echo 'Lock already exists'
EOT
}

depends_on = [google_spanner_database.import_db]
}

# --- IAM ---

resource "google_service_account" "automation_sa" {
account_id = "import-automation-sa"
display_name = "Service Account for Import Automation (Workflows & Functions)"
project = var.project_id
}

resource "google_project_iam_member" "automation_roles" {
for_each = toset([
"roles/workflows.admin",
"roles/cloudfunctions.admin",
"roles/run.admin",
"roles/run.invoker",
"roles/batch.jobsEditor",
"roles/dataflow.admin",
"roles/logging.logWriter",
"roles/storage.objectAdmin",
"roles/iam.serviceAccountUser",
"roles/spanner.databaseAdmin",
"roles/bigquery.dataEditor",
"roles/bigquery.jobUser",
"roles/artifactregistry.admin",
"roles/secretmanager.secretAccessor",
"roles/cloudbuild.builds.builder",
])
project = var.project_id
role = each.key
member = "service_account:${google_service_account.automation_sa.email}"
}

# --- Artifact Registry ---

resource "google_artifact_registry_repository" "automation_repo" {
location = var.region
repository_id = "import-automation"
description = "Docker repository for import automation images"
format = "DOCKER"
project = var.project_id

depends_on = [google_project_service.services]
}

# --- Cloud Build Triggers ---

resource "google_cloudbuild_trigger" "executor_trigger" {
name = "dc-import-executor"
location = var.region
project = var.project_id

github {
owner = var.github_owner
name = var.github_repo_name
push {
branch = "^main$"
}
}

filename = "import-automation/executor/cloudbuild.yaml"

substitutions = {
_DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor"
}

service_account = google_service_account.automation_sa.id
depends_on = [google_artifact_registry_repository.automation_repo]
}

resource "google_cloudbuild_trigger" "workflow_trigger" {
name = "import-workflow-trigger"
location = var.region
project = var.project_id

github {
owner = var.github_owner
name = var.github_repo_name
push {
branch = "^main$"
}
}

filename = "import-automation/workflow/cloudbuild_main.yaml"

substitutions = {
_PROJECT_ID = var.project_id
_SPANNER_PROJECT_ID = var.project_id
_SPANNER_INSTANCE_ID = var.spanner_instance_id
_SPANNER_DATABASE_ID = var.spanner_database_id
_GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
_LOCATION = var.region
_GCS_MOUNT_BUCKET = "${var.project_id}-mount"
_BQ_DATASET_ID = var.bq_dataset_id
}

service_account = google_service_account.automation_sa.id
}

resource "google_cloudbuild_trigger" "ingestion_trigger" {
name = "ingestion-pipeline-trigger"
location = var.region
project = var.project_id

github {
owner = var.github_owner
name = var.github_repo_ingestion_name
push {
branch = "^main$"
}
}

filename = "pipeline/ingestion/cloudbuild.yaml"

substitutions = {
_TEMPLATE_BUCKET = google_storage_bucket.import_bucket.name
_IMAGE_GCR_PATH = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dataflow-templates/ingestion"
_VERSION = "0.1-SNAPSHOT"
}

service_account = google_service_account.automation_sa.id
depends_on = [
google_artifact_registry_repository.automation_repo,
google_storage_bucket.import_bucket
]
}

resource "google_cloudbuild_trigger" "import_tool_trigger" {
name = "dc-import-tool-trigger"
location = var.region
project = var.project_id

github {
owner = var.github_owner
name = var.github_repo_ingestion_name
push {
branch = "^main$"
}
}

filename = "cloudbuild.yaml"

substitutions = {
_GCS_BUCKET = google_storage_bucket.import_bucket.name
_DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor"
}

service_account = google_service_account.automation_sa.id
}

# --- Pub/Sub ---

resource "google_pubsub_topic" "import_automation_trigger" {
name = "import-automation-trigger"
project = var.project_id
}

resource "google_pubsub_subscription" "import_automation_sub" {
name = "import-automation-sub"
topic = google_pubsub_topic.import_automation_trigger.name
project = var.project_id

filter = "attributes.transfer_status=\"TRANSFER_COMPLETED\""

push_config {
# Note: This endpoint is deployed via Cloud Build.
# The URL pattern below assumes Function Gen2 deployment.
push_endpoint = "https://import-automation-helper-${var.project_id}-${var.region}.a.run.app"
oidc_token {
service_account_email = google_service_account.automation_sa.email
}
}
}

# --- Trigger Initial Builds ---

resource "null_resource" "trigger_initial_builds" {
provisioner "local-exec" {
command = <<EOT
gcloud builds triggers run ${google_cloudbuild_trigger.executor_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
gcloud builds triggers run ${google_cloudbuild_trigger.ingestion_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
gcloud builds triggers run ${google_cloudbuild_trigger.workflow_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
gcloud builds triggers run ${google_cloudbuild_trigger.import_tool_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
EOT
}

depends_on = [
google_cloudbuild_trigger.executor_trigger,
google_cloudbuild_trigger.ingestion_trigger,
google_cloudbuild_trigger.workflow_trigger,
google_cloudbuild_trigger.import_tool_trigger
]
}

# Outputs
output "executor_trigger_id" {
value = google_cloudbuild_trigger.executor_trigger.id
}

output "workflow_trigger_id" {
value = google_cloudbuild_trigger.workflow_trigger.id
}

output "ingestion_trigger_id" {
value = google_cloudbuild_trigger.ingestion_trigger.id
}

output "import_tool_trigger_id" {
value = google_cloudbuild_trigger.import_tool_trigger.id
}
Loading