diff --git a/bin/helm-operations.sh b/bin/helm-operations.sh index 10c57fed2..46d4a75a8 100755 --- a/bin/helm-operations.sh +++ b/bin/helm-operations.sh @@ -11,6 +11,10 @@ CERT_MASTER_EMAIL="${CERT_MASTER_EMAIL:-certmaster@example.com}" # default is set to TRUE to deploy it unless changed DEPLOY_CERT_MANAGER="${DEPLOY_CERT_MANAGER:-TRUE}" +# DEPLOY_CALLING_SERVICES env variable is used to decide if sftd and coturn should get deployed +# default is set to TRUE to deploy them unless changed +DEPLOY_CALLING_SERVICES="${DEPLOY_CALLING_SERVICES:-TRUE}" + # DUMP_LOGS_ON_FAIL to dump logs on failure # it is false by default DUMP_LOGS_ON_FAIL="${DUMP_LOGS_ON_FAIL:-FALSE}" @@ -19,9 +23,7 @@ DUMP_LOGS_ON_FAIL="${DUMP_LOGS_ON_FAIL:-FALSE}" # assuming it to be the public address used by clients to reach public Address HOST_IP="${HOST_IP:-}" -if [ -z "$HOST_IP" ]; then -HOST_IP=$(wget -qO- https://api.ipify.org) -fi +CALLING_NODE="" function dump_debug_logs { local exit_code=$? @@ -32,12 +34,28 @@ function dump_debug_logs { } trap dump_debug_logs ERR -# picking a node for calling traffic (3rd kube worker node) -CALLING_NODE=$(kubectl get nodes --no-headers | tail -n 1 | awk '{print $1}') -if [[ -z "$CALLING_NODE" ]]; then - echo "Error: could not determine the last kube worker node via kubectl" - exit 1 -fi +configure_calling_environment() { + + if [[ "$DEPLOY_CALLING_SERVICES" != "TRUE" ]]; then + return 0 + fi + + if [[ -z "$HOST_IP" ]]; then + HOST_IP=$(wget -qO- https://api.ipify.org) + fi + + if [[ -z "$HOST_IP" ]]; then + echo "Error: could not determine HOST_IP automatically" + exit 1 + fi + + # picking a node for calling traffic (3rd kube worker node) + CALLING_NODE=$(kubectl get nodes --no-headers | tail -n 1 | awk '{print $1}') + if [[ -z "$CALLING_NODE" ]]; then + echo "Error: could not determine the last kube worker node via kubectl" + exit 1 + fi +} sync_pg_secrets() { echo "Retrieving PostgreSQL password from databases-ephemeral for wire-server deployment..." @@ -60,7 +78,15 @@ process_values() { ENV=$1 TYPE=$2 - charts=(fake-aws smtp rabbitmq databases-ephemeral reaper wire-server webapp account-pages team-settings ingress-nginx-controller nginx-ingress-services coturn sftd cert-manager) + charts=(fake-aws demo-smtp rabbitmq databases-ephemeral reaper wire-server webapp account-pages team-settings ingress-nginx-controller) + + if [[ "$DEPLOY_CERT_MANAGER" == "TRUE" ]]; then + charts+=(nginx-ingress-services cert-manager) + fi + + if [[ "$DEPLOY_CALLING_SERVICES" == "TRUE" ]]; then + charts+=(coturn sftd) + fi if [[ "$ENV" != "prod" ]] || [[ -z "$TYPE" ]] ; then echo "Error: This function only supports prod deployments with TYPE as values or secrets. ENV must be 'prod', got: '$ENV' and '$TYPE'" @@ -92,41 +118,55 @@ configure_values() { TEMP_DIR=$(mktemp -d) trap 'rm -rf $TEMP_DIR' EXIT - # to find IP address of calling NODE - CALLING_NODE_IP=$(kubectl get node "$CALLING_NODE" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') - # Fixing the hosts with TARGET_SYSTEM and setting the turn server sed -e "s/example.com/$TARGET_SYSTEM/g" \ "$BASE_DIR/values/wire-server/values.yaml" > "$TEMP_DIR/wire-server-values.yaml" - # fixing the turnStatic values - yq eval -i ".brig.turnStatic.v2 = [\"turn:$HOST_IP:3478\", \"turn:$HOST_IP:3478?transport=tcp\"]" "$TEMP_DIR/wire-server-values.yaml" - # Fixing the hosts in webapp team-settings and account-pages charts for chart in webapp team-settings account-pages; do sed "s/example.com/$TARGET_SYSTEM/g" "$BASE_DIR/values/$chart/values.yaml" > "$TEMP_DIR/$chart-values.yaml" done - # Setting certManager and DNS records - sed -e 's/useCertManager: false/useCertManager: true/g' \ - -e "/certmasterEmail:$/s/certmasterEmail:/certmasterEmail: $CERT_MASTER_EMAIL/" \ - -e "s/example.com/$TARGET_SYSTEM/" \ - "$BASE_DIR/values/nginx-ingress-services/values.yaml" > "$TEMP_DIR/nginx-ingress-services-values.yaml" + files=(wire-server-values.yaml webapp-values.yaml team-settings-values.yaml account-pages-values.yaml) + + if [[ "$DEPLOY_CERT_MANAGER" == "TRUE" ]]; then + # Setting certManager and DNS records for Let's Encrypt based certificate management + sed -e 's/useCertManager: false/useCertManager: true/g' \ + -e "/certmasterEmail:$/s/certmasterEmail:/certmasterEmail: $CERT_MASTER_EMAIL/" \ + -e "s/example.com/$TARGET_SYSTEM/" \ + "$BASE_DIR/values/nginx-ingress-services/values.yaml" > "$TEMP_DIR/nginx-ingress-services-values.yaml" + + files+=(nginx-ingress-services-values.yaml) + fi + + if [[ "$DEPLOY_CALLING_SERVICES" == "TRUE" ]]; then + # to find IP address of calling NODE + CALLING_NODE_IP=$(kubectl get node "$CALLING_NODE" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') + + # fixing the turnStatic values + yq eval -i ".brig.turnStatic.v2 = [\"turn:$HOST_IP:3478\", \"turn:$HOST_IP:3478?transport=tcp\"]" "$TEMP_DIR/wire-server-values.yaml" - # Fixing SFTD hosts and setting the cert-manager to http01 - sed -e "s/webapp.example.com/webapp.$TARGET_SYSTEM/" \ - -e "s/sftd.example.com/sftd.$TARGET_SYSTEM/" \ - -e 's/name: letsencrypt-prod/name: letsencrypt-http01/' \ - "$BASE_DIR/values/sftd/values.yaml" > "$TEMP_DIR/sftd-values.yaml" + # Fix SFTD hostnames, and only enable Let's Encrypt specific issuer changes when cert-manager is enabled. + sed -e "s/webapp.example.com/webapp.$TARGET_SYSTEM/" \ + -e "s/sftd.example.com/sftd.$TARGET_SYSTEM/" \ + "$BASE_DIR/values/sftd/values.yaml" > "$TEMP_DIR/sftd-values.yaml" - # Setting coturn node IP values - yq eval -i ".coturnTurnListenIP = \"$CALLING_NODE_IP\"" "$BASE_DIR/values/coturn/values.yaml" - yq eval -i ".coturnTurnRelayIP = \"$CALLING_NODE_IP\"" "$BASE_DIR/values/coturn/values.yaml" - yq eval -i ".coturnTurnExternalIP = \"$HOST_IP\"" "$BASE_DIR/values/coturn/values.yaml" + cp "$BASE_DIR/values/coturn/values.yaml" "$TEMP_DIR/coturn-values.yaml" + + if [[ "$DEPLOY_CERT_MANAGER" == "TRUE" ]]; then + yq eval -i '.tls.issuerRef.name = "letsencrypt-http01"' "$TEMP_DIR/sftd-values.yaml" + fi + + # Setting coturn node IP values + yq eval -i ".coturnTurnListenIP = \"$CALLING_NODE_IP\"" "$TEMP_DIR/coturn-values.yaml" + yq eval -i ".coturnTurnRelayIP = \"$CALLING_NODE_IP\"" "$TEMP_DIR/coturn-values.yaml" + yq eval -i ".coturnTurnExternalIP = \"$HOST_IP\"" "$TEMP_DIR/coturn-values.yaml" + + files+=(sftd-values.yaml coturn-values.yaml) + fi # Compare and copy files if different - for file in wire-server-values.yaml webapp-values.yaml team-settings-values.yaml account-pages-values.yaml \ - nginx-ingress-services-values.yaml sftd-values.yaml; do + for file in "${files[@]}"; do if ! cmp -s "$TEMP_DIR/$file" "$BASE_DIR/values/${file%-values.yaml}/values.yaml"; then cp "$TEMP_DIR/$file" "$BASE_DIR/values/${file%-values.yaml}/values.yaml" echo "Updating $BASE_DIR/values/${file%-values.yaml}/values.yaml" @@ -188,6 +228,11 @@ deploy_cert_manager() { deploy_calling_services() { + if [[ "$DEPLOY_CALLING_SERVICES" != "TRUE" ]]; then + echo "Skipping sftd and coturn deployment because DEPLOY_CALLING_SERVICES=$DEPLOY_CALLING_SERVICES" + return 0 + fi + echo "Deploying sftd and coturn" # select the node to deploy sftd kubectl annotate node "$CALLING_NODE" wire.com/external-ip="$HOST_IP" --overwrite @@ -202,6 +247,9 @@ deploy_calling_services() { main() { +# initialize calling-service specific values only when enabled +configure_calling_environment + # Create prod-values.example.yaml to values.yaml and take backup process_values "prod" "values" # Create prod-secrets.example.yaml to secrets.yaml and take backup @@ -228,7 +276,7 @@ if [[ "$DEPLOY_CERT_MANAGER" == "TRUE" ]]; then kubectl get certificate fi -# deploying sft and coturn services +# deploying sft and coturn services when enabled deploy_calling_services } diff --git a/bin/offline-deploy.sh b/bin/offline-deploy.sh index 3bede967a..34f9f57fb 100755 --- a/bin/offline-deploy.sh +++ b/bin/offline-deploy.sh @@ -41,4 +41,7 @@ fi $DOCKER_RUN_BASE $SSH_MOUNT $WSD_CONTAINER ./bin/offline-cluster.sh -sudo docker run --network=host -v $PWD:/wire-server-deploy $WSD_CONTAINER sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" DEPLOY_CERT_MANAGER=TRUE DUMP_LOGS_ON_FAIL=TRUE ./bin/helm-operations.sh' +# verify if all kube-system pods are running well +sudo docker run --network=host -v $PWD:/wire-server-deploy $WSD_CONTAINER sh -c 'kubectl -n kube-system get pods' + +sudo docker run --network=host -v $PWD:/wire-server-deploy $WSD_CONTAINER sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" DEPLOY_CERT_MANAGER=TRUE DUMP_LOGS_ON_FAIL=TRUE DEPLOY_CALLING_SERVICES=TRUE ./bin/helm-operations.sh' diff --git a/changelog.d/3-deploy-builds/wiab-staging-fixes b/changelog.d/3-deploy-builds/wiab-staging-fixes new file mode 100644 index 000000000..7eb73e076 --- /dev/null +++ b/changelog.d/3-deploy-builds/wiab-staging-fixes @@ -0,0 +1,3 @@ +Added: documentation around managing staging.yml inventory, how to verify, download artifact, and documentation around cert-manager and calling components +Added: a flag DEPLOY_CALLING_SERVICES to control the calling services and improved the flow based on cert-manager and calling services requirement +Added: instructions around verifying MTU management and calico kernel requirements diff --git a/offline/wiab-staging.md b/offline/wiab-staging.md index 5b37f53b2..52a5b2730 100644 --- a/offline/wiab-staging.md +++ b/offline/wiab-staging.md @@ -59,7 +59,7 @@ Our deployment will be into 7 VMs with [Ubuntu 22](https://releases.ubuntu.com/j ### Internet access for VMs: In most cases, Wire Server components do not require internet access, except in the following situations: -- **External email services** – If your users’ email servers are hosted on the public internet (for example, user@gmail.com etc). +- **External email services** – If your users’ email providers are hosted on the public internet (for example, `user@gmail.com`). If outbound internet access is not allowed and no internal email service is available on your local network, email-based flows such as verification codes, invitations, and some login emails will not be delivered. In that case, you must retrieve the required codes from the logs instead. Read more at [I deployed demo-smtp and I want to skip email configuration and retrieve verification codes directly](https://docs.wire.com/latest/how-to/install/troubleshooting.html?h=smtp#i-deployed-demo-smtp-and-i-want-to-skip-email-configuration-and-retrieve-verification-codes-directly). - **Mobile push notifications (FCM/APNS)** – Required to enable notifications for Android and Apple mobile devices. Wire uses [AWS services](https://docs.wire.com/latest/how-to/install/infrastructure-configuration.html#enable-push-notifications-using-the-public-appstore-playstore-mobile-wire-clients) to relay notifications to Firebase Cloud Messaging (FCM) and Apple Push Notification Service (APNS). - **Third-party content previews** – If you want clients to display previews for services such as Giphy, Google, Spotify, or SoundCloud. Wire provides a proxy service for third-party content so clients do not communicate directly with these services, preventing exposure of IP addresses, cookies, or other metadata. - **Federation with other Wire servers** – Required if your deployment needs to federate with another Wire server hosted on the public internet. @@ -115,10 +115,10 @@ cd wire-server-deploy **Step 2: Configure your Ansible inventory for your physical machine** -A sample inventory is available at [ansible/inventory/demo/wiab-staging.yml](https://github.com/wireapp/wire-server-deploy/blob/master/ansible/inventory/demo/wiab-staging.yml). +A sample inventory is available at [ansible/inventory/demo/wiab-staging.yml](../ansible/inventory/demo/wiab-staging.yml). Replace example.com with your physical machine (`adminhost`) address where KVM is available and adjust other variables like `ansible_user` and `ansible_ssh_private_key_file`. The SSH user for ansible `ansible_user` should have password-less `sudo` access. The adminhost should be running Ubuntu 22.04. From here on, we would refer the physical machine as `adminhost`. -The `private_deployment` variable determines whether the VMs created below will have internet access. When set to `true` (default value), no internet access is available to VMs. Check [Internet access for VMs](#internet-access-for-vms) to understand more about it. +The `private_deployment` variable determines whether the VMs created below will have internet access. When set to `true` (default value), no internet access is available to VMs. Check [Network Traffic Configuration](#network-traffic-configuration) to understand more about it. **Step 3: Run the VM and network provision** @@ -128,13 +128,83 @@ ansible-playbook -i ansible/inventory/demo/wiab-staging.yml ansible/wiab-staging *Note: Ansible core version 2.16.3 or compatible is required for this step* -## Ensure secondary ansible inventory for VMs +## When VMs are ready Now you should have 7 VMs running on your `adminhost`. If you have used the ansible playbook, you should also have a directory `/home/ansible_user/wire-server-deploy` with all resources required for further deployment. If you didn't use the above playbook, download the `wire-server-deploy` artifact shared by Wire support and extract it with tar. +```bash +wget https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/wire-server-deploy-static-.tgz +tar xvzf wire-server-deploy-static-.tgz +cd wire-server-deploy +``` + Ensure the inventory file `ansible/inventory/offline/inventory.yml` in the directory `/home/ansible_user/wire-server-deploy` contains values corresponding to your VMs. If you have already used the [Ansible playbook above](#getting-started-with-ansible-playbook) to set up VMs, this file should have been prepared for you. -The purpose of secondary ansible inventory is to interact only with the VMs. All the operations concerning the secondary inventory are meant to install datastores and k8s services. +The purpose of this secondary Ansible inventory is to interact only with the 7 VMs after they have been created. It is used by the offline deployment steps to install Kubernetes and the stateful services. Our kubernetes solution uses `Calico` as the default `Container Network Interface (CNI)` plugin for cluster networking and ensure the [kernel requirements](https://docs.tigera.io/calico/latest/getting-started/kubernetes/requirements#kernel-dependencies) are met on the VMs before deploying Kubernetes. + +If the provisioning playbook did not generate it for you, create it from the template [ansible/inventory/offline/staging.yml](../ansible/inventory/offline/staging.yml): + +```bash +cp ansible/inventory/offline/staging.yml ansible/inventory/offline/inventory.yml +``` + +Then edit `ansible/inventory/offline/inventory.yml` and replace all placeholder values. + +**Critical values to review in the inventory:** + +- `all.vars.ansible_user`: the SSH user present on every VM, should be part of `sudoers` list. +- `all.vars.ansible_ssh_private_key_file`: uncomment and set this if you authenticate with a private key, for example `ssh/id_ed25519`. +- `assethost.hosts.assethost.ansible_host`: IP address of the asset host VM. +- `kube-node.hosts.kubenode1|2|3.ansible_host`: IP addresses of the three Kubernetes VMs. +- `datanodes.hosts.datanode1|2|3.ansible_host`: IP addresses of the three data VMs. +- `cassandra.vars.cassandra_network_interface`, `elasticsearch.vars.elasticsearch_network_interface`, `minio.vars.minio_network_interface`, `rmq-cluster.vars.rabbitmq_network_interface`, `postgresql.vars.postgresql_network_interface`: the network interface name used by those services inside each data VM, for example `enp1s0`. Do not assume this value; verify it on your machines. +- `rmq-cluster.vars.rabbitmq_cluster_master`: the RabbitMQ primary node. Keep this aligned with the hostname of one of the data nodes, typically `datanode1`. +- `postgresql.vars.repmgr_node_config`: The repmgr config to ensure HA PostgreSQL cluster. + +> **Note:** If your environment uses a non-standard MTU (e.g. cloud providers, VPNs, or overlay networks), you must [configure the MTU](https://github.com/kubernetes-sigs/kubespray/blob/master/docs/CNI/calico.md#configuring-interface-mtu) for Calico in `k8s-cluster.vars`. Ensure all VMs have the same MTU on their primary interface: +> ```bash +> ip link show +> ``` +> Then set: +> ```yaml +> # k8s-cluster.vars. +> calico_mtu: +> calico_veth_mtu: +> ``` +> As a rule of thumb: +> - `calico_mtu = underlying network MTU - encapsulation overhead` +> - `calico_veth_mtu` ≤ `calico_mtu` + +**Hostnames matter:** + +- The inventory hostnames `assethost`, `kubenode1`, `kubenode2`, `kubenode3`, `datanode1`, `datanode2`, and `datanode3` should match the actual hostnames configured inside the VMs. +- This is especially important for RabbitMQ, because the nodes in `rmq-cluster` must match each VM's real hostname. +- `datanode1` is also referenced as the Cassandra seed and as the default RabbitMQ cluster master in the template, so change those only if your topology differs. + +**SSH authentication options:** + +- If the VMs are reachable with a private key, set `ansible_ssh_private_key_file` in the inventory and run Ansible normally. +- If you rely on an SSH agent, keep `ansible_ssh_private_key_file` commented out and ensure the agent on the `adminhost` can reach all VMs. +- If you do not use a private key entry in the inventory and password authentication is enabled on the VMs, add `--ask-pass` when running ansible-playbooks manually and `--ask-become-pass` for sudo access. +- Our installation scripts are non-interactive, define `ansible_password` and `ansible_become_pass` in the inventory instead of relying on interactive password prompts. + +Before running the offline deployment scripts, verify that the inventory resolves to the expected machines. The commands below assume you are running them from `/home/ansible_user/wire-server-deploy` on the `adminhost`. + +```bash +# confirm the inventory hostnames match the actual VM hostnames +ansible all -i ansible/inventory/offline/inventory.yml -m shell -a 'hostname' + +# verify the default IPv4 interface and address reported by Ansible +ansible all -i ansible/inventory/offline/inventory.yml -m setup -a 'filter=ansible_default_ipv4' + +# verify time and timezone consistency across the machines +ansible all -i ansible/inventory/offline/inventory.yml -m shell -a 'date' + +# verify if the MTU is consistent across all the VMs +d ansible all -i ansible/inventory/offline/inventory.yml -m shell -a "ip link show | grep mtu" +``` + +If any hostname, IP address, SSH setting, or interface name is wrong at this stage, correct `ansible/inventory/offline/inventory.yml` before continuing. The next deployment steps assume this inventory is accurate. ## Next steps @@ -151,6 +221,7 @@ Once the inventory is ready, please continue with the following steps: ``` - You can always use this alias `d` later to interact with the ansible playbooks, k8s cluster and the helm charts. - The docker container mounts everything here from the `wire-server-deploy` directory, hence this acts an entry point for all the future interactions with ansible, k8s and helm charts. + - Please ensure that this environment doesn't contain `quay.io/wire/wire-server-deploy` docker image from previous installations, if it does then such images need to be removed. - **[Generating secrets](docs_ubuntu_22.04.md#generating-secrets)** - Run `bin/offline-secrets.sh` to generate fresh secrets for Minio and coturn services. It uses the docker container images shipped inside the `wire-server-deploy` directory. @@ -170,32 +241,48 @@ Once the inventory is ready, please continue with the following steps: ``` - Run the above command to deploy Kubernetes and stateful services (Cassandra, PostgreSQL, Elasticsearch, Minio, RabbitMQ). This script deploys all infrastructure needed for Wire backend operations. +To confirm if the kubernetes cluster has been setup correctly. All pods should be in `Running` or `Completed` state. Any `CrashLoopBackOff`, `Error`, or `Pending` states indicate a problem.: +```bash +d kubectl -n kube-system get pods +``` + ### Helm Operations to install wire services and supporting helm charts **Helm chart deployment (automated):** The script `bin/helm-operations.sh` will deploy the charts for you. It prepares `values.yaml`/`secrets.yaml`, customizes them for your domain/IPs, then runs Helm installs/upgrades in the correct order. Prepare the values before running it. **User-provided inputs (set these before running):** -- `TARGET_SYSTEM`: your domain (e.g., `wire.example.com` or `example.dev`). -- `CERT_MASTER_EMAIL`: email used by cert-manager for ACME registration. -- `HOST_IP`: public IP that matches your DNS A record (auto-detected if empty). +- `TARGET_SYSTEM`: your domain (e.g., `wire.example.com` or `example.dev`) using which you have created subdomains, check more at [How to set up DNS records](https://docs.wire.com/latest/how-to/install/demo-wiab.html#dns-requirements). +- `CERT_MASTER_EMAIL`: email used by cert-manager for ACME registration (by default=TRUE). +- `DEPLOY_CALLING_SERVICES`: set to `TRUE` or `FALSE` to control deployment of the calling services (`sftd` and `coturn`). Default is `TRUE`. +- `HOST_IP`: the IP address on which traffic for Wire calling services is expected to arrive. This should match your public DNS A record since we are expected to deploy Wire and calling services behind a single firewall. The calling traffic configuration described in [Network Traffic Configuration](#network-traffic-configuration) and [Configure the port redirection in Nftables](coturn.md#configure-the-port-redirection-in-nftables). It is not required if `DEPLOY_CALLING_SERVICES=FALSE` + +**Calling services behavior:** +- When `DEPLOY_CALLING_SERVICES=TRUE` and `HOST_IP` is not passed, the script tries to detect the publicly visible address for this setup by running `wget -qO- https://api.ipify.org`. +- When `DEPLOY_CALLING_SERVICES=FALSE`, the script skips deployment of `sftd` and `coturn`, and it does not evaluate any `HOST_IP`-dependent logic. **TLS / certificate behavior (cert-manager vs. Bring Your Own):** - By default, `bin/helm-operations.sh` has `DEPLOY_CERT_MANAGER=TRUE`, which installs cert-manager and configures a Let’s Encrypt (HTTP-01) issuer for the ingress charts. -- If you **do not** want Let’s Encrypt / cert-manager (for example, you are using **[Bring Your Own certificates](docs_ubuntu_22.04.md#acquiring--deploying-ssl-certificates)**), disable this step by passing the environment variable `DEPLOY_CERT_MANAGER=FALSE` when running `bin/helm-operations.sh`. - - When choosing `DEPLOY_CERT_MANAGER=FALSE`, ensure your ingress is configured with your own TLS secret(s) as described at [Acquiring / Deploying SSL Certificates](docs_ubuntu_22.04.md#acquiring--deploying-ssl-certificates). +- If you **do not** want Let’s Encrypt / cert-manager for TLS certs for the ingress, disable this step by passing the environment variable `DEPLOY_CERT_MANAGER=FALSE` when running `bin/helm-operations.sh`. + - When choosing `DEPLOY_CERT_MANAGER=FALSE`, ensure your ingress is configured with your own TLS secret(s) as described at [Acquiring / Deploying SSL Certificates](docs_ubuntu_22.04.md#acquiring--deploying-ssl-certificates). The `nginx-ingress-services` should be deployed manually. - When choosing `DEPLOY_CERT_MANAGER=TRUE`, ensure if further network configuration is required by following [cert-manager behaviour in NAT / bridge environments](#cert-manager-behaviour-in-nat--bridge-environments). **To run the automated helm chart deployment with your variables**: ```bash # example command - verify the variables before running it -d sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" DEPLOY_CERT_MANAGER=TRUE ./bin/helm-operations.sh' +d sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" DEPLOY_CERT_MANAGER=TRUE DEPLOY_CALLING_SERVICES=TRUE HOST_IP="a.a.a.a" ./bin/helm-operations.sh' +``` + +If you do not want to deploy the calling services, run: + +```bash +d sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" DEPLOY_CERT_MANAGER=TRUE DEPLOY_CALLING_SERVICES=FALSE ./bin/helm-operations.sh' ``` **Charts deployed by the script:** - External datastores and helpers: `cassandra-external`, `elasticsearch-external`, `postgresql-external`, `minio-external`, `rabbitmq-external`, `databases-ephemeral`, `reaper`, `fake-aws`, `smtp`. - Wire services: `wire-server`, `webapp`, `account-pages`, `team-settings`. - Ingress and certificates: `ingress-nginx-controller`, `cert-manager`, `nginx-ingress-services`. -- Calling services: `sftd`, `coturn`. +- Calling services: `sftd`, `coturn` when `DEPLOY_CALLING_SERVICES=TRUE`. **Values and secrets generation:** - Creates `values.yaml` and `secrets.yaml` from `prod-values.example.yaml` and `prod-secrets.example.yaml` for each chart under `values/`. @@ -203,6 +290,13 @@ d sh -c 'TARGET_SYSTEM="example.dev" CERT_MASTER_EMAIL="certmaster@example.dev" *Note: The `bin/helm-operations.sh` script above deploys these charts; you do not need to run the Helm commands manually unless you want to customize or debug.* +**Manually removing non-required helm charts**: +- If some helm charts are not required in your environment like `demo-smtp` for email relaying then use the following command to uninstall them: +```bash +#d helm uninstall CHART_NAME +d helm uninstall demo-smtp +``` + ## Network Traffic Configuration ### Bring traffic from the adminhost to Wire services in the k8s cluster @@ -228,7 +322,8 @@ The `adminhost` must forward traffic from external clients to the Kubernetes clu - All other inbound traffic to adminhost → drop → default deny policy 4. **Masquerading (If [Internet access for VMs](#internet-access-for-vms) is required)** – Enable outbound connectivity for VMs - - Any traffic from VM subnet leaving via WAN interface → SNAT/masquerade → ensures return traffic from internet. + - Any traffic from VM subnet leaving via WAN interface → SNAT/masquerade → ensures return traffic from internet. + - Controlled by the variable `private_deployment` 5. **Conditional Rules (cert-manager / HTTP-01 in NAT setups)** – Temporary adjustments for certificate validation - DNAT hairpin traffic (VM → public IP → VM) → may require SNAT/masquerade on VM bridge → ensures return path during HTTP-01 self-checks @@ -281,6 +376,9 @@ If you have already used the `wiab-staging-provision.yml` ansible playbook to cr ```bash ansible-playbook -i ansible/inventory/demo/wiab-staging.yml ansible/wiab-staging-provision.yml --tags nftables ``` + +> **Note:** You can use this playbook to change the internet access to VMs by modifying the variable `private_deployment` and re-run the above playbook. + Alternatively, if you have not used the `wiab-staging-provision.yml` ansible playbook to create the VMs but would like to configure nftables rules, you can invoke the ansible playbook [wiab-staging-nftables.yaml](https://github.com/wireapp/wire-server-deploy/blob/master/ansible/wiab-staging-nftables.yaml) against the physical node. The playbook is available in the directory `wire-server-deploy/ansible`. The inventory file `inventory.yml` should define the following variables: @@ -318,9 +416,9 @@ When cert-manager performs HTTP-01 self-checks inside the cluster, traffic can h - Pod → Node → host public IP → DNAT → Node → Ingress -> **Note**: Using Let's encrypt with `cert-manager` requires internet access ([to at least `acme-v02.api.letsencrypt.org`](https://letsencrypt.org/docs/acme-protocol-updates/)) to issue TLS certs. If you have chosen to keep the network private i.e. `private_deployment=true` for the VMs when applying nftables rules aka no internet access to VMs, then we need to make a temporary exception for this. +> **Note**: Using Let's Encrypt with `cert-manager` requires internet access ([to at least `acme-v02.api.letsencrypt.org`](https://letsencrypt.org/docs/acme-protocol-updates/)) to issue TLS certificates. If you have chosen to keep the network private, that is `private_deployment=true` for the VMs when applying nftables rules, then you need to make a temporary exception for this traffic. The same outbound access will also be required later for certificate renewals (after 180 days). > -> To add a nftables masquerading rule for all outgoing traffic run the following command on the `adminhost` or make a similar change in your firewall: +> To temporarily provide outbound internet access from the VMs, add the following nftables masquerading rule on the `adminhost`. Replace `INF_WAN` with the WAN interface that should carry this traffic, or make an equivalent change in your firewall: > > ```bash > # Host WAN interface name @@ -400,7 +498,7 @@ If you observe HTTP-01 challenge timeouts or self-check failures in a NAT/bridge xargs -r -I {} sudo nft delete rule ip nat POSTROUTING handle {} ``` -> **Note**: If above you had made an exception to allow temporary internet access to VMs by adding a nftables rules, then this should be removed now. +> **Note**: If you added an nftables rule above to allow temporary internet access for the VMs, remove it after certificate issuance is complete. > > To remove the nftables masquerading rule for all outgoing traffic run the following command: > @@ -414,7 +512,7 @@ If you observe HTTP-01 challenge timeouts or self-check failures in a NAT/bridge > > If you are using a different implementation than nftables then please ensure temporary Internet access to VMs has been removed. -For additional background on when hairpin NAT is required and how it relates to WIAB Dev and WIAB Staging, see [Hairpin networking for WIAB Dev and WIAB Staging](tls-certificates.md#hairpin-networking-for-wiab-dev-and-wiab-staging). +> **Note**: If email delivery is not working, or if Android/iOS push notifications are still not working after you have configured the required AWS credentials, ensure the required outbound access is allowed as explained at [Internet access for VMs](#internet-access-for-vms). ## Further Reading