Skip to content

Commit 7ae6e6c

Browse files
authored
feat: add monitors (#64)
1 parent fabacba commit 7ae6e6c

3 files changed

Lines changed: 205 additions & 45 deletions

File tree

.github/test-matrix.json

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{
2+
"arch": [
3+
{
4+
"runner": "ubuntu-latest",
5+
"rie": "aws-lambda-rie",
6+
"label": "x64"
7+
},
8+
{
9+
"runner": "ubuntu-24.04-arm",
10+
"rie": "aws-lambda-rie-arm64",
11+
"label": "arm64"
12+
}
13+
],
14+
"distro_config": [
15+
{
16+
"distro": "al2023",
17+
"distro_version": "al2023",
18+
"runtime_version": "3.4",
19+
"executable": "/usr/local/bin/aws_lambda_ric"
20+
},
21+
{
22+
"distro": "al2023",
23+
"distro_version": "al2023",
24+
"runtime_version": "3.3",
25+
"executable": "/usr/local/bin/aws_lambda_ric"
26+
},
27+
{
28+
"distro": "alpine",
29+
"distro_version": "3.23",
30+
"runtime_version": "3.4",
31+
"executable": "/usr/local/bundle/bin/aws_lambda_ric"
32+
},
33+
{
34+
"distro": "alpine",
35+
"distro_version": "3.23",
36+
"runtime_version": "3.3",
37+
"executable": "/usr/local/bundle/bin/aws_lambda_ric"
38+
},
39+
{
40+
"distro": "debian",
41+
"distro_version": "bookworm",
42+
"runtime_version": "3.4",
43+
"executable": "/usr/local/bundle/bin/aws_lambda_ric"
44+
},
45+
{
46+
"distro": "debian",
47+
"distro_version": "bookworm",
48+
"runtime_version": "3.3",
49+
"executable": "/usr/local/bundle/bin/aws_lambda_ric"
50+
},
51+
{
52+
"distro": "ubuntu",
53+
"distro_version": "24.04",
54+
"runtime_version": "3.4",
55+
"executable": "/usr/local/bin/aws_lambda_ric"
56+
},
57+
{
58+
"distro": "ubuntu",
59+
"distro_version": "24.04",
60+
"runtime_version": "3.3",
61+
"executable": "/usr/local/bin/aws_lambda_ric"
62+
}
63+
]
64+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
name: bootstrap-alarms
2+
3+
permissions:
4+
id-token: write
5+
contents: read
6+
7+
on:
8+
schedule:
9+
- cron: '0 8 * * 1' # Every Monday at 08:00 UTC
10+
workflow_dispatch:
11+
12+
env:
13+
AWS_REGION: ${{ secrets.AWS_REGION }}
14+
ALARM_NAMESPACE: GitHubActions
15+
16+
jobs:
17+
bootstrap:
18+
runs-on: ubuntu-latest
19+
env:
20+
COMPOSITE_ALARM_NAME: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-integration-tests-aggregate
21+
22+
steps:
23+
- name: Debug OIDC token
24+
run: |
25+
echo "GitHub ref: ${{ github.ref }}"
26+
echo "GitHub event name: ${{ github.event_name }}"
27+
28+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
29+
30+
- name: Configure AWS credentials (OIDC)
31+
uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0
32+
with:
33+
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
34+
aws-region: ${{ secrets.AWS_REGION }}
35+
36+
- name: Create individual metric alarms
37+
run: |
38+
set -euo pipefail
39+
40+
MATRIX_FILE=".github/test-matrix.json"
41+
ALARM_NAMES=()
42+
43+
# Iterate over every arch × distro_config permutation from the shared matrix
44+
for row in $(jq -c '
45+
.arch[] as $a |
46+
.distro_config[] as $d |
47+
{ arch: $a.label, distro: $d.distro, distro_version: $d.distro_version, runtime_version: $d.runtime_version }
48+
' "$MATRIX_FILE"); do
49+
50+
arch=$(echo "$row" | jq -r '.arch')
51+
distro=$(echo "$row" | jq -r '.distro')
52+
distro_version=$(echo "$row" | jq -r '.distro_version')
53+
runtime_version=$(echo "$row" | jq -r '.runtime_version')
54+
55+
ALARM_NAME="GitHubActions-ruby-ric-${distro}-${distro_version}-ruby${runtime_version}-${arch}"
56+
57+
echo "Creating alarm: ${ALARM_NAME}"
58+
59+
# Alarms if no success metric is received within 3 days
60+
# Uses 1-day periods with 3 evaluation periods for faster state transitions
61+
aws cloudwatch put-metric-alarm \
62+
--alarm-name "${ALARM_NAME}" \
63+
--alarm-description "Integration test: ${distro} ${distro_version} / ruby ${runtime_version} (${arch})" \
64+
--namespace "${ALARM_NAMESPACE}" \
65+
--metric-name "TestResult" \
66+
--dimensions "Name=Distro,Value=${distro}" "Name=DistroVersion,Value=${distro_version}" "Name=RuntimeVersion,Value=${runtime_version}" "Name=Arch,Value=${arch}" \
67+
--statistic Sum \
68+
--period 86400 \
69+
--evaluation-periods 3 \
70+
--datapoints-to-alarm 3 \
71+
--threshold 1 \
72+
--comparison-operator LessThanThreshold \
73+
--treat-missing-data breaching
74+
75+
ALARM_NAMES+=("${ALARM_NAME}")
76+
done
77+
78+
# Save alarm names for the composite alarm step
79+
printf '%s\n' "${ALARM_NAMES[@]}" > /tmp/alarm_names.txt
80+
81+
- name: Create composite aggregate alarm
82+
run: |
83+
set -euo pipefail
84+
85+
mapfile -t ALARM_NAMES < /tmp/alarm_names.txt
86+
87+
# Build the composite alarm rule: triggers if ANY sub-alarm is in ALARM or INSUFFICIENT_DATA
88+
RULE=""
89+
for name in "${ALARM_NAMES[@]}"; do
90+
if [ -n "$RULE" ]; then
91+
RULE="${RULE} OR "
92+
fi
93+
RULE="${RULE}(ALARM(\"${name}\") OR INSUFFICIENT_DATA(\"${name}\"))"
94+
done
95+
96+
echo "Composite alarm rule:"
97+
echo "${RULE}"
98+
99+
aws cloudwatch put-composite-alarm \
100+
--alarm-name "${COMPOSITE_ALARM_NAME}" \
101+
--alarm-description "Aggregate alarm for all Ruby RIC integration test permutations" \
102+
--alarm-rule "${RULE}" \
103+
--actions-enabled \
104+
--alarm-actions "${{ secrets.AWS_ALARM_TARGET_ARN }}" \
105+
--insufficient-data-actions "${{ secrets.AWS_ALARM_TARGET_ARN }}"
106+
107+
echo "Composite alarm '${COMPOSITE_ALARM_NAME}' created successfully."

.github/workflows/integration-tests.yml

Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,50 @@
11
name: integration-tests
22

33
permissions:
4+
id-token: write
45
contents: read
56

67
on:
78
push:
89
branches: [main]
910
pull_request:
1011
branches: ['*']
12+
schedule:
13+
- cron: '0 8 * * 1-5' # Every workday (Mon-Fri) at 08:00 UTC
1114
workflow_dispatch:
1215

1316
jobs:
17+
load-matrix:
18+
runs-on: ubuntu-latest
19+
outputs:
20+
matrix: ${{ steps.set.outputs.matrix }}
21+
steps:
22+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
23+
24+
- name: Load test matrix
25+
id: set
26+
run: |
27+
MATRIX=$(jq -c '.' .github/test-matrix.json)
28+
echo "matrix=${MATRIX}" >> "$GITHUB_OUTPUT"
29+
1430
integration-test:
31+
needs: load-matrix
1532
runs-on: ${{ matrix.arch.runner }}
1633
strategy:
1734
fail-fast: false
18-
matrix:
19-
arch:
20-
- runner: ubuntu-latest
21-
rie: aws-lambda-rie
22-
label: x64
23-
- runner: ubuntu-24.04-arm
24-
rie: aws-lambda-rie-arm64
25-
label: arm64
26-
distro_config:
27-
# al2023
28-
- distro: al2023
29-
distro_version: "al2023"
30-
runtime_version: "3.4"
31-
executable: /usr/local/bin/aws_lambda_ric
32-
- distro: al2023
33-
distro_version: "al2023"
34-
runtime_version: "3.3"
35-
executable: /usr/local/bin/aws_lambda_ric
36-
# Alpine
37-
- distro: alpine
38-
distro_version: "3.23"
39-
runtime_version: "3.4"
40-
executable: /usr/local/bundle/bin/aws_lambda_ric
41-
- distro: alpine
42-
distro_version: "3.23"
43-
runtime_version: "3.3"
44-
executable: /usr/local/bundle/bin/aws_lambda_ric
45-
# Debian
46-
- distro: debian
47-
distro_version: bookworm
48-
runtime_version: "3.4"
49-
executable: /usr/local/bundle/bin/aws_lambda_ric
50-
- distro: debian
51-
distro_version: bookworm
52-
runtime_version: "3.3"
53-
executable: /usr/local/bundle/bin/aws_lambda_ric
54-
# Ubuntu
55-
- distro: ubuntu
56-
distro_version: "24.04"
57-
runtime_version: "3.4"
58-
executable: /usr/local/bin/aws_lambda_ric
59-
- distro: ubuntu
60-
distro_version: "24.04"
61-
runtime_version: "3.3"
62-
executable: /usr/local/bin/aws_lambda_ric
35+
matrix: ${{ fromJson(needs.load-matrix.outputs.matrix) }}
6336

6437
name: "${{ matrix.distro_config.distro }} ${{ matrix.distro_config.distro_version }} / ruby ${{ matrix.distro_config.runtime_version }} (${{ matrix.arch.label }})"
6538

6639
steps:
6740
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
6841

42+
- name: Configure AWS credentials (OIDC)
43+
uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0
44+
with:
45+
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
46+
aws-region: ${{ secrets.AWS_REGION }}
47+
6948
- name: Download RIE
7049
run: |
7150
mkdir -p .scratch
@@ -131,6 +110,16 @@ jobs:
131110
echo "=== Tester container logs ==="
132111
docker logs "${TEST_NAME}-tester" 2>&1 || true
133112
113+
- name: Publish success metric
114+
if: success()
115+
run: |
116+
aws cloudwatch put-metric-data \
117+
--namespace "GitHubActions" \
118+
--metric-name "TestResult" \
119+
--dimensions "Distro=${{ matrix.distro_config.distro }},DistroVersion=${{ matrix.distro_config.distro_version }},RuntimeVersion=${{ matrix.distro_config.runtime_version }},Arch=${{ matrix.arch.label }}" \
120+
--value 1 \
121+
--unit Count
122+
134123
- name: Cleanup
135124
if: always()
136125
run: |

0 commit comments

Comments
 (0)