diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 9867810a7419..c6a95b29b4c0 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -357,6 +357,8 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Java SingleStoreIO IT ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml) | N/A |`beam_PostCommit_Java_SingleStoreIO_IT.json`| [![.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Spark3 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | N/A |`beam_PostCommit_Java_PVR_Spark3_Streaming.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Spark Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | N/A |`beam_PostCommit_Java_PVR_Spark_Batch.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml?query=event%3Aschedule) | +| [ PostCommit Java PVR Spark4 Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml) | N/A |`beam_PostCommit_Java_PVR_Spark4_Batch.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml?query=event%3Aschedule) | +| [ PostCommit Java PVR Spark4 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml) | N/A |`beam_PostCommit_Java_PVR_Spark4_Streaming.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml?query=event%3Aschedule) | | [ PostCommit Java Tpcds Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml) | N/A |`beam_PostCommit_Java_Tpcds_Dataflow.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit Java Tpcds Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml) | N/A |`beam_PostCommit_Java_Tpcds_Flink.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml?query=event%3Aschedule) | | [ PostCommit Java Tpcds Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml) | N/A |`beam_PostCommit_Java_Tpcds_Spark.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml?query=event%3Aschedule) | @@ -370,7 +372,7 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Flink.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Spark.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml?query=event%3Aschedule) | -| [ PostCommit Java ValidatesRunner Spark4StructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml?query=event%3Aschedule) | +| [ PostCommit Java ValidatesRunner Spark4 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Spark4.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Twister2.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_ULR.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml?query=event%3Aschedule) | | [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`beam_PostCommit_Java.json`| [![.github/workflows/beam_PostCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml?query=event%3Aschedule) | diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml new file mode 100644 index 000000000000..eb2d65afc2d8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Batch.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark4 Batch + +on: + schedule: + - cron: '45 5/6 * * *' + pull_request_target: + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_PVR_Spark4_Batch.json'] + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark4_Batch: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-24.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark4_Batch] + job_phrase: [Run Java Spark4 PortableValidatesRunner Batch] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + steps: + - uses: actions/checkout@v6 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 17 + - name: run PostCommit Java PortableValidatesRunner Spark4 Batch script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: | + :runners:spark:4:job-server:validatesPortableRunnerBatch \ + :runners:spark:4:job-server:validatesPortableRunnerDocker + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v7 + if: ${{ !success() }} + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + large_files: true + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' + - name: Archive SpotBugs Results + uses: actions/upload-artifact@v7 + with: + name: SpotBugs Results + path: "**/build/reports/spotbugs/*.html" diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml new file mode 100644 index 000000000000..38866bedeb1b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark4_Streaming.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark4 Streaming + +on: + schedule: + - cron: '45 5/6 * * *' + pull_request_target: + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_PVR_Spark4_Streaming.json'] + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark4_Streaming: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-24.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark4_Streaming] + job_phrase: [Run Java Spark v4 PortableValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') || + github.event.comment.body == 'Run Java Spark v4 PortableValidatesRunner Streaming' + steps: + - uses: actions/checkout@v6 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 17 + - name: run PostCommit Java PortableValidatesRunner Spark4 Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:4:job-server:validatesPortableRunnerStreaming + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v7 + if: ${{ !success() }} + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' + large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml similarity index 82% rename from .github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml rename to .github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml index d1f214e6a458..c6e742204894 100644 --- a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.yml +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark4.yml @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: PostCommit Java ValidatesRunner Spark4 StructuredStreaming +name: PostCommit Java ValidatesRunner Spark4 on: schedule: - cron: '45 4/6 * * *' pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming.json'] + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Spark4.json'] workflow_dispatch: #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event @@ -49,19 +49,19 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming: + beam_PostCommit_Java_ValidatesRunner_Spark4: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) runs-on: [self-hosted, ubuntu-24.04, main] timeout-minutes: 120 strategy: matrix: - job_name: [beam_PostCommit_Java_ValidatesRunner_Spark4StructuredStreaming] - job_phrase: [Run Spark4 StructuredStreaming ValidatesRunner] + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark4] + job_phrase: [Run Spark4 ValidatesRunner] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event.comment.body == 'Run Spark4 StructuredStreaming ValidatesRunner' + github.event.comment.body == 'Run Spark4 ValidatesRunner' steps: - uses: actions/checkout@v6 - name: Setup repository @@ -74,13 +74,11 @@ jobs: uses: ./.github/actions/setup-environment-action with: java-version: '17' - - name: run validatesStructuredStreamingRunnerBatch script + - name: run validatesRunner script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:spark:4:validatesStructuredStreamingRunnerBatch - arguments: | - -PtestJavaVersion=17 \ - -PdisableSpotlessCheck=true \ + gradle-command: :runners:spark:4:validatesRunner + arguments: -PdisableSpotlessCheck=true - name: Archive JUnit Test Results uses: actions/upload-artifact@v7 if: ${{ !success() }} diff --git a/runners/spark/4/build.gradle b/runners/spark/4/build.gradle index 01fb3680b078..ec1af8df38a4 100644 --- a/runners/spark/4/build.gradle +++ b/runners/spark/4/build.gradle @@ -36,13 +36,23 @@ apply from: "$basePath/spark_runner.gradle" // Spark 4 always requires Java 17, so unconditionally add the --add-opens flags // required by Kryo and other libraries that use reflection on JDK internals. -test { +tasks.withType(Test).configureEach { jvmArgs "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens=java.base/java.nio=ALL-UNNAMED", "--add-opens=java.base/java.util=ALL-UNNAMED", "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED" } +tasks.validatesStructuredStreamingRunnerBatch { + filter { + // TODO(https://github.com/apache/beam/issues/36841): currently failing with INTERNAL_ERROR] + // Couldn't find method fromByteArray with arguments + excludeTestsMatching 'org.apache.beam.sdk.transforms.CombineTest$BasicTests.testHotKeyCombining' + } +} + +tasks.validatesRunner.dependsOn(validatesStructuredStreamingRunnerBatch) + // Exclude DStream-based streaming tests from the shared-base copy: the Spark 4 module // supports only structured streaming (batch) and does not include legacy DStream support. // Streaming test utilities also depend on kafka.server.KafkaServerStartable which was