resources: containers: - container: ci image: mmlsparkmcr.azurecr.io/synapseml/ci:ci-latest endpoint: 'SynapseML MCR' repositories: - repository: self type: self trigger: branches: include: - master - spark3.3 - spark3.5 paths: exclude: - README.md - CONTRIBUTORS.md - SECURITY.md - "docs/Reference/Contributor Guide.md" - "docs/Reference/Developer Setup.md" - "docs/Reference/Docker Setup.md" - CODEOWNERS pr: branches: include: - master - spark3.3 - spark3.5 paths: exclude: - README.md - CONTRIBUTORS.md - SECURITY.md - "docs/Reference/Contributor Guide.md" - "docs/Reference/Developer Setup.md" - "docs/Reference/Docker Setup.md" - CODEOWNERS schedules: - cron: "0 0 * * *" displayName: Daily midnight build always: true branches: include: - master parameters: - name: testStyle displayName: Run Style Tests type: boolean default: true - name: testUnit displayName: Run Unit Tests type: boolean default: true - name: testPython displayName: Run Python Tests type: boolean default: true - name: testR displayName: Run R Tests type: boolean default: true - name: testDatabricksE2E displayName: Run Databricks E2E Tests type: boolean default: true - name: testFabricE2E displayName: Run Fabric E2E Tests type: boolean default: true - name: testWebsiteSamples displayName: Run Website Samples Tests type: boolean default: true - name: publishArtifacts displayName: Publish Artifacts type: boolean default: true - name: publishDockerImages displayName: Publish Docker Images type: boolean default: false - name: publishRelease displayName: Publish Release type: boolean default: false variables: runTests: True CONDA_CACHE_DIR: /usr/share/miniconda/envs UBUNTU_VERSION: ubuntu-22.04 ComponentDetection.Timeout: 900 isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')] isTag: $[startsWith(variables['Build.SourceBranch'], 'refs/tags/')] isPR: $[eq(variables['Build.Reason'], 'PullRequest')] # Run coverage only on PRs, master, or tag builds to speed up feature branch builds runCoverage: $[or(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), startsWith(variables['Build.SourceBranch'], 'refs/tags/'))] jobs: - job: BuildCIImage displayName: 'Ensure CI Image' cancelTimeoutInMinutes: 0 timeoutInMinutes: 60 pool: vmImage: $(UBUNTU_VERSION) steps: - bash: | set -e echo "=== Disk space BEFORE cleanup ===" df -h / | grep -E 'Filesystem|/$' sudo rm -rf /usr/local/lib/android || true sudo rm -rf /usr/lib/google-cloud-sdk || true sudo rm -rf /usr/share/dotnet || true sudo rm -rf /opt/ghc || true sudo rm -rf /opt/hostedtoolcache || true sudo rm -rf /usr/local/share/boost || true sudo rm -rf /usr/share/swift || true sudo rm -rf /usr/local/.ghcup || true sudo rm -rf /usr/share/miniconda || true sudo rm -rf /usr/local/share/chromium || true sudo docker image prune -af || true echo "=== Disk space AFTER cleanup ===" df -h / | grep -E 'Filesystem|/$' displayName: 'Free disk space' - task: Docker@2 displayName: 'Login to ACR' inputs: command: login containerRegistry: 'SynapseML MCR' - bash: | set -e # Hash each dependency file individually (with filename) to avoid boundary-shift collisions HASH=$(sha256sum environment.yml project/plugins.sbt project/build.properties build.sbt sonatype.sbt tools/docker/ci/Dockerfile | sha256sum | cut -c1-12) TAG="ci-${HASH}" LATEST_TAG="ci-latest" REGISTRY="mmlsparkmcr.azurecr.io" REPO="synapseml/ci" echo "Content hash: $TAG" # Retry manifest inspect to tolerate transient ACR failures IMAGE_EXISTS=false for i in 1 2 3; do if docker manifest inspect "${REGISTRY}/${REPO}:${TAG}" > /dev/null 2>&1; then IMAGE_EXISTS=true break fi [ $i -lt 3 ] && echo "Manifest inspect attempt $i failed, retrying..." && sleep 5 done if [ "$IMAGE_EXISTS" = "true" ]; then echo "Image ${TAG} exists. Re-tagging server-side as ${LATEST_TAG}." docker buildx imagetools create \ --tag "${REGISTRY}/${REPO}:${LATEST_TAG}" \ "${REGISTRY}/${REPO}:${TAG}" else echo "Image ${TAG} not found. Building..." docker pull "${REGISTRY}/${REPO}:${LATEST_TAG}" || true docker build \ --cache-from "${REGISTRY}/${REPO}:${LATEST_TAG}" \ -t "${REGISTRY}/${REPO}:${TAG}" \ -t "${REGISTRY}/${REPO}:${LATEST_TAG}" \ -f tools/docker/ci/Dockerfile . docker push "${REGISTRY}/${REPO}:${TAG}" docker push "${REGISTRY}/${REPO}:${LATEST_TAG}" fi displayName: 'Check/Build CI Image' - job: Style dependsOn: BuildCIImage cancelTimeoutInMinutes: 0 condition: and(succeeded(), eq(variables.runTests, 'True'), eq('${{ parameters.testStyle }}', true)) pool: vmImage: $(UBUNTU_VERSION) container: ci steps: - template: templates/free_disk.yml - task: AzureCLI@2 displayName: 'Scala Style Check' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: 'sbt scalastyle test:scalastyle' - bash: | set -e source activate synapseml black --diff --color . && black --check -q . displayName: 'Python Style Check' - ${{ if eq(parameters.publishArtifacts, true) }}: - job: Publish dependsOn: BuildCIImage condition: and(succeeded(), eq('${{ parameters.publishArtifacts }}', true)) cancelTimeoutInMinutes: 0 pool: vmImage: $(UBUNTU_VERSION) container: ci steps: - template: templates/free_disk.yml - template: templates/kv.yml - task: MavenAuthenticate@0 name: mavenAuthPublicPackages displayName: Authenticate SynapseML_PublicPackages inputs: artifactsFeeds: SynapseML_PublicPackages mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection - task: AzureCLI@2 displayName: 'Publish Artifacts' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e source activate synapseml sbt packagePython uploadNotebooks sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython sbt genBuildInfo echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" sbt -DskipCodegen=true publishLocalSigned python tools/esrp/prepare_jar.py env: NEXUS-UN: $(nexus-un) NEXUS-PW: $(nexus-pw) PGP-PRIVATE: $(pgp-private) PGP-PUBLIC: $(pgp-public) PGP-PW: $(pgp-pw) SYNAPSEML_ENABLE_PUBLISH: true - task: AzureCLI@2 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e sbt publishBadges condition: and(succeeded(), eq(variables.isMaster, true)) displayName: Publish Badges - job: DatabricksE2E displayName: 'Databricks E2E' dependsOn: BuildCIImage condition: and(succeeded(), eq('${{ parameters.testDatabricksE2E }}', true)) timeoutInMinutes: 120 cancelTimeoutInMinutes: 0 pool: vmImage: $(UBUNTU_VERSION) container: ci strategy: matrix: databricks-cpu-1: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests1" databricks-cpu-2: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests2" databricks-cpu-3: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests3" databricks-cpu-4: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests4" databricks-cpu-5: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests5" databricks-gpu-1: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests1" databricks-gpu-2: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests2" databricks-gpu-3: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests3" # databricks-rapids tests have been disabled because these tests are failing. # This test will be re-enabled once the issue is fixed. # databricks-rapids: # TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" steps: - template: templates/free_disk.yml - template: templates/kv.yml - template: templates/publish.yml - task: AzureCLI@2 displayName: 'E2E' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e source activate synapseml sbt "testOnly $(TEST-CLASS)" condition: and(succeeded(), eq(variables.runTests, 'True')) - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/test-reports/TEST-*.xml' failTaskOnFailedTests: true condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) # FabricE2E runs in the CI container. The Fabric tests are Scala/JVM tests # that call Fabric REST APIs — no special Python packages beyond the base env. - job: FabricE2E displayName: 'Fabric E2E' dependsOn: BuildCIImage condition: and(succeeded(), eq('${{ parameters.testFabricE2E }}', true)) timeoutInMinutes: 120 cancelTimeoutInMinutes: 0 pool: vmImage: $(UBUNTU_VERSION) container: ci steps: - template: templates/free_disk.yml - template: templates/kv.yml - template: templates/fabric_kv.yml - template: templates/publish.yml - task: AzureCLI@2 displayName: 'E2E' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e source activate synapseml sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.FabricSmokeTests com.microsoft.azure.synapse.ml.nbtest.FabricNotebookTests" env: INTEGRATION_ENV: $(sempy-integration-region) INTEGRATION_ACCOUNT: $(sempy-integration-account) INTEGRATION_CERTIFICATE: $(sempy-integration-certificate) INTEGRATION_WORKSPACE_PREFIX: $(sempy-integration-workspace-prefix) condition: and(succeeded(), eq(variables.runTests, 'True')) - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/test-reports/TEST-*.xml' failTaskOnFailedTests: true condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) # - job: BuildDocker displayName: BuildDocker pool: vmImage: ubuntu-22.04 steps: - task: AzureCLI@2 displayName: 'Get Docker Tag + Version' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g') echo '##vso[task.setvariable variable=version]'$VERSION echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD) # Build all images (runs on every build to validate Dockerfiles) - task: Docker@2 displayName: Demo Image Build inputs: command: 'build' buildContext: "." Dockerfile: 'tools/docker/demo/Dockerfile' arguments: --build-arg SYNAPSEML_VERSION=$(version) -t mmlspark-demo:$(version) - task: Docker@2 displayName: Minimal Image Build inputs: command: 'build' buildContext: "." Dockerfile: 'tools/docker/minimal/Dockerfile' arguments: --build-arg SYNAPSEML_VERSION=$(version) -t mmlspark-minimal:$(version) # Push demo and minimal on every master build - bash: | set -e docker tag mmlspark-demo:$(version) mmlsparkmcr.azurecr.io/public/mmlspark/build-demo:$(version) docker tag mmlspark-minimal:$(version) mmlsparkmcr.azurecr.io/public/mmlspark/build-minimal:$(version) condition: and(eq(variables.isMaster, true), eq('${{ parameters.publishDockerImages }}', true)) displayName: Tag Dev Images for ACR - task: Docker@2 condition: and(eq(variables.isMaster, true), eq('${{ parameters.publishDockerImages }}', true)) displayName: Demo Image Push inputs: containerRegistry: 'SynapseML MCR' repository: 'public/mmlspark/build-demo' command: 'push' tags: $(version) - task: Docker@2 condition: and(eq(variables.isMaster, true), eq('${{ parameters.publishDockerImages }}', true)) displayName: Minimal Image Push inputs: containerRegistry: 'SynapseML MCR' repository: 'public/mmlspark/build-minimal' command: 'push' tags: $(version) # Push release only on tagged releases - bash: | set -e docker tag mmlspark-demo:$(version) mmlsparkmcr.azurecr.io/public/mmlspark/release:$(version) docker tag mmlspark-demo:$(version) mmlsparkmcr.azurecr.io/public/mmlspark/release:latest condition: and(eq('${{ parameters.publishDockerImages }}', true), eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) displayName: Tag Release Image for ACR - task: Docker@2 condition: and(eq('${{ parameters.publishDockerImages }}', true), eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) displayName: Release Image Push inputs: containerRegistry: 'SynapseML MCR' repository: 'public/mmlspark/release' command: 'push' tags: | $(version) latest - task: ComponentGovernanceComponentDetection@0 - ${{ if eq(parameters.publishRelease, true) }}: - job: Release condition: eq('${{ parameters.publishRelease }}', true) cancelTimeoutInMinutes: 0 pool: vmImage: $(UBUNTU_VERSION) steps: - template: templates/update_cli.yml - bash: | echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) displayName: 'Get Git Tag' - bash: | set -e wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 chmod +x git-chglog_linux_amd64 ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - task: GitHubRelease@1 condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) inputs: gitHubConnection: 'MMLSpark Github' repositoryName: '$(Build.Repository.Name)' action: 'create' target: '$(Build.SourceVersion)' tagSource: 'gitTag' releaseNotesFile: 'CHANGELOG.md' isDraft: true - bash: echo "##vso[task.prependpath]$CONDA/bin" condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) displayName: Add conda to PATH - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) displayName: Fix directory permissions condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - task: Cache@2 displayName: Use cached Anaconda environment condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) inputs: key: 'conda | "$(Agent.OS)" | environment.yml' restoreKeys: | python | "$(Agent.OS)" python path: $(CONDA_CACHE_DIR) cacheHitVar: CONDA_CACHE_RESTORED - bash: | conda env create --force -f environment.yml -v condition: and(eq(variables.isMaster, true), and(startsWith(variables['tag'], 'v'), eq(variables.CONDA_CACHE_RESTORED, 'false'))) displayName: Create Anaconda environment - task: AzureKeyVault@2 condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) inputs: azureSubscription: 'SynapseML Build' keyVaultName: mmlspark-keys - bash: | set -e source activate synapseml sbt publishPypi condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) env: STORAGE-KEY: $(storage-key) NEXUS-UN: $(nexus-un) NEXUS-PW: $(nexus-pw) PGP-PRIVATE: $(pgp-private) PGP-PUBLIC: $(pgp-public) PGP-PW: $(pgp-pw) PYPI-API-TOKEN: $(pypi-api-token) SYNAPSEML_ENABLE_PUBLISH: true displayName: 'publish python package to pypi' - bash: | set -e source activate synapseml sbt publishLocalSigned python tools/esrp/prepare_jar.py condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) env: STORAGE-KEY: $(storage-key) NEXUS-UN: $(nexus-un) NEXUS-PW: $(nexus-pw) PGP-PRIVATE: $(pgp-private) PGP-PUBLIC: $(pgp-public) PGP-PW: $(pgp-pw) SYNAPSEML_ENABLE_PUBLISH: true displayName: 'publish jar package to maven central' - task: EsrpRelease@9 inputs: connectedservicename: 'SynapseML-ESRP-Service-Connection' usemanagedidentity: true keyvaultname: 'esrp-kv-ame' signcertname: 'esrp-release-mi-cert' clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6' contenttype: maven folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/' owners: 'zhoubo@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com' approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com' mainpublisher: synapseml domaintenantid: '33e01921-4d64-4f8c-a055-5bdaffd5e33d' displayName: 'ESRP Publish Package' condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - job: PythonTests dependsOn: BuildCIImage timeoutInMinutes: 120 cancelTimeoutInMinutes: 0 condition: and(succeeded(), eq(variables.runTests, 'True'), eq('${{ parameters.testPython }}', true)) pool: vmImage: $(UBUNTU_VERSION) container: ci strategy: matrix: core: PACKAGE: "core" deep-learning-hf: PACKAGE: "deepLearning" TEST_SUB_PATH: "hf" IGNORE_TEST_PATH: "" deep-learning-nohf: PACKAGE: "deepLearning" TEST_SUB_PATH: "" IGNORE_TEST_PATH: "hf" lightgbm: PACKAGE: "lightgbm" opencv: PACKAGE: "opencv" vw: PACKAGE: "vw" cognitive: PACKAGE: "cognitive" steps: - template: templates/free_disk.yml - template: templates/kv.yml - task: AzureCLI@2 displayName: 'Install and package deps' timeoutInMinutes: 40 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | source activate synapseml if [ "$(runCoverage)" = "True" ]; then COV_CMD="coverage"; else COV_CMD=""; fi sbt $COV_CMD getDatasets "project core" installPipPackage publishM2 "project $(PACKAGE)" installPipPackage publishM2 - task: AzureCLI@2 displayName: 'Test Python Code' timeoutInMinutes: 40 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e source activate synapseml export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss5M -Duser.timezone=GMT" echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" echo "SBT_OPTS=$SBT_OPTS" IGNORE_TEST_PATH_FLAG="" TEST_SUB_PATH_FLAG="" if [ -n "$(IGNORE_TEST_PATH)" ]; then IGNORE_TEST_PATH_FLAG="-DpythonIgnoreTestPath=$(IGNORE_TEST_PATH)" fi if [ -n "$(TEST_SUB_PATH)" ]; then TEST_SUB_PATH_FLAG="-DpythonSubTestPath=$(TEST_SUB_PATH)" fi echo "IGNORE_TEST_PATH=$IGNORE_TEST_PATH" echo "TEST_SUB_PATH=$TEST_SUB_PATH" echo "IGNORE_TEST_PATH_FLAG=$IGNORE_TEST_PATH_FLAG" echo "TEST_SUB_PATH_FLAG=$TEST_SUB_PATH_FLAG" (sbt $IGNORE_TEST_PATH_FLAG $TEST_SUB_PATH_FLAG "project $(PACKAGE)" coverage testPython) || \ (sbt $IGNORE_TEST_PATH_FLAG $TEST_SUB_PATH_FLAG "project $(PACKAGE)" coverage testPython) - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/python-test-*.xml' failTaskOnFailedTests: true condition: succeededOrFailed() - task: AzureCLI@2 displayName: 'Generate Codecov report' retryCountOnTaskFailure: 1 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: 'sbt coverageReport' condition: and(succeededOrFailed(), eq(variables.runCoverage, true)) - ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), startsWith(variables['Build.SourceBranch'], 'refs/tags/')) }}: - template: templates/codecov.yml - job: RTests dependsOn: BuildCIImage timeoutInMinutes: 60 cancelTimeoutInMinutes: 0 condition: and(succeeded(), eq(variables.runTests, 'True'), eq('${{ parameters.testR }}', true)) pool: vmImage: $(UBUNTU_VERSION) container: ci strategy: matrix: core: PACKAGE: "core" deep-learning: PACKAGE: "deepLearning" lightgbm: PACKAGE: "lightgbm" opencv: PACKAGE: "opencv" vw: PACKAGE: "vw" cognitive: PACKAGE: "cognitive" steps: - template: templates/free_disk.yml #- template: templates/ivy_cache_2.yml - template: templates/kv.yml - task: AzureCLI@2 displayName: 'Prepare for tests' retryCountOnTaskFailure: 1 timeoutInMinutes: 60 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss5M -Duser.timezone=GMT" source activate synapseml timeout 30m sbt setup codegen publishM2 SPARK_VERSION=3.5.0 HADOOP_VERSION=3 wget -q https://mmlspark.blob.core.windows.net/installers/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - task: AzureCLI@2 displayName: 'Test R Code' retryCountOnTaskFailure: 1 timeoutInMinutes: 20 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | set -e export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss5M -Duser.timezone=GMT" source activate synapseml timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/r-test-*.xml' failTaskOnFailedTests: true condition: succeededOrFailed() - task: AzureCLI@2 retryCountOnTaskFailure: 1 displayName: 'Generate Codecov report' inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: 'sbt coverageReport' condition: and(succeededOrFailed(), eq(variables.runCoverage, true)) - ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), startsWith(variables['Build.SourceBranch'], 'refs/tags/')) }}: - template: templates/codecov.yml - job: WebsiteSamplesTests dependsOn: BuildCIImage cancelTimeoutInMinutes: 0 condition: and(succeeded(), eq(variables.runTests, 'True'), eq('${{ parameters.testWebsiteSamples }}', true)) pool: vmImage: $(UBUNTU_VERSION) container: ci steps: - template: templates/free_disk.yml #- template: templates/ivy_cache.yml - template: templates/kv.yml - template: templates/publish.yml - task: AzureCLI@2 displayName: 'Test Website Samples' timeoutInMinutes: 30 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss2M -Duser.timezone=GMT" if [ "$(runCoverage)" = "True" ]; then COV_CMD="coverage"; else COV_CMD=""; fi (timeout 30m sbt setup) || (echo "retrying" && timeout 30m sbt setup) sbt $COV_CMD testWebsiteDocs - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/website-test-result.xml' failTaskOnFailedTests: true condition: succeededOrFailed() - task: AzureCLI@2 displayName: 'Generate Codecov report' retryCountOnTaskFailure: 1 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: 'sbt coverageReport' condition: and(succeededOrFailed(), eq(variables.runCoverage, true)) - ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), startsWith(variables['Build.SourceBranch'], 'refs/tags/')) }}: - template: templates/codecov.yml - job: UnitTests dependsOn: BuildCIImage cancelTimeoutInMinutes: 1 timeoutInMinutes: 50 condition: and(succeeded(), eq(variables.runTests, 'True'), eq('${{ parameters.testUnit }}', true)) pool: vmImage: $(UBUNTU_VERSION) container: ci strategy: matrix: automl: PACKAGE: "automl" PROJECT: "core" causal: PACKAGE: "causal" PROJECT: "core" onnx: PACKAGE: "onnx" PROJECT: "deepLearning" geospatial: PACKAGE: "services.geospatial" PROJECT: "cognitive" anomaly: PACKAGE: "services.anomaly" PROJECT: "cognitive" FLAKY: "true" face: PACKAGE: "services.face" PROJECT: "cognitive" FLAKY: "true" form: PACKAGE: "services.form" PROJECT: "cognitive" FLAKY: "true" language: PACKAGE: "services.language" PROJECT: "cognitive" FLAKY: "true" openai: PACKAGE: "services.openai" PROJECT: "cognitive" FLAKY: "true" aifoundry: PACKAGE: "services.aifoundry" PROJECT: "cognitive" FLAKY: "true" search1: PACKAGE: "services.search.split1" PROJECT: "cognitive" FFMPEG: "true" FLAKY: "true" search2: PACKAGE: "services.search.split2" PROJECT: "cognitive" FFMPEG: "true" FLAKY: "true" speech1: PACKAGE: "services.speech" TEST_CLASSES: "com.microsoft.azure.synapse.ml.services.speech.SpeechToTextSDKSuite" PROJECT: "cognitive" FFMPEG: "true" FLAKY: "true" speech2: PACKAGE: "services.speech" TEST_CLASSES: "com.microsoft.azure.synapse.ml.services.speech.ConversationTranscriptionSuite com.microsoft.azure.synapse.ml.services.speech.SpeechToTextSuite com.microsoft.azure.synapse.ml.services.speech.TextToSpeechSuite com.microsoft.azure.synapse.ml.services.speech.SpeakerEmotionInferenceSuite" PROJECT: "cognitive" FFMPEG: "true" FLAKY: "true" text: PACKAGE: "services.text" PROJECT: "cognitive" FLAKY: "true" translate: PACKAGE: "services.translate" PROJECT: "cognitive" FLAKY: "true" vision: PACKAGE: "services.vision" PROJECT: "cognitive" FLAKY: "true" core: PACKAGE: "core" PROJECT: "core" explainers1: PACKAGE: "explainers.split1" PROJECT: "core" explainers2: PACKAGE: "explainers.split2" PROJECT: "deepLearning" explainers3: PACKAGE: "explainers.split3" PROJECT: "deepLearning" exploratory: PACKAGE: "exploratory" PROJECT: "core" featurize: PACKAGE: "featurize" PROJECT: "core" image: PACKAGE: "image" PROJECT: "core" io1: PACKAGE: "io.split1" PROJECT: "core" FLAKY: "true" io2: PACKAGE: "io.split2" PROJECT: "core" FLAKY: "true" isolationforest: PACKAGE: "isolationforest" PROJECT: "core" flaky: PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed PROJECT: "core" FLAKY: "true" lightgbm1: PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split PROJECT: "lightgbm" FLAKY: "true" lightgbm2: PACKAGE: "lightgbm.split2" PROJECT: "lightgbm" FLAKY: "true" lightgbm3: PACKAGE: "lightgbm.split3" PROJECT: "lightgbm" FLAKY: "true" lightgbm4: PACKAGE: "lightgbm.split4" PROJECT: "lightgbm" FLAKY: "true" lightgbm5: PACKAGE: "lightgbm.split5" PROJECT: "lightgbm" FLAKY: "true" lightgbm6: PACKAGE: "lightgbm.split6" PROJECT: "lightgbm" FLAKY: "true" opencv: PACKAGE: "opencv" PROJECT: "opencv" recommendation: PACKAGE: "recommendation" PROJECT: "core" stages: PACKAGE: "stages" PROJECT: "core" nn: PACKAGE: "nn" PROJECT: "core" train: PACKAGE: "train" PROJECT: "core" vw: PACKAGE: "vw" PROJECT: "vw" steps: - template: templates/free_disk.yml #- template: templates/ivy_cache.yml - task: AzureCLI@2 displayName: 'Setup repo' retryCountOnTaskFailure: 1 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss2M -Duser.timezone=GMT" sbt getDatasets "project $(PROJECT)" Test/compile - task: AzureCLI@2 displayName: 'Unit Test' timeoutInMinutes: 90 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: | ulimit -c unlimited echo "Available CPUs: $(nproc)" export SBT_OPTS="-Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -Xss2M -Duser.timezone=GMT -Dscala.concurrent.context.numThreads=8 -Dscala.concurrent.context.maxThreads=8" # Only run coverage on PRs, master, or tag builds if [ "$(runCoverage)" = "True" ]; then COV_CMD="coverage"; else COV_CMD=""; fi TEST_SPEC="${TEST_CLASSES:-com.microsoft.azure.synapse.ml.$(PACKAGE).**}" (timeout 30m sbt $COV_CMD "project $(PROJECT)" "testOnly $TEST_SPEC") || (${FLAKY:-false} && timeout 30m sbt $COV_CMD "project $(PROJECT)" "testOnly $TEST_SPEC") - task: PublishTestResults@2 displayName: 'Publish Test Results' inputs: testResultsFiles: '**/test-reports/TEST-*.xml' failTaskOnFailedTests: true condition: succeededOrFailed() - task: AzureCLI@2 displayName: 'Generate Codecov report' retryCountOnTaskFailure: 1 inputs: azureSubscription: 'SynapseML Build' scriptLocation: inlineScript scriptType: bash inlineScript: 'sbt coverageReport' condition: and(succeededOrFailed(), eq(variables.runCoverage, true)) - template: templates/kv.yml - ${{ if or(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), startsWith(variables['Build.SourceBranch'], 'refs/tags/')) }}: - template: templates/codecov.yml