#!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Function to validate if a BigQuery table exists and has rows. # Usage: validate_table [retry_delay_seconds] # Exits with 0 if validation succeeds, 1 otherwise. # Requires GCP_PROJECT_ID and BQ_DATASET to be set in the environment. validate_table() { local table_name=$1 echo "DEBUG: ===== Starting validate_table for table: $table_name =====" # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited) if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the environment." exit 1 # Exit script if env vars missing fi local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}" local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}" local count="" local exit_code=1 local retries=10 local delay=60 # Default seconds between retries # Allow overriding delay via second argument (optional) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then delay=$2 echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}" else echo "DEBUG: Using default retry delay: ${delay}s for table ${table_name}" fi echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}" for i in $(seq 1 $retries); do echo "DEBUG: Starting attempt $i/$retries..." local query_output echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} --use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM \`${full_table_id}\`\"" query_output=$(bq query --project_id=${GCP_PROJECT_ID} \ --use_legacy_sql=false \ --format=sparse \ --max_rows=1 \ "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1) exit_code=$? echo "DEBUG: bq query exit code: $exit_code" echo "DEBUG: bq query raw output: [$query_output]" if [ $exit_code -eq 0 ]; then echo "DEBUG: bq query exited successfully (code 0)." count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]') echo "DEBUG: Processed count after removing whitespace (from last line): [$count]" if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then echo "DEBUG: Count [$count] is a positive integer. Validation successful for this attempt." break # Success! Found non-zero rows else echo "DEBUG: Count [$count] is zero or not a positive integer." if [[ "$count" == "0" ]]; then echo "DEBUG: Explicit count of 0 received." fi fi else echo "DEBUG: bq query failed (exit code: $exit_code)." echo "DEBUG: Checking table existence with bq show..." if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" > /dev/null 2>&1; then echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq show failed)." else echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show succeeded), but query failed." fi fi if [ $i -lt $retries ]; then echo "DEBUG: Validation condition not met on attempt $i. Retrying in $delay seconds..." sleep $delay else echo "DEBUG: Final attempt ($i) failed." fi done echo "DEBUG: ===== Final validation check for table: $table_name =====" if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK." echo "DEBUG: validate_table returning 0 (success)." return 0 # Indicate success else echo "ERROR: Failed to get a non-zero row count for table ${table_name} after $retries retries (Last exit code: $exit_code, Last processed count: '$count')." echo "DEBUG: validate_table returning 1 (failure)." return 1 # Indicate failure fi } # Allow the script to be sourced using "source ./script.sh" # and then call the function directly: "validate_table my_table 30" # If the script is executed directly, check if arguments are provided and call the function. if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then if [[ $# -eq 0 ]]; then echo "Usage: $0 [retry_delay_seconds]" echo "Requires GCP_PROJECT_ID and BQ_DATASET env vars." exit 1 fi validate_table "$@" fi