# This is an example of how you can use the Google Pipelines API backend # provider. *This is not a complete configuration file!* The # content here should be copy pasted into the backend -> providers section # of cromwell.example.backends/cromwell.examples.conf in the root of the repository. # You should uncomment lines that you want to define, and read carefully to customize # the file. If you have any questions, please open an issue at # https://broadworkbench.atlassian.net/projects/BA/issues # Documentation # https://cromwell.readthedocs.io/en/stable/backends/Google/ google { application-name = "cromwell" auths = [ { name = "application-default" scheme = "application_default" }, { name = "service-account" scheme = "service_account" service-account-id = "" json-file = "credentials.json" }, { name = "user-service-account" scheme = "user_service_account" } ] } # Filesystems available in this Crowmell instance # They can be enabled individually in the engine.filesystems stanza and in the config.filesystems stanza of backends # There is a default built-in local filesytem that can also be referenced as "local" as well. filesystems { gcs { class = "cromwell.filesystems.gcs.GcsPathBuilderFactory" } } engine { filesystems { gcs { auth = "service-account" } } } backend { default = PAPIv2 providers { PAPIv2 { actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory" config { # Google project project = "" # Base bucket for workflow executions root = "gs://" # Make the name of the backend used for call caching purposes insensitive to the PAPI version. name-for-call-caching-purposes: PAPI # Emit a warning if jobs last longer than this amount of time. This might indicate that something got stuck in PAPI. slow-job-warning-time: 24 hours # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for # your project. # # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will # cause a drop in performance. Setting this value too high will cause QPS based locks from Google. # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds" # See https://cloud.google.com/genomics/quotas for more information genomics-api-queries-per-100-seconds = 1000 # Polling for completion backs-off gradually for slower-running jobs. # This is the maximum polling interval (in seconds): maximum-polling-interval = 600 # Optional Dockerhub Credentials. Can be used to access private docker images. dockerhub { # account = "" # token = "" } # Number of workers to assign to PAPI requests request-workers = 3 # Optional configuration to use high security network (Virtual Private Cloud) for running jobs. # See https://cromwell.readthedocs.io/en/stable/backends/Google/ for more details. # virtual-private-cloud { # network-label-key = "network-key" # auth = "application-default" # } # Global pipeline timeout # Defaults to 7 days; max 30 days # pipeline-timeout = 7 days genomics { # A reference to an auth defined in the `google` stanza at the top. This auth is used to create # Pipelines and manipulate auth JSONs. auth = "service-account" // alternative service account to use on the launched compute instance // NOTE: If combined with service account authorization, both that serivce account and this service account // must be able to read and write to the 'root' GCS path compute-service-account = "default" # Currently Cloud Life Sciences API is available now in more locations for the full list check # https://cloud.google.com/life-sciences/docs/concepts/locations location = "us-central1" # Endpoint for APIs. For locations other than us-central1, the endpoint needs to be updated to match the location # For example for "europe-west4" location the endpoint-url should be "https://europe-west4-lifesciences.googleapis.com" endpoint-url = "https://lifesciences.googleapis.com" # Restrict access to VM metadata. Useful in cases when untrusted containers are running under a service # account not owned by the submitting user restrict-metadata-access = false # Pipelines v2 only: specify the number of times localization and delocalization operations should be attempted # There is no logic to determine if the error was transient or not, everything is retried upon failure # Defaults to 3 localization-attempts = 3 # Specifies the minimum file size for `gsutil cp` to use parallel composite uploads during delocalization. # Parallel composite uploads can result in a significant improvement in delocalization speed for large files # but may introduce complexities in downloading such files from GCS, please see # https://cloud.google.com/storage/docs/gsutil/commands/cp#parallel-composite-uploads for more information. # # If set to 0 parallel composite uploads are turned off. The default Cromwell configuration turns off # parallel composite uploads, this sample configuration turns it on for files of 150M or larger. parallel-composite-upload-threshold="150M" } # Controls how batched requests to PAPI are handled: batch-requests { timeouts { # Timeout when attempting to connect to PAPI to make requests: # read = 10 seconds # Timeout waiting for batch responses from PAPI: # # Note: Try raising this value if you see errors in logs like: # WARN - PAPI request worker PAPIQueryWorker-[...] terminated. 99 run creation requests, 0 status poll requests, and 0 abort requests will be reconsidered. If any of those succeeded in the cloud before the batch request failed, they might be run twice. # ERROR - Read timed out # connect = 10 seconds } } filesystems { gcs { # A reference to a potentially different auth for manipulating files via engine functions. auth = "service-account" # Google project which will be billed for the requests project = "" caching { # When a cache hit is found, the following duplication strategy will be followed to use the cached outputs # Possible values: "copy", "reference". Defaults to "copy" # "copy": Copy the output files # "reference": DO NOT copy the output files but point to the original output files instead. # Will still make sure than all the original output files exist and are accessible before # going forward with the cache hit. duplication-strategy = "copy" } } } default-runtime-attributes { cpu: 1 failOnStderr: false continueOnReturnCode: 0 memory: "2048 MB" bootDiskSizeGb: 10 # Allowed to be a String, or a list of Strings disks: "local-disk 10 SSD" # Worker nodes will be created without External IP address, Private Google Access needs to be enabled to access # GCS and Cloud NAT needs to be configured to access Internet resources such as Docker HUB noAddress: true # Update to 1 to make PVM the default choice unless over-ridden in the task runtime-attribute preemptible: 0 zones: ["us-central1-b", "us-central1-a"] } include "papi_v2_reference_image_manifest.conf" } } } } webservice { port = 8000 interface = 0.0.0.0 binding-timeout = 5s instance.name = "cromwell" } akka { # Allow longer timeout # https://gatkforums.broadinstitute.org/gatk/discussion/comment/41714#Comment_41714 http { server { request-timeout = 600s idle-timeout = 600s } } } database { # mysql example profile = "slick.jdbc.MySQLProfile$" db { driver = "com.mysql.jdbc.Driver" url = "jdbc:mysql:///cromwell?rewriteBatchedStatements=true" # Updte Database user if other than root user = "root" password = "" connectionTimeout = 5000 } }