{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/api-evangelist/runloop-ai/main/json-schema/runloop-scenario-schema.json", "title": "Runloop Scenario", "description": "JSON Schema for the Runloop scenario resource, derived from the Runloop OpenAPI schema ScenarioCreateParameters.", "type": "object", "additionalProperties": false, "properties": { "name": { "type": "string", "description": "Name of the scenario." }, "input_context": { "$ref": "#/$defs/InputContext", "description": "The input context for the Scenario." }, "scoring_contract": { "$ref": "#/$defs/ScoringContract", "description": "The scoring contract for the Scenario." }, "environment_parameters": { "$ref": "#/$defs/ScenarioEnvironment", "nullable": true, "description": "The Environment in which the Scenario will run." }, "metadata": { "type": "object", "additionalProperties": { "type": "string" }, "nullable": true, "description": "User defined metadata to attach to the scenario for organization." }, "reference_output": { "type": "string", "nullable": true, "description": "A string representation of the reference output to solve the scenario. Commonly can be the result of a git diff or a sequence of command actions to apply to the environment." }, "required_environment_variables": { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "Environment variables required to run the scenario. If these variables are not provided, the scenario will fail to start." }, "required_secret_names": { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "Secrets required to run the scenario (user secret name to scenario required secret name). If these secrets are not provided or the mapping is incorrect, the scenario will fail to start." }, "validation_type": { "$ref": "#/$defs/ValidationType", "nullable": true, "description": "Validation strategy." }, "scorer_timeout_sec": { "type": "integer", "format": "int32", "nullable": true, "description": "Timeout for scoring in seconds. Default 30 minutes (1800s)." } }, "required": [ "name", "input_context", "scoring_contract" ], "$defs": { "InputContext": { "type": "object", "additionalProperties": false, "description": "InputContextView specifies the problem statement along with all additional context for a Scenario.", "properties": { "problem_statement": { "type": "string", "description": "The problem statement for the Scenario." }, "additional_context": { "type": "object", "nullable": true, "description": "Additional JSON structured input context." } }, "required": [ "problem_statement" ] }, "AstGrepScoringFunction": { "type": "object", "additionalProperties": false, "description": "AstGrepScoringFunction utilizes structured coach search for scoring.", "properties": { "lang": { "type": "string", "description": "The language of the pattern." }, "search_directory": { "type": "string", "description": "The path to search." }, "pattern": { "type": "string", "description": "AST pattern to match. Pattern will be passed to ast-grep using the commandline surround by double quotes (\"), so make sure to use proper escaping (for example, \\$\\$\\$)." }, "type": { "type": "string", "enum": [ "ast_grep_scorer" ], "default": "ast_grep_scorer" } }, "required": [ "search_directory", "pattern", "type" ] }, "BashScriptScoringFunction": { "type": "object", "additionalProperties": false, "description": "BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment.", "properties": { "bash_script": { "type": "string", "description": "A single bash script that sets up the environment, scores, and prints the final score to standard out. Score should be a float between 0.0 and 1.0, and look like \"score=[0.0..1.0]." }, "type": { "type": "string", "enum": [ "bash_script_scorer" ], "default": "bash_script_scorer" } }, "required": [ "type" ] }, "CommandScoringFunction": { "type": "object", "additionalProperties": false, "description": "CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed.", "properties": { "command": { "type": "string", "description": "The command to execute." }, "type": { "type": "string", "enum": [ "command_scorer" ], "default": "command_scorer" } }, "required": [ "type" ] }, "CustomScoringFunction": { "type": "object", "additionalProperties": false, "description": "CustomScoringFunction is a custom, user defined scoring function.", "properties": { "custom_scorer_type": { "type": "string", "description": "Type of the scoring function, previously registered with Runloop." }, "scorer_params": { "type": "object", "nullable": true, "description": "Additional JSON structured context to pass to the scoring function." }, "type": { "type": "string", "enum": [ "custom_scorer" ], "default": "custom_scorer" } }, "required": [ "custom_scorer_type", "type" ] }, "PythonScriptScoringFunction": { "type": "object", "additionalProperties": false, "description": "PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction.", "properties": { "requirements_contents": { "type": "string", "nullable": true, "description": "Package dependencies to be installed. The requirements should be a valid requirements.txt file." }, "python_script": { "type": "string", "description": "Python script to be run. The script should output the score to standard out as a float between 0.0 and 1.0." }, "python_version_constraint": { "type": "string", "nullable": true, "description": "Python version to run scoring. Default is \"==3.12.10\"" }, "type": { "type": "string", "enum": [ "python_script_scorer" ], "default": "python_script_scorer" } }, "required": [ "python_script", "type" ] }, "TestFile": { "type": "object", "additionalProperties": false, "properties": { "file_path": { "type": "string", "description": "Path to write content of the test file, relative to your environment's working directory" }, "file_contents": { "type": "string", "description": "Content of the test file" } } }, "TestBasedScoringFunction": { "type": "object", "additionalProperties": false, "description": "TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution.", "properties": { "test_files": { "type": "array", "items": { "$ref": "#/$defs/TestFile" }, "description": "List of test files to create" }, "test_command": { "type": "string", "description": "The command to execute for running the tests" }, "type": { "type": "string", "enum": [ "test_based_scorer" ], "default": "test_based_scorer" } }, "required": [ "type" ] }, "BuiltInScoringFunction": { "oneOf": [ { "$ref": "#/$defs/AstGrepScoringFunction" }, { "$ref": "#/$defs/BashScriptScoringFunction" }, { "$ref": "#/$defs/CommandScoringFunction" }, { "$ref": "#/$defs/CustomScoringFunction" }, { "$ref": "#/$defs/PythonScriptScoringFunction" }, { "$ref": "#/$defs/TestBasedScoringFunction" } ], "discriminator": { "propertyName": "type", "mapping": { "ast_grep_scorer": "#/components/schemas/AstGrepScoringFunction", "bash_script_scorer": "#/components/schemas/BashScriptScoringFunction", "command_scorer": "#/components/schemas/CommandScoringFunction", "custom_scorer": "#/components/schemas/CustomScoringFunction", "python_script_scorer": "#/components/schemas/PythonScriptScoringFunction", "test_based_scorer": "#/components/schemas/TestBasedScoringFunction" } } }, "ScoringFunction": { "type": "object", "additionalProperties": false, "description": "ScoringFunction specifies a method of scoring a Scenario.", "properties": { "name": { "type": "string", "description": "Name of scoring function. Names must only contain [a-zA-Z0-9_-]." }, "scorer": { "$ref": "#/$defs/BuiltInScoringFunction", "description": "The scoring function to use for evaluating this scenario. The type field determines which built-in function to use." }, "weight": { "type": "number", "format": "float", "description": "Weight to apply to scoring function score. Weights of all scoring functions should sum to 1.0." } }, "required": [ "name", "scorer", "weight" ] }, "ScoringContract": { "type": "object", "additionalProperties": false, "description": "InputContextView specifies the problem statement along with all additional context for a Scenario.", "properties": { "scoring_function_parameters": { "type": "array", "items": { "$ref": "#/$defs/ScoringFunction" }, "description": "A list of scoring functions used to evaluate the Scenario." } }, "required": [ "scoring_function_parameters" ] }, "ResourceSize": { "type": "string", "enum": [ "X_SMALL", "SMALL", "MEDIUM", "LARGE", "X_LARGE", "XX_LARGE", "CUSTOM_SIZE" ], "description": "The size of the Devbox resources for Runloop to allocate.\n\nX_SMALL: 0.5 cpu x 1GiB memory x 4GiB disk\nSMALL: 1 cpu x 2GiB memory x 4GiB disk\nMEDIUM: 2 cpu x 4GiB memory x 8GiB disk\nLARGE: 2 cpu x 8GiB memory x 16GiB disk\nX_LARGE: 4 cpu x 16GiB memory x 16GiB disk\nXX_LARGE: 8 cpu x 32GiB memory x 16GiB disk\nCUSTOM_SIZE: To choose a custom size, set this enum and also the custom_cpu_cores, custom_gb_memory, and optionally custom_disk_size in launch parameters. CPU must be 0.5, 1, or a multiple of 2 (max 16). Memory must be 1 or a multiple of 2 (max 64GiB). Disk must be a multiple of 2 (min 2GiB, max 64GiB). The cpu:memory ratio must be between 1:2 and 1:8 inclusive.\n", "x-enum-descriptions": { "X_SMALL": "0.5 cpu x 1GiB memory x 4GiB disk", "SMALL": "1 cpu x 2GiB memory x 4GiB disk", "MEDIUM": "2 cpu x 4GiB memory x 8GiB disk", "LARGE": "2 cpu x 8GiB memory x 16GiB disk", "X_LARGE": "4 cpu x 16GiB memory x 16GiB disk", "XX_LARGE": "8 cpu x 32GiB memory x 16GiB disk", "CUSTOM_SIZE": "To choose a custom size, set this enum and also the custom_cpu_cores, custom_gb_memory, and optionally custom_disk_size in launch parameters. CPU must be 0.5, 1, or a multiple of 2 (max 16). Memory must be 1 or a multiple of 2 (max 64GiB). Disk must be a multiple of 2 (min 2GiB, max 64GiB). The cpu:memory ratio must be between 1:2 and 1:8 inclusive." } }, "IdleAction": { "type": "string", "enum": [ "shutdown", "suspend" ], "description": "Action to take after Devbox idle timer is triggered.\n\nshutdown: Shutdown the Devbox.\nsuspend: Suspend the Devbox.\n", "x-enum-descriptions": { "shutdown": "Shutdown the Devbox.", "suspend": "Suspend the Devbox." } }, "IdleConfigurationParameters": { "type": "object", "additionalProperties": false, "properties": { "idle_time_seconds": { "type": "integer", "format": "int32", "description": "After idle_time_seconds, on_idle action will be taken." }, "on_idle": { "$ref": "#/$defs/IdleAction", "description": "Action to take after Devbox becomes idle." } }, "required": [ "idle_time_seconds", "on_idle" ] }, "Architecture": { "type": "string", "enum": [ "x86_64", "arm64" ] }, "UserParameters": { "type": "object", "additionalProperties": false, "description": "Configuration for the Linux user in the Devbox environment.", "properties": { "username": { "type": "string", "description": "Username for the Linux user." }, "uid": { "type": "integer", "format": "int32", "description": "User ID (UID) for the Linux user. Must be a non-negative integer." } }, "required": [ "username", "uid" ] }, "ResumeTriggers": { "type": "object", "additionalProperties": false, "description": "Triggers that can resume a suspended Devbox.", "properties": { "http": { "type": "boolean", "nullable": true, "description": "When true, HTTP traffic to a suspended Devbox via tunnel will trigger a resume." }, "axon_event": { "type": "boolean", "nullable": true, "description": "When true, axon events targeting a suspended Devbox will trigger a resume." } } }, "LifecycleConfigurationParameters": { "type": "object", "additionalProperties": false, "description": "Lifecycle configuration for Devbox idle and resume behavior. Configure idle policy via after_idle and resume triggers via resume_triggers.", "properties": { "after_idle": { "$ref": "#/$defs/IdleConfigurationParameters", "nullable": true, "description": "Configure Devbox lifecycle based on idle activity. If both this and the top-level after_idle are set, they must have the same value. Prefer this field for new integrations." }, "resume_triggers": { "$ref": "#/$defs/ResumeTriggers", "nullable": true, "description": "Triggers that can resume a suspended Devbox." } } }, "LaunchParameters": { "type": "object", "additionalProperties": false, "description": "LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'.", "properties": { "launch_commands": { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "Set of commands to be run at launch time, before the entrypoint process is run." }, "resource_size_request": { "$ref": "#/$defs/ResourceSize", "nullable": true, "description": "Preset Devbox resources (vCPU, RAM in GiB, ephemeral disk in GiB). If not set, SMALL is used. X_SMALL: 0.5 vCPU, 1 GiB RAM, 4 GiB disk. SMALL: 1 vCPU, 2 GiB RAM, 4 GiB disk. MEDIUM: 2 vCPU, 4 GiB RAM, 8 GiB disk. LARGE: 2 vCPU, 8 GiB RAM, 16 GiB disk. X_LARGE: 4 vCPU, 16 GiB RAM, 16 GiB disk. XX_LARGE: 8 vCPU, 32 GiB RAM, 16 GiB disk. CUSTOM_SIZE: set custom_cpu_cores, custom_gb_memory, and optionally custom_disk_size." }, "available_ports": { "type": "array", "items": { "type": "integer", "format": "int32" }, "nullable": true, "description": "[Deprecated] A list of ports to make available on the Devbox. This field is ignored." }, "keep_alive_time_seconds": { "type": "integer", "format": "int64", "nullable": true, "description": "Time in seconds after which Devbox will automatically shutdown. Default is 1 hour. Maximum is 48 hours (172800 seconds)." }, "after_idle": { "$ref": "#/$defs/IdleConfigurationParameters", "nullable": true, "description": "Configure Devbox lifecycle based on idle activity. If after_idle is set, Devbox will ignore keep_alive_time_seconds. If both after_idle and lifecycle.after_idle are set, they must have the same value. Use lifecycle.after_idle instead." }, "custom_cpu_cores": { "type": "integer", "format": "int32", "nullable": true, "description": "Custom CPU cores. Must be 0.5, 1, or a multiple of 2. Max is 16." }, "custom_gb_memory": { "type": "integer", "format": "int32", "nullable": true, "description": "Custom memory size in GiB. Must be 1 or a multiple of 2. Max is 64GiB." }, "custom_disk_size": { "type": "integer", "format": "int32", "nullable": true, "description": "Custom disk size in GiB. Must be a multiple of 2. Min is 2GiB, max is 64GiB." }, "architecture": { "$ref": "#/$defs/Architecture", "nullable": true, "description": "The target architecture for the Devbox. If unset, defaults to x86_64." }, "user_parameters": { "$ref": "#/$defs/UserParameters", "nullable": true, "description": "Specify the user for execution on Devbox. If not set, default `user` will be used." }, "required_services": { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "A list of ContainerizedService names to be started when a Devbox is created. A valid ContainerizedService must be specified in Blueprint to be started." }, "network_policy_id": { "type": "string", "nullable": true, "description": "(Optional) ID of the network policy to apply to Devboxes launched with these parameters. When set on a Blueprint launch parameters, Devboxes created from it will inherit this policy unless explicitly overridden." }, "lifecycle": { "$ref": "#/$defs/LifecycleConfigurationParameters", "nullable": true, "description": "Lifecycle configuration for idle and resume behavior. Configure idle policy via lifecycle.after_idle (if both this and the top-level after_idle are set, they must match) and resume triggers via lifecycle.resume_triggers." } } }, "ScenarioEnvironment": { "type": "object", "additionalProperties": false, "description": "ScenarioEnvironmentParameters specify the environment in which a Scenario will be run.", "properties": { "blueprint_id": { "type": "string", "nullable": true, "description": "Use the blueprint with matching ID." }, "snapshot_id": { "type": "string", "nullable": true, "description": "Use the snapshot with matching ID." }, "launch_parameters": { "$ref": "#/$defs/LaunchParameters", "nullable": true, "description": "Optional launch parameters to apply to the devbox environment at launch." }, "working_directory": { "type": "string", "nullable": true, "description": "The working directory where the agent is expected to fulfill the scenario. Scoring functions also run from the working directory." } } }, "ValidationType": { "type": "string", "enum": [ "UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION" ] } } }