naftiko: 1.0.0-alpha2 info: label: Runloop Benchmarks (benchmark) description: Runloop benchmarks capability covering one Runloop business surface. 19 operations. tags: - Runloop - Benchmarks - AI Agents - Sandboxes created: '2026-05-25' modified: '2026-05-25' binds: - namespace: env keys: RUNLOOP_API_KEY: RUNLOOP_API_KEY capability: consumes: - type: http namespace: benchmark-benchmarks baseUri: https://api.runloop.ai description: Runloop benchmarks business capability. Self-contained, no shared references. resources: - name: v1-benchmarks path: /v1/benchmarks operations: - name: createBenchmark method: POST description: Create a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: body in: body type: object description: Request body (JSON). required: false - name: listBenchmarks method: GET description: List Benchmarks. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: name in: query type: string description: Filter by name required: false - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: include_total_count in: query type: boolean description: If true (default), includes total_count in the response. Set to false to skip the count query for better performance on large datasets. required: false - name: v1-benchmarks-list-public path: /v1/benchmarks/list_public operations: - name: listPublicBenchmarks method: GET description: List Public Benchmarks. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: include_total_count in: query type: boolean description: If true (default), includes total_count in the response. Set to false to skip the count query for better performance on large datasets. required: false - name: v1-benchmarks-metadata-keys path: /v1/benchmarks/metadata/keys operations: - name: getBenchmarkMetadataKeys method: GET description: List available benchmark metadata keys. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: _ in: query type: string description: '' required: false - name: v1-benchmarks-metadata-keys-key-values path: /v1/benchmarks/metadata/keys/{key}/values operations: - name: getBenchmarkMetadataValues method: GET description: List values for a specific benchmark metadata key. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: key in: path type: string description: The metadata key to get values for. required: true - name: v1-benchmarks-runs path: /v1/benchmarks/runs operations: - name: listBenchmarkRunsDeprecated method: GET description: List BenchmarkRuns. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: name in: query type: string description: Filter by name required: false - name: benchmark_id in: query type: string description: The Benchmark ID to filter by. required: false - name: state in: query type: string description: Filter by state required: false - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: include_total_count in: query type: boolean description: If true (default), includes total_count in the response. Set to false to skip the count query for better performance on large datasets. required: false - name: v1-benchmarks-runs-id path: /v1/benchmarks/runs/{id} operations: - name: getBenchmarkRunDeprecated method: GET description: Get a previously created BenchmarkRun. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The BenchmarkRun ID. required: true - name: v1-benchmarks-runs-id-cancel path: /v1/benchmarks/runs/{id}/cancel operations: - name: cancelBenchmarkRunDeprecated method: POST description: Cancel a currently running Benchmark run. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The BenchmarkRun ID. required: true - name: v1-benchmarks-runs-id-complete path: /v1/benchmarks/runs/{id}/complete operations: - name: completeBenchmarkRunDeprecated method: POST description: Complete a BenchmarkRun. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The BenchmarkRun ID. required: true - name: v1-benchmarks-runs-id-download-logs path: /v1/benchmarks/runs/{id}/download_logs operations: - name: downloadBenchmarkRunLogsDeprecated method: POST description: Download logs for a Benchmark run. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The BenchmarkRun ID. required: true - name: v1-benchmarks-runs-id-scenario-runs path: /v1/benchmarks/runs/{id}/scenario_runs operations: - name: listBenchmarkRunScenarioRunsDeprecated method: GET description: List started scenario runs for a benchmark run. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The BenchmarkRun ID. required: true - name: state in: query type: string description: Filter by Scenario Run state required: false - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: include_total_count in: query type: boolean description: If true (default), includes total_count in the response. Set to false to skip the count query for better performance on large datasets. required: false - name: v1-benchmarks-start-run path: /v1/benchmarks/start_run operations: - name: startBenchmarkRun method: POST description: Start a new BenchmarkRun. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: body in: body type: object description: Request body (JSON). required: false - name: v1-benchmarks-id path: /v1/benchmarks/{id} operations: - name: updateBenchmark method: POST description: Update a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The Benchmark ID. required: true - name: body in: body type: object description: Request body (JSON). required: false - name: getBenchmark method: GET description: Get a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The Benchmark ID. required: true - name: v1-benchmarks-id-archive path: /v1/benchmarks/{id}/archive operations: - name: archiveBenchmark method: POST description: Archive a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The ID of the Benchmark to archive. required: true - name: v1-benchmarks-id-definitions path: /v1/benchmarks/{id}/definitions operations: - name: getBenchmarkScenarioDefinitions method: GET description: Get scenario definitions for a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The Benchmark ID. required: true - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: v1-benchmarks-id-runs path: /v1/benchmarks/{id}/runs operations: - name: getBenchmarkRuns method: GET description: Get runs for a provided Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The Benchmark ID. required: true - name: limit in: query type: integer description: The limit of items to return. Default is 20. Max is 5000. required: false - name: starting_after in: query type: string description: Load the next page of data starting after the item with the given ID. required: false - name: include_total_count in: query type: boolean description: If true (default), includes total_count in the response. Set to false to skip the count query for better performance on large datasets. required: false - name: v1-benchmarks-id-scenarios path: /v1/benchmarks/{id}/scenarios operations: - name: updateBenchmarkScenarios method: POST description: Modify scenarios for a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The Benchmark ID. required: true - name: body in: body type: object description: Request body (JSON). required: false - name: v1-benchmarks-id-unarchive path: /v1/benchmarks/{id}/unarchive operations: - name: unarchiveBenchmark method: POST description: Unarchive a Benchmark. outputRawFormat: json outputParameters: - name: result type: object value: $. inputParameters: - name: id in: path type: string description: The ID of the Benchmark to unarchive. required: true authentication: type: bearer value: '{{env.RUNLOOP_API_KEY}}' placement: header exposes: - type: rest namespace: benchmark-benchmarks-rest port: 8080 description: REST adapter for Runloop benchmarks. One Spectral-compliant resource per consumed operation. resources: - path: /v1/benchmarks name: v1-benchmarks description: REST surface for v1-benchmarks. operations: - method: POST name: createBenchmark description: Create a Benchmark. call: benchmark-benchmarks.createBenchmark with: body: rest.body outputParameters: - type: object mapping: $. - method: GET name: listBenchmarks description: List Benchmarks. call: benchmark-benchmarks.listBenchmarks with: name: rest.params.name limit: rest.params.limit starting_after: rest.params.starting_after include_total_count: rest.params.include_total_count outputParameters: - type: object mapping: $. - path: /v1/benchmarks/list_public name: v1-benchmarks-list-public description: REST surface for v1-benchmarks-list-public. operations: - method: GET name: listPublicBenchmarks description: List Public Benchmarks. call: benchmark-benchmarks.listPublicBenchmarks with: limit: rest.params.limit starting_after: rest.params.starting_after include_total_count: rest.params.include_total_count outputParameters: - type: object mapping: $. - path: /v1/benchmarks/metadata/keys name: v1-benchmarks-metadata-keys description: REST surface for v1-benchmarks-metadata-keys. operations: - method: GET name: getBenchmarkMetadataKeys description: List available benchmark metadata keys. call: benchmark-benchmarks.getBenchmarkMetadataKeys with: _: rest.params._ outputParameters: - type: object mapping: $. - path: /v1/benchmarks/metadata/keys/{key}/values name: v1-benchmarks-metadata-keys-key-values description: REST surface for v1-benchmarks-metadata-keys-key-values. operations: - method: GET name: getBenchmarkMetadataValues description: List values for a specific benchmark metadata key. call: benchmark-benchmarks.getBenchmarkMetadataValues with: key: rest.params.key outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs name: v1-benchmarks-runs description: REST surface for v1-benchmarks-runs. operations: - method: GET name: listBenchmarkRunsDeprecated description: List BenchmarkRuns. call: benchmark-benchmarks.listBenchmarkRunsDeprecated with: name: rest.params.name benchmark_id: rest.params.benchmark_id state: rest.params.state limit: rest.params.limit starting_after: rest.params.starting_after include_total_count: rest.params.include_total_count outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs/{id} name: v1-benchmarks-runs-id description: REST surface for v1-benchmarks-runs-id. operations: - method: GET name: getBenchmarkRunDeprecated description: Get a previously created BenchmarkRun. call: benchmark-benchmarks.getBenchmarkRunDeprecated with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs/{id}/cancel name: v1-benchmarks-runs-id-cancel description: REST surface for v1-benchmarks-runs-id-cancel. operations: - method: POST name: cancelBenchmarkRunDeprecated description: Cancel a currently running Benchmark run. call: benchmark-benchmarks.cancelBenchmarkRunDeprecated with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs/{id}/complete name: v1-benchmarks-runs-id-complete description: REST surface for v1-benchmarks-runs-id-complete. operations: - method: POST name: completeBenchmarkRunDeprecated description: Complete a BenchmarkRun. call: benchmark-benchmarks.completeBenchmarkRunDeprecated with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs/{id}/download_logs name: v1-benchmarks-runs-id-download-logs description: REST surface for v1-benchmarks-runs-id-download-logs. operations: - method: POST name: downloadBenchmarkRunLogsDeprecated description: Download logs for a Benchmark run. call: benchmark-benchmarks.downloadBenchmarkRunLogsDeprecated with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/runs/{id}/scenario_runs name: v1-benchmarks-runs-id-scenario-runs description: REST surface for v1-benchmarks-runs-id-scenario-runs. operations: - method: GET name: listBenchmarkRunScenarioRunsDeprecated description: List started scenario runs for a benchmark run. call: benchmark-benchmarks.listBenchmarkRunScenarioRunsDeprecated with: id: rest.params.id state: rest.params.state limit: rest.params.limit starting_after: rest.params.starting_after include_total_count: rest.params.include_total_count outputParameters: - type: object mapping: $. - path: /v1/benchmarks/start_run name: v1-benchmarks-start-run description: REST surface for v1-benchmarks-start-run. operations: - method: POST name: startBenchmarkRun description: Start a new BenchmarkRun. call: benchmark-benchmarks.startBenchmarkRun with: body: rest.body outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id} name: v1-benchmarks-id description: REST surface for v1-benchmarks-id. operations: - method: POST name: updateBenchmark description: Update a Benchmark. call: benchmark-benchmarks.updateBenchmark with: id: rest.params.id body: rest.body outputParameters: - type: object mapping: $. - method: GET name: getBenchmark description: Get a Benchmark. call: benchmark-benchmarks.getBenchmark with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id}/archive name: v1-benchmarks-id-archive description: REST surface for v1-benchmarks-id-archive. operations: - method: POST name: archiveBenchmark description: Archive a Benchmark. call: benchmark-benchmarks.archiveBenchmark with: id: rest.params.id outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id}/definitions name: v1-benchmarks-id-definitions description: REST surface for v1-benchmarks-id-definitions. operations: - method: GET name: getBenchmarkScenarioDefinitions description: Get scenario definitions for a Benchmark. call: benchmark-benchmarks.getBenchmarkScenarioDefinitions with: id: rest.params.id limit: rest.params.limit starting_after: rest.params.starting_after outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id}/runs name: v1-benchmarks-id-runs description: REST surface for v1-benchmarks-id-runs. operations: - method: GET name: getBenchmarkRuns description: Get runs for a provided Benchmark. call: benchmark-benchmarks.getBenchmarkRuns with: id: rest.params.id limit: rest.params.limit starting_after: rest.params.starting_after include_total_count: rest.params.include_total_count outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id}/scenarios name: v1-benchmarks-id-scenarios description: REST surface for v1-benchmarks-id-scenarios. operations: - method: POST name: updateBenchmarkScenarios description: Modify scenarios for a Benchmark. call: benchmark-benchmarks.updateBenchmarkScenarios with: id: rest.params.id body: rest.body outputParameters: - type: object mapping: $. - path: /v1/benchmarks/{id}/unarchive name: v1-benchmarks-id-unarchive description: REST surface for v1-benchmarks-id-unarchive. operations: - method: POST name: unarchiveBenchmark description: Unarchive a Benchmark. call: benchmark-benchmarks.unarchiveBenchmark with: id: rest.params.id outputParameters: - type: object mapping: $. - type: mcp namespace: benchmark-benchmarks-mcp port: 9090 transport: http description: MCP adapter for Runloop benchmarks. One tool per consumed operation. tools: - name: runloop-benchmark-benchmarks-createBenchmark description: Create a Benchmark. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.createBenchmark with: body: tools.body outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-listBenchmarks description: List Benchmarks. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.listBenchmarks with: name: tools.name limit: tools.limit starting_after: tools.starting_after include_total_count: tools.include_total_count outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-listPublicBenchmarks description: List Public Benchmarks. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.listPublicBenchmarks with: limit: tools.limit starting_after: tools.starting_after include_total_count: tools.include_total_count outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmarkMetadataKeys description: List available benchmark metadata keys. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmarkMetadataKeys with: _: tools._ outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmarkMetadataValues description: List values for a specific benchmark metadata key. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmarkMetadataValues with: key: tools.key outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-listBenchmarkRunsDeprecated description: List BenchmarkRuns. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.listBenchmarkRunsDeprecated with: name: tools.name benchmark_id: tools.benchmark_id state: tools.state limit: tools.limit starting_after: tools.starting_after include_total_count: tools.include_total_count outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmarkRunDeprecated description: Get a previously created BenchmarkRun. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmarkRunDeprecated with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-cancelBenchmarkRunDeprecated description: Cancel a currently running Benchmark run. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.cancelBenchmarkRunDeprecated with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-completeBenchmarkRunDeprecated description: Complete a BenchmarkRun. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.completeBenchmarkRunDeprecated with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-downloadBenchmarkRunLogsDepreca description: Download logs for a Benchmark run. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.downloadBenchmarkRunLogsDeprecated with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-listBenchmarkRunScenarioRunsDep description: List started scenario runs for a benchmark run. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.listBenchmarkRunScenarioRunsDeprecated with: id: tools.id state: tools.state limit: tools.limit starting_after: tools.starting_after include_total_count: tools.include_total_count outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-startBenchmarkRun description: Start a new BenchmarkRun. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.startBenchmarkRun with: body: tools.body outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-updateBenchmark description: Update a Benchmark. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.updateBenchmark with: id: tools.id body: tools.body outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmark description: Get a Benchmark. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmark with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-archiveBenchmark description: Archive a Benchmark. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.archiveBenchmark with: id: tools.id outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmarkScenarioDefinitions description: Get scenario definitions for a Benchmark. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmarkScenarioDefinitions with: id: tools.id limit: tools.limit starting_after: tools.starting_after outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-getBenchmarkRuns description: Get runs for a provided Benchmark. hints: readOnly: true destructive: false idempotent: true call: benchmark-benchmarks.getBenchmarkRuns with: id: tools.id limit: tools.limit starting_after: tools.starting_after include_total_count: tools.include_total_count outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-updateBenchmarkScenarios description: Modify scenarios for a Benchmark. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.updateBenchmarkScenarios with: id: tools.id body: tools.body outputParameters: - type: object mapping: $. - name: runloop-benchmark-benchmarks-unarchiveBenchmark description: Unarchive a Benchmark. hints: readOnly: false destructive: false idempotent: false call: benchmark-benchmarks.unarchiveBenchmark with: id: tools.id outputParameters: - type: object mapping: $.