{ manifest_version: "0.4.0", experimental_features: ["docker"], config_schema: { type: "object", properties: {}, additionalProperties: false, }, slots: { proxy: { kind: "a2a" }, worker: { kind: "http", optional: true }, }, program: { image: "ghcr.io/benchflow-ai/skillsbench-agentbeats-worker@sha256:4c4bff9b8596eec10f88d4999526b6bf9b7d7abe1e084688c515b63e495be6ef", entrypoint: "sh -lc 'uv run --no-sync python -m skillsbench_agentbeats.worker --host 127.0.0.1 --port 9010 --jobs-dir jobs/agentbeats-worker --environment docker & sleep 2; exec uv run --no-sync python -m skillsbench_agentbeats.server --host 0.0.0.0 --port 9009'", env: { SKILLSBENCH_AGENTBEATS_PROXY_URL: "${slots.proxy.url}", SKILLSBENCH_WORKER_URL: "http://127.0.0.1:9010", SKILLSBENCH_WORKER_TIMEOUT_SEC: "3600", SKILLSBENCH_WORKER_POLL_INTERVAL_SEC: "1.0", SKILLSBENCH_WORKER_REVISION: "d5b00898638c9d8d537f527d8715a2732c808408", SKILLSBENCH_REVISION: "d5b00898638c9d8d537f527d8715a2732c808408", BENCHFLOW_REVISION: "50df6238abf88dc43da7ae8eb4c895f0cc7d40ae", SKILLSBENCH_WORKER_IMAGE: "ghcr.io/benchflow-ai/skillsbench-agentbeats-worker@sha256:4c4bff9b8596eec10f88d4999526b6bf9b7d7abe1e084688c515b63e495be6ef", SKILLSBENCH_WORKER_IMAGE_DIGEST: "sha256:4c4bff9b8596eec10f88d4999526b6bf9b7d7abe1e084688c515b63e495be6ef", SKILLSBENCH_WORKER_PREBUILT_IMAGES: "{\"3d-scan-calc\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:a4012960d8166369e6e827ffa95b4338a7c2ba08ec737596e18e514f4e760abb\",\"ada-bathroom-plan-repair\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:cf2939af13075c6e5b06205878f80f62687cae33d3d960345d9e83a13ec0c015\",\"adaptive-cruise-control\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b2648ffa9567f3d4159675ea4c5d12352e462c5024461d8f0fce36d13cbc48ba\",\"azure-bgp-oscillation-route-leak\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:2ef9a1adaaf347621cf20f4fc9596182b814352e9c61c6c39eb9a570ea22998f\",\"bike-rebalance\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:4db525c293739ca3d590f95e54c6c4ed90b4d9bbdef67954ea1683988c1b8887\",\"citation-check\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:fde88f44b02f1c31585c9371e44a6f4bc88eb049ff14a1291cdd2957830dd6a7\",\"civ6-adjacency-optimizer\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:f6011886fb77aeff01b3cec164a3d6254868eed64cce4c4e25503325f4267455\",\"court-form-filling\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0439fd3638ec6e78002e2db55d56a78cd54e3f70d8fb663c32c4c12259ce3353\",\"crystallographic-wyckoff-position-analysis\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:24539cc37ef2add6d2b22f08b751281b6c5bb7efe7ca73895dd2211b4f0ea26f\",\"dapt-intrusion-detection\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:62c6a25a78f9d650b51ba8890e9a6f8db32f9644ec47642da96211841b1c597d\",\"data-to-d3\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:01eaf428a9c84ce60fb193fbc537b46d1056e4268c9b35c709d678f41ace70b1\",\"debug-trl-grpo\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:5156312af44aeaa8b6ba1f78a4af3bda438c0bf36ee18bc3f122fb3616957b8a\",\"dialogue-parser\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:f295fb432a2b747e12b6129b78b5a9b2b59807f7be9dddb8187811df4ad716c1\",\"drone-planning-control\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:10396b506ecceb6aa7a5d9c188f2459c338a16820798e8e1fefe489d08b8d657\",\"dynamic-object-aware-egomotion\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:bf1729709b261d775ebd7477f096286578c6c11281ecb0032667783a73d43515\",\"earthquake-phase-association\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:a1f3e13dbe76c35879bb1ab1fff9f8ddba686e132625dc6c4ab45f1371533959\",\"earthquake-plate-calculation\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:d9c54ef2eaf8759a474520bf8fd86b64d02fd3547b56629f64b7fa967ef91ffa\",\"econ-detrending-correlation\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:7d2ef1456147ec8900421eaedde36d06a4792739d251dca8336fb3082a639d27\",\"edit-pdf\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:1b9bcb64edb7f2c6d730d053b69da8c098853e0f8dd19a2b057e914c3ff8975a\",\"energy-ac-optimal-power-flow\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:3f22546118c9adede618b291e15c0ff1338d897245f8713ba526b41e6556edde\",\"energy-market-pricing\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:9f58f3d18012ad3cc8dd568a9f18fce085d6d6b87a676ddb247d1b713d4b61e5\",\"energy-unit-commitment\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0ce8532d63e87ed6d4b897b5af1764a0162444a39dd62dba1d254775abfd5826\",\"enterprise-information-search\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:ee02498cc8ccacd110c6b2cbcd758fd2769d29a8af7385fbfc106e601846c572\",\"exam-block-sequencing\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:9bde26b3154bba062fc92d2074179a5450bd18c7d1e7f305f2961815018e5bfa\",\"exceltable-in-ppt\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:9f95e56c962e7620b29e39d819dc5b8074d18e7204829ac911f4ea219530fac7\",\"exoplanet-detection-period\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:879941c8463c1d972c299e16d14568e2006a76a56a040754da01a90dce0ca15a\",\"financial-modeling-qa\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:430c4fd92d2caf55faba76b76ace722b4514080065ef2fb8a2a206befedf6d51\",\"find-topk-similiar-chemicals\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:67bac9926ae2830b10389f7ee7ee071accf40b702e6e14dce69fc86d548aa723\",\"fix-build-agentops\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b17f26fefe85e4a5c6fb5181c2e92b8e003b5754d8dcc22dab8f110d6042c1f5\",\"fix-build-google-auto\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:e4f880c859ed7726734211f54a2e3312d0f6012e84c826fce6d7a0be632eaa41\",\"fix-druid-loophole-cve\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:c3f78054507986dd165be7dcedc98f1fb8373867c62e824272cebda532eebb8f\",\"fix-erlang-ssh-cve\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:7380b01f4df1779f8ca2bc62cf483fe97477be5e6b45a8fcda32a215e3960f92\",\"fix-visual-stability\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:c4e2d1540d4e93c039db3c63fb62bc4de5f048024ac421ef5680aa8e1ebf6688\",\"flink-query\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0112e398f59f9392b3548301497c31578946b621b5b119b6865ac72b7a01038e\",\"flood-risk-analysis\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:73a9ff87d9abe5c1f66589130a3ee096cb561c85a64b0e95cb0e062abf814310\",\"gh-repo-analytics\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:2e048d79f79e1fd85239d1eab5a9c0e1a8bad890ce7f7ddf5325154965e884ee\",\"glm-lake-mendota\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:28970a09192d223b09f71185a5eeb1f630c2a074ee90f9d08437eb80dc3c6dab\",\"gravitational-wave-detection\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:8ba7eb9e36c86d09858410fc72207990fd6c3e9a51fed74910171c481f4b744f\",\"grid-dispatch-operator\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b2b96cb399e6893b2933ff60655084c8f83823b925a7d5a8badc146353c63393\",\"hvac-control\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:432611ecea9e4b31389d3f2cff3f63ec3baf9889afaf101fadae17ba4cdb3e14\",\"invoice-fraud-detection\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:60f398662b5aae895631fcfd1b4ca49f0bdd3c08f538b30dcb391b558160b44e\",\"jax-computing-basics\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:99015cad1a83b00f4e48a14e1ac7b1f7525dce33cb21edd7886f4c8cbc2ac027\",\"jpg-ocr-stat\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b65e33996519b6e02620ad1bc485a9a4aa18903652b5de732ce0407b26a422bd\",\"lab-unit-harmonization\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:e964e1d54f4569cee89ecb5d1ac679dc54f24c72d97fba60fc8182d758c5dc51\",\"lake-warming-attribution\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:13b85ccdb9b609dd1820f62026e12597476259c02b7415fc352afdbeacf5bab8\",\"latex-formula-extraction\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:e67d42582aa9606a37868fe17eacca9e926c1c4358b93a670968cf58a3478f4e\",\"lean4-proof\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:f1aa9c878d0a8edf1bf3cde90ab71138b85e4b0a51e7634765a28bc973c55c33\",\"llm-prefix-cache-replay\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:ce19ecee9dbb45ff94e76c5209e1c5ddc434aa4cd8e50ace1100a77ca0d041ec\",\"manufacturing-codebook-normalization\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:32676116d2bd46fb39538a3c1f95dd61582220c90fc5110bfa9e4ce45cb3ab82\",\"manufacturing-equipment-maintenance\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:440ca8e28e222b557909713eff401a790c5f8df40c8f39060e31141e3fa5823d\",\"manufacturing-fjsp-optimization\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:6ca761758d744626a6c3f45eb719801a8f123382bfddeafcd57533db89d13b38\",\"mario-coin-counting\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:7a4d047548d858326ab1639b8b012c79c9769386bc4c89896be97b345fb0c3d5\",\"mars-clouds-clustering\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:ddf418a52116e2d4ec04622b950a3bd69f6a9ffa44d298aed9dbdacd0d886e8b\",\"multilingual-video-dubbing\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:2b01a90e72c02661451337ddfcd2ca248b0bdfeb364f5a402cf1d0abf9095f72\",\"offer-letter-generator\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:980a9cdcd34834393d9174513fde8f803d855c6a76a3d5edcb4bca91edbf1f1c\",\"organize-messy-files\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:292fbafe5e7316762d003adc384eff9d05c63351c667ee3357c8b4e1fb68ee93\",\"paper-anonymizer\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:520ecd97152a4425d0ec50a45c68957b9fca5277402bda1eb516408888cc16e5\",\"parallel-tfidf-search\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:716eea005f786a4c9b4c5f5df1b60ef14b7dcc0ab1d99dfecb137c195fddbd68\",\"paratransit-routing\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:49ac2ad86f6567e30fbf97754cbf88b8922ec972996eff93eafd5abf17e2208f\",\"pddl-airport-planning\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:a8e7833c8c488a9978b64d758c7655c7db2ee6890c94873ad5f87ba567067b5c\",\"pddl-tpp-planning\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0f382fc23dd616b1f671f0acf631273f42a2051ae6a0072d371284f85f7cdd2a\",\"pdf-excel-diff\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:c3c094c9c8003209f9485bcde66cdb6621e900ec674f7eb7fdfae01f581c917d\",\"pedestrian-traffic-counting\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:4774441c3d636115f0a24d5858fb48e316931eadaaf1cb71ba563e1cff2b5993\",\"pg-essay-to-audiobook\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:e17466262e01c86ef20eef4bb176723994de4c64209238aeb70f5fda7cd8baad\",\"powerlifting-coef-calc\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:5a44dff35b51b83bfe279c2c5ef9c81a5bf7871432962e77140d529ea2a0fcfa\",\"pptx-reference-formatting\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b5d1b01905e198bfd38b8b388666a830fa75684b4052152fc95ed701ed536701\",\"protein-expression-analysis\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:7adb886d0277e6230cb871201f566606edbf843a2477e0465ce80a7e23ef631f\",\"python-scala-translation\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:fd7db9690267e9bb9329598100ab88a6e90e3a8bbf3e19307520e9e7b4efb652\",\"quantum-numerical-simulation\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:26e25c820d6f89098cf94b9303075d30b0aee4ae935a0aaac495370de10508cc\",\"r2r-mpc-control\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b36f13cc4cb45361d85dba24f3915b684ae056bfe5b75738dfd2f801e2401dbd\",\"radar-vital-signs\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:664103865ea12d5bf16f67b4b13d9ae11db1b1cc3aa834172479ad693b594f95\",\"react-performance-debugging\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:bba91792e34a93c16a5df578902130407f62265ceb3f52c17ecffc07ee34a6de\",\"reserves-at-risk-calc\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:15e0e22806e6a58d4ef44cc1f3cc1c496a9ad3b53afc153208626c916e211153\",\"sales-pivot-analysis\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:af779f2e5a1ae554841c71585eb7838705f6b022248c09b9e0dc3682a2123a37\",\"sec-financial-report\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:a6ad9277cc2fd2677d06eb22c0f0ca049a70adff07188a79d2cedab6e400e7d8\",\"seismic-phase-picking\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0951eebea681e7063dcf7f06e5cdb076da53f0b020a2140592952317b456632d\",\"setup-fuzzing-py\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:5e3190442a0d17307cbf5acad3c19a9bc4d85f31e5fce2e1c72fb58b458aadff\",\"shock-analysis-demand\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:fdf0f9e5e158cb9ae1847dcee8907d9d3f641dec7c7e7ec7055931143eae1050\",\"shock-analysis-supply\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:102e063a0a54c21cce07c231bf28ad70d472f04a59d1b23d68bfa6d341a0d8b6\",\"simpo-code-reproduction\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:429fa0fc10f8be943679d064c7d22505ee998ef14c6337128be68401076eea0c\",\"software-dependency-audit\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:c16904616b1537b858c77378c096571e1db365f4352a5a1d8e9ed48eeed48283\",\"spring-boot-jakarta-migration\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b046e8a4c560136f340c523aef847e0d793f37379da0ee48a537494e0e2da788\",\"suricata-custom-exfil\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:bc46dd94f314b99aec9322998767be23e20f94e39c1bcbde51c43c780734db8f\",\"syzkaller-ppdev-syzlang\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:ee87285d61fb045c2c905aae7079183d685188ec4b574eb9c31f94f0ec96b3f8\",\"taxonomy-tree-merge\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:73b04d6aa8daf2b6017f800fba3bb7cb77ca8dc71e52239190483b7b8a3569bc\",\"threejs-structure-parser\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:e78398c30a91a19e09bd9ca518ff2e4996dead63a80b69fef0fe32c79b7e38f8\",\"threejs-to-obj\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:015e7b0d3c88cda86d8bc0e455b7f6eab1f91b2b5e98e0afd354a61104581472\",\"tictoc-unnecessary-abort-detection\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:5bf90f2ee4ca7641ddd0ca7b38e38dd07c8ab99f506efd3e871dd15b5dcc66fe\",\"travel-planning\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:b84609da17110735c044c76978be601d67b83b91610a22a0a518661f61905d5f\",\"video-filler-word-remover\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:0691c08959fdea394bba37cb5b91ac34ab39d2c2d6acd018c8405fc456550ee5\",\"video-silence-remover\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:8db3b869022169e85e647c74f3b3ad44075df44f578a572f02e9597d5a379962\",\"video-tutorial-indexer\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:d20e8435215ffd0d40f93b68efc8c98ba5a3a05abea3f3f1d9470b39e2237878\",\"weighted-gdp-calc\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:f6b829a4aaf958c88ddf689fd40849d48ca3420aa8c42af8017c7a7e6cfaa61a\",\"xlsx-recover-data\":\"ghcr.io/benchflow-ai/skillsbench-task-env@sha256:d96c53042d74f41055b4048191a3b0c7f29eef67ea274b0aa1110459a0fbc8e3\"}", SKILLSBENCH_WORKER_VERIFY_PREBUILT_IMAGES: "true", SKILLSBENCH_PRIVATE_PROOF_DIR: "jobs/agentbeats-private-proof", SKILLSBENCH_PRIVATE_PROOF_URI_PREFIX: "local://agentbeats-private-proof", SKILLSBENCH_PRIVATE_PROOF_RETENTION: "github-actions-smoke-debug-only", SKILLSBENCH_REQUIRE_DURABLE_PRIVATE_PROOF: "false", BENCHFLOW_DOCKER_LOGS_HOST_MOUNTED: "false", DOCKER_BUILDKIT: "0", COMPOSE_DOCKER_CLI_BUILD: "0", SKILLSBENCH_WORKER_PARTICIPANT_PROXY_URL: "${slots.proxy.url}", }, mounts: [ { name: "docker_socket", path: "/var/run/docker.sock", from: "framework.docker" }, ], network: { endpoints: [ { name: "a2a_endpoint", port: 9009 }, ], }, }, provides: { a2a: { kind: "a2a", endpoint: "a2a_endpoint" }, }, exports: { a2a: "a2a" }, metadata: { assessment_config: { tasks: "all", task_set: "standard-v1", condition: "with_skills", allow_excluded_tasks: false, shard_index: 0, num_shards: 7, }, worker_mode: "embedded_green_worker", participant_roles: ["agent"], }, }