{"dataset": {"crypto": 52, "forensics": 15, "misc": 24, "pwn": 39, "rev": 51, "web": 19, "total": 200}, "submissions": [{"name": "EnIGMA", "comment": "pass@1", "link": "https://enigma-agent.github.io/", "model": "claude-3.5-sonnet-20240620", "date": "2024/10/02", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/enigma_claude3.5_sonnet", "solved": 27, "per_category": {"crypto": 4, "forensics": 3, "misc": 4, "pwn": 7, "rev": 9, "web": 0}}, {"name": "CRAKEN (Self-RAG)", "comment": "pass@1", "link": "https://arxiv.org/abs/2505.17107", "model": "claude-3.7-sonnet-20250219", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/craken_claude3.7_sonnet", "solved": 37, "per_category": {"crypto": 7, "forensics": 3, "misc": 7, "pwn": 5, "rev": 13, "web": 2}}, {"name": "NYU CTF Baseline", "comment": "pass@5", "link": "https://github.com/NYU-LLM-CTF/llm_ctf_automation/", "model": "gpt-3.5-turbo-1106", "date": "2024/09/25", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/baseline_gpt3.5", "solved": 8, "per_category": {"crypto": 1, "forensics": 0, "misc": 3, "pwn": 1, "rev": 3, "web": 0}}, {"name": "EnIGMA", "comment": "pass@1", "link": "https://enigma-agent.github.io/", "model": "gpt-4-1106-preview", "date": "2024/10/02", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/enigma_gpt4", "solved": 14, "per_category": {"crypto": 1, "forensics": 2, "misc": 4, "pwn": 2, "rev": 5, "web": 0}}, {"name": "D-CIPHER", "comment": "pass@1", "link": "https://arxiv.org/abs/2502.10931", "model": "claude-3.7-sonnet-20250219", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/dcipher_claude3.7_sonnet", "solved": 35, "per_category": {"crypto": 6, "forensics": 3, "misc": 7, "pwn": 6, "rev": 11, "web": 2}}, {"name": "CRAKEN (Self-RAG)", "comment": "pass@1", "link": "https://arxiv.org/abs/2505.17107", "model": "gpt-4o", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/craken_gpt4o", "solved": 23, "per_category": {"crypto": 3, "forensics": 3, "misc": 5, "pwn": 2, "rev": 8, "web": 2}}, {"name": "D-CIPHER", "comment": "pass@1", "link": "https://arxiv.org/abs/2502.10931", "model": "gpt-4o", "date": "2025/03/01", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/dcipher_gpt4o", "solved": 21, "per_category": {"crypto": 3, "forensics": 2, "misc": 4, "pwn": 3, "rev": 7, "web": 2}}, {"name": "CRAKEN (Self-RAG)", "comment": "pass@1", "link": "https://arxiv.org/abs/2505.17107", "model": "gpt-4.1", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/craken_gpt4.1", "solved": 23, "per_category": {"crypto": 4, "forensics": 3, "misc": 5, "pwn": 3, "rev": 6, "web": 2}}, {"name": "D-CIPHER", "comment": "pass@1", "link": "https://arxiv.org/abs/2502.10931", "model": "gpt-4.1", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/dcipher_gpt4.1", "solved": 27, "per_category": {"crypto": 5, "forensics": 1, "misc": 5, "pwn": 5, "rev": 9, "web": 2}}, {"name": "EnIGMA", "comment": "pass@1", "link": "https://enigma-agent.github.io/", "model": "gpt-4o", "date": "2024/10/02", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/enigma_gpt4o", "solved": 19, "per_category": {"crypto": 2, "forensics": 2, "misc": 4, "pwn": 3, "rev": 7, "web": 1}}, {"name": "CRAKEN (Self-RAG)", "comment": "pass@1", "link": "https://arxiv.org/abs/2505.17107", "model": "claude-3.5-sonnet-20241022", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/craken_claude3.5_sonnet", "solved": 42, "per_category": {"crypto": 6, "forensics": 3, "misc": 6, "pwn": 7, "rev": 17, "web": 3}}, {"name": "NYU CTF Baseline", "comment": "pass@5", "link": "https://github.com/NYU-LLM-CTF/llm_ctf_automation/", "model": "claude-3-haiku-20240307", "date": "2024/09/25", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/baseline_claude3_haiku", "solved": 8, "per_category": {"crypto": 3, "forensics": 0, "misc": 2, "pwn": 1, "rev": 2, "web": 0}}, {"name": "NYU CTF Baseline", "comment": "pass@5", "link": "https://github.com/NYU-LLM-CTF/llm_ctf_automation/", "model": "gpt-4-0125-preview", "date": "2024/09/25", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/baseline_gpt4", "solved": 10, "per_category": {"crypto": 0, "forensics": 1, "misc": 0, "pwn": 3, "rev": 5, "web": 1}}, {"name": "CRAKEN (Self-RAG + Graph-RAG)", "comment": "pass@1", "link": "https://arxiv.org/abs/2505.17107", "model": "claude-3.5-sonnet-20241022", "date": "2025/08/12", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/craken_graph_claude3.5_sonnet", "solved": 44, "per_category": {"crypto": 8, "forensics": 4, "misc": 7, "pwn": 8, "rev": 14, "web": 3}}, {"name": "D-CIPHER", "comment": "pass@1", "link": "https://arxiv.org/abs/2502.10931", "model": "claude-3.5-sonnet-20241022", "date": "2025/03/01", "logs": "https://github.com/NYU-LLM-CTF/leaderboard_submissions/tree/main/transcripts/dcipher_claude3.5_sonnet", "solved": 38, "per_category": {"crypto": 8, "forensics": 3, "misc": 6, "pwn": 5, "rev": 15, "web": 1}}]}