FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04 AS builder RUN apt-get update && apt-get install -y --no-install-recommends \ python3.10 python3-pip ffmpeg \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY pyproject.toml . RUN pip install --no-cache-dir -e ".[inference,rag]" && \ pip install --no-cache-dir grpcio-tools grpcio-health-checking scipy # Generate gRPC code in builder stage COPY proto/ ./proto/ RUN mkdir -p inference/generated && \ python3 -m grpc_tools.protoc \ -I proto/ \ --python_out=inference/generated \ --grpc_python_out=inference/generated \ proto/*.proto FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04 RUN apt-get update && apt-get install -y --no-install-recommends \ python3.10 python3-pip ffmpeg \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10 COPY --from=builder /usr/local/bin /usr/local/bin COPY --from=builder /app/inference/generated ./inference/generated/ COPY pyproject.toml . COPY inference/ ./inference/ COPY models/flash_head/ ./models/flash_head/ COPY cyberverse_config.yaml . HEALTHCHECK --interval=30s --timeout=5s --retries=3 \ CMD python3 -c "import grpc; ch = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(ch).result(timeout=3)" || exit 1 EXPOSE 50051 CMD ["python3", "-m", "inference.server", "--config", "cyberverse_config.yaml"]