package inference import ( "context" "fmt" "time" pb "github.com/cyberverse/server/internal/pb" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" healthpb "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/keepalive" ) // Client manages the gRPC connection to the Python inference server // and provides typed access to all service clients. type Client struct { conn *grpc.ClientConn avatar pb.AvatarServiceClient llm pb.LLMServiceClient rag pb.RAGServiceClient tts pb.TTSServiceClient asr pb.ASRServiceClient voiceLLM pb.VoiceLLMServiceClient } // Compile-time check that Client implements InferenceService. var _ InferenceService = (*Client)(nil) // NewClient creates a new gRPC client connected to the inference server. func NewClient(addr string) (*Client, error) { opts := []grpc.DialOption{ grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithKeepaliveParams(keepalive.ClientParameters{ Time: 60 * time.Second, Timeout: 20 * time.Second, PermitWithoutStream: true, }), grpc.WithDefaultCallOptions( grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB for video frames grpc.MaxCallSendMsgSize(10*1024*1024), ), } conn, err := grpc.NewClient(addr, opts...) if err != nil { return nil, fmt.Errorf("failed to connect to inference server at %s: %w", addr, err) } return &Client{ conn: conn, avatar: pb.NewAvatarServiceClient(conn), llm: pb.NewLLMServiceClient(conn), rag: pb.NewRAGServiceClient(conn), tts: pb.NewTTSServiceClient(conn), asr: pb.NewASRServiceClient(conn), voiceLLM: pb.NewVoiceLLMServiceClient(conn), }, nil } // Close shuts down the gRPC connection. func (c *Client) Close() error { if c.conn != nil { return c.conn.Close() } return nil } // HealthCheck verifies the inference server is reachable. func (c *Client) HealthCheck(ctx context.Context) error { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() resp, err := healthpb.NewHealthClient(c.conn).Check(ctx, &healthpb.HealthCheckRequest{}) if err != nil { return err } if resp.GetStatus() != healthpb.HealthCheckResponse_SERVING { return fmt.Errorf("inference health status is %s", resp.GetStatus().String()) } return nil } func (c *Client) AvatarInfo(ctx context.Context) (*pb.AvatarInfo, error) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() return c.avatar.GetInfo(ctx, &pb.GetInfoRequest{}) }