feat(server): wire voice messages into device handler

2026-02-20 01:59:50 +05:30
parent 63276d3573
commit 3522b66b02
1 changed files with 104 additions and 0 deletions
--- a/server/src/ws/device.ts
+++ b/server/src/ws/device.ts
@@ -6,6 +6,12 @@ import { apikey, llmConfig, device } from "../schema.js";
 import { sessions, type WebSocketData } from "./sessions.js";
 import { runPipeline } from "../agent/pipeline.js";
 import type { LLMConfig } from "../agent/llm.js";
 import {
  handleVoiceStart,
  handleVoiceChunk,
  handleVoiceSend,
  handleVoiceCancel,
 } from "./voice.js";
 /**
 * Hash an API key the same way better-auth does:
@@ -361,6 +367,104 @@ export async function handleDeviceMessage(
      break;
    }
    case "voice_start": {
      const deviceId = ws.data.deviceId!;
      const userId = ws.data.userId!;
      // Fetch user's LLM config to get API key for Groq Whisper
      const configs = await db
        .select()
        .from(llmConfig)
        .where(eq(llmConfig.userId, userId))
        .limit(1);
      if (configs.length === 0 || !configs[0].apiKey) {
        sendToDevice(ws, { type: "transcript_final", text: "" });
        break;
      }
      handleVoiceStart(ws, deviceId, configs[0].apiKey);
      break;
    }
    case "voice_chunk": {
      const deviceId = ws.data.deviceId!;
      handleVoiceChunk(deviceId, (msg as unknown as { data: string }).data);
      break;
    }
    case "voice_stop": {
      const deviceId = ws.data.deviceId!;
      const userId = ws.data.userId!;
      const voiceAction = (msg as unknown as { action: string }).action;
      if (voiceAction === "cancel") {
        handleVoiceCancel(deviceId);
        break;
      }
      // action === "send" — finalize and fire goal
      const configs = await db
        .select()
        .from(llmConfig)
        .where(eq(llmConfig.userId, userId))
        .limit(1);
      const groqKey = configs[0]?.apiKey ?? "";
      const transcript = await handleVoiceSend(ws, deviceId, groqKey);
      if (transcript) {
        const persistentDeviceId = ws.data.persistentDeviceId!;
        if (activeSessions.has(deviceId)) {
          sendToDevice(ws, { type: "goal_failed", message: "Agent already running" });
          break;
        }
        const userLlmConfig: LLMConfig = {
          provider: configs[0].provider,
          apiKey: configs[0].apiKey,
          model: configs[0].model ?? undefined,
        };
        console.log(`[Pipeline] Starting voice goal for device ${deviceId}: ${transcript}`);
        const abortController = new AbortController();
        activeSessions.set(deviceId, { goal: transcript, abort: abortController });
        sendToDevice(ws, { type: "goal_started", sessionId: deviceId, goal: transcript });
        runPipeline({
          deviceId,
          persistentDeviceId,
          userId,
          goal: transcript,
          llmConfig: userLlmConfig,
          signal: abortController.signal,
          onStep(step) {
            sendToDevice(ws, {
              type: "step",
              step: step.stepNumber,
              action: step.action,
              reasoning: step.reasoning,
            });
          },
          onComplete(result) {
            activeSessions.delete(deviceId);
            sendToDevice(ws, {
              type: "goal_completed",
              success: result.success,
              stepsUsed: result.stepsUsed,
            });
          },
        }).catch((err) => {
          activeSessions.delete(deviceId);
          sendToDevice(ws, { type: "goal_failed", message: String(err) });
        });
      }
      break;
    }
    default: {
      console.warn(
        `Unknown message type from device ${ws.data.deviceId}:`,