feat: workflow automation via NotificationListenerService

Add workflow system that lets users describe automations in natural language through the same input field. The server LLM classifies input as either an immediate goal or a workflow rule, then: - Parses workflow descriptions into structured trigger conditions - Stores workflows per-user in Postgres - Syncs workflows to device via WebSocket - NotificationListenerService monitors notifications and triggers matching workflows as agent goals Also cleans up overlay text and adds network security config.
2026-02-18 18:49:14 +05:30
parent 45766621f2
commit 4d4b7059e4
18 changed files with 921 additions and 9 deletions
--- a/server/src/agent/input-classifier.ts
+++ b/server/src/agent/input-classifier.ts
@@ -0,0 +1,54 @@
+/**
+ * Classifies user input as either an immediate goal or a workflow (automation rule).
+ *
+ * Uses the user's LLM to determine intent. Workflows describe recurring
+ * automations ("when X happens, do Y"), goals are one-time tasks ("open WhatsApp").
+ */
+
+import type { LLMConfig } from "./llm.js";
+import { getLlmProvider, parseJsonResponse } from "./llm.js";
+
+export type InputType = "goal" | "workflow";
+
+export interface ClassificationResult {
+  type: InputType;
+}
+
+const CLASSIFIER_PROMPT = `You classify user input for an Android automation agent.
+
+Decide if the input is:
+- "goal": A one-time task to execute right now (e.g. "open WhatsApp", "search for pizza", "take a screenshot", "reply to John with hello")
+- "workflow": An automation rule that should be saved and triggered later when a condition is met (e.g. "when I get a notification from WhatsApp saying where are you, reply with Bangalore", "whenever someone messages me on Telegram, auto-reply with I'm busy", "reply to all notifications that have a reply button")
+
+Key signals for "workflow":
+- Uses words like "when", "whenever", "if", "every time", "automatically", "always"
+- Describes a trigger condition + a response action
+- Refers to future/recurring events
+
+Key signals for "goal":
+- Describes a single task to do now
+- Imperative commands ("open", "send", "search", "go to")
+- No conditional/temporal trigger
+
+Respond with ONLY: {"type": "goal"} or {"type": "workflow"}`;
+
+export async function classifyInput(
+  text: string,
+  llmConfig: LLMConfig
+): Promise<ClassificationResult> {
+  const provider = getLlmProvider(llmConfig);
+
+  try {
+    const raw = await provider.getAction(CLASSIFIER_PROMPT, text);
+    const parsed = parseJsonResponse(raw);
+
+    if (parsed?.type === "workflow") {
+      return { type: "workflow" };
+    }
+  } catch (err) {
+    console.error(`[Classifier] Failed to classify input, defaulting to goal:`, err);
+  }
+
+  // Default to goal — safer to execute once than to accidentally create a rule
+  return { type: "goal" };
+}
--- a/server/src/agent/workflow-parser.ts
+++ b/server/src/agent/workflow-parser.ts
@@ -0,0 +1,75 @@
+/**
+ * Parses a natural-language workflow description into structured
+ * trigger conditions and a goal template using the user's LLM.
+ */
+
+import type { LLMConfig } from "./llm.js";
+import { getLlmProvider, parseJsonResponse } from "./llm.js";
+
+export interface ParsedWorkflow {
+  name: string;
+  triggerType: "notification";
+  conditions: Array<{
+    field: "app_package" | "title" | "text";
+    matchMode: "contains" | "exact" | "regex";
+    value: string;
+  }>;
+  goalTemplate: string;
+}
+
+const PARSER_PROMPT = `You are a workflow parser for an Android automation agent.
+
+The user describes an automation rule in plain English. Parse it into a structured workflow.
+
+A workflow has:
+1. **name**: A short human-readable name (3-6 words).
+2. **triggerType**: Always "notification" for now.
+3. **conditions**: An array of matching rules for incoming notifications. Each condition has:
+   - "field": one of "app_package", "title", or "text"
+   - "matchMode": one of "contains", "exact", or "regex"
+   - "value": the string or regex to match
+4. **goalTemplate**: The goal string to send to the agent when triggered. Use {{title}}, {{text}}, {{app}} as placeholders that get filled from the notification.
+
+Example input: "When I get a WhatsApp message saying 'where are you', reply with 'Bangalore'"
+Example output:
+{
+  "name": "Auto-reply where are you",
+  "triggerType": "notification",
+  "conditions": [
+    {"field": "app_package", "matchMode": "contains", "value": "whatsapp"},
+    {"field": "text", "matchMode": "contains", "value": "where are you"}
+  ],
+  "goalTemplate": "Open the WhatsApp notification from {{title}} and reply with 'Bangalore'"
+}
+
+Example input: "Reply to all notifications that have a reply button with 'I am busy'"
+Example output:
+{
+  "name": "Auto-reply I am busy",
+  "triggerType": "notification",
+  "conditions": [],
+  "goalTemplate": "Open the notification '{{title}}' from {{app}} and reply with 'I am busy'"
+}
+
+Respond with ONLY a valid JSON object. No explanation.`;
+
+export async function parseWorkflowDescription(
+  description: string,
+  llmConfig: LLMConfig
+): Promise<ParsedWorkflow> {
+  const provider = getLlmProvider(llmConfig);
+
+  const raw = await provider.getAction(PARSER_PROMPT, description);
+  const parsed = parseJsonResponse(raw);
+
+  if (!parsed || !parsed.name || !parsed.goalTemplate) {
+    throw new Error("Failed to parse workflow description into structured format");
+  }
+
+  return {
+    name: parsed.name as string,
+    triggerType: "notification",
+    conditions: (parsed.conditions as ParsedWorkflow["conditions"]) ?? [],
+    goalTemplate: parsed.goalTemplate as string,
+  };
+}
--- a/server/src/schema.ts
+++ b/server/src/schema.ts
@@ -128,6 +128,24 @@ export const agentSession = pgTable("agent_session", {
  completedAt: timestamp("completed_at"),
 });

+export const workflow = pgTable("workflow", {
+  id: text("id").primaryKey(),
+  userId: text("user_id")
+    .notNull()
+    .references(() => user.id, { onDelete: "cascade" }),
+  name: text("name").notNull(),
+  description: text("description").notNull(),
+  triggerType: text("trigger_type").notNull().default("notification"),
+  conditions: jsonb("conditions").notNull().default("[]"),
+  goalTemplate: text("goal_template").notNull(),
+  enabled: boolean("enabled").default(true).notNull(),
+  createdAt: timestamp("created_at").defaultNow().notNull(),
+  updatedAt: timestamp("updated_at")
+    .defaultNow()
+    .$onUpdate(() => new Date())
+    .notNull(),
+});
+
 export const agentStep = pgTable("agent_step", {
  id: text("id").primaryKey(),
  sessionId: text("session_id")
--- a/server/src/ws/device.ts
+++ b/server/src/ws/device.ts
@@ -6,6 +6,14 @@ import { apikey, llmConfig, device } from "../schema.js";
 import { sessions, type WebSocketData } from "./sessions.js";
 import { runPipeline } from "../agent/pipeline.js";
 import type { LLMConfig } from "../agent/llm.js";
+import {
+  handleWorkflowCreate,
+  handleWorkflowUpdate,
+  handleWorkflowDelete,
+  handleWorkflowSync,
+  handleWorkflowTrigger,
+} from "./workflow-handlers.js";
+import { classifyInput } from "../agent/input-classifier.js";

 /**
 * Hash an API key the same way better-auth does:
@@ -251,6 +259,20 @@ export async function handleDeviceMessage(
        break;
      }

+      // Classify: is this an immediate goal or a workflow?
+      try {
+        const classification = await classifyInput(goal, userLlmConfig);
+        if (classification.type === "workflow") {
+          console.log(`[Classifier] Input classified as workflow: ${goal}`);
+          handleWorkflowCreate(ws, goal).catch((err) =>
+            console.error(`[Workflow] Auto-create error:`, err)
+          );
+          break;
+        }
+      } catch (err) {
+        console.warn(`[Classifier] Classification failed, treating as goal:`, err);
+      }
+
      console.log(`[Pipeline] Starting goal for device ${deviceId}: ${goal}`);
      const abortController = new AbortController();
      activeSessions.set(deviceId, { goal, abort: abortController });
@@ -361,6 +383,59 @@ export async function handleDeviceMessage(
      break;
    }

+    case "workflow_create": {
+      const description = (msg as unknown as { description: string }).description;
+      if (description) {
+        handleWorkflowCreate(ws, description).catch((err) =>
+          console.error(`[Workflow] Create error:`, err)
+        );
+      }
+      break;
+    }
+
+    case "workflow_update": {
+      const { workflowId, enabled } = msg as unknown as { workflowId: string; enabled?: boolean };
+      if (workflowId) {
+        handleWorkflowUpdate(ws, workflowId, enabled).catch((err) =>
+          console.error(`[Workflow] Update error:`, err)
+        );
+      }
+      break;
+    }
+
+    case "workflow_delete": {
+      const { workflowId } = msg as unknown as { workflowId: string };
+      if (workflowId) {
+        handleWorkflowDelete(ws, workflowId).catch((err) =>
+          console.error(`[Workflow] Delete error:`, err)
+        );
+      }
+      break;
+    }
+
+    case "workflow_sync": {
+      handleWorkflowSync(ws).catch((err) =>
+        console.error(`[Workflow] Sync error:`, err)
+      );
+      break;
+    }
+
+    case "workflow_trigger": {
+      const { workflowId, notificationApp, notificationTitle, notificationText } =
+        msg as unknown as {
+          workflowId: string;
+          notificationApp?: string;
+          notificationTitle?: string;
+          notificationText?: string;
+        };
+      if (workflowId) {
+        handleWorkflowTrigger(ws, workflowId, notificationApp, notificationTitle, notificationText).catch(
+          (err) => console.error(`[Workflow] Trigger error:`, err)
+        );
+      }
+      break;
+    }
+
    default: {
      console.warn(
        `Unknown message type from device ${ws.data.deviceId}:`,
--- a/server/src/ws/workflow-handlers.ts
+++ b/server/src/ws/workflow-handlers.ts
@@ -0,0 +1,229 @@
+/**
+ * Server-side handlers for workflow CRUD and trigger messages
+ * from the Android device WebSocket.
+ */
+
+import type { ServerWebSocket } from "bun";
+import { eq, and } from "drizzle-orm";
+import { db } from "../db.js";
+import { workflow, llmConfig } from "../schema.js";
+import { parseWorkflowDescription } from "../agent/workflow-parser.js";
+import type { LLMConfig } from "../agent/llm.js";
+import type { WebSocketData } from "./sessions.js";
+
+function sendToDevice(ws: ServerWebSocket<WebSocketData>, msg: Record<string, unknown>) {
+  try {
+    ws.send(JSON.stringify(msg));
+  } catch {
+    // device disconnected
+  }
+}
+
+async function getUserLlmConfig(userId: string): Promise<LLMConfig | null> {
+  const configs = await db
+    .select()
+    .from(llmConfig)
+    .where(eq(llmConfig.userId, userId))
+    .limit(1);
+
+  if (configs.length === 0) return null;
+
+  const cfg = configs[0];
+  return {
+    provider: cfg.provider,
+    apiKey: cfg.apiKey,
+    model: cfg.model ?? undefined,
+  };
+}
+
+function workflowToJson(wf: typeof workflow.$inferSelect): string {
+  return JSON.stringify({
+    id: wf.id,
+    name: wf.name,
+    description: wf.description,
+    triggerType: wf.triggerType,
+    conditions: wf.conditions,
+    goalTemplate: wf.goalTemplate,
+    enabled: wf.enabled,
+    createdAt: new Date(wf.createdAt).getTime(),
+  });
+}
+
+export async function handleWorkflowCreate(
+  ws: ServerWebSocket<WebSocketData>,
+  description: string
+): Promise<void> {
+  const userId = ws.data.userId!;
+
+  const userLlm = await getUserLlmConfig(userId);
+  if (!userLlm) {
+    sendToDevice(ws, {
+      type: "error",
+      message: "No LLM provider configured. Set it up in the web dashboard.",
+    });
+    return;
+  }
+
+  try {
+    const parsed = await parseWorkflowDescription(description, userLlm);
+
+    // Validate regexes before persisting
+    for (const cond of parsed.conditions) {
+      if (cond.matchMode === "regex") {
+        try {
+          new RegExp(cond.value, "i");
+        } catch {
+          throw new Error(`Invalid regex in condition: ${cond.value}`);
+        }
+      }
+    }
+
+    const id = crypto.randomUUID();
+    const now = new Date();
+
+    await db.insert(workflow).values({
+      id,
+      userId,
+      name: parsed.name,
+      description,
+      triggerType: parsed.triggerType,
+      conditions: parsed.conditions,
+      goalTemplate: parsed.goalTemplate,
+      enabled: true,
+      createdAt: now,
+      updatedAt: now,
+    });
+
+    const inserted = await db
+      .select()
+      .from(workflow)
+      .where(eq(workflow.id, id))
+      .limit(1);
+
+    if (inserted.length > 0) {
+      sendToDevice(ws, {
+        type: "workflow_created",
+        workflowId: id,
+        workflowJson: workflowToJson(inserted[0]),
+      });
+    }
+
+    console.log(`[Workflow] Created '${parsed.name}' for user ${userId}`);
+  } catch (err) {
+    console.error(`[Workflow] Failed to create workflow:`, err);
+    sendToDevice(ws, {
+      type: "error",
+      message: `Failed to parse workflow: ${err}`,
+    });
+  }
+}
+
+export async function handleWorkflowUpdate(
+  ws: ServerWebSocket<WebSocketData>,
+  workflowId: string,
+  enabled?: boolean
+): Promise<void> {
+  const userId = ws.data.userId!;
+
+  const updates: Record<string, unknown> = {};
+  if (enabled !== undefined) updates.enabled = enabled;
+
+  await db
+    .update(workflow)
+    .set(updates)
+    .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId)));
+
+  console.log(`[Workflow] Updated ${workflowId}: enabled=${enabled}`);
+}
+
+export async function handleWorkflowDelete(
+  ws: ServerWebSocket<WebSocketData>,
+  workflowId: string
+): Promise<void> {
+  const userId = ws.data.userId!;
+
+  await db
+    .delete(workflow)
+    .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId)));
+
+  sendToDevice(ws, {
+    type: "workflow_deleted",
+    workflowId,
+  });
+
+  console.log(`[Workflow] Deleted ${workflowId}`);
+}
+
+export async function handleWorkflowSync(
+  ws: ServerWebSocket<WebSocketData>
+): Promise<void> {
+  const userId = ws.data.userId!;
+
+  const workflows = await db
+    .select()
+    .from(workflow)
+    .where(eq(workflow.userId, userId));
+
+  const workflowsJson = JSON.stringify(
+    workflows.map((wf) => ({
+      id: wf.id,
+      name: wf.name,
+      description: wf.description,
+      triggerType: wf.triggerType,
+      conditions: wf.conditions,
+      goalTemplate: wf.goalTemplate,
+      enabled: wf.enabled,
+      createdAt: new Date(wf.createdAt).getTime(),
+    }))
+  );
+
+  sendToDevice(ws, {
+    type: "workflow_synced",
+    workflowsJson,
+  });
+
+  console.log(`[Workflow] Synced ${workflows.length} workflows for user ${userId}`);
+}
+
+export async function handleWorkflowTrigger(
+  ws: ServerWebSocket<WebSocketData>,
+  workflowId: string,
+  notificationApp?: string,
+  notificationTitle?: string,
+  notificationText?: string
+): Promise<void> {
+  const userId = ws.data.userId!;
+
+  const workflows = await db
+    .select()
+    .from(workflow)
+    .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId)))
+    .limit(1);
+
+  if (workflows.length === 0) {
+    console.warn(`[Workflow] Trigger for unknown workflow ${workflowId}`);
+    return;
+  }
+
+  const wf = workflows[0];
+  if (!wf.enabled) return;
+
+  // Expand goal template placeholders
+  let goal = wf.goalTemplate;
+  goal = goal.replace(/\{\{app\}\}/g, notificationApp ?? "unknown app");
+  goal = goal.replace(/\{\{title\}\}/g, notificationTitle ?? "");
+  goal = goal.replace(/\{\{text\}\}/g, notificationText ?? "");
+
+  console.log(`[Workflow] Triggering '${wf.name}' with goal: ${goal}`);
+
+  // Send as a goal — reuse existing goal handling by injecting a goal message
+  sendToDevice(ws, { type: "ping" }); // keep-alive before goal injection
+
+  // The device will receive this as a workflow-triggered goal
+  // We send the goal text back to the device to be submitted as a regular goal
+  sendToDevice(ws, {
+    type: "workflow_goal",
+    workflowId: wf.id,
+    goal,
+  });
+}