feat: installed apps, stop goal, auth fixes, remote commands

- Android: fetch installed apps via PackageManager, send to server on connect
- Android: add QUERY_ALL_PACKAGES permission for full app visibility
- Android: fix duplicate Intent import, increase accessibility retry window
- Android: default server URL to ws:// instead of wss://
- Server: store installed apps in device metadata JSONB
- Server: inject installed apps context into LLM prompt
- Server: preprocessor resolves app names from device's actual installed apps
- Server: add POST /goals/stop endpoint with AbortController cancellation
- Server: rewrite session middleware to direct DB token lookup
- Server: goals route fetches user's saved LLM config from DB
- Web: show installed apps in device detail Overview tab with search
- Web: add Stop button for running goals
- Web: replace API routes with remote commands (submitGoal, stopGoal)
- Web: add error display for goal submission failures
- Shared: add InstalledApp type and apps message to protocol
This commit is contained in:
Sanju Sivalingam
2026-02-17 22:50:18 +05:30
parent fae5fd3534
commit e300f04e13
17 changed files with 410 additions and 88 deletions

View File

@@ -26,7 +26,7 @@ import {
} from "./llm.js";
import { createStuckDetector } from "./stuck.js";
import { db } from "../db.js";
import { agentSession, agentStep } from "../schema.js";
import { agentSession, agentStep, device as deviceTable } from "../schema.js";
import { eq } from "drizzle-orm";
import type { UIElement, ActionDecision } from "@droidclaw/shared";
@@ -42,6 +42,8 @@ export interface AgentLoopOptions {
originalGoal?: string;
llmConfig: LLMConfig;
maxSteps?: number;
/** Abort signal for cancellation */
signal?: AbortSignal;
onStep?: (step: AgentStep) => void;
onComplete?: (result: AgentResult) => void;
}
@@ -224,6 +226,7 @@ export async function runAgentLoop(
originalGoal,
llmConfig,
maxSteps = 30,
signal,
onStep,
onComplete,
} = options;
@@ -239,6 +242,28 @@ export async function runAgentLoop(
const recentActions: string[] = [];
let lastActionFeedback = "";
// Fetch installed apps from device metadata for LLM context
let installedAppsContext = "";
if (persistentDeviceId) {
try {
const rows = await db
.select({ info: deviceTable.deviceInfo })
.from(deviceTable)
.where(eq(deviceTable.id, persistentDeviceId))
.limit(1);
const info = rows[0]?.info as Record<string, unknown> | null;
const apps = info?.installedApps as Array<{ packageName: string; label: string }> | undefined;
if (apps && apps.length > 0) {
installedAppsContext =
`\nINSTALLED_APPS (use exact packageName for "launch" action):\n` +
apps.map((a) => ` ${a.label}: ${a.packageName}`).join("\n") +
"\n";
}
} catch {
// Non-critical — continue without apps context
}
}
// Persist session to DB
if (persistentDeviceId) {
try {
@@ -268,6 +293,12 @@ export async function runAgentLoop(
try {
for (let step = 0; step < maxSteps; step++) {
// Check for cancellation
if (signal?.aborted) {
console.log(`[Agent ${sessionId}] Stopped by user at step ${step + 1}`);
break;
}
stepsUsed = step + 1;
// ── 1. Get screen state from device ─────────────────────
@@ -371,6 +402,7 @@ export async function runAgentLoop(
let userPrompt =
`GOAL: ${goal}\n\n` +
`STEP: ${step + 1}/${maxSteps}\n\n` +
installedAppsContext +
foregroundLine +
actionFeedbackLine +
`SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` +

View File

@@ -8,10 +8,21 @@
*/
import { sessions } from "../ws/sessions.js";
import { db } from "../db.js";
import { device as deviceTable } from "../schema.js";
import { eq } from "drizzle-orm";
// ─── App Name → Package Name Map ────────────────────────────
// ─── Installed App type ──────────────────────────────────────
const APP_PACKAGES: Record<string, string> = {
interface InstalledApp {
packageName: string;
label: string;
}
// ─── Fallback App Name → Package Name Map ────────────────────
// Used only when device has no installed apps data in DB.
const FALLBACK_PACKAGES: Record<string, string> = {
youtube: "com.google.android.youtube",
gmail: "com.google.android.gm",
chrome: "com.android.chrome",
@@ -79,12 +90,32 @@ interface PreprocessResult {
}
/**
* Try to find a known app name at the start of a goal string.
* Build a label→packageName lookup from the device's installed apps.
* Keys are lowercase app labels (e.g. "youtube", "play store").
*/
function buildInstalledAppMap(apps: InstalledApp[]): Record<string, string> {
const map: Record<string, string> = {};
for (const app of apps) {
map[app.label.toLowerCase()] = app.packageName;
}
return map;
}
/**
* Try to find an app name at the start of a goal string.
* Checks device's installed apps first, then falls back to hardcoded map.
* Returns the package name and remaining text, or null.
*/
function matchAppName(lower: string): { pkg: string; appName: string; rest: string } | null {
function matchAppName(
lower: string,
installedApps: InstalledApp[]
): { pkg: string; appName: string; rest: string } | null {
// Build combined lookup: installed apps take priority over fallback
const installedMap = buildInstalledAppMap(installedApps);
const combined: Record<string, string> = { ...FALLBACK_PACKAGES, ...installedMap };
// Try longest app names first (e.g. "google meet" before "meet")
const sorted = Object.keys(APP_PACKAGES).sort((a, b) => b.length - a.length);
const sorted = Object.keys(combined).sort((a, b) => b.length - a.length);
for (const name of sorted) {
// Match: "open <app> [app] and <rest>" or "open <app> [app]"
@@ -93,7 +124,7 @@ function matchAppName(lower: string): { pkg: string; appName: string; rest: stri
);
const m = lower.match(pattern);
if (m) {
return { pkg: APP_PACKAGES[name], appName: name, rest: m[1]?.trim() ?? "" };
return { pkg: combined[name], appName: name, rest: m[1]?.trim() ?? "" };
}
}
return null;
@@ -103,6 +134,23 @@ function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Fetch installed apps from the device record in DB.
*/
async function fetchInstalledApps(persistentDeviceId: string): Promise<InstalledApp[]> {
try {
const rows = await db
.select({ info: deviceTable.deviceInfo })
.from(deviceTable)
.where(eq(deviceTable.id, persistentDeviceId))
.limit(1);
const info = rows[0]?.info as Record<string, unknown> | null;
return (info?.installedApps as InstalledApp[]) ?? [];
} catch {
return [];
}
}
/**
* Attempt to preprocess a goal before the LLM loop.
*
@@ -113,12 +161,16 @@ function escapeRegex(s: string): string {
*/
export async function preprocessGoal(
deviceId: string,
goal: string
goal: string,
persistentDeviceId?: string
): Promise<PreprocessResult> {
const lower = goal.toLowerCase().trim();
// Fetch device's actual installed apps for accurate package resolution
const installedApps = persistentDeviceId ? await fetchInstalledApps(persistentDeviceId) : [];
// ── Pattern: "open <app> [and <remaining>]" ───────────────
const appMatch = matchAppName(lower);
const appMatch = matchAppName(lower, installedApps);
if (appMatch) {
try {

View File

@@ -1,5 +1,8 @@
import type { Context, Next } from "hono";
import { auth } from "../auth.js";
import { db } from "../db.js";
import { session as sessionTable, user as userTable } from "../schema.js";
import { eq } from "drizzle-orm";
import { getCookie } from "hono/cookie";
/** Hono Env type for routes protected by sessionMiddleware */
export type AuthEnv = {
@@ -10,15 +13,43 @@ export type AuthEnv = {
};
export async function sessionMiddleware(c: Context, next: Next) {
const session = await auth.api.getSession({
headers: c.req.raw.headers,
});
if (!session) {
// Extract session token from cookie (same approach as dashboard WS auth)
const rawCookie = getCookie(c, "better-auth.session_token");
if (!rawCookie) {
return c.json({ error: "unauthorized" }, 401);
}
c.set("user", session.user);
c.set("session", session.session);
// Token may have a signature appended after a dot — use only the token part
const token = rawCookie.split(".")[0];
// Direct DB lookup (proven to work, unlike auth.api.getSession)
const rows = await db
.select({
sessionId: sessionTable.id,
userId: sessionTable.userId,
})
.from(sessionTable)
.where(eq(sessionTable.token, token))
.limit(1);
if (rows.length === 0) {
return c.json({ error: "unauthorized" }, 401);
}
const { sessionId, userId } = rows[0];
// Fetch user info
const users = await db
.select({ id: userTable.id, name: userTable.name, email: userTable.email })
.from(userTable)
.where(eq(userTable.id, userId))
.limit(1);
if (users.length === 0) {
return c.json({ error: "unauthorized" }, 401);
}
c.set("user", users[0]);
c.set("session", { id: sessionId, userId });
await next();
}

View File

@@ -1,14 +1,17 @@
import { Hono } from "hono";
import { eq } from "drizzle-orm";
import { sessionMiddleware, type AuthEnv } from "../middleware/auth.js";
import { sessions } from "../ws/sessions.js";
import { runAgentLoop, type AgentLoopOptions } from "../agent/loop.js";
import type { LLMConfig } from "../agent/llm.js";
import { db } from "../db.js";
import { llmConfig as llmConfigTable } from "../schema.js";
const goals = new Hono<AuthEnv>();
goals.use("*", sessionMiddleware);
/** Track running agent sessions so we can prevent duplicates */
const activeSessions = new Map<string, { sessionId: string; goal: string }>();
/** Track running agent sessions so we can prevent duplicates and cancel them */
const activeSessions = new Map<string, { sessionId: string; goal: string; abort: AbortController }>();
goals.post("/", async (c) => {
const user = c.get("user");
@@ -46,15 +49,39 @@ goals.post("/", async (c) => {
);
}
// Build LLM config from request body or environment defaults
const llmConfig: LLMConfig = {
provider: body.llmProvider ?? process.env.LLM_PROVIDER ?? "openai",
apiKey: body.llmApiKey ?? process.env.LLM_API_KEY ?? "",
model: body.llmModel,
};
// Build LLM config: request body → user's DB config → env defaults
let llmCfg: LLMConfig;
if (!llmConfig.apiKey) {
return c.json({ error: "LLM API key is required (provide llmApiKey or set LLM_API_KEY env var)" }, 400);
if (body.llmApiKey) {
llmCfg = {
provider: body.llmProvider ?? process.env.LLM_PROVIDER ?? "openai",
apiKey: body.llmApiKey,
model: body.llmModel,
};
} else {
// Fetch user's saved LLM config from DB (same as device WS handler)
const configs = await db
.select()
.from(llmConfigTable)
.where(eq(llmConfigTable.userId, user.id))
.limit(1);
if (configs.length > 0) {
const cfg = configs[0];
llmCfg = {
provider: cfg.provider,
apiKey: cfg.apiKey,
model: body.llmModel ?? cfg.model ?? undefined,
};
} else if (process.env.LLM_API_KEY) {
llmCfg = {
provider: process.env.LLM_PROVIDER ?? "openai",
apiKey: process.env.LLM_API_KEY,
model: body.llmModel,
};
} else {
return c.json({ error: "No LLM provider configured. Set it up in the web dashboard Settings." }, 400);
}
}
const options: AgentLoopOptions = {
@@ -62,16 +89,20 @@ goals.post("/", async (c) => {
persistentDeviceId: device.persistentDeviceId,
userId: user.id,
goal: body.goal,
llmConfig,
llmConfig: llmCfg,
maxSteps: body.maxSteps,
};
// Create abort controller for this session
const abort = new AbortController();
options.signal = abort.signal;
// Start the agent loop in the background (fire-and-forget).
// The client observes progress via the /ws/dashboard WebSocket.
const loopPromise = runAgentLoop(options);
// Track as active until it completes
const sessionPlaceholder = { sessionId: "pending", goal: body.goal };
const sessionPlaceholder = { sessionId: "pending", goal: body.goal, abort };
activeSessions.set(trackingKey, sessionPlaceholder);
loopPromise
@@ -96,4 +127,33 @@ goals.post("/", async (c) => {
});
});
goals.post("/stop", async (c) => {
const user = c.get("user");
const body = await c.req.json<{ deviceId: string }>();
if (!body.deviceId) {
return c.json({ error: "deviceId is required" }, 400);
}
// Look up device to verify ownership
const device = sessions.getDevice(body.deviceId)
?? sessions.getDeviceByPersistentId(body.deviceId);
if (!device) {
return c.json({ error: "device not connected" }, 404);
}
if (device.userId !== user.id) {
return c.json({ error: "device does not belong to you" }, 403);
}
const trackingKey = device.persistentDeviceId ?? device.deviceId;
const active = activeSessions.get(trackingKey);
if (!active) {
return c.json({ error: "no agent running on this device" }, 404);
}
active.abort.abort();
console.log(`[Agent] Stop requested for device ${body.deviceId}`);
return c.json({ status: "stopping" });
});
export { goals };

View File

@@ -227,7 +227,7 @@ export async function handleDeviceMessage(
// Preprocess: handle simple goals directly, or extract "open X" prefix
let effectiveGoal = goal;
try {
const preResult = await preprocessGoal(deviceId, goal);
const preResult = await preprocessGoal(deviceId, goal, persistentDeviceId);
if (preResult.handled) {
await new Promise((r) => setTimeout(r, 1500));
@@ -328,6 +328,26 @@ export async function handleDeviceMessage(
break;
}
case "apps": {
const persistentDeviceId = ws.data.persistentDeviceId;
if (persistentDeviceId) {
const apps = (msg as unknown as { apps: Array<{ packageName: string; label: string }> }).apps;
// Merge apps into existing deviceInfo
db.update(device)
.set({
deviceInfo: {
...(await db.select({ info: device.deviceInfo }).from(device).where(eq(device.id, persistentDeviceId)).limit(1).then(r => (r[0]?.info as Record<string, unknown>) ?? {})),
installedApps: apps,
},
})
.where(eq(device.id, persistentDeviceId))
.catch((err) => console.error(`[DB] Failed to store installed apps: ${err}`));
console.log(`[Device] Received ${apps.length} installed apps for device ${persistentDeviceId}`);
}
break;
}
case "heartbeat": {
const persistentDeviceId = ws.data.persistentDeviceId;
const userId = ws.data.userId;