feat: installed apps, stop goal, auth fixes, remote commands
- Android: fetch installed apps via PackageManager, send to server on connect - Android: add QUERY_ALL_PACKAGES permission for full app visibility - Android: fix duplicate Intent import, increase accessibility retry window - Android: default server URL to ws:// instead of wss:// - Server: store installed apps in device metadata JSONB - Server: inject installed apps context into LLM prompt - Server: preprocessor resolves app names from device's actual installed apps - Server: add POST /goals/stop endpoint with AbortController cancellation - Server: rewrite session middleware to direct DB token lookup - Server: goals route fetches user's saved LLM config from DB - Web: show installed apps in device detail Overview tab with search - Web: add Stop button for running goals - Web: replace API routes with remote commands (submitGoal, stopGoal) - Web: add error display for goal submission failures - Shared: add InstalledApp type and apps message to protocol
This commit is contained in:
@@ -26,7 +26,7 @@ import {
|
||||
} from "./llm.js";
|
||||
import { createStuckDetector } from "./stuck.js";
|
||||
import { db } from "../db.js";
|
||||
import { agentSession, agentStep } from "../schema.js";
|
||||
import { agentSession, agentStep, device as deviceTable } from "../schema.js";
|
||||
import { eq } from "drizzle-orm";
|
||||
import type { UIElement, ActionDecision } from "@droidclaw/shared";
|
||||
|
||||
@@ -42,6 +42,8 @@ export interface AgentLoopOptions {
|
||||
originalGoal?: string;
|
||||
llmConfig: LLMConfig;
|
||||
maxSteps?: number;
|
||||
/** Abort signal for cancellation */
|
||||
signal?: AbortSignal;
|
||||
onStep?: (step: AgentStep) => void;
|
||||
onComplete?: (result: AgentResult) => void;
|
||||
}
|
||||
@@ -224,6 +226,7 @@ export async function runAgentLoop(
|
||||
originalGoal,
|
||||
llmConfig,
|
||||
maxSteps = 30,
|
||||
signal,
|
||||
onStep,
|
||||
onComplete,
|
||||
} = options;
|
||||
@@ -239,6 +242,28 @@ export async function runAgentLoop(
|
||||
const recentActions: string[] = [];
|
||||
let lastActionFeedback = "";
|
||||
|
||||
// Fetch installed apps from device metadata for LLM context
|
||||
let installedAppsContext = "";
|
||||
if (persistentDeviceId) {
|
||||
try {
|
||||
const rows = await db
|
||||
.select({ info: deviceTable.deviceInfo })
|
||||
.from(deviceTable)
|
||||
.where(eq(deviceTable.id, persistentDeviceId))
|
||||
.limit(1);
|
||||
const info = rows[0]?.info as Record<string, unknown> | null;
|
||||
const apps = info?.installedApps as Array<{ packageName: string; label: string }> | undefined;
|
||||
if (apps && apps.length > 0) {
|
||||
installedAppsContext =
|
||||
`\nINSTALLED_APPS (use exact packageName for "launch" action):\n` +
|
||||
apps.map((a) => ` ${a.label}: ${a.packageName}`).join("\n") +
|
||||
"\n";
|
||||
}
|
||||
} catch {
|
||||
// Non-critical — continue without apps context
|
||||
}
|
||||
}
|
||||
|
||||
// Persist session to DB
|
||||
if (persistentDeviceId) {
|
||||
try {
|
||||
@@ -268,6 +293,12 @@ export async function runAgentLoop(
|
||||
|
||||
try {
|
||||
for (let step = 0; step < maxSteps; step++) {
|
||||
// Check for cancellation
|
||||
if (signal?.aborted) {
|
||||
console.log(`[Agent ${sessionId}] Stopped by user at step ${step + 1}`);
|
||||
break;
|
||||
}
|
||||
|
||||
stepsUsed = step + 1;
|
||||
|
||||
// ── 1. Get screen state from device ─────────────────────
|
||||
@@ -371,6 +402,7 @@ export async function runAgentLoop(
|
||||
let userPrompt =
|
||||
`GOAL: ${goal}\n\n` +
|
||||
`STEP: ${step + 1}/${maxSteps}\n\n` +
|
||||
installedAppsContext +
|
||||
foregroundLine +
|
||||
actionFeedbackLine +
|
||||
`SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` +
|
||||
|
||||
@@ -8,10 +8,21 @@
|
||||
*/
|
||||
|
||||
import { sessions } from "../ws/sessions.js";
|
||||
import { db } from "../db.js";
|
||||
import { device as deviceTable } from "../schema.js";
|
||||
import { eq } from "drizzle-orm";
|
||||
|
||||
// ─── App Name → Package Name Map ────────────────────────────
|
||||
// ─── Installed App type ──────────────────────────────────────
|
||||
|
||||
const APP_PACKAGES: Record<string, string> = {
|
||||
interface InstalledApp {
|
||||
packageName: string;
|
||||
label: string;
|
||||
}
|
||||
|
||||
// ─── Fallback App Name → Package Name Map ────────────────────
|
||||
// Used only when device has no installed apps data in DB.
|
||||
|
||||
const FALLBACK_PACKAGES: Record<string, string> = {
|
||||
youtube: "com.google.android.youtube",
|
||||
gmail: "com.google.android.gm",
|
||||
chrome: "com.android.chrome",
|
||||
@@ -79,12 +90,32 @@ interface PreprocessResult {
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find a known app name at the start of a goal string.
|
||||
* Build a label→packageName lookup from the device's installed apps.
|
||||
* Keys are lowercase app labels (e.g. "youtube", "play store").
|
||||
*/
|
||||
function buildInstalledAppMap(apps: InstalledApp[]): Record<string, string> {
|
||||
const map: Record<string, string> = {};
|
||||
for (const app of apps) {
|
||||
map[app.label.toLowerCase()] = app.packageName;
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find an app name at the start of a goal string.
|
||||
* Checks device's installed apps first, then falls back to hardcoded map.
|
||||
* Returns the package name and remaining text, or null.
|
||||
*/
|
||||
function matchAppName(lower: string): { pkg: string; appName: string; rest: string } | null {
|
||||
function matchAppName(
|
||||
lower: string,
|
||||
installedApps: InstalledApp[]
|
||||
): { pkg: string; appName: string; rest: string } | null {
|
||||
// Build combined lookup: installed apps take priority over fallback
|
||||
const installedMap = buildInstalledAppMap(installedApps);
|
||||
const combined: Record<string, string> = { ...FALLBACK_PACKAGES, ...installedMap };
|
||||
|
||||
// Try longest app names first (e.g. "google meet" before "meet")
|
||||
const sorted = Object.keys(APP_PACKAGES).sort((a, b) => b.length - a.length);
|
||||
const sorted = Object.keys(combined).sort((a, b) => b.length - a.length);
|
||||
|
||||
for (const name of sorted) {
|
||||
// Match: "open <app> [app] and <rest>" or "open <app> [app]"
|
||||
@@ -93,7 +124,7 @@ function matchAppName(lower: string): { pkg: string; appName: string; rest: stri
|
||||
);
|
||||
const m = lower.match(pattern);
|
||||
if (m) {
|
||||
return { pkg: APP_PACKAGES[name], appName: name, rest: m[1]?.trim() ?? "" };
|
||||
return { pkg: combined[name], appName: name, rest: m[1]?.trim() ?? "" };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
@@ -103,6 +134,23 @@ function escapeRegex(s: string): string {
|
||||
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch installed apps from the device record in DB.
|
||||
*/
|
||||
async function fetchInstalledApps(persistentDeviceId: string): Promise<InstalledApp[]> {
|
||||
try {
|
||||
const rows = await db
|
||||
.select({ info: deviceTable.deviceInfo })
|
||||
.from(deviceTable)
|
||||
.where(eq(deviceTable.id, persistentDeviceId))
|
||||
.limit(1);
|
||||
const info = rows[0]?.info as Record<string, unknown> | null;
|
||||
return (info?.installedApps as InstalledApp[]) ?? [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to preprocess a goal before the LLM loop.
|
||||
*
|
||||
@@ -113,12 +161,16 @@ function escapeRegex(s: string): string {
|
||||
*/
|
||||
export async function preprocessGoal(
|
||||
deviceId: string,
|
||||
goal: string
|
||||
goal: string,
|
||||
persistentDeviceId?: string
|
||||
): Promise<PreprocessResult> {
|
||||
const lower = goal.toLowerCase().trim();
|
||||
|
||||
// Fetch device's actual installed apps for accurate package resolution
|
||||
const installedApps = persistentDeviceId ? await fetchInstalledApps(persistentDeviceId) : [];
|
||||
|
||||
// ── Pattern: "open <app> [and <remaining>]" ───────────────
|
||||
const appMatch = matchAppName(lower);
|
||||
const appMatch = matchAppName(lower, installedApps);
|
||||
|
||||
if (appMatch) {
|
||||
try {
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import type { Context, Next } from "hono";
|
||||
import { auth } from "../auth.js";
|
||||
import { db } from "../db.js";
|
||||
import { session as sessionTable, user as userTable } from "../schema.js";
|
||||
import { eq } from "drizzle-orm";
|
||||
import { getCookie } from "hono/cookie";
|
||||
|
||||
/** Hono Env type for routes protected by sessionMiddleware */
|
||||
export type AuthEnv = {
|
||||
@@ -10,15 +13,43 @@ export type AuthEnv = {
|
||||
};
|
||||
|
||||
export async function sessionMiddleware(c: Context, next: Next) {
|
||||
const session = await auth.api.getSession({
|
||||
headers: c.req.raw.headers,
|
||||
});
|
||||
|
||||
if (!session) {
|
||||
// Extract session token from cookie (same approach as dashboard WS auth)
|
||||
const rawCookie = getCookie(c, "better-auth.session_token");
|
||||
if (!rawCookie) {
|
||||
return c.json({ error: "unauthorized" }, 401);
|
||||
}
|
||||
|
||||
c.set("user", session.user);
|
||||
c.set("session", session.session);
|
||||
// Token may have a signature appended after a dot — use only the token part
|
||||
const token = rawCookie.split(".")[0];
|
||||
|
||||
// Direct DB lookup (proven to work, unlike auth.api.getSession)
|
||||
const rows = await db
|
||||
.select({
|
||||
sessionId: sessionTable.id,
|
||||
userId: sessionTable.userId,
|
||||
})
|
||||
.from(sessionTable)
|
||||
.where(eq(sessionTable.token, token))
|
||||
.limit(1);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return c.json({ error: "unauthorized" }, 401);
|
||||
}
|
||||
|
||||
const { sessionId, userId } = rows[0];
|
||||
|
||||
// Fetch user info
|
||||
const users = await db
|
||||
.select({ id: userTable.id, name: userTable.name, email: userTable.email })
|
||||
.from(userTable)
|
||||
.where(eq(userTable.id, userId))
|
||||
.limit(1);
|
||||
|
||||
if (users.length === 0) {
|
||||
return c.json({ error: "unauthorized" }, 401);
|
||||
}
|
||||
|
||||
c.set("user", users[0]);
|
||||
c.set("session", { id: sessionId, userId });
|
||||
await next();
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import { Hono } from "hono";
|
||||
import { eq } from "drizzle-orm";
|
||||
import { sessionMiddleware, type AuthEnv } from "../middleware/auth.js";
|
||||
import { sessions } from "../ws/sessions.js";
|
||||
import { runAgentLoop, type AgentLoopOptions } from "../agent/loop.js";
|
||||
import type { LLMConfig } from "../agent/llm.js";
|
||||
import { db } from "../db.js";
|
||||
import { llmConfig as llmConfigTable } from "../schema.js";
|
||||
|
||||
const goals = new Hono<AuthEnv>();
|
||||
goals.use("*", sessionMiddleware);
|
||||
|
||||
/** Track running agent sessions so we can prevent duplicates */
|
||||
const activeSessions = new Map<string, { sessionId: string; goal: string }>();
|
||||
/** Track running agent sessions so we can prevent duplicates and cancel them */
|
||||
const activeSessions = new Map<string, { sessionId: string; goal: string; abort: AbortController }>();
|
||||
|
||||
goals.post("/", async (c) => {
|
||||
const user = c.get("user");
|
||||
@@ -46,15 +49,39 @@ goals.post("/", async (c) => {
|
||||
);
|
||||
}
|
||||
|
||||
// Build LLM config from request body or environment defaults
|
||||
const llmConfig: LLMConfig = {
|
||||
provider: body.llmProvider ?? process.env.LLM_PROVIDER ?? "openai",
|
||||
apiKey: body.llmApiKey ?? process.env.LLM_API_KEY ?? "",
|
||||
model: body.llmModel,
|
||||
};
|
||||
// Build LLM config: request body → user's DB config → env defaults
|
||||
let llmCfg: LLMConfig;
|
||||
|
||||
if (!llmConfig.apiKey) {
|
||||
return c.json({ error: "LLM API key is required (provide llmApiKey or set LLM_API_KEY env var)" }, 400);
|
||||
if (body.llmApiKey) {
|
||||
llmCfg = {
|
||||
provider: body.llmProvider ?? process.env.LLM_PROVIDER ?? "openai",
|
||||
apiKey: body.llmApiKey,
|
||||
model: body.llmModel,
|
||||
};
|
||||
} else {
|
||||
// Fetch user's saved LLM config from DB (same as device WS handler)
|
||||
const configs = await db
|
||||
.select()
|
||||
.from(llmConfigTable)
|
||||
.where(eq(llmConfigTable.userId, user.id))
|
||||
.limit(1);
|
||||
|
||||
if (configs.length > 0) {
|
||||
const cfg = configs[0];
|
||||
llmCfg = {
|
||||
provider: cfg.provider,
|
||||
apiKey: cfg.apiKey,
|
||||
model: body.llmModel ?? cfg.model ?? undefined,
|
||||
};
|
||||
} else if (process.env.LLM_API_KEY) {
|
||||
llmCfg = {
|
||||
provider: process.env.LLM_PROVIDER ?? "openai",
|
||||
apiKey: process.env.LLM_API_KEY,
|
||||
model: body.llmModel,
|
||||
};
|
||||
} else {
|
||||
return c.json({ error: "No LLM provider configured. Set it up in the web dashboard Settings." }, 400);
|
||||
}
|
||||
}
|
||||
|
||||
const options: AgentLoopOptions = {
|
||||
@@ -62,16 +89,20 @@ goals.post("/", async (c) => {
|
||||
persistentDeviceId: device.persistentDeviceId,
|
||||
userId: user.id,
|
||||
goal: body.goal,
|
||||
llmConfig,
|
||||
llmConfig: llmCfg,
|
||||
maxSteps: body.maxSteps,
|
||||
};
|
||||
|
||||
// Create abort controller for this session
|
||||
const abort = new AbortController();
|
||||
options.signal = abort.signal;
|
||||
|
||||
// Start the agent loop in the background (fire-and-forget).
|
||||
// The client observes progress via the /ws/dashboard WebSocket.
|
||||
const loopPromise = runAgentLoop(options);
|
||||
|
||||
// Track as active until it completes
|
||||
const sessionPlaceholder = { sessionId: "pending", goal: body.goal };
|
||||
const sessionPlaceholder = { sessionId: "pending", goal: body.goal, abort };
|
||||
activeSessions.set(trackingKey, sessionPlaceholder);
|
||||
|
||||
loopPromise
|
||||
@@ -96,4 +127,33 @@ goals.post("/", async (c) => {
|
||||
});
|
||||
});
|
||||
|
||||
goals.post("/stop", async (c) => {
|
||||
const user = c.get("user");
|
||||
const body = await c.req.json<{ deviceId: string }>();
|
||||
|
||||
if (!body.deviceId) {
|
||||
return c.json({ error: "deviceId is required" }, 400);
|
||||
}
|
||||
|
||||
// Look up device to verify ownership
|
||||
const device = sessions.getDevice(body.deviceId)
|
||||
?? sessions.getDeviceByPersistentId(body.deviceId);
|
||||
if (!device) {
|
||||
return c.json({ error: "device not connected" }, 404);
|
||||
}
|
||||
if (device.userId !== user.id) {
|
||||
return c.json({ error: "device does not belong to you" }, 403);
|
||||
}
|
||||
|
||||
const trackingKey = device.persistentDeviceId ?? device.deviceId;
|
||||
const active = activeSessions.get(trackingKey);
|
||||
if (!active) {
|
||||
return c.json({ error: "no agent running on this device" }, 404);
|
||||
}
|
||||
|
||||
active.abort.abort();
|
||||
console.log(`[Agent] Stop requested for device ${body.deviceId}`);
|
||||
return c.json({ status: "stopping" });
|
||||
});
|
||||
|
||||
export { goals };
|
||||
|
||||
@@ -227,7 +227,7 @@ export async function handleDeviceMessage(
|
||||
// Preprocess: handle simple goals directly, or extract "open X" prefix
|
||||
let effectiveGoal = goal;
|
||||
try {
|
||||
const preResult = await preprocessGoal(deviceId, goal);
|
||||
const preResult = await preprocessGoal(deviceId, goal, persistentDeviceId);
|
||||
if (preResult.handled) {
|
||||
await new Promise((r) => setTimeout(r, 1500));
|
||||
|
||||
@@ -328,6 +328,26 @@ export async function handleDeviceMessage(
|
||||
break;
|
||||
}
|
||||
|
||||
case "apps": {
|
||||
const persistentDeviceId = ws.data.persistentDeviceId;
|
||||
if (persistentDeviceId) {
|
||||
const apps = (msg as unknown as { apps: Array<{ packageName: string; label: string }> }).apps;
|
||||
// Merge apps into existing deviceInfo
|
||||
db.update(device)
|
||||
.set({
|
||||
deviceInfo: {
|
||||
...(await db.select({ info: device.deviceInfo }).from(device).where(eq(device.id, persistentDeviceId)).limit(1).then(r => (r[0]?.info as Record<string, unknown>) ?? {})),
|
||||
installedApps: apps,
|
||||
},
|
||||
})
|
||||
.where(eq(device.id, persistentDeviceId))
|
||||
.catch((err) => console.error(`[DB] Failed to store installed apps: ${err}`));
|
||||
|
||||
console.log(`[Device] Received ${apps.length} installed apps for device ${persistentDeviceId}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "heartbeat": {
|
||||
const persistentDeviceId = ws.data.persistentDeviceId;
|
||||
const userId = ws.data.userId;
|
||||
|
||||
Reference in New Issue
Block a user