diff --git a/examples/flows/clear-notifications.yaml b/examples/flows/clear-notifications.yaml new file mode 100644 index 0000000..e050fbc --- /dev/null +++ b/examples/flows/clear-notifications.yaml @@ -0,0 +1,8 @@ +name: Clear Notifications +--- +- shell: "cmd statusbar expand-notifications" +- wait: 2 +- tap: "Clear all" +- wait: 1 +- home +- done: "Notifications cleared" diff --git a/examples/flows/create-contact.yaml b/examples/flows/create-contact.yaml new file mode 100644 index 0000000..002d8cf --- /dev/null +++ b/examples/flows/create-contact.yaml @@ -0,0 +1,16 @@ +appId: com.google.android.contacts +name: Create Contact +--- +- launchApp +- wait: 2 +- tap: "Create new contact" +- wait: 1 +- tap: "First name" +- type: "John" +- tap: "Last name" +- type: "Snow" +- tap: "Phone" +- type: "5551234567" +- tap: "Save" +- wait: 2 +- done: "Contact created" diff --git a/examples/flows/google-search.yaml b/examples/flows/google-search.yaml new file mode 100644 index 0000000..83244d6 --- /dev/null +++ b/examples/flows/google-search.yaml @@ -0,0 +1,10 @@ +appId: com.google.android.googlequicksearchbox +name: Google Search +--- +- launchApp +- wait: 2 +- tap: "Search" +- type: "weather today" +- enter +- wait: 3 +- done: "Search results loaded" diff --git a/examples/flows/send-whatsapp.yaml b/examples/flows/send-whatsapp.yaml new file mode 100644 index 0000000..4927d17 --- /dev/null +++ b/examples/flows/send-whatsapp.yaml @@ -0,0 +1,14 @@ +appId: com.whatsapp +name: Send WhatsApp Message +--- +- home +- wait: 1 +- launchApp +- wait: 2 +- tap: "Vi" +- wait: 1 +- tap: "Message" +- type: "hello from Droidclaw" +- tap: "Send" +- wait: 1 +- done: "Message sent" diff --git a/examples/flows/toggle-wifi.yaml b/examples/flows/toggle-wifi.yaml new file mode 100644 index 0000000..1922992 --- /dev/null +++ b/examples/flows/toggle-wifi.yaml @@ -0,0 +1,7 @@ +name: Toggle WiFi +--- +- settings: "wifi" +- wait: 2 +- tap: "Wi-Fi" +- wait: 1 +- done: "WiFi toggled" diff --git a/examples/send-whatsapp-vi.json b/examples/send-whatsapp-vi.json new file mode 100644 index 0000000..91a1c35 --- /dev/null +++ b/examples/send-whatsapp-vi.json @@ -0,0 +1,10 @@ +{ + "name": "Send WhatsApp to Vi", + "steps": [ + { + "app": "com.whatsapp", + "goal": "You are on WhatsApp. First check if 'Vi' is visible in the chat list. If it is, tap on it directly. If not, use the Search bar to search for 'Vi' and then tap the result. Once inside the Vi chat, type 'hello from Droidclaw' in the message field and send it.", + "maxSteps": 12 + } + ] +} diff --git a/package.json b/package.json index 7c8eee0..14ed2fe 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "ai": "^6.0.72", "fast-xml-parser": "^4.5.0", "openai": "^4.73.0", + "yaml": "^2.8.2", "zod": "^4.3.6" }, "devDependencies": { diff --git a/src/flow.ts b/src/flow.ts new file mode 100644 index 0000000..86ea0b8 --- /dev/null +++ b/src/flow.ts @@ -0,0 +1,305 @@ +/** + * Flow runner for DroidClaw. + * Executes deterministic YAML flows (Maestro-style) — no LLM needed. + * + * Usage: + * bun run src/kernel.ts --flow examples/flows/create-contact.yaml + * + * YAML format: + * appId: com.android.contacts # optional frontmatter + * name: Create Contact # optional flow name + * --- + * - launchApp + * - tap: "First Name" + * - type: "John" + * - back + * - done: "Contact created" + */ + +import { existsSync, readFileSync } from "fs"; +import { parseAllDocuments } from "yaml"; +import { Config } from "./config.js"; +import { + executeAction, + runAdbCommand, + getScreenResolution, + initDeviceContext, + type ActionResult, +} from "./actions.js"; +import { getInteractiveElements, type UIElement } from "./sanitizer.js"; + +// =========================================== +// Types +// =========================================== + +interface FlowFrontmatter { + appId?: string; + name?: string; +} + +type FlowStep = + | string // "launchApp", "back", "home", "enter", "clear" + | { [key: string]: string | number | [number, number] }; // "tap: text", "type: text", etc. + +interface ParsedFlow { + frontmatter: FlowFrontmatter; + steps: FlowStep[]; +} + +// =========================================== +// YAML Parsing +// =========================================== + +function parseFlowFile(filePath: string): ParsedFlow { + if (!existsSync(filePath)) { + throw new Error(`Flow file not found: ${filePath}`); + } + + const raw = readFileSync(filePath, "utf-8"); + const docs = parseAllDocuments(raw); + + let frontmatter: FlowFrontmatter = {}; + let steps: FlowStep[] = []; + + if (docs.length === 1) { + // Single document — could be just steps or frontmatter+steps + const content = docs[0].toJSON(); + if (Array.isArray(content)) { + steps = content; + } else if (content && typeof content === "object") { + // Entire doc is frontmatter with no steps? Shouldn't happen, but handle it. + frontmatter = content as FlowFrontmatter; + } + } else if (docs.length >= 2) { + // First doc = frontmatter, second doc = steps + frontmatter = (docs[0].toJSON() ?? {}) as FlowFrontmatter; + steps = (docs[1].toJSON() ?? []) as FlowStep[]; + } + + if (!Array.isArray(steps) || steps.length === 0) { + throw new Error("Flow file contains no steps"); + } + + return { frontmatter, steps }; +} + +// =========================================== +// Element Finding +// =========================================== + +function scanScreen(): UIElement[] { + try { + runAdbCommand(["shell", "uiautomator", "dump", Config.SCREEN_DUMP_PATH]); + runAdbCommand(["pull", Config.SCREEN_DUMP_PATH, Config.LOCAL_DUMP_PATH]); + } catch { + console.log("Warning: Screen capture failed during flow step."); + return []; + } + if (!existsSync(Config.LOCAL_DUMP_PATH)) return []; + return getInteractiveElements(readFileSync(Config.LOCAL_DUMP_PATH, "utf-8")); +} + +function findElementByText(elements: UIElement[], query: string): UIElement | null { + const q = query.toLowerCase(); + + // Exact match first + const exact = elements.find( + (el) => el.text && el.text.toLowerCase() === q + ); + if (exact) return exact; + + // Substring match — prefer shorter text (more specific) + const matches = elements + .filter((el) => el.text && el.text.toLowerCase().includes(q)) + .sort((a, b) => a.text.length - b.text.length); + if (matches.length > 0) return matches[0]; + + // Hint match + const hintMatch = elements.find( + (el) => el.hint && el.hint.toLowerCase().includes(q) + ); + if (hintMatch) return hintMatch; + + // Resource ID match + const idMatch = elements.find( + (el) => el.id && el.id.toLowerCase().includes(q) + ); + if (idMatch) return idMatch; + + return null; +} + +// =========================================== +// Step Execution +// =========================================== + +function executeFlowStep( + step: FlowStep, + frontmatter: FlowFrontmatter, + stepIndex: number +): ActionResult { + // Simple string commands: "launchApp", "back", "home", "enter", "clear" + if (typeof step === "string") { + switch (step) { + case "launchApp": + if (!frontmatter.appId) { + return { success: false, message: "launchApp requires appId in frontmatter" }; + } + return executeAction({ action: "launch", package: frontmatter.appId }); + case "back": + return executeAction({ action: "back" }); + case "home": + return executeAction({ action: "home" }); + case "enter": + return executeAction({ action: "enter" }); + case "clear": + return executeAction({ action: "clear" }); + case "done": + return executeAction({ action: "done", reason: "Flow complete" }); + default: + return { success: false, message: `Unknown step: ${step}` }; + } + } + + // Object commands: { tap: "text", type: "hello", wait: 2, ... } + if (typeof step === "object" && step !== null) { + const [command, value] = Object.entries(step)[0]; + + switch (command) { + case "tap": { + if (Array.isArray(value)) { + // Coordinate tap: tap: [x, y] + return executeAction({ action: "tap", coordinates: value as [number, number] }); + } + // Text tap: find element and tap + const elements = scanScreen(); + const el = findElementByText(elements, String(value)); + if (!el) { + const available = elements.filter((e) => e.text).map((e) => e.text).slice(0, 10); + return { success: false, message: `Element "${value}" not found. Available: ${available.join(", ")}` }; + } + console.log(` Found "${el.text}" at (${el.center[0]}, ${el.center[1]})`); + return executeAction({ action: "tap", coordinates: el.center }); + } + + case "longpress": { + if (Array.isArray(value)) { + return executeAction({ action: "longpress", coordinates: value as [number, number] }); + } + const elements = scanScreen(); + const el = findElementByText(elements, String(value)); + if (!el) { + return { success: false, message: `Element "${value}" not found for longpress` }; + } + console.log(` Found "${el.text}" at (${el.center[0]}, ${el.center[1]})`); + return executeAction({ action: "longpress", coordinates: el.center }); + } + + case "type": + return executeAction({ action: "type", text: String(value) }); + + case "swipe": + return executeAction({ action: "swipe", direction: String(value) }); + + case "scroll": + return executeAction({ action: "scroll", direction: String(value) }); + + case "wait": { + const seconds = Number(value) || 2; + console.log(` Waiting ${seconds}s...`); + Bun.sleepSync(seconds * 1000); + return { success: true, message: `Waited ${seconds}s` }; + } + + case "launch": + return executeAction({ action: "launch", package: String(value) }); + + case "openUrl": + return executeAction({ action: "open_url", url: String(value) }); + + case "clipboard": + return executeAction({ action: "clipboard_set", text: String(value) }); + + case "paste": { + if (Array.isArray(value)) { + return executeAction({ action: "paste", coordinates: value as [number, number] }); + } + return executeAction({ action: "paste" }); + } + + case "shell": + return executeAction({ action: "shell", command: String(value) }); + + case "keyevent": + return executeAction({ action: "keyevent", code: Number(value) }); + + case "settings": + return executeAction({ action: "open_settings", setting: String(value) }); + + case "done": + return executeAction({ action: "done", reason: String(value) }); + + default: + return { success: false, message: `Unknown command: ${command}` }; + } + } + + return { success: false, message: `Invalid step at index ${stepIndex}: ${JSON.stringify(step)}` }; +} + +// =========================================== +// Flow Runner +// =========================================== + +export interface FlowResult { + name: string; + success: boolean; + stepsCompleted: number; + totalSteps: number; + error?: string; +} + +export async function runFlow(filePath: string): Promise { + const { frontmatter, steps } = parseFlowFile(filePath); + const name = frontmatter.name ?? filePath.split("/").pop() ?? "flow"; + + // Auto-detect screen resolution + const resolution = getScreenResolution(); + if (resolution) { + initDeviceContext(resolution); + console.log(`Screen resolution: ${resolution[0]}x${resolution[1]}`); + } + + console.log(`\nFlow: ${name}`); + if (frontmatter.appId) console.log(`App: ${frontmatter.appId}`); + console.log(`Steps: ${steps.length}\n`); + + for (let i = 0; i < steps.length; i++) { + const step = steps[i]; + const label = typeof step === "string" ? step : Object.entries(step)[0].join(": "); + console.log(`[${i + 1}/${steps.length}] ${label}`); + + const result = executeFlowStep(step, frontmatter, i); + + if (!result.success) { + console.log(` FAILED: ${result.message}`); + return { + name, + success: false, + stepsCompleted: i, + totalSteps: steps.length, + error: result.message, + }; + } + + console.log(` OK: ${result.message}`); + + // Brief pause between steps for UI to settle + if (i < steps.length - 1 && typeof step !== "string" || (typeof step === "object" && !("wait" in step))) { + await Bun.sleep(Config.STEP_DELAY * 1000); + } + } + + console.log(`\nFlow "${name}" completed successfully.`); + return { name, success: true, stepsCompleted: steps.length, totalSteps: steps.length }; +} diff --git a/src/kernel.ts b/src/kernel.ts index 53ea033..7bc53af 100644 --- a/src/kernel.ts +++ b/src/kernel.ts @@ -467,6 +467,30 @@ async function main(): Promise { return; } + // Check for --flow flag (deterministic YAML flows, no LLM) + const flowIdx = process.argv.findIndex((a) => a === "--flow" || a.startsWith("--flow=")); + if (flowIdx !== -1) { + const flowArg = process.argv[flowIdx]; + const flowFile = flowArg.includes("=") + ? flowArg.split("=")[1] + : process.argv[flowIdx + 1]; + + if (!flowFile) { + console.log("Error: --flow requires a YAML file path."); + process.exit(1); + } + + const { runFlow } = await import("./flow.js"); + try { + const result = await runFlow(flowFile); + console.log(`\nResult: ${result.success ? "OK" : "FAILED"} (${result.stepsCompleted}/${result.totalSteps} steps)`); + process.exit(result.success ? 0 : 1); + } catch (e) { + console.log(`Flow Error: ${(e as Error).message}`); + process.exit(1); + } + } + // Check for --workflow flag const workflowIdx = process.argv.findIndex((a) => a === "--workflow" || a.startsWith("--workflow=")); if (workflowIdx !== -1) {