Add Maestro-style YAML flow runner for deterministic automation
New --flow mode executes scripted YAML steps without LLM, mapping 17 commands (tap, type, swipe, scroll, etc.) to existing actions. Element finding uses accessibility tree text/hint/id matching. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
8
examples/flows/clear-notifications.yaml
Normal file
8
examples/flows/clear-notifications.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name: Clear Notifications
|
||||||
|
---
|
||||||
|
- shell: "cmd statusbar expand-notifications"
|
||||||
|
- wait: 2
|
||||||
|
- tap: "Clear all"
|
||||||
|
- wait: 1
|
||||||
|
- home
|
||||||
|
- done: "Notifications cleared"
|
||||||
16
examples/flows/create-contact.yaml
Normal file
16
examples/flows/create-contact.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
appId: com.google.android.contacts
|
||||||
|
name: Create Contact
|
||||||
|
---
|
||||||
|
- launchApp
|
||||||
|
- wait: 2
|
||||||
|
- tap: "Create new contact"
|
||||||
|
- wait: 1
|
||||||
|
- tap: "First name"
|
||||||
|
- type: "John"
|
||||||
|
- tap: "Last name"
|
||||||
|
- type: "Snow"
|
||||||
|
- tap: "Phone"
|
||||||
|
- type: "5551234567"
|
||||||
|
- tap: "Save"
|
||||||
|
- wait: 2
|
||||||
|
- done: "Contact created"
|
||||||
10
examples/flows/google-search.yaml
Normal file
10
examples/flows/google-search.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
appId: com.google.android.googlequicksearchbox
|
||||||
|
name: Google Search
|
||||||
|
---
|
||||||
|
- launchApp
|
||||||
|
- wait: 2
|
||||||
|
- tap: "Search"
|
||||||
|
- type: "weather today"
|
||||||
|
- enter
|
||||||
|
- wait: 3
|
||||||
|
- done: "Search results loaded"
|
||||||
14
examples/flows/send-whatsapp.yaml
Normal file
14
examples/flows/send-whatsapp.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
appId: com.whatsapp
|
||||||
|
name: Send WhatsApp Message
|
||||||
|
---
|
||||||
|
- home
|
||||||
|
- wait: 1
|
||||||
|
- launchApp
|
||||||
|
- wait: 2
|
||||||
|
- tap: "Vi"
|
||||||
|
- wait: 1
|
||||||
|
- tap: "Message"
|
||||||
|
- type: "hello from Droidclaw"
|
||||||
|
- tap: "Send"
|
||||||
|
- wait: 1
|
||||||
|
- done: "Message sent"
|
||||||
7
examples/flows/toggle-wifi.yaml
Normal file
7
examples/flows/toggle-wifi.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
name: Toggle WiFi
|
||||||
|
---
|
||||||
|
- settings: "wifi"
|
||||||
|
- wait: 2
|
||||||
|
- tap: "Wi-Fi"
|
||||||
|
- wait: 1
|
||||||
|
- done: "WiFi toggled"
|
||||||
10
examples/send-whatsapp-vi.json
Normal file
10
examples/send-whatsapp-vi.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"name": "Send WhatsApp to Vi",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"app": "com.whatsapp",
|
||||||
|
"goal": "You are on WhatsApp. First check if 'Vi' is visible in the chat list. If it is, tap on it directly. If not, use the Search bar to search for 'Vi' and then tap the result. Once inside the Vi chat, type 'hello from Droidclaw' in the message field and send it.",
|
||||||
|
"maxSteps": 12
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -14,6 +14,7 @@
|
|||||||
"ai": "^6.0.72",
|
"ai": "^6.0.72",
|
||||||
"fast-xml-parser": "^4.5.0",
|
"fast-xml-parser": "^4.5.0",
|
||||||
"openai": "^4.73.0",
|
"openai": "^4.73.0",
|
||||||
|
"yaml": "^2.8.2",
|
||||||
"zod": "^4.3.6"
|
"zod": "^4.3.6"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
305
src/flow.ts
Normal file
305
src/flow.ts
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
/**
|
||||||
|
* Flow runner for DroidClaw.
|
||||||
|
* Executes deterministic YAML flows (Maestro-style) — no LLM needed.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* bun run src/kernel.ts --flow examples/flows/create-contact.yaml
|
||||||
|
*
|
||||||
|
* YAML format:
|
||||||
|
* appId: com.android.contacts # optional frontmatter
|
||||||
|
* name: Create Contact # optional flow name
|
||||||
|
* ---
|
||||||
|
* - launchApp
|
||||||
|
* - tap: "First Name"
|
||||||
|
* - type: "John"
|
||||||
|
* - back
|
||||||
|
* - done: "Contact created"
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { existsSync, readFileSync } from "fs";
|
||||||
|
import { parseAllDocuments } from "yaml";
|
||||||
|
import { Config } from "./config.js";
|
||||||
|
import {
|
||||||
|
executeAction,
|
||||||
|
runAdbCommand,
|
||||||
|
getScreenResolution,
|
||||||
|
initDeviceContext,
|
||||||
|
type ActionResult,
|
||||||
|
} from "./actions.js";
|
||||||
|
import { getInteractiveElements, type UIElement } from "./sanitizer.js";
|
||||||
|
|
||||||
|
// ===========================================
|
||||||
|
// Types
|
||||||
|
// ===========================================
|
||||||
|
|
||||||
|
interface FlowFrontmatter {
|
||||||
|
appId?: string;
|
||||||
|
name?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type FlowStep =
|
||||||
|
| string // "launchApp", "back", "home", "enter", "clear"
|
||||||
|
| { [key: string]: string | number | [number, number] }; // "tap: text", "type: text", etc.
|
||||||
|
|
||||||
|
interface ParsedFlow {
|
||||||
|
frontmatter: FlowFrontmatter;
|
||||||
|
steps: FlowStep[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================
|
||||||
|
// YAML Parsing
|
||||||
|
// ===========================================
|
||||||
|
|
||||||
|
function parseFlowFile(filePath: string): ParsedFlow {
|
||||||
|
if (!existsSync(filePath)) {
|
||||||
|
throw new Error(`Flow file not found: ${filePath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const raw = readFileSync(filePath, "utf-8");
|
||||||
|
const docs = parseAllDocuments(raw);
|
||||||
|
|
||||||
|
let frontmatter: FlowFrontmatter = {};
|
||||||
|
let steps: FlowStep[] = [];
|
||||||
|
|
||||||
|
if (docs.length === 1) {
|
||||||
|
// Single document — could be just steps or frontmatter+steps
|
||||||
|
const content = docs[0].toJSON();
|
||||||
|
if (Array.isArray(content)) {
|
||||||
|
steps = content;
|
||||||
|
} else if (content && typeof content === "object") {
|
||||||
|
// Entire doc is frontmatter with no steps? Shouldn't happen, but handle it.
|
||||||
|
frontmatter = content as FlowFrontmatter;
|
||||||
|
}
|
||||||
|
} else if (docs.length >= 2) {
|
||||||
|
// First doc = frontmatter, second doc = steps
|
||||||
|
frontmatter = (docs[0].toJSON() ?? {}) as FlowFrontmatter;
|
||||||
|
steps = (docs[1].toJSON() ?? []) as FlowStep[];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(steps) || steps.length === 0) {
|
||||||
|
throw new Error("Flow file contains no steps");
|
||||||
|
}
|
||||||
|
|
||||||
|
return { frontmatter, steps };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================
|
||||||
|
// Element Finding
|
||||||
|
// ===========================================
|
||||||
|
|
||||||
|
function scanScreen(): UIElement[] {
|
||||||
|
try {
|
||||||
|
runAdbCommand(["shell", "uiautomator", "dump", Config.SCREEN_DUMP_PATH]);
|
||||||
|
runAdbCommand(["pull", Config.SCREEN_DUMP_PATH, Config.LOCAL_DUMP_PATH]);
|
||||||
|
} catch {
|
||||||
|
console.log("Warning: Screen capture failed during flow step.");
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (!existsSync(Config.LOCAL_DUMP_PATH)) return [];
|
||||||
|
return getInteractiveElements(readFileSync(Config.LOCAL_DUMP_PATH, "utf-8"));
|
||||||
|
}
|
||||||
|
|
||||||
|
function findElementByText(elements: UIElement[], query: string): UIElement | null {
|
||||||
|
const q = query.toLowerCase();
|
||||||
|
|
||||||
|
// Exact match first
|
||||||
|
const exact = elements.find(
|
||||||
|
(el) => el.text && el.text.toLowerCase() === q
|
||||||
|
);
|
||||||
|
if (exact) return exact;
|
||||||
|
|
||||||
|
// Substring match — prefer shorter text (more specific)
|
||||||
|
const matches = elements
|
||||||
|
.filter((el) => el.text && el.text.toLowerCase().includes(q))
|
||||||
|
.sort((a, b) => a.text.length - b.text.length);
|
||||||
|
if (matches.length > 0) return matches[0];
|
||||||
|
|
||||||
|
// Hint match
|
||||||
|
const hintMatch = elements.find(
|
||||||
|
(el) => el.hint && el.hint.toLowerCase().includes(q)
|
||||||
|
);
|
||||||
|
if (hintMatch) return hintMatch;
|
||||||
|
|
||||||
|
// Resource ID match
|
||||||
|
const idMatch = elements.find(
|
||||||
|
(el) => el.id && el.id.toLowerCase().includes(q)
|
||||||
|
);
|
||||||
|
if (idMatch) return idMatch;
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================
|
||||||
|
// Step Execution
|
||||||
|
// ===========================================
|
||||||
|
|
||||||
|
function executeFlowStep(
|
||||||
|
step: FlowStep,
|
||||||
|
frontmatter: FlowFrontmatter,
|
||||||
|
stepIndex: number
|
||||||
|
): ActionResult {
|
||||||
|
// Simple string commands: "launchApp", "back", "home", "enter", "clear"
|
||||||
|
if (typeof step === "string") {
|
||||||
|
switch (step) {
|
||||||
|
case "launchApp":
|
||||||
|
if (!frontmatter.appId) {
|
||||||
|
return { success: false, message: "launchApp requires appId in frontmatter" };
|
||||||
|
}
|
||||||
|
return executeAction({ action: "launch", package: frontmatter.appId });
|
||||||
|
case "back":
|
||||||
|
return executeAction({ action: "back" });
|
||||||
|
case "home":
|
||||||
|
return executeAction({ action: "home" });
|
||||||
|
case "enter":
|
||||||
|
return executeAction({ action: "enter" });
|
||||||
|
case "clear":
|
||||||
|
return executeAction({ action: "clear" });
|
||||||
|
case "done":
|
||||||
|
return executeAction({ action: "done", reason: "Flow complete" });
|
||||||
|
default:
|
||||||
|
return { success: false, message: `Unknown step: ${step}` };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Object commands: { tap: "text", type: "hello", wait: 2, ... }
|
||||||
|
if (typeof step === "object" && step !== null) {
|
||||||
|
const [command, value] = Object.entries(step)[0];
|
||||||
|
|
||||||
|
switch (command) {
|
||||||
|
case "tap": {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
// Coordinate tap: tap: [x, y]
|
||||||
|
return executeAction({ action: "tap", coordinates: value as [number, number] });
|
||||||
|
}
|
||||||
|
// Text tap: find element and tap
|
||||||
|
const elements = scanScreen();
|
||||||
|
const el = findElementByText(elements, String(value));
|
||||||
|
if (!el) {
|
||||||
|
const available = elements.filter((e) => e.text).map((e) => e.text).slice(0, 10);
|
||||||
|
return { success: false, message: `Element "${value}" not found. Available: ${available.join(", ")}` };
|
||||||
|
}
|
||||||
|
console.log(` Found "${el.text}" at (${el.center[0]}, ${el.center[1]})`);
|
||||||
|
return executeAction({ action: "tap", coordinates: el.center });
|
||||||
|
}
|
||||||
|
|
||||||
|
case "longpress": {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return executeAction({ action: "longpress", coordinates: value as [number, number] });
|
||||||
|
}
|
||||||
|
const elements = scanScreen();
|
||||||
|
const el = findElementByText(elements, String(value));
|
||||||
|
if (!el) {
|
||||||
|
return { success: false, message: `Element "${value}" not found for longpress` };
|
||||||
|
}
|
||||||
|
console.log(` Found "${el.text}" at (${el.center[0]}, ${el.center[1]})`);
|
||||||
|
return executeAction({ action: "longpress", coordinates: el.center });
|
||||||
|
}
|
||||||
|
|
||||||
|
case "type":
|
||||||
|
return executeAction({ action: "type", text: String(value) });
|
||||||
|
|
||||||
|
case "swipe":
|
||||||
|
return executeAction({ action: "swipe", direction: String(value) });
|
||||||
|
|
||||||
|
case "scroll":
|
||||||
|
return executeAction({ action: "scroll", direction: String(value) });
|
||||||
|
|
||||||
|
case "wait": {
|
||||||
|
const seconds = Number(value) || 2;
|
||||||
|
console.log(` Waiting ${seconds}s...`);
|
||||||
|
Bun.sleepSync(seconds * 1000);
|
||||||
|
return { success: true, message: `Waited ${seconds}s` };
|
||||||
|
}
|
||||||
|
|
||||||
|
case "launch":
|
||||||
|
return executeAction({ action: "launch", package: String(value) });
|
||||||
|
|
||||||
|
case "openUrl":
|
||||||
|
return executeAction({ action: "open_url", url: String(value) });
|
||||||
|
|
||||||
|
case "clipboard":
|
||||||
|
return executeAction({ action: "clipboard_set", text: String(value) });
|
||||||
|
|
||||||
|
case "paste": {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return executeAction({ action: "paste", coordinates: value as [number, number] });
|
||||||
|
}
|
||||||
|
return executeAction({ action: "paste" });
|
||||||
|
}
|
||||||
|
|
||||||
|
case "shell":
|
||||||
|
return executeAction({ action: "shell", command: String(value) });
|
||||||
|
|
||||||
|
case "keyevent":
|
||||||
|
return executeAction({ action: "keyevent", code: Number(value) });
|
||||||
|
|
||||||
|
case "settings":
|
||||||
|
return executeAction({ action: "open_settings", setting: String(value) });
|
||||||
|
|
||||||
|
case "done":
|
||||||
|
return executeAction({ action: "done", reason: String(value) });
|
||||||
|
|
||||||
|
default:
|
||||||
|
return { success: false, message: `Unknown command: ${command}` };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { success: false, message: `Invalid step at index ${stepIndex}: ${JSON.stringify(step)}` };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================
|
||||||
|
// Flow Runner
|
||||||
|
// ===========================================
|
||||||
|
|
||||||
|
export interface FlowResult {
|
||||||
|
name: string;
|
||||||
|
success: boolean;
|
||||||
|
stepsCompleted: number;
|
||||||
|
totalSteps: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runFlow(filePath: string): Promise<FlowResult> {
|
||||||
|
const { frontmatter, steps } = parseFlowFile(filePath);
|
||||||
|
const name = frontmatter.name ?? filePath.split("/").pop() ?? "flow";
|
||||||
|
|
||||||
|
// Auto-detect screen resolution
|
||||||
|
const resolution = getScreenResolution();
|
||||||
|
if (resolution) {
|
||||||
|
initDeviceContext(resolution);
|
||||||
|
console.log(`Screen resolution: ${resolution[0]}x${resolution[1]}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFlow: ${name}`);
|
||||||
|
if (frontmatter.appId) console.log(`App: ${frontmatter.appId}`);
|
||||||
|
console.log(`Steps: ${steps.length}\n`);
|
||||||
|
|
||||||
|
for (let i = 0; i < steps.length; i++) {
|
||||||
|
const step = steps[i];
|
||||||
|
const label = typeof step === "string" ? step : Object.entries(step)[0].join(": ");
|
||||||
|
console.log(`[${i + 1}/${steps.length}] ${label}`);
|
||||||
|
|
||||||
|
const result = executeFlowStep(step, frontmatter, i);
|
||||||
|
|
||||||
|
if (!result.success) {
|
||||||
|
console.log(` FAILED: ${result.message}`);
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
success: false,
|
||||||
|
stepsCompleted: i,
|
||||||
|
totalSteps: steps.length,
|
||||||
|
error: result.message,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` OK: ${result.message}`);
|
||||||
|
|
||||||
|
// Brief pause between steps for UI to settle
|
||||||
|
if (i < steps.length - 1 && typeof step !== "string" || (typeof step === "object" && !("wait" in step))) {
|
||||||
|
await Bun.sleep(Config.STEP_DELAY * 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFlow "${name}" completed successfully.`);
|
||||||
|
return { name, success: true, stepsCompleted: steps.length, totalSteps: steps.length };
|
||||||
|
}
|
||||||
@@ -467,6 +467,30 @@ async function main(): Promise<void> {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for --flow flag (deterministic YAML flows, no LLM)
|
||||||
|
const flowIdx = process.argv.findIndex((a) => a === "--flow" || a.startsWith("--flow="));
|
||||||
|
if (flowIdx !== -1) {
|
||||||
|
const flowArg = process.argv[flowIdx];
|
||||||
|
const flowFile = flowArg.includes("=")
|
||||||
|
? flowArg.split("=")[1]
|
||||||
|
: process.argv[flowIdx + 1];
|
||||||
|
|
||||||
|
if (!flowFile) {
|
||||||
|
console.log("Error: --flow requires a YAML file path.");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { runFlow } = await import("./flow.js");
|
||||||
|
try {
|
||||||
|
const result = await runFlow(flowFile);
|
||||||
|
console.log(`\nResult: ${result.success ? "OK" : "FAILED"} (${result.stepsCompleted}/${result.totalSteps} steps)`);
|
||||||
|
process.exit(result.success ? 0 : 1);
|
||||||
|
} catch (e) {
|
||||||
|
console.log(`Flow Error: ${(e as Error).message}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check for --workflow flag
|
// Check for --workflow flag
|
||||||
const workflowIdx = process.argv.findIndex((a) => a === "--workflow" || a.startsWith("--workflow="));
|
const workflowIdx = process.argv.findIndex((a) => a === "--workflow" || a.startsWith("--workflow="));
|
||||||
if (workflowIdx !== -1) {
|
if (workflowIdx !== -1) {
|
||||||
|
|||||||
Reference in New Issue
Block a user