From 477d99861ca24a6525698d5aa23220f023fa3ac3 Mon Sep 17 00:00:00 2001
From: Sanju Sivalingam <spikeysanju98@gmail.com>
Date: Fri, 6 Feb 2026 08:59:43 +0530
Subject: [PATCH] initial commit

---
 .DS_Store                                  | Bin 0 -> 6148 bytes
 android-action-kernel/.DS_Store            | Bin 0 -> 6148 bytes
 android-action-kernel/.env.example         |  63 ++++
 android-action-kernel/.gitignore           |   4 +
 android-action-kernel/package.json         |  22 ++
 android-action-kernel/src/actions.ts       | 322 ++++++++++++++++++++
 android-action-kernel/src/config.ts        |  82 ++++++
 android-action-kernel/src/constants.ts     |  78 +++++
 android-action-kernel/src/kernel.ts        | 298 +++++++++++++++++++
 android-action-kernel/src/llm-providers.ts | 327 +++++++++++++++++++++
 android-action-kernel/src/sanitizer.ts     | 171 +++++++++++
 android-action-kernel/tsconfig.json        |  19 ++
 12 files changed, 1386 insertions(+)
 create mode 100644 .DS_Store
 create mode 100644 android-action-kernel/.DS_Store
 create mode 100644 android-action-kernel/.env.example
 create mode 100644 android-action-kernel/.gitignore
 create mode 100644 android-action-kernel/package.json
 create mode 100644 android-action-kernel/src/actions.ts
 create mode 100644 android-action-kernel/src/config.ts
 create mode 100644 android-action-kernel/src/constants.ts
 create mode 100644 android-action-kernel/src/kernel.ts
 create mode 100644 android-action-kernel/src/llm-providers.ts
 create mode 100644 android-action-kernel/src/sanitizer.ts
 create mode 100644 android-action-kernel/tsconfig.json
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..fddfd185c7c5c68d7ab71e605dd01797c8576d54
GIT binary patch
literal 6148
zcmeHKPfyf96n_J&c0r`TE+WySaW5u-f`4L+F{}$lR}T<Na1n5AyTei`Gn=-sA|ctM
zUi<)l06&89$i<^q4_^EN9z1!~H-8$y)dLrzrZ1WKy?OKAo6fJDnGOJu(nf6pzytsT
zi@;15R=*L^FG`zqXiEl>$Q&GesKFRG;I(<DhLnMnfxnFbv3B#gUKy_25caR%7}qjK
z`xvYN#Xg30P!71~2HeFcv1OF7T>K#gwL@icn^8k&dgnF*KZ?i4Kaq~k%(3Gq48!O)
zmhDyEv|}gUh)PblC|Yxz2X3M}?z~T%RWmomS?u_XhBCmfQFK{)<g*%YmiPv%MKab_
z9Y!{rt(t?|+t;sL8?`2OMlGe?xID45lQr^%OE(`@*MfD%U&<K)!JSP?kH!REp>hc3
zd~Z|FmwZg-`^7Yq8E3n?PxhSZHT(Jp`f`2we17os(3#=kvsH7zb-cBDC}uLn10I%w
z4O;O%&TRh$t;F<sER?F*|BK&4T6?l*n%ioiPj1g3bKMOoWsCLDug$EiL^Q7K(-*2{
zZ{&qM2xfWY$3ABZOA)?=a!l9DY=z^9oA?4|8lo$jr(uNH7E@Vp!q8!LTH0C;9G3<&
zti<D(uM1C+4JR@iQjtlHZ8sdxXZ6D5H^p<IIGZRJlwv|bDR?U75=|p1?0UOWpJSZS
zY_?E*pt}qhf!lB&Y*>XRyoNXM7T&`L_zYj+JIRnia-NKktK=4$B=<;}JSHAlB|dqk
z;yaSh;GE8<fkdbr;xpK^Ll$sW$EYOZ!GuTH(hp#CAK{qkP$>f`1OEmCVt=r(2rO!B
zDU@3WR`LpfD1IZPpe}w4M2w}esIjFGdr*i9MO2|gUonUZN4ur{iW*xARX7kiGUm}E
z6MaJ=a&)v?(j7=qVOpmQqzoKnpi?zXasJ==^Zox|P})ivNE!IA7$BKxds<xO_1W4s
zIdRrnSe{}LBK%qkWe8UKIF^byiifaB!9GhUh((Ppg=j%Ze*`2AX_Yc?qzwE7Q;zI*

literal 0
HcmV?d00001

diff --git a/android-action-kernel/.DS_Store b/android-action-kernel/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..58880b8ba8a9e74239d28f62e695a1564ba31a6c
GIT binary patch
literal 6148
zcmeHLPfyf96rZ=O+69pUyNE=S#=V$8BKRl97{j_?boBsn2`&PzZFg8IWoFZMS&?A&
zs24wgAHa`bJaX~q)q@wmfCo=r_069K0lgYy<|Q+~-^_b&rt@ps>68!x<#uC=5StKU
zVG)?^#p*XE{GuGOjPA)If*8XKxrbZ~R@OcPMggP1zpH?Fb~7X)aV6yb!RHrV{E#Lu
zUdDKxxTRy>d95&rlgY_Xkm<=DJAT5ltUhbCy3RY*#7)|9*^QP(Yk~97OS$VU2DDSR
z^D~?!Zop_H6M_cCkd=o4Yw%8)w^<{WxmD$`a=BdH9@*KscKParGqpS6D0cnQ)b4K1
zDikl?cu?O6HyMAaga&*YN$b~`!Yd?lC4s-CR{|f?m3*=7bS=5wzLWi@2JFG1;lcc1
zp->n(J$hzr>}=g0_FR9X8HsD0;USO8VVl+hpR;Q4oYoThED@w`4;_koKpT(uyB4-v
z7?a=ckYnA6C}qpdC}_;Bt;IB{{lb^(_CV}MJPhY~93%l}ODi$TTS@3<g{^U%aSNrL
zYYA7pNTV3>NKS3ZjUtydX?c4!bUhl*u`*8*z9}NbXt<F1h>DTq+|`!r2dr6~{-zWx
zmgZA(K@po26~R3spE8Y5-1GM&UtpZkT&`HUuU!@yC%4EwQYGu8Ltc|N<SltmK9JAk
zEBOvt7=iOJ4p-nNOv7EMz$5Ts9Rhf&@-yiTKBvdi05KAIgSV;?3ps0IJB>XUYFeo<
zR-e(rC}0#Y3LH{Eyf_^!0!tdZ3gtT$D|rM!6rT~&kc-cMk>hAAY3wRQ4;rXcM3oBq
zhyf}c`yCxu(%4m~(h10!aU4A}=o1Ra*|Fb|;RH$wO>Gn~3LI3RM>TD+|KI-e`Tt;&
zX&D8K0{@i)ke#j0iXV1*Z#}X&vDZ3So?sCs;<^fD3RZeMmPBmDqgbS&&k+Q%q_L|I
Rd(g~>fRw>hMuESoz)$=v&hr2O

literal 0
HcmV?d00001

diff --git a/android-action-kernel/.env.example b/android-action-kernel/.env.example
new file mode 100644
index 0000000..19990e2
--- /dev/null
+++ b/android-action-kernel/.env.example
@@ -0,0 +1,63 @@
+# Android Action Kernel Configuration (TypeScript/Bun)
+# Copy this file to .env and fill in your settings
+# cp .env.example .env
+
+# ===========================================
+# Agent Configuration
+# ===========================================
+MAX_STEPS=30              # Maximum steps before stopping (30 for complex multi-app tasks)
+STEP_DELAY=2              # Seconds to wait between steps
+MAX_RETRIES=3             # Retries on ADB/network failures
+STUCK_THRESHOLD=3         # Steps before stuck-loop recovery kicks in
+
+# ===========================================
+# Vision Fallback (when accessibility tree is empty)
+# ===========================================
+VISION_ENABLED=true       # Auto-capture screenshot when UI elements not found
+
+# ===========================================
+# LLM Provider: "groq", "openai", "bedrock", or "openrouter"
+# ===========================================
+LLM_PROVIDER=groq
+
+# ===========================================
+# Groq Configuration (Free tier available)
+# Get your key at: https://console.groq.com
+# ===========================================
+GROQ_API_KEY=gsk_your_key_here
+GROQ_MODEL=llama-3.3-70b-versatile
+# Other models: llama-3.1-8b-instant (faster, higher rate limits)
+
+# ===========================================
+# OpenAI Configuration
+# Get your key at: https://platform.openai.com
+# ===========================================
+OPENAI_API_KEY=sk-your_key_here
+OPENAI_MODEL=gpt-4o
+# Other models: gpt-4o-mini (faster, cheaper)
+
+# ===========================================
+# AWS Bedrock Configuration
+# Uses AWS credential chain (run 'aws configure' first)
+# ===========================================
+AWS_REGION=us-east-1
+BEDROCK_MODEL=us.meta.llama3-3-70b-instruct-v1:0
+# Other models:
+#   anthropic.claude-3-sonnet-20240229-v1:0
+#   anthropic.claude-3-haiku-20240307-v1:0
+#   meta.llama3-8b-instruct-v1:0
+
+# ===========================================
+# OpenRouter Configuration (via Vercel AI SDK)
+# Access 200+ models through a single API
+# Get your key at: https://openrouter.ai/keys
+# ===========================================
+OPENROUTER_API_KEY=sk-or-v1-your_key_here
+OPENROUTER_MODEL=anthropic/claude-3.5-sonnet
+# Popular models:
+#   anthropic/claude-3.5-sonnet (best reasoning)
+#   openai/gpt-4o (multimodal)
+#   google/gemini-2.0-flash-001 (fast + cheap)
+#   meta-llama/llama-3.3-70b-instruct (open source)
+#   mistralai/mistral-large-latest (European)
+#   deepseek/deepseek-chat (cost efficient)
diff --git a/android-action-kernel/.gitignore b/android-action-kernel/.gitignore
new file mode 100644
index 0000000..fb16f59
--- /dev/null
+++ b/android-action-kernel/.gitignore
@@ -0,0 +1,4 @@
+node_modules/
+dist/
+bun.lock
+.env
diff --git a/android-action-kernel/package.json b/android-action-kernel/package.json
new file mode 100644
index 0000000..b0820fe
--- /dev/null
+++ b/android-action-kernel/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "android-action-kernel",
+  "version": "1.0.0",
+  "description": "AI agent that controls Android devices through the accessibility API - TypeScript/Bun edition",
+  "type": "module",
+  "scripts": {
+    "start": "bun run src/kernel.ts",
+    "build": "bun build src/kernel.ts --outdir dist --target bun",
+    "typecheck": "tsc --noEmit"
+  },
+  "dependencies": {
+    "@aws-sdk/client-bedrock-runtime": "^3.700.0",
+    "@openrouter/ai-sdk-provider": "^2.1.1",
+    "ai": "^6.0.72",
+    "fast-xml-parser": "^4.5.0",
+    "openai": "^4.73.0"
+  },
+  "devDependencies": {
+    "@types/bun": "^1.1.0",
+    "typescript": "^5.6.0"
+  }
+}
diff --git a/android-action-kernel/src/actions.ts b/android-action-kernel/src/actions.ts
new file mode 100644
index 0000000..14d1839
--- /dev/null
+++ b/android-action-kernel/src/actions.ts
@@ -0,0 +1,322 @@
+/**
+ * Action execution module for Android Action Kernel.
+ * Handles all ADB commands for interacting with Android devices.
+ *
+ * Supported actions:
+ *   tap, type, enter, swipe, home, back, wait, done,
+ *   longpress, screenshot, launch, clear, clipboard_get, clipboard_set, shell
+ */
+
+import { Config } from "./config.js";
+import {
+  KEYCODE_ENTER,
+  KEYCODE_HOME,
+  KEYCODE_BACK,
+  KEYCODE_DEL,
+  KEYCODE_MOVE_HOME,
+  KEYCODE_MOVE_END,
+  SWIPE_COORDS,
+  SWIPE_DURATION_MS,
+  LONG_PRESS_DURATION_MS,
+  DEVICE_SCREENSHOT_PATH,
+  LOCAL_SCREENSHOT_PATH,
+} from "./constants.js";
+
+export interface ActionDecision {
+  action: string;
+  coordinates?: [number, number];
+  text?: string;
+  direction?: string;
+  reason?: string;
+  // launch action
+  package?: string;
+  activity?: string;
+  uri?: string;
+  extras?: Record<string, string>;
+  // shell action
+  command?: string;
+  // screenshot action
+  filename?: string;
+}
+
+export interface ActionResult {
+  success: boolean;
+  message: string;
+  data?: string;
+}
+
+/**
+ * Executes a shell command via ADB with retry support.
+ */
+export function runAdbCommand(command: string[], retries = Config.MAX_RETRIES): string {
+  for (let attempt = 0; attempt <= retries; attempt++) {
+    const result = Bun.spawnSync([Config.ADB_PATH, ...command], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+
+    const stdout = result.stdout.toString().trim();
+    const stderr = result.stderr.toString().trim();
+
+    if (stderr && stderr.toLowerCase().includes("error")) {
+      if (attempt < retries) {
+        const delay = Math.pow(2, attempt) * 1000;
+        console.log(`ADB Error (attempt ${attempt + 1}/${retries + 1}): ${stderr}`);
+        console.log(`Retrying in ${delay / 1000}s...`);
+        Bun.sleepSync(delay);
+        continue;
+      }
+      console.log(`ADB Error (all retries exhausted): ${stderr}`);
+    }
+
+    return stdout;
+  }
+
+  return "";
+}
+
+/**
+ * Executes the action decided by the LLM. Returns a result for the kernel to track.
+ */
+export function executeAction(action: ActionDecision): ActionResult {
+  switch (action.action) {
+    case "tap":
+      return executeTap(action);
+    case "type":
+      return executeType(action);
+    case "enter":
+      return executeEnter();
+    case "swipe":
+      return executeSwipe(action);
+    case "home":
+      return executeHome();
+    case "back":
+      return executeBack();
+    case "wait":
+      return executeWait();
+    case "done":
+      return executeDone(action);
+    case "longpress":
+      return executeLongPress(action);
+    case "screenshot":
+      return executeScreenshot(action);
+    case "launch":
+      return executeLaunch(action);
+    case "clear":
+      return executeClear();
+    case "clipboard_get":
+      return executeClipboardGet();
+    case "clipboard_set":
+      return executeClipboardSet(action);
+    case "shell":
+      return executeShell(action);
+    default:
+      console.log(`Warning: Unknown action: ${action.action}`);
+      return { success: false, message: `Unknown action: ${action.action}` };
+  }
+}
+
+// ===========================================
+// Original actions (enhanced)
+// ===========================================
+
+function executeTap(action: ActionDecision): ActionResult {
+  const [x, y] = action.coordinates ?? [0, 0];
+  console.log(`Tapping: (${x}, ${y})`);
+  runAdbCommand(["shell", "input", "tap", String(x), String(y)]);
+  return { success: true, message: `Tapped (${x}, ${y})` };
+}
+
+function executeType(action: ActionDecision): ActionResult {
+  const text = action.text ?? "";
+  if (!text) return { success: false, message: "No text to type" };
+  // ADB requires %s for spaces, escape special shell characters
+  const escapedText = text
+    .replaceAll("\\", "\\\\")
+    .replaceAll("\"", "\\\"")
+    .replaceAll("'", "\\'")
+    .replaceAll(" ", "%s")
+    .replaceAll("&", "\\&")
+    .replaceAll("|", "\\|")
+    .replaceAll(";", "\\;")
+    .replaceAll("(", "\\(")
+    .replaceAll(")", "\\)")
+    .replaceAll("<", "\\<")
+    .replaceAll(">", "\\>");
+  console.log(`Typing: ${text}`);
+  runAdbCommand(["shell", "input", "text", escapedText]);
+  return { success: true, message: `Typed "${text}"` };
+}
+
+function executeEnter(): ActionResult {
+  console.log("Pressing Enter");
+  runAdbCommand(["shell", "input", "keyevent", KEYCODE_ENTER]);
+  return { success: true, message: "Pressed Enter" };
+}
+
+function executeSwipe(action: ActionDecision): ActionResult {
+  const direction = action.direction ?? "up";
+  const coords = SWIPE_COORDS[direction] ?? SWIPE_COORDS["up"];
+
+  console.log(`Swiping ${direction}`);
+  runAdbCommand([
+    "shell", "input", "swipe",
+    String(coords[0]), String(coords[1]),
+    String(coords[2]), String(coords[3]),
+    SWIPE_DURATION_MS,
+  ]);
+  return { success: true, message: `Swiped ${direction}` };
+}
+
+function executeHome(): ActionResult {
+  console.log("Going Home");
+  runAdbCommand(["shell", "input", "keyevent", KEYCODE_HOME]);
+  return { success: true, message: "Went to home screen" };
+}
+
+function executeBack(): ActionResult {
+  console.log("Going Back");
+  runAdbCommand(["shell", "input", "keyevent", KEYCODE_BACK]);
+  return { success: true, message: "Went back" };
+}
+
+function executeWait(): ActionResult {
+  console.log("Waiting...");
+  Bun.sleepSync(2000);
+  return { success: true, message: "Waited 2s" };
+}
+
+function executeDone(action: ActionDecision): ActionResult {
+  console.log(`Goal Achieved: ${action.reason ?? "Task complete"}`);
+  return { success: true, message: "done" };
+}
+
+// ===========================================
+// New actions
+// ===========================================
+
+/**
+ * Long press at coordinates (opens context menus, triggers drag mode, etc.)
+ */
+function executeLongPress(action: ActionDecision): ActionResult {
+  const [x, y] = action.coordinates ?? [0, 0];
+  console.log(`Long pressing: (${x}, ${y})`);
+  // A swipe from the same point to the same point with long duration = long press
+  runAdbCommand([
+    "shell", "input", "swipe",
+    String(x), String(y), String(x), String(y),
+    LONG_PRESS_DURATION_MS,
+  ]);
+  return { success: true, message: `Long pressed (${x}, ${y})` };
+}
+
+/**
+ * Captures a screenshot and saves it locally.
+ */
+function executeScreenshot(action: ActionDecision): ActionResult {
+  const filename = action.filename ?? LOCAL_SCREENSHOT_PATH;
+  console.log(`Taking screenshot → ${filename}`);
+  runAdbCommand(["shell", "screencap", "-p", DEVICE_SCREENSHOT_PATH]);
+  runAdbCommand(["pull", DEVICE_SCREENSHOT_PATH, filename]);
+  return { success: true, message: `Screenshot saved to ${filename}`, data: filename };
+}
+
+/**
+ * Launches an app by package name, activity, or URI intent.
+ *
+ * Examples the LLM can produce:
+ *   { action: "launch", package: "com.whatsapp" }
+ *   { action: "launch", package: "com.whatsapp", activity: ".HomeActivity" }
+ *   { action: "launch", uri: "https://maps.google.com/?q=pizza+near+me" }
+ *   { action: "launch", package: "com.whatsapp", uri: "content://media/external/images/1",
+ *     extras: { "android.intent.extra.TEXT": "Check this out" } }
+ */
+function executeLaunch(action: ActionDecision): ActionResult {
+  const args: string[] = ["shell", "am", "start"];
+
+  if (action.uri) {
+    args.push("-a", "android.intent.action.VIEW");
+    args.push("-d", action.uri);
+  }
+
+  if (action.package && action.activity) {
+    args.push("-n", `${action.package}/${action.activity}`);
+  } else if (action.package) {
+    // Launch the default activity for the package
+    const launchResult = runAdbCommand([
+      "shell", "monkey", "-p", action.package, "-c",
+      "android.intent.category.LAUNCHER", "1",
+    ]);
+    console.log(`Launching: ${action.package}`);
+    return { success: true, message: `Launched ${action.package}`, data: launchResult };
+  }
+
+  // Attach intent extras
+  if (action.extras) {
+    for (const [key, value] of Object.entries(action.extras)) {
+      args.push("--es", key, value);
+    }
+  }
+
+  const label = action.package ?? action.uri ?? "intent";
+  console.log(`Launching: ${label}`);
+  const result = runAdbCommand(args);
+  return { success: true, message: `Launched ${label}`, data: result };
+}
+
+/**
+ * Clears the currently focused text field.
+ * Selects all text then deletes it.
+ */
+function executeClear(): ActionResult {
+  console.log("Clearing text field");
+  // Move to end of field
+  runAdbCommand(["shell", "input", "keyevent", KEYCODE_MOVE_END]);
+  // Select all: Shift+Home
+  runAdbCommand(["shell", "input", "keyevent", "--longpress", KEYCODE_MOVE_HOME]);
+  // Delete selected text
+  runAdbCommand(["shell", "input", "keyevent", KEYCODE_DEL]);
+  return { success: true, message: "Cleared text field" };
+}
+
+/**
+ * Reads the current clipboard contents.
+ */
+function executeClipboardGet(): ActionResult {
+  console.log("Reading clipboard");
+  // Use am broadcast to get clipboard via a helper or service log
+  // On Android 10+, direct clipboard access via ADB is restricted.
+  // Workaround: dump the clipboard service log
+  const result = runAdbCommand(["shell", "cmd", "clipboard", "get-text"]);
+  if (result) {
+    console.log(`Clipboard: ${result.slice(0, 100)}`);
+    return { success: true, message: `Clipboard: ${result}`, data: result };
+  }
+  // Fallback for older Android versions
+  const fallback = runAdbCommand([
+    "shell", "service", "call", "clipboard", "2", "i32", "1",
+  ]);
+  return { success: true, message: `Clipboard (raw): ${fallback}`, data: fallback };
+}
+
+/**
+ * Sets the clipboard to the given text.
+ */
+function executeClipboardSet(action: ActionDecision): ActionResult {
+  const text = action.text ?? "";
+  if (!text) return { success: false, message: "No text to set on clipboard" };
+  console.log(`Setting clipboard: ${text.slice(0, 50)}...`);
+  runAdbCommand(["shell", "cmd", "clipboard", "set-text", text]);
+  return { success: true, message: `Clipboard set to "${text.slice(0, 50)}"` };
+}
+
+/**
+ * Runs an arbitrary ADB shell command. Use sparingly for edge cases.
+ */
+function executeShell(action: ActionDecision): ActionResult {
+  const cmd = action.command ?? "";
+  if (!cmd) return { success: false, message: "No command provided" };
+  console.log(`Shell: ${cmd}`);
+  const result = runAdbCommand(["shell", ...cmd.split(" ")]);
+  return { success: true, message: `Shell output: ${result.slice(0, 200)}`, data: result };
+}
diff --git a/android-action-kernel/src/config.ts b/android-action-kernel/src/config.ts
new file mode 100644
index 0000000..1f95493
--- /dev/null
+++ b/android-action-kernel/src/config.ts
@@ -0,0 +1,82 @@
+/**
+ * Configuration management for Android Action Kernel.
+ * Bun natively loads .env files — no dotenv needed.
+ */
+
+import {
+  DEVICE_DUMP_PATH,
+  LOCAL_DUMP_PATH,
+  DEVICE_SCREENSHOT_PATH,
+  LOCAL_SCREENSHOT_PATH,
+  DEFAULT_MAX_STEPS,
+  DEFAULT_STEP_DELAY,
+  DEFAULT_GROQ_MODEL,
+  DEFAULT_OPENAI_MODEL,
+  DEFAULT_BEDROCK_MODEL,
+  DEFAULT_MAX_RETRIES,
+  DEFAULT_STUCK_THRESHOLD,
+  DEFAULT_VISION_ENABLED,
+} from "./constants.js";
+
+function env(key: string, fallback = ""): string {
+  return process.env[key] ?? fallback;
+}
+
+export const Config = {
+  // ADB Configuration
+  ADB_PATH: env("ADB_PATH", "adb"),
+  SCREEN_DUMP_PATH: DEVICE_DUMP_PATH,
+  LOCAL_DUMP_PATH: LOCAL_DUMP_PATH,
+  DEVICE_SCREENSHOT_PATH: DEVICE_SCREENSHOT_PATH,
+  LOCAL_SCREENSHOT_PATH: LOCAL_SCREENSHOT_PATH,
+
+  // Agent Configuration
+  MAX_STEPS: parseInt(env("MAX_STEPS", String(DEFAULT_MAX_STEPS)), 10),
+  STEP_DELAY: parseFloat(env("STEP_DELAY", String(DEFAULT_STEP_DELAY))),
+  MAX_RETRIES: parseInt(env("MAX_RETRIES", String(DEFAULT_MAX_RETRIES)), 10),
+  STUCK_THRESHOLD: parseInt(env("STUCK_THRESHOLD", String(DEFAULT_STUCK_THRESHOLD)), 10),
+
+  // Vision fallback (when accessibility tree is empty)
+  VISION_ENABLED: env("VISION_ENABLED", String(DEFAULT_VISION_ENABLED)) === "true",
+
+  // LLM Provider: "groq", "openai", "bedrock", or "openrouter"
+  LLM_PROVIDER: env("LLM_PROVIDER", "groq"),
+
+  // Groq Configuration
+  GROQ_API_KEY: env("GROQ_API_KEY"),
+  GROQ_MODEL: env("GROQ_MODEL", DEFAULT_GROQ_MODEL),
+
+  // OpenAI Configuration
+  OPENAI_API_KEY: env("OPENAI_API_KEY"),
+  OPENAI_MODEL: env("OPENAI_MODEL", DEFAULT_OPENAI_MODEL),
+
+  // AWS Bedrock Configuration
+  AWS_REGION: env("AWS_REGION", "us-east-1"),
+  BEDROCK_MODEL: env("BEDROCK_MODEL", DEFAULT_BEDROCK_MODEL),
+
+  // OpenRouter Configuration (via Vercel AI SDK)
+  OPENROUTER_API_KEY: env("OPENROUTER_API_KEY"),
+  OPENROUTER_MODEL: env("OPENROUTER_MODEL", "anthropic/claude-3.5-sonnet"),
+
+  getModel(): string {
+    const provider = Config.LLM_PROVIDER;
+    if (provider === "groq") return Config.GROQ_MODEL;
+    if (provider === "bedrock") return Config.BEDROCK_MODEL;
+    if (provider === "openrouter") return Config.OPENROUTER_MODEL;
+    return Config.OPENAI_MODEL;
+  },
+
+  validate(): void {
+    const provider = Config.LLM_PROVIDER;
+    if (provider === "groq" && !Config.GROQ_API_KEY) {
+      throw new Error("GROQ_API_KEY is required when using Groq provider");
+    }
+    if (provider === "openai" && !Config.OPENAI_API_KEY) {
+      throw new Error("OPENAI_API_KEY is required when using OpenAI provider");
+    }
+    if (provider === "openrouter" && !Config.OPENROUTER_API_KEY) {
+      throw new Error("OPENROUTER_API_KEY is required when using OpenRouter provider");
+    }
+    // Bedrock uses AWS credential chain, no explicit validation needed
+  },
+};
diff --git a/android-action-kernel/src/constants.ts b/android-action-kernel/src/constants.ts
new file mode 100644
index 0000000..498468a
--- /dev/null
+++ b/android-action-kernel/src/constants.ts
@@ -0,0 +1,78 @@
+/**
+ * Constants for Android Action Kernel.
+ * All magic strings, URLs, and fixed values in one place.
+ */
+
+// ===========================================
+// API Endpoints
+// ===========================================
+export const GROQ_API_BASE_URL = "https://api.groq.com/openai/v1";
+
+// ===========================================
+// ADB Key Codes
+// ===========================================
+export const KEYCODE_ENTER = "66";
+export const KEYCODE_HOME = "KEYCODE_HOME";
+export const KEYCODE_BACK = "KEYCODE_BACK";
+export const KEYCODE_DEL = "67";
+export const KEYCODE_FORWARD_DEL = "112";
+export const KEYCODE_MOVE_HOME = "122";
+export const KEYCODE_MOVE_END = "123";
+export const KEYCODE_MENU = "82";
+export const KEYCODE_TAB = "61";
+export const KEYCODE_ESCAPE = "111";
+export const KEYCODE_DPAD_UP = "19";
+export const KEYCODE_DPAD_DOWN = "20";
+export const KEYCODE_DPAD_LEFT = "21";
+export const KEYCODE_DPAD_RIGHT = "22";
+export const KEYCODE_VOLUME_UP = "24";
+export const KEYCODE_VOLUME_DOWN = "25";
+export const KEYCODE_POWER = "26";
+
+// ===========================================
+// Default Screen Coordinates (for swipe actions)
+// Adjust based on target device resolution
+// ===========================================
+export const SCREEN_CENTER_X = 540;
+export const SCREEN_CENTER_Y = 1200;
+
+// Swipe coordinates: [start_x, start_y, end_x, end_y]
+export const SWIPE_COORDS: Record<string, [number, number, number, number]> = {
+  up: [SCREEN_CENTER_X, 1500, SCREEN_CENTER_X, 500],
+  down: [SCREEN_CENTER_X, 500, SCREEN_CENTER_X, 1500],
+  left: [800, SCREEN_CENTER_Y, 200, SCREEN_CENTER_Y],
+  right: [200, SCREEN_CENTER_Y, 800, SCREEN_CENTER_Y],
+};
+export const SWIPE_DURATION_MS = "300";
+export const LONG_PRESS_DURATION_MS = "1000";
+
+// ===========================================
+// Default Models
+// ===========================================
+export const DEFAULT_GROQ_MODEL = "llama-3.3-70b-versatile";
+export const DEFAULT_OPENAI_MODEL = "gpt-4o";
+export const DEFAULT_BEDROCK_MODEL = "us.meta.llama3-3-70b-instruct-v1:0";
+export const DEFAULT_OPENROUTER_MODEL = "anthropic/claude-3.5-sonnet";
+
+// ===========================================
+// Bedrock Model Identifiers
+// ===========================================
+export const BEDROCK_ANTHROPIC_MODELS = ["anthropic"];
+export const BEDROCK_META_MODELS = ["meta", "llama"];
+
+// ===========================================
+// File Paths
+// ===========================================
+export const DEVICE_DUMP_PATH = "/sdcard/window_dump.xml";
+export const LOCAL_DUMP_PATH = "window_dump.xml";
+export const DEVICE_SCREENSHOT_PATH = "/sdcard/kernel_screenshot.png";
+export const LOCAL_SCREENSHOT_PATH = "kernel_screenshot.png";
+
+// ===========================================
+// Agent Defaults
+// ===========================================
+export const DEFAULT_MAX_STEPS = 30;
+export const DEFAULT_STEP_DELAY = 2.0;
+export const DEFAULT_MAX_RETRIES = 3;
+export const DEFAULT_STUCK_THRESHOLD = 3;
+export const DEFAULT_VISION_ENABLED = true;
diff --git a/android-action-kernel/src/kernel.ts b/android-action-kernel/src/kernel.ts
new file mode 100644
index 0000000..fdb36bf
--- /dev/null
+++ b/android-action-kernel/src/kernel.ts
@@ -0,0 +1,298 @@
+/**
+ * Android Action Kernel - Main Agent Loop (TypeScript/Bun Edition)
+ *
+ * An AI agent that controls Android devices through the accessibility API.
+ * Uses LLMs to make decisions based on screen context.
+ *
+ * Features:
+ *   - Perception → Reasoning → Action loop
+ *   - Screen state diffing (stuck loop detection)
+ *   - Error recovery with retries
+ *   - Vision fallback when accessibility tree is empty
+ *   - Dynamic early exit on goal completion
+ *   - 15 actions: tap, type, enter, swipe, home, back, wait, done,
+ *     longpress, screenshot, launch, clear, clipboard_get, clipboard_set, shell
+ *
+ * Usage:
+ *     bun run src/kernel.ts
+ */
+
+import { existsSync, readFileSync } from "fs";
+
+import { Config } from "./config.js";
+import {
+  executeAction,
+  runAdbCommand,
+  type ActionDecision,
+  type ActionResult,
+} from "./actions.js";
+import { getLlmProvider, type LLMProvider } from "./llm-providers.js";
+import {
+  getInteractiveElements,
+  computeScreenHash,
+  type UIElement,
+} from "./sanitizer.js";
+import {
+  DEVICE_SCREENSHOT_PATH,
+  LOCAL_SCREENSHOT_PATH,
+} from "./constants.js";
+
+// ===========================================
+// Screen Perception
+// ===========================================
+
+/**
+ * Dumps the current UI XML and returns parsed elements + JSON string.
+ */
+function getScreenState(): { elements: UIElement[]; json: string } {
+  try {
+    runAdbCommand(["shell", "uiautomator", "dump", Config.SCREEN_DUMP_PATH]);
+    runAdbCommand(["pull", Config.SCREEN_DUMP_PATH, Config.LOCAL_DUMP_PATH]);
+  } catch {
+    console.log("Warning: ADB screen capture failed.");
+    return { elements: [], json: "Error: Could not capture screen." };
+  }
+
+  if (!existsSync(Config.LOCAL_DUMP_PATH)) {
+    return { elements: [], json: "Error: Could not capture screen." };
+  }
+
+  const xmlContent = readFileSync(Config.LOCAL_DUMP_PATH, "utf-8");
+  const elements = getInteractiveElements(xmlContent);
+  return { elements, json: JSON.stringify(elements, null, 2) };
+}
+
+/**
+ * Captures a screenshot and returns the local file path.
+ * Used as a vision fallback when the accessibility tree is empty.
+ */
+function captureScreenshot(): string | null {
+  try {
+    runAdbCommand(["shell", "screencap", "-p", DEVICE_SCREENSHOT_PATH]);
+    runAdbCommand(["pull", DEVICE_SCREENSHOT_PATH, LOCAL_SCREENSHOT_PATH]);
+    if (existsSync(LOCAL_SCREENSHOT_PATH)) {
+      return LOCAL_SCREENSHOT_PATH;
+    }
+  } catch {
+    console.log("Warning: Screenshot capture failed.");
+  }
+  return null;
+}
+
+// ===========================================
+// Screen State Diffing
+// ===========================================
+
+interface ScreenDiff {
+  changed: boolean;
+  addedTexts: string[];
+  removedTexts: string[];
+  summary: string;
+}
+
+function diffScreenState(
+  prevElements: UIElement[],
+  currElements: UIElement[]
+): ScreenDiff {
+  const prevTexts = new Set(prevElements.map((e) => e.text).filter(Boolean));
+  const currTexts = new Set(currElements.map((e) => e.text).filter(Boolean));
+
+  const addedTexts = [...currTexts].filter((t) => !prevTexts.has(t));
+  const removedTexts = [...prevTexts].filter((t) => !currTexts.has(t));
+
+  const prevHash = computeScreenHash(prevElements);
+  const currHash = computeScreenHash(currElements);
+  const changed = prevHash !== currHash;
+
+  let summary = "";
+  if (!changed) {
+    summary = "Screen has NOT changed since last action.";
+  } else {
+    const parts: string[] = [];
+    if (addedTexts.length > 0) {
+      parts.push(`New on screen: ${addedTexts.slice(0, 5).join(", ")}`);
+    }
+    if (removedTexts.length > 0) {
+      parts.push(`Gone from screen: ${removedTexts.slice(0, 5).join(", ")}`);
+    }
+    summary = parts.join(". ") || "Screen layout changed.";
+  }
+
+  return { changed, addedTexts, removedTexts, summary };
+}
+
+// ===========================================
+// Action History Formatting
+// ===========================================
+
+function formatActionHistory(
+  actionHistory: ActionDecision[],
+  resultHistory: ActionResult[]
+): string {
+  if (actionHistory.length === 0) return "";
+
+  const lines = actionHistory.map((entry, i) => {
+    const actionType = entry.action ?? "unknown";
+    const reason = entry.reason ?? "N/A";
+    const result = resultHistory[i];
+    const outcome = result ? (result.success ? "OK" : "FAILED") : "";
+
+    if (actionType === "type") {
+      return `Step ${i + 1}: typed "${entry.text ?? ""}" - ${reason} [${outcome}]`;
+    }
+    if (actionType === "tap") {
+      return `Step ${i + 1}: tapped ${JSON.stringify(entry.coordinates ?? [])} - ${reason} [${outcome}]`;
+    }
+    if (actionType === "launch") {
+      return `Step ${i + 1}: launched ${entry.package ?? entry.uri ?? ""} - ${reason} [${outcome}]`;
+    }
+    if (actionType === "screenshot") {
+      return `Step ${i + 1}: took screenshot - ${reason} [${outcome}]`;
+    }
+    return `Step ${i + 1}: ${actionType} - ${reason} [${outcome}]`;
+  });
+
+  return "\n\nPREVIOUS_ACTIONS:\n" + lines.join("\n");
+}
+
+// ===========================================
+// Main Agent Loop
+// ===========================================
+
+async function runAgent(goal: string, maxSteps?: number): Promise<void> {
+  const steps = maxSteps ?? Config.MAX_STEPS;
+
+  console.log("Android Action Kernel Started");
+  console.log(`Goal: ${goal}`);
+  console.log(`Provider: ${Config.LLM_PROVIDER} (${Config.getModel()})`);
+  console.log(`Max steps: ${steps} | Step delay: ${Config.STEP_DELAY}s`);
+  console.log(`Vision fallback: ${Config.VISION_ENABLED ? "ON" : "OFF"}`);
+
+  const llm = getLlmProvider();
+  const actionHistory: ActionDecision[] = [];
+  const resultHistory: ActionResult[] = [];
+  let prevElements: UIElement[] = [];
+  let stuckCount = 0;
+
+  for (let step = 0; step < steps; step++) {
+    console.log(`\n--- Step ${step + 1}/${steps} ---`);
+
+    // 1. Perception: Capture screen state
+    console.log("Scanning screen...");
+    const { elements, json: screenContext } = getScreenState();
+
+    // 2. Screen diff: detect stuck loops
+    let diffContext = "";
+    if (step > 0) {
+      const diff = diffScreenState(prevElements, elements);
+      diffContext = `\n\nSCREEN_CHANGE: ${diff.summary}`;
+
+      if (!diff.changed) {
+        stuckCount++;
+        console.log(
+          `Warning: Screen unchanged for ${stuckCount} step(s).`
+        );
+        if (stuckCount >= Config.STUCK_THRESHOLD) {
+          console.log(
+            `Stuck for ${stuckCount} steps. Injecting recovery hint.`
+          );
+          diffContext +=
+            `\nWARNING: You have been stuck for ${stuckCount} steps. ` +
+            `The screen is NOT changing. Try a DIFFERENT action: ` +
+            `swipe to scroll, press back, go home, or launch a different app.`;
+        }
+      } else {
+        stuckCount = 0;
+      }
+    }
+    prevElements = elements;
+
+    // 3. Vision fallback: if accessibility tree is empty, use screenshot
+    let visionContext = "";
+    if (elements.length === 0 && Config.VISION_ENABLED) {
+      console.log("Accessibility tree empty. Attempting vision fallback...");
+      const screenshotPath = captureScreenshot();
+      if (screenshotPath) {
+        visionContext =
+          "\n\nVISION_FALLBACK: The accessibility tree returned NO elements. " +
+          "A screenshot has been captured. The screen likely contains custom-drawn " +
+          "content (game, WebView, or Flutter). Try using coordinate-based taps on " +
+          "common UI positions, or use 'back'/'home' to navigate away. " +
+          "If you know the app package name, use 'launch' to restart it.";
+        console.log("Vision fallback: screenshot captured for context.");
+      }
+    }
+
+    // 4. Reasoning: Get LLM decision
+    console.log("Thinking...");
+    const historyStr = formatActionHistory(actionHistory, resultHistory);
+    const fullContext = screenContext + historyStr + diffContext + visionContext;
+
+    let decision: ActionDecision;
+    try {
+      decision = await llm.getDecision(goal, fullContext, actionHistory);
+    } catch (err) {
+      console.log(`LLM Error: ${(err as Error).message}`);
+      console.log("Falling back to wait action.");
+      decision = { action: "wait", reason: "LLM request failed, waiting for retry" };
+    }
+
+    console.log(`Decision: ${decision.action} — ${decision.reason ?? "no reason"}`);
+
+    // 5. Action: Execute the decision
+    let result: ActionResult;
+    try {
+      result = executeAction(decision);
+    } catch (err) {
+      console.log(`Action Error: ${(err as Error).message}`);
+      result = { success: false, message: (err as Error).message };
+    }
+
+    // Track history
+    actionHistory.push(decision);
+    resultHistory.push(result);
+
+    // 6. Check for goal completion
+    if (decision.action === "done") {
+      console.log("\nTask completed successfully.");
+      return;
+    }
+
+    // Wait for UI to update
+    await Bun.sleep(Config.STEP_DELAY * 1000);
+  }
+
+  console.log("\nMax steps reached. Task may be incomplete.");
+}
+
+// ===========================================
+// Entry Point
+// ===========================================
+
+async function main(): Promise<void> {
+  try {
+    Config.validate();
+  } catch (e) {
+    console.log(`Configuration Error: ${(e as Error).message}`);
+    return;
+  }
+
+  // Read user input from stdin
+  process.stdout.write("Enter your goal: ");
+  const goal = await new Promise<string>((resolve) => {
+    const reader = Bun.stdin.stream().getReader();
+    reader.read().then(({ value }) => {
+      resolve(new TextDecoder().decode(value).trim());
+      reader.releaseLock();
+    });
+  });
+
+  if (!goal) {
+    console.log("No goal provided. Exiting.");
+    return;
+  }
+
+  await runAgent(goal);
+}
+
+main();
diff --git a/android-action-kernel/src/llm-providers.ts b/android-action-kernel/src/llm-providers.ts
new file mode 100644
index 0000000..64a654b
--- /dev/null
+++ b/android-action-kernel/src/llm-providers.ts
@@ -0,0 +1,327 @@
+/**
+ * LLM Provider module for Android Action Kernel.
+ * Supports OpenAI, Groq, AWS Bedrock, and OpenRouter (via Vercel AI SDK).
+ */
+
+import OpenAI from "openai";
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+import { generateText } from "ai";
+import { createOpenRouter } from "@openrouter/ai-sdk-provider";
+
+import { Config } from "./config.js";
+import {
+  GROQ_API_BASE_URL,
+  BEDROCK_ANTHROPIC_MODELS,
+  BEDROCK_META_MODELS,
+} from "./constants.js";
+import type { ActionDecision } from "./actions.js";
+
+// ===========================================
+// System Prompt — all 15 actions + rich element context
+// ===========================================
+
+const SYSTEM_PROMPT = `You are an Android Driver Agent. Your job is to achieve the user's goal by navigating the Android UI.
+
+You will receive:
+1. GOAL — the user's task.
+2. SCREEN_CONTEXT — JSON array of interactive UI elements with coordinates, states, and hierarchy.
+3. PREVIOUS_ACTIONS — your action history with outcomes (OK/FAILED).
+4. SCREEN_CHANGE — what changed since your last action (or if the screen is stuck).
+5. VISION_FALLBACK — present when the accessibility tree is empty (custom UI / WebView).
+
+You must output ONLY a valid JSON object with your next action.
+
+═══════════════════════════════════════════
+AVAILABLE ACTIONS (15 total)
+═══════════════════════════════════════════
+
+Navigation:
+  {"action": "tap", "coordinates": [x, y], "reason": "..."}
+  {"action": "longpress", "coordinates": [x, y], "reason": "..."}
+  {"action": "swipe", "direction": "up|down|left|right", "reason": "..."}
+  {"action": "enter", "reason": "Press Enter/submit"}
+  {"action": "back", "reason": "Navigate back"}
+  {"action": "home", "reason": "Go to home screen"}
+
+Text Input:
+  {"action": "type", "text": "Hello World", "reason": "..."}
+  {"action": "clear", "reason": "Clear current text field before typing"}
+
+App Control:
+  {"action": "launch", "package": "com.whatsapp", "reason": "Open WhatsApp"}
+  {"action": "launch", "uri": "https://maps.google.com/?q=pizza", "reason": "Open URL"}
+  {"action": "launch", "package": "com.whatsapp", "uri": "content://media/external/images/1", "extras": {"android.intent.extra.TEXT": "Check this"}, "reason": "Share image to WhatsApp"}
+
+Data:
+  {"action": "screenshot", "reason": "Capture current screen"}
+  {"action": "screenshot", "filename": "order_confirmation.png", "reason": "Save proof"}
+  {"action": "clipboard_get", "reason": "Read clipboard contents"}
+  {"action": "clipboard_set", "text": "copied text", "reason": "Set clipboard"}
+
+System:
+  {"action": "shell", "command": "am force-stop com.app.broken", "reason": "Kill crashed app"}
+  {"action": "wait", "reason": "Wait for screen to load"}
+  {"action": "done", "reason": "Task is complete"}
+
+═══════════════════════════════════════════
+ELEMENT PROPERTIES YOU WILL SEE
+═══════════════════════════════════════════
+
+Each element in SCREEN_CONTEXT has:
+- text: visible label or content description
+- center: [x, y] coordinates to tap
+- size: [width, height] in pixels
+- enabled: whether the element can be interacted with (DO NOT tap disabled elements!)
+- checked: checkbox/toggle state (true = ON)
+- focused: whether this field currently has input focus
+- selected: whether this item is currently selected (tabs, list items)
+- scrollable: whether this container can be scrolled
+- longClickable: supports long-press for context menu
+- editable: text input field
+- password: password input (don't read/log the text)
+- hint: placeholder text shown when field is empty
+- parent: the containing element (helps understand layout hierarchy)
+- action: suggested action — "tap", "type", "longpress", "scroll", or "read"
+
+═══════════════════════════════════════════
+CRITICAL RULES
+═══════════════════════════════════════════
+
+1. DISABLED ELEMENTS: If "enabled": false, DO NOT tap or interact with it. Find an alternative.
+2. TEXT INPUT: If "editable": true, use "clear" first if field has existing text, then "type".
+3. ALREADY TYPED: Check PREVIOUS_ACTIONS. Do NOT re-type text you already entered.
+4. REPETITION: Do NOT tap the same coordinates twice in a row. If it didn't work, try something else.
+5. STUCK: If SCREEN_CHANGE says "NOT changed", your last action had no effect. Change strategy.
+6. APP LAUNCH: Use "launch" to directly open apps instead of hunting for icons on the home screen.
+7. SCREENSHOTS: Use "screenshot" to capture proof of completed tasks (order confirmations, etc).
+8. LONG PRESS: Use "longpress" when you see "longClickable": true (context menus, copy/paste, etc).
+9. SCROLLING: If the item you need isn't visible, "swipe" up/down to scroll and find it.
+10. MULTI-APP: To switch apps, use "home" then "launch" the next app. Or use "back" to return.
+11. PASSWORDS: Never log or output the text of password fields.
+12. DONE: Say "done" as soon as the goal is achieved. Don't keep acting after success.
+13. SEARCH: After typing in a search field, use "enter" to submit the search.
+14. SHARE: To send files/images between apps, use "launch" with uri + extras for Android intents.
+15. CLEANUP: If a popup/ad appears, dismiss it with "back" or tap the close button, then continue.`;
+
+// ===========================================
+// Provider Interface
+// ===========================================
+
+interface ActionHistoryEntry {
+  action?: string;
+  reason?: string;
+  text?: string;
+  coordinates?: [number, number];
+  package?: string;
+  uri?: string;
+}
+
+export interface LLMProvider {
+  getDecision(
+    goal: string,
+    screenContext: string,
+    actionHistory: ActionHistoryEntry[]
+  ): Promise<ActionDecision>;
+}
+
+// ===========================================
+// OpenAI / Groq Provider
+// ===========================================
+
+class OpenAIProvider implements LLMProvider {
+  private client: OpenAI;
+  private model: string;
+
+  constructor() {
+    if (Config.LLM_PROVIDER === "groq") {
+      this.client = new OpenAI({
+        apiKey: Config.GROQ_API_KEY,
+        baseURL: GROQ_API_BASE_URL,
+      });
+      this.model = Config.GROQ_MODEL;
+    } else {
+      this.client = new OpenAI({ apiKey: Config.OPENAI_API_KEY });
+      this.model = Config.OPENAI_MODEL;
+    }
+  }
+
+  async getDecision(
+    goal: string,
+    screenContext: string,
+    _actionHistory: ActionHistoryEntry[]
+  ): Promise<ActionDecision> {
+    // screenContext now includes history, diff, and vision context from kernel
+    const userContent = `GOAL: ${goal}\n\nSCREEN_CONTEXT:\n${screenContext}`;
+
+    const response = await this.client.chat.completions.create({
+      model: this.model,
+      response_format: { type: "json_object" },
+      messages: [
+        { role: "system", content: SYSTEM_PROMPT },
+        { role: "user", content: userContent },
+      ],
+    });
+
+    return JSON.parse(response.choices[0].message.content ?? "{}");
+  }
+}
+
+// ===========================================
+// OpenRouter Provider (Vercel AI SDK)
+// ===========================================
+
+class OpenRouterProvider implements LLMProvider {
+  private openrouter: ReturnType<typeof createOpenRouter>;
+  private model: string;
+
+  constructor() {
+    this.openrouter = createOpenRouter({
+      apiKey: Config.OPENROUTER_API_KEY,
+    });
+    this.model = Config.OPENROUTER_MODEL;
+  }
+
+  async getDecision(
+    goal: string,
+    screenContext: string,
+    _actionHistory: ActionHistoryEntry[]
+  ): Promise<ActionDecision> {
+    const userContent = `GOAL: ${goal}\n\nSCREEN_CONTEXT:\n${screenContext}`;
+
+    const result = await generateText({
+      model: this.openrouter.chat(this.model),
+      system: SYSTEM_PROMPT,
+      prompt: userContent + "\n\nRespond with ONLY a valid JSON object.",
+    });
+
+    return parseJsonResponse(result.text);
+  }
+}
+
+// ===========================================
+// AWS Bedrock Provider
+// ===========================================
+
+class BedrockProvider implements LLMProvider {
+  private client: BedrockRuntimeClient;
+  private model: string;
+
+  constructor() {
+    this.client = new BedrockRuntimeClient({ region: Config.AWS_REGION });
+    this.model = Config.BEDROCK_MODEL;
+  }
+
+  async getDecision(
+    goal: string,
+    screenContext: string,
+    _actionHistory: ActionHistoryEntry[]
+  ): Promise<ActionDecision> {
+    const userContent = `GOAL: ${goal}\n\nSCREEN_CONTEXT:\n${screenContext}`;
+    const requestBody = this.buildRequest(userContent);
+
+    const command = new InvokeModelCommand({
+      modelId: this.model,
+      body: new TextEncoder().encode(requestBody),
+      contentType: "application/json",
+      accept: "application/json",
+    });
+
+    const response = await this.client.send(command);
+    const responseBody = JSON.parse(new TextDecoder().decode(response.body));
+    const resultText = this.extractResponse(responseBody);
+
+    return parseJsonResponse(resultText);
+  }
+
+  private isAnthropicModel(): boolean {
+    return BEDROCK_ANTHROPIC_MODELS.some((id) => this.model.includes(id));
+  }
+
+  private isMetaModel(): boolean {
+    return BEDROCK_META_MODELS.some((id) =>
+      this.model.toLowerCase().includes(id)
+    );
+  }
+
+  private buildRequest(userContent: string): string {
+    if (this.isAnthropicModel()) {
+      return JSON.stringify({
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 1024,
+        system: SYSTEM_PROMPT,
+        messages: [
+          {
+            role: "user",
+            content:
+              userContent + "\n\nRespond with ONLY a valid JSON object.",
+          },
+        ],
+      });
+    }
+
+    if (this.isMetaModel()) {
+      return JSON.stringify({
+        prompt: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n${SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n${userContent}\n\nRespond with ONLY a valid JSON object, no other text.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n`,
+        max_gen_len: 512,
+        temperature: 0.1,
+      });
+    }
+
+    return JSON.stringify({
+      inputText: `${SYSTEM_PROMPT}\n\n${userContent}\n\nRespond with ONLY a valid JSON object.`,
+      textGenerationConfig: {
+        maxTokenCount: 512,
+        temperature: 0.1,
+      },
+    });
+  }
+
+  private extractResponse(responseBody: Record<string, any>): string {
+    if (this.isAnthropicModel()) {
+      return responseBody.content[0].text;
+    }
+    if (this.isMetaModel()) {
+      return responseBody.generation ?? "";
+    }
+    return responseBody.results[0].outputText;
+  }
+}
+
+// ===========================================
+// Shared JSON Parsing
+// ===========================================
+
+function parseJsonResponse(text: string): ActionDecision {
+  try {
+    return JSON.parse(text);
+  } catch {
+    // Try to extract JSON from markdown code blocks or mixed text
+    const match = text.match(/\{[\s\S]*?\}/);
+    if (match) {
+      try {
+        return JSON.parse(match[0]);
+      } catch {
+        // fall through
+      }
+    }
+    console.log(`Warning: Could not parse LLM response: ${text.slice(0, 200)}`);
+    return { action: "wait", reason: "Failed to parse response, waiting" };
+  }
+}
+
+// ===========================================
+// Factory
+// ===========================================
+
+export function getLlmProvider(): LLMProvider {
+  if (Config.LLM_PROVIDER === "bedrock") {
+    return new BedrockProvider();
+  }
+  if (Config.LLM_PROVIDER === "openrouter") {
+    return new OpenRouterProvider();
+  }
+  return new OpenAIProvider();
+}
diff --git a/android-action-kernel/src/sanitizer.ts b/android-action-kernel/src/sanitizer.ts
new file mode 100644
index 0000000..684b009
--- /dev/null
+++ b/android-action-kernel/src/sanitizer.ts
@@ -0,0 +1,171 @@
+/**
+ * XML Sanitizer for Android Action Kernel.
+ * Parses Android Accessibility XML and extracts interactive UI elements
+ * with full state information and parent-child hierarchy context.
+ */
+
+import { XMLParser } from "fast-xml-parser";
+
+export interface UIElement {
+  id: string;
+  text: string;
+  type: string;
+  bounds: string;
+  center: [number, number];
+  size: [number, number];
+  clickable: boolean;
+  editable: boolean;
+  enabled: boolean;
+  checked: boolean;
+  focused: boolean;
+  selected: boolean;
+  scrollable: boolean;
+  longClickable: boolean;
+  password: boolean;
+  hint: string;
+  action: "tap" | "type" | "longpress" | "scroll" | "read";
+  parent: string;
+  depth: number;
+}
+
+/**
+ * Compute a hash of element texts/ids for screen state comparison.
+ */
+export function computeScreenHash(elements: UIElement[]): string {
+  const parts = elements.map(
+    (e) => `${e.id}|${e.text}|${e.center[0]},${e.center[1]}|${e.enabled}|${e.checked}`
+  );
+  return parts.join(";");
+}
+
+/**
+ * Parses Android Accessibility XML and returns a rich list of interactive elements.
+ * Preserves state (enabled, checked, focused) and hierarchy context.
+ */
+export function getInteractiveElements(xmlContent: string): UIElement[] {
+  const parser = new XMLParser({
+    ignoreAttributes: false,
+    attributeNamePrefix: "@_",
+    allowBooleanAttributes: true,
+  });
+
+  let parsed: unknown;
+  try {
+    parsed = parser.parse(xmlContent);
+  } catch {
+    console.log("Warning: Error parsing XML. The screen might be loading.");
+    return [];
+  }
+
+  const elements: UIElement[] = [];
+
+  function walk(node: any, parentLabel: string, depth: number): void {
+    if (!node || typeof node !== "object") return;
+
+    if (node["@_bounds"]) {
+      const isClickable = node["@_clickable"] === "true";
+      const isLongClickable = node["@_long-clickable"] === "true";
+      const isScrollable = node["@_scrollable"] === "true";
+      const isEnabled = node["@_enabled"] !== "false"; // default true
+      const isChecked = node["@_checked"] === "true";
+      const isFocused = node["@_focused"] === "true";
+      const isSelected = node["@_selected"] === "true";
+      const isPassword = node["@_password"] === "true";
+
+      const elementClass = node["@_class"] ?? "";
+      const isEditable =
+        elementClass.includes("EditText") ||
+        elementClass.includes("AutoCompleteTextView") ||
+        node["@_editable"] === "true";
+
+      const text: string = node["@_text"] ?? "";
+      const desc: string = node["@_content-desc"] ?? "";
+      const resourceId: string = node["@_resource-id"] ?? "";
+      const hint: string = node["@_hint"] ?? "";
+
+      // Build a label for this node to use as parent context for children
+      const typeName = elementClass.split(".").pop() ?? "";
+      const nodeLabel = text || desc || resourceId.split("/").pop() || typeName;
+
+      // Determine if this element should be included
+      const isInteractive = isClickable || isEditable || isLongClickable || isScrollable;
+      const hasContent = !!(text || desc);
+
+      if (isInteractive || hasContent) {
+        const bounds: string = node["@_bounds"];
+        try {
+          const coords = bounds
+            .replace("][", ",")
+            .replace("[", "")
+            .replace("]", "")
+            .split(",")
+            .map(Number);
+
+          const [x1, y1, x2, y2] = coords;
+          const centerX = Math.floor((x1 + x2) / 2);
+          const centerY = Math.floor((y1 + y2) / 2);
+          const width = x2 - x1;
+          const height = y2 - y1;
+
+          // Skip zero-size elements (invisible)
+          if (width <= 0 || height <= 0) {
+            // still walk children
+          } else {
+            let suggestedAction: UIElement["action"];
+            if (isEditable) suggestedAction = "type";
+            else if (isLongClickable && !isClickable) suggestedAction = "longpress";
+            else if (isScrollable && !isClickable) suggestedAction = "scroll";
+            else if (isClickable) suggestedAction = "tap";
+            else suggestedAction = "read";
+
+            elements.push({
+              id: resourceId,
+              text: text || desc,
+              type: typeName,
+              bounds,
+              center: [centerX, centerY],
+              size: [width, height],
+              clickable: isClickable,
+              editable: isEditable,
+              enabled: isEnabled,
+              checked: isChecked,
+              focused: isFocused,
+              selected: isSelected,
+              scrollable: isScrollable,
+              longClickable: isLongClickable,
+              password: isPassword,
+              hint: hint,
+              action: suggestedAction,
+              parent: parentLabel,
+              depth,
+            });
+          }
+        } catch {
+          // Skip malformed bounds
+        }
+      }
+
+      // Recurse with updated parent label
+      walkChildren(node, nodeLabel, depth + 1);
+      return;
+    }
+
+    // No bounds on this node — just recurse
+    walkChildren(node, parentLabel, depth);
+  }
+
+  function walkChildren(node: any, parentLabel: string, depth: number): void {
+    if (node.node) {
+      const children = Array.isArray(node.node) ? node.node : [node.node];
+      for (const child of children) {
+        walk(child, parentLabel, depth);
+      }
+    }
+    if (node.hierarchy) {
+      walk(node.hierarchy, parentLabel, depth);
+    }
+  }
+
+  walk(parsed, "root", 0);
+  return elements;
+}
diff --git a/android-action-kernel/tsconfig.json b/android-action-kernel/tsconfig.json
new file mode 100644
index 0000000..ea8d4f1
--- /dev/null
+++ b/android-action-kernel/tsconfig.json
@@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "outDir": "dist",
+    "rootDir": "src",
+    "types": ["bun-types"],
+    "resolveJsonModule": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}