Files
droidclaw/src/sanitizer.ts
Sanju Sivalingam 76b7bce405 Rename project to DroidClaw
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 17:51:08 +05:30

250 lines
7.5 KiB
TypeScript

/**
* XML Sanitizer for DroidClaw.
* Parses Android Accessibility XML and extracts interactive UI elements
* with full state information and parent-child hierarchy context.
*/
import { XMLParser } from "fast-xml-parser";
export interface UIElement {
id: string;
text: string;
type: string;
bounds: string;
center: [number, number];
size: [number, number];
clickable: boolean;
editable: boolean;
enabled: boolean;
checked: boolean;
focused: boolean;
selected: boolean;
scrollable: boolean;
longClickable: boolean;
password: boolean;
hint: string;
action: "tap" | "type" | "longpress" | "scroll" | "read";
parent: string;
depth: number;
}
/**
* Compute a hash of element texts/ids for screen state comparison.
*/
export function computeScreenHash(elements: UIElement[]): string {
const parts = elements.map(
(e) => `${e.id}|${e.text}|${e.center[0]},${e.center[1]}|${e.enabled}|${e.checked}`
);
return parts.join(";");
}
/**
* Parses Android Accessibility XML and returns a rich list of interactive elements.
* Preserves state (enabled, checked, focused) and hierarchy context.
*/
export function getInteractiveElements(xmlContent: string): UIElement[] {
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: "@_",
allowBooleanAttributes: true,
});
let parsed: unknown;
try {
parsed = parser.parse(xmlContent);
} catch {
console.log("Warning: Error parsing XML. The screen might be loading.");
return [];
}
const elements: UIElement[] = [];
function walk(node: any, parentLabel: string, depth: number): void {
if (!node || typeof node !== "object") return;
if (node["@_bounds"]) {
const isClickable = node["@_clickable"] === "true";
const isLongClickable = node["@_long-clickable"] === "true";
const isScrollable = node["@_scrollable"] === "true";
const isEnabled = node["@_enabled"] !== "false"; // default true
const isChecked = node["@_checked"] === "true";
const isFocused = node["@_focused"] === "true";
const isSelected = node["@_selected"] === "true";
const isPassword = node["@_password"] === "true";
const elementClass = node["@_class"] ?? "";
const isEditable =
elementClass.includes("EditText") ||
elementClass.includes("AutoCompleteTextView") ||
node["@_editable"] === "true";
const text: string = node["@_text"] ?? "";
const desc: string = node["@_content-desc"] ?? "";
const resourceId: string = node["@_resource-id"] ?? "";
const hint: string = node["@_hint"] ?? "";
// Build a label for this node to use as parent context for children
const typeName = elementClass.split(".").pop() ?? "";
const nodeLabel = text || desc || resourceId.split("/").pop() || typeName;
// Determine if this element should be included
const isInteractive = isClickable || isEditable || isLongClickable || isScrollable;
const hasContent = !!(text || desc);
if (isInteractive || hasContent) {
const bounds: string = node["@_bounds"];
try {
const coords = bounds
.replace("][", ",")
.replace("[", "")
.replace("]", "")
.split(",")
.map(Number);
const [x1, y1, x2, y2] = coords;
const centerX = Math.floor((x1 + x2) / 2);
const centerY = Math.floor((y1 + y2) / 2);
const width = x2 - x1;
const height = y2 - y1;
// Skip zero-size elements (invisible)
if (width <= 0 || height <= 0) {
// still walk children
} else {
let suggestedAction: UIElement["action"];
if (isEditable) suggestedAction = "type";
else if (isLongClickable && !isClickable) suggestedAction = "longpress";
else if (isScrollable && !isClickable) suggestedAction = "scroll";
else if (isClickable) suggestedAction = "tap";
else suggestedAction = "read";
elements.push({
id: resourceId,
text: text || desc,
type: typeName,
bounds,
center: [centerX, centerY],
size: [width, height],
clickable: isClickable,
editable: isEditable,
enabled: isEnabled,
checked: isChecked,
focused: isFocused,
selected: isSelected,
scrollable: isScrollable,
longClickable: isLongClickable,
password: isPassword,
hint: hint,
action: suggestedAction,
parent: parentLabel,
depth,
});
}
} catch {
// Skip malformed bounds
}
}
// Recurse with updated parent label
walkChildren(node, nodeLabel, depth + 1);
return;
}
// No bounds on this node — just recurse
walkChildren(node, parentLabel, depth);
}
function walkChildren(node: any, parentLabel: string, depth: number): void {
if (node.node) {
const children = Array.isArray(node.node) ? node.node : [node.node];
for (const child of children) {
walk(child, parentLabel, depth);
}
}
if (node.hierarchy) {
walk(node.hierarchy, parentLabel, depth);
}
}
walk(parsed, "root", 0);
return elements;
}
// ===========================================
// Smart Element Filtering (Phase 2A)
// ===========================================
/**
* Compact representation sent to the LLM — only essential fields.
* Non-default flags are included conditionally to minimize tokens.
*/
export interface CompactUIElement {
text: string;
center: [number, number];
action: UIElement["action"];
// Only included when non-default
enabled?: false;
checked?: true;
focused?: true;
hint?: string;
editable?: true;
scrollable?: true;
}
/**
* Strips a full UIElement to its compact form, omitting default-valued flags.
*/
export function compactElement(el: UIElement): CompactUIElement {
const compact: CompactUIElement = {
text: el.text,
center: el.center,
action: el.action,
};
if (!el.enabled) compact.enabled = false;
if (el.checked) compact.checked = true;
if (el.focused) compact.focused = true;
if (el.hint) compact.hint = el.hint;
if (el.editable) compact.editable = true;
if (el.scrollable) compact.scrollable = true;
return compact;
}
/**
* Scores an element for relevance to the LLM.
*/
function scoreElement(el: UIElement): number {
let score = 0;
if (el.enabled) score += 10;
if (el.editable) score += 8;
if (el.focused) score += 6;
if (el.clickable || el.longClickable) score += 5;
if (el.text) score += 3;
return score;
}
/**
* Deduplicates elements by center coordinates (within tolerance),
* scores them, and returns the top N as compact elements.
*/
export function filterElements(
elements: UIElement[],
limit: number
): CompactUIElement[] {
// Deduplicate by center coordinates (5px tolerance)
const seen = new Map<string, UIElement>();
for (const el of elements) {
const bucketX = Math.round(el.center[0] / 5) * 5;
const bucketY = Math.round(el.center[1] / 5) * 5;
const key = `${bucketX},${bucketY}`;
const existing = seen.get(key);
if (!existing || scoreElement(el) > scoreElement(existing)) {
seen.set(key, el);
}
}
// Score, sort descending, take top N
const deduped = Array.from(seen.values());
deduped.sort((a, b) => scoreElement(b) - scoreElement(a));
return deduped.slice(0, limit).map(compactElement);
}