250 lines
7.5 KiB
TypeScript
250 lines
7.5 KiB
TypeScript
/**
|
|
* XML Sanitizer for DroidClaw.
|
|
* Parses Android Accessibility XML and extracts interactive UI elements
|
|
* with full state information and parent-child hierarchy context.
|
|
*/
|
|
|
|
import { XMLParser } from "fast-xml-parser";
|
|
|
|
export interface UIElement {
|
|
id: string;
|
|
text: string;
|
|
type: string;
|
|
bounds: string;
|
|
center: [number, number];
|
|
size: [number, number];
|
|
clickable: boolean;
|
|
editable: boolean;
|
|
enabled: boolean;
|
|
checked: boolean;
|
|
focused: boolean;
|
|
selected: boolean;
|
|
scrollable: boolean;
|
|
longClickable: boolean;
|
|
password: boolean;
|
|
hint: string;
|
|
action: "tap" | "type" | "longpress" | "scroll" | "read";
|
|
parent: string;
|
|
depth: number;
|
|
}
|
|
|
|
/**
|
|
* Compute a hash of element texts/ids for screen state comparison.
|
|
*/
|
|
export function computeScreenHash(elements: UIElement[]): string {
|
|
const parts = elements.map(
|
|
(e) => `${e.id}|${e.text}|${e.center[0]},${e.center[1]}|${e.enabled}|${e.checked}`
|
|
);
|
|
return parts.join(";");
|
|
}
|
|
|
|
/**
|
|
* Parses Android Accessibility XML and returns a rich list of interactive elements.
|
|
* Preserves state (enabled, checked, focused) and hierarchy context.
|
|
*/
|
|
export function getInteractiveElements(xmlContent: string): UIElement[] {
|
|
const parser = new XMLParser({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: "@_",
|
|
allowBooleanAttributes: true,
|
|
});
|
|
|
|
let parsed: unknown;
|
|
try {
|
|
parsed = parser.parse(xmlContent);
|
|
} catch {
|
|
console.log("Warning: Error parsing XML. The screen might be loading.");
|
|
return [];
|
|
}
|
|
|
|
const elements: UIElement[] = [];
|
|
|
|
function walk(node: any, parentLabel: string, depth: number): void {
|
|
if (!node || typeof node !== "object") return;
|
|
|
|
if (node["@_bounds"]) {
|
|
const isClickable = node["@_clickable"] === "true";
|
|
const isLongClickable = node["@_long-clickable"] === "true";
|
|
const isScrollable = node["@_scrollable"] === "true";
|
|
const isEnabled = node["@_enabled"] !== "false"; // default true
|
|
const isChecked = node["@_checked"] === "true";
|
|
const isFocused = node["@_focused"] === "true";
|
|
const isSelected = node["@_selected"] === "true";
|
|
const isPassword = node["@_password"] === "true";
|
|
|
|
const elementClass = node["@_class"] ?? "";
|
|
const isEditable =
|
|
elementClass.includes("EditText") ||
|
|
elementClass.includes("AutoCompleteTextView") ||
|
|
node["@_editable"] === "true";
|
|
|
|
const text: string = node["@_text"] ?? "";
|
|
const desc: string = node["@_content-desc"] ?? "";
|
|
const resourceId: string = node["@_resource-id"] ?? "";
|
|
const hint: string = node["@_hint"] ?? "";
|
|
|
|
// Build a label for this node to use as parent context for children
|
|
const typeName = elementClass.split(".").pop() ?? "";
|
|
const nodeLabel = text || desc || resourceId.split("/").pop() || typeName;
|
|
|
|
// Determine if this element should be included
|
|
const isInteractive = isClickable || isEditable || isLongClickable || isScrollable;
|
|
const hasContent = !!(text || desc);
|
|
|
|
if (isInteractive || hasContent) {
|
|
const bounds: string = node["@_bounds"];
|
|
try {
|
|
const coords = bounds
|
|
.replace("][", ",")
|
|
.replace("[", "")
|
|
.replace("]", "")
|
|
.split(",")
|
|
.map(Number);
|
|
|
|
const [x1, y1, x2, y2] = coords;
|
|
const centerX = Math.floor((x1 + x2) / 2);
|
|
const centerY = Math.floor((y1 + y2) / 2);
|
|
const width = x2 - x1;
|
|
const height = y2 - y1;
|
|
|
|
// Skip zero-size elements (invisible)
|
|
if (width <= 0 || height <= 0) {
|
|
// still walk children
|
|
} else {
|
|
let suggestedAction: UIElement["action"];
|
|
if (isEditable) suggestedAction = "type";
|
|
else if (isLongClickable && !isClickable) suggestedAction = "longpress";
|
|
else if (isScrollable && !isClickable) suggestedAction = "scroll";
|
|
else if (isClickable) suggestedAction = "tap";
|
|
else suggestedAction = "read";
|
|
|
|
elements.push({
|
|
id: resourceId,
|
|
text: text || desc,
|
|
type: typeName,
|
|
bounds,
|
|
center: [centerX, centerY],
|
|
size: [width, height],
|
|
clickable: isClickable,
|
|
editable: isEditable,
|
|
enabled: isEnabled,
|
|
checked: isChecked,
|
|
focused: isFocused,
|
|
selected: isSelected,
|
|
scrollable: isScrollable,
|
|
longClickable: isLongClickable,
|
|
password: isPassword,
|
|
hint: hint,
|
|
action: suggestedAction,
|
|
parent: parentLabel,
|
|
depth,
|
|
});
|
|
}
|
|
} catch {
|
|
// Skip malformed bounds
|
|
}
|
|
}
|
|
|
|
// Recurse with updated parent label
|
|
walkChildren(node, nodeLabel, depth + 1);
|
|
return;
|
|
}
|
|
|
|
// No bounds on this node — just recurse
|
|
walkChildren(node, parentLabel, depth);
|
|
}
|
|
|
|
function walkChildren(node: any, parentLabel: string, depth: number): void {
|
|
if (node.node) {
|
|
const children = Array.isArray(node.node) ? node.node : [node.node];
|
|
for (const child of children) {
|
|
walk(child, parentLabel, depth);
|
|
}
|
|
}
|
|
if (node.hierarchy) {
|
|
walk(node.hierarchy, parentLabel, depth);
|
|
}
|
|
}
|
|
|
|
walk(parsed, "root", 0);
|
|
return elements;
|
|
}
|
|
|
|
// ===========================================
|
|
// Smart Element Filtering (Phase 2A)
|
|
// ===========================================
|
|
|
|
/**
|
|
* Compact representation sent to the LLM — only essential fields.
|
|
* Non-default flags are included conditionally to minimize tokens.
|
|
*/
|
|
export interface CompactUIElement {
|
|
text: string;
|
|
center: [number, number];
|
|
action: UIElement["action"];
|
|
// Only included when non-default
|
|
enabled?: false;
|
|
checked?: true;
|
|
focused?: true;
|
|
hint?: string;
|
|
editable?: true;
|
|
scrollable?: true;
|
|
}
|
|
|
|
/**
|
|
* Strips a full UIElement to its compact form, omitting default-valued flags.
|
|
*/
|
|
export function compactElement(el: UIElement): CompactUIElement {
|
|
const compact: CompactUIElement = {
|
|
text: el.text,
|
|
center: el.center,
|
|
action: el.action,
|
|
};
|
|
if (!el.enabled) compact.enabled = false;
|
|
if (el.checked) compact.checked = true;
|
|
if (el.focused) compact.focused = true;
|
|
if (el.hint) compact.hint = el.hint;
|
|
if (el.editable) compact.editable = true;
|
|
if (el.scrollable) compact.scrollable = true;
|
|
return compact;
|
|
}
|
|
|
|
/**
|
|
* Scores an element for relevance to the LLM.
|
|
*/
|
|
function scoreElement(el: UIElement): number {
|
|
let score = 0;
|
|
if (el.enabled) score += 10;
|
|
if (el.editable) score += 8;
|
|
if (el.focused) score += 6;
|
|
if (el.clickable || el.longClickable) score += 5;
|
|
if (el.text) score += 3;
|
|
return score;
|
|
}
|
|
|
|
/**
|
|
* Deduplicates elements by center coordinates (within tolerance),
|
|
* scores them, and returns the top N as compact elements.
|
|
*/
|
|
export function filterElements(
|
|
elements: UIElement[],
|
|
limit: number
|
|
): CompactUIElement[] {
|
|
// Deduplicate by center coordinates (5px tolerance)
|
|
const seen = new Map<string, UIElement>();
|
|
for (const el of elements) {
|
|
const bucketX = Math.round(el.center[0] / 5) * 5;
|
|
const bucketY = Math.round(el.center[1] / 5) * 5;
|
|
const key = `${bucketX},${bucketY}`;
|
|
const existing = seen.get(key);
|
|
if (!existing || scoreElement(el) > scoreElement(existing)) {
|
|
seen.set(key, el);
|
|
}
|
|
}
|
|
|
|
// Score, sort descending, take top N
|
|
const deduped = Array.from(seen.values());
|
|
deduped.sort((a, b) => scoreElement(b) - scoreElement(a));
|
|
return deduped.slice(0, limit).map(compactElement);
|
|
}
|