fix(agent): prevent stuck loop by adding action history to LLM prompt

The UI agent had no memory of previous actions — each step was a fresh
single-shot LLM call. After typing and sending a message, the LLM saw
an empty text field and retyped the message in a loop.

- Add RECENT_ACTIONS (last 5 actions with text/result) to user prompt
- Add chat app completion detection rule to dynamic prompt
- Add send-success hints for WhatsApp and Messages apps
- Add git convention to CLAUDE.md (no co-author lines)
This commit is contained in:
Sanju Sivalingam
2026-02-18 00:53:13 +05:30
parent 9193b02d36
commit db995e4913
4 changed files with 26 additions and 0 deletions

View File

@@ -63,3 +63,7 @@ Copy `.env.example` to `.env` and configure `LLM_PROVIDER` + the corresponding A
## Device Assumptions ## Device Assumptions
Swipe coordinates in `constants.ts` are hardcoded for 1080px-wide screens (center X=540, center Y=1200). Adjust `SWIPE_COORDS` and `SCREEN_CENTER_*` for different resolutions. Swipe coordinates in `constants.ts` are hardcoded for 1080px-wide screens (center X=540, center Y=1200). Adjust `SWIPE_COORDS` and `SCREEN_CENTER_*` for different resolutions.
## Git Conventions
- Do NOT add `Co-Authored-By: Claude` lines to commit messages.

View File

@@ -13,6 +13,7 @@ const APP_HINTS: Record<string, string[]> = {
], ],
"com.whatsapp": [ "com.whatsapp": [
"To send a message, tap the green arrow/send button — do NOT use 'enter' key.", "To send a message, tap the green arrow/send button — do NOT use 'enter' key.",
"SEND SUCCESS: After tapping send, the text field clears and your message appears as a chat bubble. If the field is EMPTY and you see your message in the conversation — it was sent. Say done.",
"New chat: tap the green floating button (bottom-right), then search for contact.", "New chat: tap the green floating button (bottom-right), then search for contact.",
"Media: tap the + or paperclip icon to attach files/images.", "Media: tap the + or paperclip icon to attach files/images.",
], ],
@@ -43,6 +44,7 @@ const APP_HINTS: Record<string, string[]> = {
"New message: tap the floating button with + or pencil icon.", "New message: tap the floating button with + or pencil icon.",
"To field: type the contact name or number, then select from suggestions.", "To field: type the contact name or number, then select from suggestions.",
"Send: tap the arrow/send icon, not Enter.", "Send: tap the arrow/send icon, not Enter.",
"SEND SUCCESS: After send, if the text field is EMPTY and your message appears in the conversation, it was sent. Say done.",
], ],
"com.google.android.dialer": [ "com.google.android.dialer": [
"Dial pad: tap the floating phone icon if dial pad isn't visible.", "Dial pad: tap the floating phone icon if dial pad isn't visible.",

View File

@@ -327,6 +327,8 @@ RULES:
- If SCREEN_CHANGE says "NOT changed", your last action had no effect — change strategy. - If SCREEN_CHANGE says "NOT changed", your last action had no effect — change strategy.
- Do NOT repeat an action that already failed. - Do NOT repeat an action that already failed.
- Say "done" as soon as the goal is achieved. - Say "done" as soon as the goal is achieved.
- CHECK RECENT_ACTIONS before every step: if you already typed text and tapped send, do NOT type it again.
- CHAT APP COMPLETION: After typing a message and tapping send in a chat app (WhatsApp, Messages, etc.), if the text field is now EMPTY and your message text appears in the conversation above, the message was SENT SUCCESSFULLY. Say "done" immediately.
- COPY-PASTE: Use clipboard_set with text from SCREEN_CONTEXT (most reliable), then paste. Or just type directly.`; - COPY-PASTE: Use clipboard_set with text from SCREEN_CONTEXT (most reliable), then paste. Or just type directly.`;
if (isStuck) { if (isStuck) {

View File

@@ -260,6 +260,7 @@ export async function runAgentLoop(
let stuckCount = 0; let stuckCount = 0;
const recentActions: string[] = []; const recentActions: string[] = [];
let lastActionFeedback = ""; let lastActionFeedback = "";
const actionHistory: string[] = []; // Human-readable log of recent actions
// Fetch installed apps from device metadata for LLM context // Fetch installed apps from device metadata for LLM context
let installedAppsContext = ""; let installedAppsContext = "";
@@ -440,12 +441,18 @@ export async function runAgentLoop(
? `LAST_ACTION_RESULT: ${lastActionFeedback}\n\n` ? `LAST_ACTION_RESULT: ${lastActionFeedback}\n\n`
: ""; : "";
// Include recent action history so LLM knows what it already did
const historyContext = actionHistory.length > 0
? `RECENT_ACTIONS (what you already did — do NOT repeat completed work):\n${actionHistory.map((h) => ` ${h}`).join("\n")}\n\n`
: "";
let userPrompt = let userPrompt =
`GOAL: ${goal}\n\n` + `GOAL: ${goal}\n\n` +
`STEP: ${step + 1}/${maxSteps}\n\n` + `STEP: ${step + 1}/${maxSteps}\n\n` +
(useDynamicPrompt ? "" : installedAppsContext) + (useDynamicPrompt ? "" : installedAppsContext) +
foregroundLine + foregroundLine +
actionFeedbackLine + actionFeedbackLine +
historyContext +
`SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` + `SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` +
diffContext + diffContext +
visionContext; visionContext;
@@ -510,6 +517,13 @@ export async function runAgentLoop(
recentActions.push(actionSig); recentActions.push(actionSig);
if (recentActions.length > 8) recentActions.shift(); if (recentActions.length > 8) recentActions.shift();
// Build human-readable history entry for LLM context
const historyParts = [`Step ${step + 1}: ${actionSig}`];
if (action.text) historyParts.push(`text="${action.text}"`);
if (action.reason) historyParts.push(`${action.reason}`);
actionHistory.push(historyParts.join(" "));
if (actionHistory.length > 5) actionHistory.shift();
// ── 7. Log + Done check ──────────────────────────────── // ── 7. Log + Done check ────────────────────────────────
const reason = action.reason ?? ""; const reason = action.reason ?? "";
console.log(`[Agent ${sessionId}] Step ${step + 1}: ${actionSig}${reason}`); console.log(`[Agent ${sessionId}] Step ${step + 1}: ${actionSig}${reason}`);
@@ -565,6 +579,10 @@ export async function runAgentLoop(
const resultSuccess = result.success !== false; const resultSuccess = result.success !== false;
lastActionFeedback = `${actionSig} -> ${resultSuccess ? "OK" : "FAILED"}: ${result.error ?? result.data ?? "completed"}`; lastActionFeedback = `${actionSig} -> ${resultSuccess ? "OK" : "FAILED"}: ${result.error ?? result.data ?? "completed"}`;
console.log(`[Agent ${sessionId}] Step ${step + 1} result: ${lastActionFeedback}`); console.log(`[Agent ${sessionId}] Step ${step + 1} result: ${lastActionFeedback}`);
// Append result to last history entry
if (actionHistory.length > 0) {
actionHistory[actionHistory.length - 1] += `${resultSuccess ? "OK" : "FAILED"}`;
}
// Update step result in DB // Update step result in DB
if (persistentDeviceId) { if (persistentDeviceId) {
db.update(agentStep).set({ result: lastActionFeedback }).where(eq(agentStep.id, stepId)) db.update(agentStep).set({ result: lastActionFeedback }).where(eq(agentStep.id, stepId))