fix(agent): prevent stuck loop by adding action history to LLM prompt
The UI agent had no memory of previous actions — each step was a fresh single-shot LLM call. After typing and sending a message, the LLM saw an empty text field and retyped the message in a loop. - Add RECENT_ACTIONS (last 5 actions with text/result) to user prompt - Add chat app completion detection rule to dynamic prompt - Add send-success hints for WhatsApp and Messages apps - Add git convention to CLAUDE.md (no co-author lines)
This commit is contained in:
@@ -63,3 +63,7 @@ Copy `.env.example` to `.env` and configure `LLM_PROVIDER` + the corresponding A
|
|||||||
## Device Assumptions
|
## Device Assumptions
|
||||||
|
|
||||||
Swipe coordinates in `constants.ts` are hardcoded for 1080px-wide screens (center X=540, center Y=1200). Adjust `SWIPE_COORDS` and `SCREEN_CENTER_*` for different resolutions.
|
Swipe coordinates in `constants.ts` are hardcoded for 1080px-wide screens (center X=540, center Y=1200). Adjust `SWIPE_COORDS` and `SCREEN_CENTER_*` for different resolutions.
|
||||||
|
|
||||||
|
## Git Conventions
|
||||||
|
|
||||||
|
- Do NOT add `Co-Authored-By: Claude` lines to commit messages.
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ const APP_HINTS: Record<string, string[]> = {
|
|||||||
],
|
],
|
||||||
"com.whatsapp": [
|
"com.whatsapp": [
|
||||||
"To send a message, tap the green arrow/send button — do NOT use 'enter' key.",
|
"To send a message, tap the green arrow/send button — do NOT use 'enter' key.",
|
||||||
|
"SEND SUCCESS: After tapping send, the text field clears and your message appears as a chat bubble. If the field is EMPTY and you see your message in the conversation — it was sent. Say done.",
|
||||||
"New chat: tap the green floating button (bottom-right), then search for contact.",
|
"New chat: tap the green floating button (bottom-right), then search for contact.",
|
||||||
"Media: tap the + or paperclip icon to attach files/images.",
|
"Media: tap the + or paperclip icon to attach files/images.",
|
||||||
],
|
],
|
||||||
@@ -43,6 +44,7 @@ const APP_HINTS: Record<string, string[]> = {
|
|||||||
"New message: tap the floating button with + or pencil icon.",
|
"New message: tap the floating button with + or pencil icon.",
|
||||||
"To field: type the contact name or number, then select from suggestions.",
|
"To field: type the contact name or number, then select from suggestions.",
|
||||||
"Send: tap the arrow/send icon, not Enter.",
|
"Send: tap the arrow/send icon, not Enter.",
|
||||||
|
"SEND SUCCESS: After send, if the text field is EMPTY and your message appears in the conversation, it was sent. Say done.",
|
||||||
],
|
],
|
||||||
"com.google.android.dialer": [
|
"com.google.android.dialer": [
|
||||||
"Dial pad: tap the floating phone icon if dial pad isn't visible.",
|
"Dial pad: tap the floating phone icon if dial pad isn't visible.",
|
||||||
|
|||||||
@@ -327,6 +327,8 @@ RULES:
|
|||||||
- If SCREEN_CHANGE says "NOT changed", your last action had no effect — change strategy.
|
- If SCREEN_CHANGE says "NOT changed", your last action had no effect — change strategy.
|
||||||
- Do NOT repeat an action that already failed.
|
- Do NOT repeat an action that already failed.
|
||||||
- Say "done" as soon as the goal is achieved.
|
- Say "done" as soon as the goal is achieved.
|
||||||
|
- CHECK RECENT_ACTIONS before every step: if you already typed text and tapped send, do NOT type it again.
|
||||||
|
- CHAT APP COMPLETION: After typing a message and tapping send in a chat app (WhatsApp, Messages, etc.), if the text field is now EMPTY and your message text appears in the conversation above, the message was SENT SUCCESSFULLY. Say "done" immediately.
|
||||||
- COPY-PASTE: Use clipboard_set with text from SCREEN_CONTEXT (most reliable), then paste. Or just type directly.`;
|
- COPY-PASTE: Use clipboard_set with text from SCREEN_CONTEXT (most reliable), then paste. Or just type directly.`;
|
||||||
|
|
||||||
if (isStuck) {
|
if (isStuck) {
|
||||||
|
|||||||
@@ -260,6 +260,7 @@ export async function runAgentLoop(
|
|||||||
let stuckCount = 0;
|
let stuckCount = 0;
|
||||||
const recentActions: string[] = [];
|
const recentActions: string[] = [];
|
||||||
let lastActionFeedback = "";
|
let lastActionFeedback = "";
|
||||||
|
const actionHistory: string[] = []; // Human-readable log of recent actions
|
||||||
|
|
||||||
// Fetch installed apps from device metadata for LLM context
|
// Fetch installed apps from device metadata for LLM context
|
||||||
let installedAppsContext = "";
|
let installedAppsContext = "";
|
||||||
@@ -440,12 +441,18 @@ export async function runAgentLoop(
|
|||||||
? `LAST_ACTION_RESULT: ${lastActionFeedback}\n\n`
|
? `LAST_ACTION_RESULT: ${lastActionFeedback}\n\n`
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
|
// Include recent action history so LLM knows what it already did
|
||||||
|
const historyContext = actionHistory.length > 0
|
||||||
|
? `RECENT_ACTIONS (what you already did — do NOT repeat completed work):\n${actionHistory.map((h) => ` ${h}`).join("\n")}\n\n`
|
||||||
|
: "";
|
||||||
|
|
||||||
let userPrompt =
|
let userPrompt =
|
||||||
`GOAL: ${goal}\n\n` +
|
`GOAL: ${goal}\n\n` +
|
||||||
`STEP: ${step + 1}/${maxSteps}\n\n` +
|
`STEP: ${step + 1}/${maxSteps}\n\n` +
|
||||||
(useDynamicPrompt ? "" : installedAppsContext) +
|
(useDynamicPrompt ? "" : installedAppsContext) +
|
||||||
foregroundLine +
|
foregroundLine +
|
||||||
actionFeedbackLine +
|
actionFeedbackLine +
|
||||||
|
historyContext +
|
||||||
`SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` +
|
`SCREEN_CONTEXT:\n${JSON.stringify(elements, null, 2)}` +
|
||||||
diffContext +
|
diffContext +
|
||||||
visionContext;
|
visionContext;
|
||||||
@@ -510,6 +517,13 @@ export async function runAgentLoop(
|
|||||||
recentActions.push(actionSig);
|
recentActions.push(actionSig);
|
||||||
if (recentActions.length > 8) recentActions.shift();
|
if (recentActions.length > 8) recentActions.shift();
|
||||||
|
|
||||||
|
// Build human-readable history entry for LLM context
|
||||||
|
const historyParts = [`Step ${step + 1}: ${actionSig}`];
|
||||||
|
if (action.text) historyParts.push(`text="${action.text}"`);
|
||||||
|
if (action.reason) historyParts.push(`— ${action.reason}`);
|
||||||
|
actionHistory.push(historyParts.join(" "));
|
||||||
|
if (actionHistory.length > 5) actionHistory.shift();
|
||||||
|
|
||||||
// ── 7. Log + Done check ────────────────────────────────
|
// ── 7. Log + Done check ────────────────────────────────
|
||||||
const reason = action.reason ?? "";
|
const reason = action.reason ?? "";
|
||||||
console.log(`[Agent ${sessionId}] Step ${step + 1}: ${actionSig} — ${reason}`);
|
console.log(`[Agent ${sessionId}] Step ${step + 1}: ${actionSig} — ${reason}`);
|
||||||
@@ -565,6 +579,10 @@ export async function runAgentLoop(
|
|||||||
const resultSuccess = result.success !== false;
|
const resultSuccess = result.success !== false;
|
||||||
lastActionFeedback = `${actionSig} -> ${resultSuccess ? "OK" : "FAILED"}: ${result.error ?? result.data ?? "completed"}`;
|
lastActionFeedback = `${actionSig} -> ${resultSuccess ? "OK" : "FAILED"}: ${result.error ?? result.data ?? "completed"}`;
|
||||||
console.log(`[Agent ${sessionId}] Step ${step + 1} result: ${lastActionFeedback}`);
|
console.log(`[Agent ${sessionId}] Step ${step + 1} result: ${lastActionFeedback}`);
|
||||||
|
// Append result to last history entry
|
||||||
|
if (actionHistory.length > 0) {
|
||||||
|
actionHistory[actionHistory.length - 1] += ` → ${resultSuccess ? "OK" : "FAILED"}`;
|
||||||
|
}
|
||||||
// Update step result in DB
|
// Update step result in DB
|
||||||
if (persistentDeviceId) {
|
if (persistentDeviceId) {
|
||||||
db.update(agentStep).set({ result: lastActionFeedback }).where(eq(agentStep.id, stepId))
|
db.update(agentStep).set({ result: lastActionFeedback }).where(eq(agentStep.id, stepId))
|
||||||
|
|||||||
Reference in New Issue
Block a user