diff --git a/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt b/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt index dcf3f00..dccd4a7 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt @@ -6,6 +6,9 @@ import android.content.Intent import android.graphics.Path import android.net.Uri import android.os.Bundle +import android.provider.ContactsContract +import android.provider.CalendarContract +import android.provider.Settings import android.util.Log import android.view.accessibility.AccessibilityNodeInfo import com.thisux.droidclaw.model.ServerMessage @@ -35,16 +38,17 @@ class GestureExecutor(private val service: DroidClawAccessibilityService) { msg.x2 ?: 0, msg.y2 ?: 0, msg.duration ?: 300 ) - "launch" -> executeLaunch(msg.packageName ?: "") + "launch" -> executeLaunch(msg) "clear" -> executeClear() "clipboard_set" -> executeClipboardSet(msg.text ?: "") "clipboard_get" -> executeClipboardGet() "paste" -> executePaste() "open_url" -> executeOpenUrl(msg.url ?: "") - "switch_app" -> executeLaunch(msg.packageName ?: "") + "switch_app" -> executeLaunch(msg) "keyevent" -> executeKeyEvent(msg.code ?: 0) - "open_settings" -> executeOpenSettings() + "open_settings" -> executeOpenSettings(msg.setting) "wait" -> executeWait(msg.duration ?: 1000) + "intent" -> executeIntent(msg) else -> ActionResult(false, "Unknown action: ${msg.type}") } } catch (e: Exception) { @@ -125,10 +129,32 @@ class GestureExecutor(private val service: DroidClawAccessibilityService) { return dispatchSwipeGesture(x1, y1, x2, y2, duration) } - private fun executeLaunch(packageName: String): ActionResult { + private fun executeLaunch(msg: ServerMessage): ActionResult { + val packageName = msg.packageName ?: "" + val uri = msg.intentUri + val extras = msg.intentExtras + + // If URI is provided, use ACTION_VIEW intent (deep link / intent with data) + if (!uri.isNullOrEmpty()) { + val intent = Intent(Intent.ACTION_VIEW, Uri.parse(uri)).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + if (packageName.isNotEmpty()) setPackage(packageName) + extras?.forEach { (k, v) -> putExtra(k, v) } + } + return try { + service.startActivity(intent) + ActionResult(true) + } catch (e: Exception) { + ActionResult(false, "Intent failed: ${e.message}") + } + } + + // Standard package launch + if (packageName.isEmpty()) return ActionResult(false, "No package or URI provided") val intent = service.packageManager.getLaunchIntentForPackage(packageName) ?: return ActionResult(false, "Package not found: $packageName") intent.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + extras?.forEach { (k, v) -> intent.putExtra(k, v) } service.startActivity(intent) return ActionResult(true) } @@ -194,12 +220,71 @@ class GestureExecutor(private val service: DroidClawAccessibilityService) { } } - private fun executeOpenSettings(): ActionResult { - val intent = Intent(android.provider.Settings.ACTION_SETTINGS).apply { + private fun executeIntent(msg: ServerMessage): ActionResult { + val intentAction = msg.intentAction + ?: return ActionResult(false, "No intentAction provided") + + val intent = Intent(intentAction).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + + val uri = msg.intentUri?.let { Uri.parse(it) } + val mimeType = msg.intentType + + when { + uri != null && mimeType != null -> setDataAndType(uri, mimeType) + uri != null -> data = uri + mimeType != null -> type = mimeType + } + + msg.packageName?.let { setPackage(it) } + + // Auto-detect numeric extras (needed for SET_ALARM HOUR/MINUTES etc.) + msg.intentExtras?.forEach { (k, v) -> + val intVal = v.toIntOrNull() + val longVal = v.toLongOrNull() + when { + intVal != null -> putExtra(k, intVal) + longVal != null -> putExtra(k, longVal) + else -> putExtra(k, v) + } + } + } + + return try { + service.startActivity(intent) + ActionResult(true) + } catch (e: Exception) { + ActionResult(false, "Intent failed: ${e.message}") + } + } + + private fun executeOpenSettings(setting: String?): ActionResult { + val action = when (setting) { + "wifi" -> Settings.ACTION_WIFI_SETTINGS + "bluetooth" -> Settings.ACTION_BLUETOOTH_SETTINGS + "display" -> Settings.ACTION_DISPLAY_SETTINGS + "sound" -> Settings.ACTION_SOUND_SETTINGS + "battery" -> Intent.ACTION_POWER_USAGE_SUMMARY + "location" -> Settings.ACTION_LOCATION_SOURCE_SETTINGS + "apps" -> Settings.ACTION_APPLICATION_SETTINGS + "date" -> Settings.ACTION_DATE_SETTINGS + "accessibility" -> Settings.ACTION_ACCESSIBILITY_SETTINGS + "developer" -> Settings.ACTION_APPLICATION_DEVELOPMENT_SETTINGS + "dnd" -> "android.settings.ZEN_MODE_SETTINGS" + "network" -> Settings.ACTION_WIRELESS_SETTINGS + "storage" -> Settings.ACTION_INTERNAL_STORAGE_SETTINGS + "security" -> Settings.ACTION_SECURITY_SETTINGS + else -> Settings.ACTION_SETTINGS + } + val intent = Intent(action).apply { addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) } - service.startActivity(intent) - return ActionResult(true) + return try { + service.startActivity(intent) + ActionResult(true) + } catch (e: Exception) { + ActionResult(false, "Settings intent failed: ${e.message}") + } } private suspend fun executeWait(duration: Int): ActionResult { diff --git a/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt b/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt index 80b855c..6558e68 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt @@ -44,7 +44,7 @@ class CommandRouter( "tap", "type", "enter", "back", "home", "notifications", "longpress", "swipe", "launch", "clear", "clipboard_set", "clipboard_get", "paste", "open_url", "switch_app", - "keyevent", "open_settings", "wait" -> handleAction(msg) + "keyevent", "open_settings", "wait", "intent" -> handleAction(msg) "goal_started" -> { currentSessionId.value = msg.sessionId diff --git a/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt b/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt index 69ba273..04468a5 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt @@ -26,6 +26,7 @@ import com.thisux.droidclaw.model.AppsMessage import com.thisux.droidclaw.model.InstalledAppInfo import com.thisux.droidclaw.util.DeviceInfoHelper import android.content.pm.PackageManager +import android.net.Uri import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.first @@ -192,11 +193,93 @@ class ConnectionService : LifecycleService() { val pm = packageManager val intent = Intent(Intent.ACTION_MAIN).addCategory(Intent.CATEGORY_LAUNCHER) val activities = pm.queryIntentActivities(intent, PackageManager.MATCH_ALL) - return activities.mapNotNull { resolveInfo -> + val apps = activities.mapNotNull { resolveInfo -> val pkg = resolveInfo.activityInfo.packageName val label = resolveInfo.loadLabel(pm).toString() InstalledAppInfo(packageName = pkg, label = label) }.distinctBy { it.packageName }.sortedBy { it.label.lowercase() } + + // Discover intent capabilities per app + val intentMap = discoverIntentCapabilities() + return apps.map { app -> + val intents = intentMap[app.packageName] + if (intents != null) app.copy(intents = intents.toList()) else app + } + } + + /** + * Probe installed apps to discover which URI schemes and intent actions + * each app supports. Returns a map of packageName -> list of capabilities. + * Format: "VIEW:scheme", "SENDTO:scheme", "SEND:mime", or action name. + */ + private fun discoverIntentCapabilities(): Map> { + val pm = packageManager + val result = mutableMapOf>() + + // Probe ACTION_VIEW with common URI schemes + val viewSchemes = listOf( + "tel", "sms", "smsto", "mailto", "geo", "https", "http", + "whatsapp", "instagram", "twitter", "fb", "spotify", + "vnd.youtube", "zoomus", "upi", "phonepe", "paytm", + "gpay", "tez", "google.navigation", "uber", "skype", + "viber", "telegram", "snapchat", "linkedin", "reddit", + "swiggy", "zomato", "ola", "maps.google.com" + ) + for (scheme in viewSchemes) { + try { + val probe = Intent(Intent.ACTION_VIEW, Uri.parse("$scheme://test")) + val resolvers = pm.queryIntentActivities(probe, PackageManager.MATCH_DEFAULT_ONLY) + for (info in resolvers) { + result.getOrPut(info.activityInfo.packageName) { mutableSetOf() } + .add("VIEW:$scheme") + } + } catch (_: Exception) { /* skip invalid scheme */ } + } + + // Probe ACTION_SENDTO (sms, mailto) + for (scheme in listOf("sms", "mailto")) { + try { + val probe = Intent(Intent.ACTION_SENDTO, Uri.parse("$scheme:test")) + val resolvers = pm.queryIntentActivities(probe, PackageManager.MATCH_DEFAULT_ONLY) + for (info in resolvers) { + result.getOrPut(info.activityInfo.packageName) { mutableSetOf() } + .add("SENDTO:$scheme") + } + } catch (_: Exception) {} + } + + // Probe ACTION_SEND (share) with common MIME types + for (mime in listOf("text/plain", "image/*")) { + try { + val probe = Intent(Intent.ACTION_SEND).apply { type = mime } + val resolvers = pm.queryIntentActivities(probe, PackageManager.MATCH_DEFAULT_ONLY) + for (info in resolvers) { + result.getOrPut(info.activityInfo.packageName) { mutableSetOf() } + .add("SEND:$mime") + } + } catch (_: Exception) {} + } + + // Probe special actions + val specialActions = listOf( + "android.intent.action.SET_ALARM", + "android.intent.action.SET_TIMER", + "android.intent.action.DIAL", + "android.intent.action.INSERT", + "android.intent.action.CALL" + ) + for (action in specialActions) { + try { + val probe = Intent(action) + val resolvers = pm.queryIntentActivities(probe, PackageManager.MATCH_DEFAULT_ONLY) + for (info in resolvers) { + result.getOrPut(info.activityInfo.packageName) { mutableSetOf() } + .add(action) + } + } catch (_: Exception) {} + } + + return result.mapValues { it.value.toList() } } private fun createNotificationChannel() { diff --git a/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt b/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt index 25dbbf7..0c9b4ba 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt @@ -61,7 +61,8 @@ data class HeartbeatMessage( @Serializable data class InstalledAppInfo( val packageName: String, - val label: String + val label: String, + val intents: List = emptyList() ) @Serializable @@ -94,5 +95,11 @@ data class ServerMessage( val text: String? = null, val packageName: String? = null, val url: String? = null, - val code: Int? = null + val code: Int? = null, + // Intent fields + val intentAction: String? = null, + val intentUri: String? = null, + val intentType: String? = null, + val intentExtras: Map? = null, + val setting: String? = null ) diff --git a/packages/shared/src/protocol.ts b/packages/shared/src/protocol.ts index caacd57..38f3755 100644 --- a/packages/shared/src/protocol.ts +++ b/packages/shared/src/protocol.ts @@ -20,7 +20,7 @@ export type ServerToDeviceMessage = | { type: "back"; requestId: string } | { type: "home"; requestId: string } | { type: "longpress"; requestId: string; x: number; y: number } - | { type: "launch"; requestId: string; packageName: string } + | { type: "launch"; requestId: string; packageName: string; intentUri?: string; intentExtras?: Record } | { type: "clear"; requestId: string } | { type: "clipboard_set"; requestId: string; text: string } | { type: "clipboard_get"; requestId: string } @@ -29,8 +29,9 @@ export type ServerToDeviceMessage = | { type: "switch_app"; requestId: string; packageName: string } | { type: "notifications"; requestId: string } | { type: "keyevent"; requestId: string; code: number } - | { type: "open_settings"; requestId: string } + | { type: "open_settings"; requestId: string; setting?: string } | { type: "wait"; requestId: string; duration?: number } + | { type: "intent"; requestId: string; intentAction: string; intentUri?: string; intentType?: string; intentExtras?: Record; packageName?: string } | { type: "ping" } | { type: "goal_started"; sessionId: string; goal: string } | { type: "goal_completed"; sessionId: string; success: boolean; stepsUsed: number }; diff --git a/server/src/agent/preprocessor.ts b/server/src/agent/preprocessor.ts deleted file mode 100644 index f9cddc9..0000000 --- a/server/src/agent/preprocessor.ts +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Goal preprocessor for DroidClaw agent loop. - * - * Intercepts simple goals (like "open youtube") and executes direct - * actions before the LLM loop starts. This avoids wasting 20 steps - * on what should be a 2-step task, especially with weaker LLMs that - * navigate via UI instead of using programmatic launch commands. - */ - -import { sessions } from "../ws/sessions.js"; -import { db } from "../db.js"; -import { device as deviceTable } from "../schema.js"; -import { eq } from "drizzle-orm"; - -// ─── Installed App type ────────────────────────────────────── - -interface InstalledApp { - packageName: string; - label: string; -} - -// ─── Fallback App Name → Package Name Map ──────────────────── -// Used only when device has no installed apps data in DB. - -const FALLBACK_PACKAGES: Record = { - youtube: "com.google.android.youtube", - gmail: "com.google.android.gm", - chrome: "com.android.chrome", - maps: "com.google.android.apps.maps", - photos: "com.google.android.apps.photos", - drive: "com.google.android.apps.docs", - calendar: "com.google.android.calendar", - contacts: "com.google.android.contacts", - messages: "com.google.android.apps.messaging", - phone: "com.google.android.dialer", - clock: "com.google.android.deskclock", - calculator: "com.google.android.calculator", - camera: "com.android.camera", - settings: "com.android.settings", - files: "com.google.android.apps.nbu.files", - play: "com.android.vending", - "play store": "com.android.vending", - "google play": "com.android.vending", - whatsapp: "com.whatsapp", - telegram: "org.telegram.messenger", - instagram: "com.instagram.android", - facebook: "com.facebook.katana", - twitter: "com.twitter.android", - x: "com.twitter.android", - spotify: "com.spotify.music", - netflix: "com.netflix.mediaclient", - tiktok: "com.zhiliaoapp.musically", - snapchat: "com.snapchat.android", - reddit: "com.reddit.frontpage", - discord: "com.discord", - slack: "com.Slack", - zoom: "us.zoom.videomeetings", - teams: "com.microsoft.teams", - outlook: "com.microsoft.office.outlook", - "google meet": "com.google.android.apps.tachyon", - meet: "com.google.android.apps.tachyon", - keep: "com.google.android.keep", - notes: "com.google.android.keep", - sheets: "com.google.android.apps.docs.editors.sheets", - docs: "com.google.android.apps.docs.editors.docs", - slides: "com.google.android.apps.docs.editors.slides", - translate: "com.google.android.apps.translate", - weather: "com.google.android.apps.weather", - news: "com.google.android.apps.magazines", - podcasts: "com.google.android.apps.podcasts", - fitbit: "com.fitbit.FitbitMobile", - uber: "com.ubercab", - lyft: "me.lyft.android", - amazon: "com.amazon.mShop.android.shopping", - ebay: "com.ebay.mobile", - linkedin: "com.linkedin.android", - pinterest: "com.pinterest", - twitch: "tv.twitch.android.app", -}; - -// ─── Goal Pattern Matching ─────────────────────────────────── - -interface PreprocessResult { - /** Whether the preprocessor handled the goal */ - handled: boolean; - /** Command sent to device (if any) */ - command?: Record; - /** Updated goal text for the LLM (optional) */ - refinedGoal?: string; -} - -/** - * Build a label→packageName lookup from the device's installed apps. - * Keys are lowercase app labels (e.g. "youtube", "play store"). - */ -function buildInstalledAppMap(apps: InstalledApp[]): Record { - const map: Record = {}; - for (const app of apps) { - map[app.label.toLowerCase()] = app.packageName; - } - return map; -} - -/** - * Try to find an app name at the start of a goal string. - * Checks device's installed apps first, then falls back to hardcoded map. - * Returns the package name and remaining text, or null. - */ -function matchAppName( - lower: string, - installedApps: InstalledApp[] -): { pkg: string; appName: string; rest: string } | null { - // Build combined lookup: installed apps take priority over fallback - const installedMap = buildInstalledAppMap(installedApps); - const combined: Record = { ...FALLBACK_PACKAGES, ...installedMap }; - - // Try longest app names first (e.g. "google meet" before "meet") - const sorted = Object.keys(combined).sort((a, b) => b.length - a.length); - - for (const name of sorted) { - // Match: "open [app] and " or "open [app]" - const pattern = new RegExp( - `^(?:open|launch|start|go to)\\s+(?:the\\s+)?${escapeRegex(name)}(?:\\s+app)?(?:\\s+(?:and|then)\\s+(.+))?$` - ); - const m = lower.match(pattern); - if (m) { - return { pkg: combined[name], appName: name, rest: m[1]?.trim() ?? "" }; - } - } - return null; -} - -function escapeRegex(s: string): string { - return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - -/** - * Fetch installed apps from the device record in DB. - */ -async function fetchInstalledApps(persistentDeviceId: string): Promise { - try { - const rows = await db - .select({ info: deviceTable.deviceInfo }) - .from(deviceTable) - .where(eq(deviceTable.id, persistentDeviceId)) - .limit(1); - const info = rows[0]?.info as Record | null; - return (info?.installedApps as InstalledApp[]) ?? []; - } catch { - return []; - } -} - -/** - * Attempt to preprocess a goal before the LLM loop. - * - * Three outcomes: - * 1. { handled: true, refinedGoal: undefined } — goal fully handled (pure "open X") - * 2. { handled: true, refinedGoal: "..." } — app launched, LLM continues with refined goal - * 3. { handled: false } — preprocessor can't help, LLM gets full goal - */ -export async function preprocessGoal( - deviceId: string, - goal: string, - persistentDeviceId?: string -): Promise { - const lower = goal.toLowerCase().trim(); - - // Fetch device's actual installed apps for accurate package resolution - const installedApps = persistentDeviceId ? await fetchInstalledApps(persistentDeviceId) : []; - - // ── Pattern: "open [and ]" ─────────────── - const appMatch = matchAppName(lower, installedApps); - - if (appMatch) { - try { - await sessions.sendCommand(deviceId, { - type: "launch", - packageName: appMatch.pkg, - }); - - if (appMatch.rest) { - // Compound goal: app launched, pass remaining instructions to LLM - console.log(`[Preprocessor] Launched ${appMatch.pkg}, refined goal: ${appMatch.rest}`); - return { - handled: true, - command: { type: "launch", packageName: appMatch.pkg }, - refinedGoal: appMatch.rest, - }; - } - - // Pure "open X" — fully handled - console.log(`[Preprocessor] Launched ${appMatch.pkg} for goal: ${goal}`); - return { handled: true, command: { type: "launch", packageName: appMatch.pkg } }; - } catch (err) { - console.warn(`[Preprocessor] Failed to launch ${appMatch.pkg}: ${err}`); - // Fall through to LLM - } - } - - // ── Pattern: "open " or "go to " ──────────────── - const urlMatch = lower.match( - /^(?:open|go to|visit|navigate to)\s+(https?:\/\/\S+)$/ - ); - - if (urlMatch) { - const url = urlMatch[1]; - try { - await sessions.sendCommand(deviceId, { type: "open_url", url }); - console.log(`[Preprocessor] Opened URL: ${url}`); - return { handled: true, command: { type: "open_url", url } }; - } catch (err) { - console.warn(`[Preprocessor] Failed to open URL: ${err}`); - } - } - - return { handled: false }; -}