diff --git a/android/app/build.gradle.kts b/android/app/build.gradle.kts index 3150bcd..5b540ce 100644 --- a/android/app/build.gradle.kts +++ b/android/app/build.gradle.kts @@ -48,7 +48,7 @@ dependencies { implementation(libs.androidx.compose.material3) // Ktor WebSocket - implementation(libs.ktor.client.cio) + implementation(libs.ktor.client.okhttp) implementation(libs.ktor.client.websockets) implementation(libs.ktor.client.content.negotiation) implementation(libs.ktor.serialization.kotlinx.json) diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml index 8111fbf..f23f06f 100644 --- a/android/app/src/main/AndroidManifest.xml +++ b/android/app/src/main/AndroidManifest.xml @@ -11,6 +11,7 @@ + + android:theme="@style/Theme.DroidClaw" + android:networkSecurityConfig="@xml/network_security_config"> + + + + + + \ No newline at end of file diff --git a/android/app/src/main/java/com/thisux/droidclaw/DroidClawApp.kt b/android/app/src/main/java/com/thisux/droidclaw/DroidClawApp.kt index 31752b6..87fb727 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/DroidClawApp.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/DroidClawApp.kt @@ -2,13 +2,17 @@ package com.thisux.droidclaw import android.app.Application import com.thisux.droidclaw.data.SettingsStore +import com.thisux.droidclaw.data.WorkflowStore class DroidClawApp : Application() { lateinit var settingsStore: SettingsStore private set + lateinit var workflowStore: WorkflowStore + private set override fun onCreate() { super.onCreate() settingsStore = SettingsStore(this) + workflowStore = WorkflowStore(this) } } diff --git a/android/app/src/main/java/com/thisux/droidclaw/accessibility/DroidClawAccessibilityService.kt b/android/app/src/main/java/com/thisux/droidclaw/accessibility/DroidClawAccessibilityService.kt index 82b98da..00d9a2a 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/accessibility/DroidClawAccessibilityService.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/accessibility/DroidClawAccessibilityService.kt @@ -1,8 +1,11 @@ package com.thisux.droidclaw.accessibility import android.accessibilityservice.AccessibilityService +import android.content.ComponentName +import android.content.Context import android.util.Log import android.view.accessibility.AccessibilityEvent +import android.view.accessibility.AccessibilityManager import android.view.accessibility.AccessibilityNodeInfo import com.thisux.droidclaw.model.UIElement import kotlinx.coroutines.delay @@ -16,6 +19,15 @@ class DroidClawAccessibilityService : AccessibilityService() { val isRunning = MutableStateFlow(false) val lastScreenTree = MutableStateFlow>(emptyList()) var instance: DroidClawAccessibilityService? = null + + fun isEnabledOnDevice(context: Context): Boolean { + val am = context.getSystemService(Context.ACCESSIBILITY_SERVICE) as AccessibilityManager + val ourComponent = ComponentName(context, DroidClawAccessibilityService::class.java) + return am.getEnabledAccessibilityServiceList(AccessibilityEvent.TYPES_ALL_MASK) + .any { it.resolveInfo.serviceInfo.let { si -> + ComponentName(si.packageName, si.name) == ourComponent + }} + } } override fun onServiceConnected() { diff --git a/android/app/src/main/java/com/thisux/droidclaw/accessibility/ScreenTreeBuilder.kt b/android/app/src/main/java/com/thisux/droidclaw/accessibility/ScreenTreeBuilder.kt index 918f9ab..3f28b58 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/accessibility/ScreenTreeBuilder.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/accessibility/ScreenTreeBuilder.kt @@ -21,6 +21,9 @@ object ScreenTreeBuilder { parentDesc: String ) { try { + // Skip DroidClaw's own overlay nodes so the agent never sees them + if (node.packageName?.toString() == "com.thisux.droidclaw") return + val rect = Rect() node.getBoundsInScreen(rect) diff --git a/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt b/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt index d006d42..67386f5 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt @@ -22,6 +22,9 @@ class ScreenCaptureManager(private val context: Context) { companion object { private const val TAG = "ScreenCapture" + private const val PREFS_NAME = "screen_capture" + private const val KEY_RESULT_CODE = "consent_result_code" + private const val KEY_CONSENT_URI = "consent_data_uri" val isAvailable = MutableStateFlow(false) // Stores MediaProjection consent for use by ConnectionService @@ -37,6 +40,37 @@ class ScreenCaptureManager(private val context: Context) { hasConsentState.value = (resultCode == Activity.RESULT_OK && data != null) } + fun storeConsent(context: Context, resultCode: Int, data: Intent?) { + storeConsent(resultCode, data) + if (resultCode == Activity.RESULT_OK && data != null) { + context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE).edit() + .putInt(KEY_RESULT_CODE, resultCode) + .putString(KEY_CONSENT_URI, data.toUri(0)) + .apply() + } + } + + fun restoreConsent(context: Context) { + if (consentResultCode != null && consentData != null) return + val prefs = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) + val code = prefs.getInt(KEY_RESULT_CODE, 0) + val uri = prefs.getString(KEY_CONSENT_URI, null) + if (code == Activity.RESULT_OK && uri != null) { + consentResultCode = code + consentData = Intent.parseUri(uri, 0) + hasConsentState.value = true + } + } + + fun clearConsent(context: Context) { + consentResultCode = null + consentData = null + hasConsentState.value = false + context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE).edit() + .clear() + .apply() + } + fun hasConsent(): Boolean = consentResultCode != null && consentData != null } diff --git a/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt b/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt index 6558e68..84b9421 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/connection/CommandRouter.kt @@ -12,14 +12,20 @@ import com.thisux.droidclaw.model.PongMessage import com.thisux.droidclaw.model.ResultResponse import com.thisux.droidclaw.model.ScreenResponse import com.thisux.droidclaw.model.ServerMessage +import com.thisux.droidclaw.model.Workflow import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.serialization.json.Json class CommandRouter( private val webSocket: ReliableWebSocket, - private val captureManager: ScreenCaptureManager? + private val captureManager: ScreenCaptureManager?, + private val onWorkflowSync: (suspend (List) -> Unit)? = null, + private val onWorkflowCreated: (suspend (Workflow) -> Unit)? = null, + private val onWorkflowDeleted: (suspend (String) -> Unit)? = null ) { companion object { private const val TAG = "CommandRouter" + private val json = Json { ignoreUnknownKeys = true; encodeDefaults = true } } val currentGoalStatus = MutableStateFlow(GoalStatus.Idle) @@ -71,6 +77,38 @@ class CommandRouter( Log.i(TAG, "Goal failed: ${msg.message}") } + "workflow_created" -> { + val wfJson = msg.workflowJson ?: return + try { + val wf = json.decodeFromString(wfJson) + onWorkflowCreated?.invoke(wf) + Log.i(TAG, "Workflow created: ${wf.name}") + } catch (e: Exception) { + Log.e(TAG, "Failed to parse workflow_created: ${e.message}") + } + } + "workflow_synced" -> { + val wfsJson = msg.workflowsJson ?: return + try { + val wfs = json.decodeFromString>(wfsJson) + onWorkflowSync?.invoke(wfs) + Log.i(TAG, "Workflows synced: ${wfs.size} workflows") + } catch (e: Exception) { + Log.e(TAG, "Failed to parse workflow_synced: ${e.message}") + } + } + "workflow_deleted" -> { + val id = msg.workflowId ?: return + onWorkflowDeleted?.invoke(id) + Log.i(TAG, "Workflow deleted: $id") + } + "workflow_goal" -> { + val goal = msg.goal ?: return + Log.i(TAG, "Workflow-triggered goal: $goal") + // Submit as a regular goal via the WebSocket + webSocket.sendTyped(com.thisux.droidclaw.model.GoalMessage(text = goal)) + } + else -> Log.w(TAG, "Unknown message type: ${msg.type}") } } diff --git a/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt b/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt index 04468a5..ee08d12 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/connection/ConnectionService.kt @@ -27,6 +27,14 @@ import com.thisux.droidclaw.model.InstalledAppInfo import com.thisux.droidclaw.util.DeviceInfoHelper import android.content.pm.PackageManager import android.net.Uri +import android.provider.Settings +import com.thisux.droidclaw.model.StopGoalMessage +import com.thisux.droidclaw.model.WorkflowCreateMessage +import com.thisux.droidclaw.model.WorkflowDeleteMessage +import com.thisux.droidclaw.model.WorkflowSyncMessage +import com.thisux.droidclaw.model.WorkflowTriggerMessage +import com.thisux.droidclaw.model.WorkflowUpdateMessage +import com.thisux.droidclaw.overlay.AgentOverlay import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.first @@ -56,11 +64,13 @@ class ConnectionService : LifecycleService() { private var commandRouter: CommandRouter? = null private var captureManager: ScreenCaptureManager? = null private var wakeLock: PowerManager.WakeLock? = null + private var overlay: AgentOverlay? = null override fun onCreate() { super.onCreate() instance = this createNotificationChannel() + overlay = AgentOverlay(this) } override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { @@ -97,6 +107,7 @@ class ConnectionService : LifecycleService() { return@launch } + ScreenCaptureManager.restoreConsent(this@ConnectionService) captureManager = ScreenCaptureManager(this@ConnectionService).also { mgr -> if (ScreenCaptureManager.hasConsent()) { try { @@ -105,7 +116,8 @@ class ConnectionService : LifecycleService() { ScreenCaptureManager.consentData!! ) } catch (e: SecurityException) { - Log.w(TAG, "Screen capture unavailable (needs mediaProjection service type): ${e.message}") + Log.w(TAG, "Screen capture unavailable: ${e.message}") + ScreenCaptureManager.clearConsent(this@ConnectionService) } } } @@ -115,7 +127,12 @@ class ConnectionService : LifecycleService() { } webSocket = ws - val router = CommandRouter(ws, captureManager) + val router = CommandRouter( + ws, captureManager, + onWorkflowSync = { workflows -> app.workflowStore.replaceAll(workflows) }, + onWorkflowCreated = { workflow -> app.workflowStore.save(workflow) }, + onWorkflowDeleted = { id -> app.workflowStore.delete(id) } + ) commandRouter = router launch { @@ -131,9 +148,14 @@ class ConnectionService : LifecycleService() { ) // Send installed apps list once connected if (state == ConnectionState.Connected) { + if (Settings.canDrawOverlays(this@ConnectionService)) { + overlay?.show() + } val apps = getInstalledApps() webSocket?.sendTyped(AppsMessage(apps = apps)) Log.i(TAG, "Sent ${apps.size} installed apps to server") + // Sync workflows from server + webSocket?.sendTyped(WorkflowSyncMessage()) } } } @@ -167,7 +189,32 @@ class ConnectionService : LifecycleService() { webSocket?.sendTyped(GoalMessage(text = text)) } + fun stopGoal() { + webSocket?.sendTyped(StopGoalMessage()) + } + + fun sendWorkflowCreate(description: String) { + webSocket?.sendTyped(WorkflowCreateMessage(description = description)) + } + + fun sendWorkflowUpdate(workflowId: String, enabled: Boolean?) { + webSocket?.sendTyped(WorkflowUpdateMessage(workflowId = workflowId, enabled = enabled)) + } + + fun sendWorkflowDelete(workflowId: String) { + webSocket?.sendTyped(WorkflowDeleteMessage(workflowId = workflowId)) + } + + fun sendWorkflowSync() { + webSocket?.sendTyped(WorkflowSyncMessage()) + } + + fun sendWorkflowTrigger(msg: WorkflowTriggerMessage) { + webSocket?.sendTyped(msg) + } + private fun disconnect() { + overlay?.hide() webSocket?.disconnect() webSocket = null commandRouter?.reset() @@ -179,6 +226,8 @@ class ConnectionService : LifecycleService() { } override fun onDestroy() { + overlay?.destroy() + overlay = null disconnect() instance = null super.onDestroy() diff --git a/android/app/src/main/java/com/thisux/droidclaw/connection/ReliableWebSocket.kt b/android/app/src/main/java/com/thisux/droidclaw/connection/ReliableWebSocket.kt index f3dab02..7e8d434 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/connection/ReliableWebSocket.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/connection/ReliableWebSocket.kt @@ -6,8 +6,7 @@ import com.thisux.droidclaw.model.ConnectionState import com.thisux.droidclaw.model.DeviceInfoMsg import com.thisux.droidclaw.model.ServerMessage import io.ktor.client.HttpClient -import io.ktor.client.engine.cio.CIO -import io.ktor.client.engine.cio.endpoint +import io.ktor.client.engine.okhttp.OkHttp import io.ktor.client.plugins.websocket.WebSockets import io.ktor.client.plugins.websocket.webSocket import io.ktor.websocket.Frame @@ -78,14 +77,7 @@ class ReliableWebSocket( } private suspend fun connectOnce(serverUrl: String, apiKey: String, deviceInfo: DeviceInfoMsg) { - val httpClient = HttpClient(CIO) { - engine { - requestTimeout = 30_000 - endpoint { - connectTimeout = 10_000 - keepAliveTime = 30_000 - } - } + val httpClient = HttpClient(OkHttp) { install(WebSockets) { pingIntervalMillis = 30_000 } diff --git a/android/app/src/main/java/com/thisux/droidclaw/data/WorkflowStore.kt b/android/app/src/main/java/com/thisux/droidclaw/data/WorkflowStore.kt new file mode 100644 index 0000000..e96a0fd --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/data/WorkflowStore.kt @@ -0,0 +1,48 @@ +package com.thisux.droidclaw.data + +import android.content.Context +import androidx.datastore.preferences.core.edit +import androidx.datastore.preferences.core.stringPreferencesKey +import com.thisux.droidclaw.model.Workflow +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.map +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json + +private val WORKFLOWS_KEY = stringPreferencesKey("workflows_json") +private val json = Json { ignoreUnknownKeys = true; encodeDefaults = true } + +class WorkflowStore(private val context: Context) { + + val workflows: Flow> = context.dataStore.data.map { prefs -> + val raw = prefs[WORKFLOWS_KEY] ?: "[]" + try { json.decodeFromString>(raw) } catch (_: Exception) { emptyList() } + } + + suspend fun save(workflow: Workflow) { + context.dataStore.edit { prefs -> + val list = currentList(prefs).toMutableList() + val idx = list.indexOfFirst { it.id == workflow.id } + if (idx >= 0) list[idx] = workflow else list.add(workflow) + prefs[WORKFLOWS_KEY] = json.encodeToString(list) + } + } + + suspend fun delete(workflowId: String) { + context.dataStore.edit { prefs -> + val list = currentList(prefs).filter { it.id != workflowId } + prefs[WORKFLOWS_KEY] = json.encodeToString(list) + } + } + + suspend fun replaceAll(workflows: List) { + context.dataStore.edit { prefs -> + prefs[WORKFLOWS_KEY] = json.encodeToString(workflows) + } + } + + private fun currentList(prefs: androidx.datastore.preferences.core.Preferences): List { + val raw = prefs[WORKFLOWS_KEY] ?: "[]" + return try { json.decodeFromString>(raw) } catch (_: Exception) { emptyList() } + } +} diff --git a/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt b/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt index 0c9b4ba..3397879 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/model/Protocol.kt @@ -71,6 +71,44 @@ data class AppsMessage( val apps: List ) +@Serializable +data class StopGoalMessage( + val type: String = "stop_goal" +) + +@Serializable +data class WorkflowCreateMessage( + val type: String = "workflow_create", + val description: String // natural-language workflow description +) + +@Serializable +data class WorkflowUpdateMessage( + val type: String = "workflow_update", + val workflowId: String, + val enabled: Boolean? = null +) + +@Serializable +data class WorkflowDeleteMessage( + val type: String = "workflow_delete", + val workflowId: String +) + +@Serializable +data class WorkflowSyncMessage( + val type: String = "workflow_sync" +) + +@Serializable +data class WorkflowTriggerMessage( + val type: String = "workflow_trigger", + val workflowId: String, + val notificationApp: String? = null, + val notificationTitle: String? = null, + val notificationText: String? = null +) + @Serializable data class ServerMessage( val type: String, @@ -101,5 +139,9 @@ data class ServerMessage( val intentUri: String? = null, val intentType: String? = null, val intentExtras: Map? = null, - val setting: String? = null + val setting: String? = null, + // Workflow fields + val workflowId: String? = null, + val workflowJson: String? = null, // single workflow as JSON + val workflowsJson: String? = null // array of workflows as JSON (for sync) ) diff --git a/android/app/src/main/java/com/thisux/droidclaw/model/Workflow.kt b/android/app/src/main/java/com/thisux/droidclaw/model/Workflow.kt new file mode 100644 index 0000000..730e2db --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/model/Workflow.kt @@ -0,0 +1,32 @@ +package com.thisux.droidclaw.model + +import kotlinx.serialization.Serializable + +@Serializable +enum class TriggerType { + notification +} + +@Serializable +enum class MatchMode { + contains, exact, regex +} + +@Serializable +data class TriggerCondition( + val field: String, // "app_package", "title", "text" + val matchMode: MatchMode, + val value: String +) + +@Serializable +data class Workflow( + val id: String, + val name: String, + val description: String, // original natural-language input + val triggerType: TriggerType = TriggerType.notification, + val conditions: List = emptyList(), + val goalTemplate: String, // sent to agent as a goal + val enabled: Boolean = true, + val createdAt: Long = System.currentTimeMillis() +) diff --git a/android/app/src/main/java/com/thisux/droidclaw/overlay/AgentOverlay.kt b/android/app/src/main/java/com/thisux/droidclaw/overlay/AgentOverlay.kt new file mode 100644 index 0000000..2818101 --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/overlay/AgentOverlay.kt @@ -0,0 +1,92 @@ +package com.thisux.droidclaw.overlay + +import android.graphics.PixelFormat +import android.view.Gravity +import android.view.MotionEvent +import android.view.View +import android.view.WindowManager +import androidx.compose.ui.platform.ComposeView +import androidx.lifecycle.Lifecycle +import androidx.lifecycle.LifecycleService +import androidx.lifecycle.setViewTreeLifecycleOwner +import androidx.savedstate.SavedStateRegistry +import androidx.savedstate.SavedStateRegistryController +import androidx.savedstate.SavedStateRegistryOwner +import androidx.savedstate.setViewTreeSavedStateRegistryOwner + +class AgentOverlay(private val service: LifecycleService) { + + private val windowManager = service.getSystemService(WindowManager::class.java) + private var composeView: ComposeView? = null + + private val savedStateOwner = object : SavedStateRegistryOwner { + private val controller = SavedStateRegistryController.create(this) + override val lifecycle: Lifecycle get() = service.lifecycle + override val savedStateRegistry: SavedStateRegistry get() = controller.savedStateRegistry + init { controller.performRestore(null) } + } + + private val layoutParams = WindowManager.LayoutParams( + WindowManager.LayoutParams.WRAP_CONTENT, + WindowManager.LayoutParams.WRAP_CONTENT, + WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY, + WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE, + PixelFormat.TRANSLUCENT + ).apply { + gravity = Gravity.TOP or Gravity.START + x = 0 + y = 200 + } + + fun show() { + if (composeView != null) return + + val view = ComposeView(service).apply { + importantForAccessibility = View.IMPORTANT_FOR_ACCESSIBILITY_NO_HIDE_DESCENDANTS + setViewTreeLifecycleOwner(service) + setViewTreeSavedStateRegistryOwner(savedStateOwner) + setContent { OverlayContent() } + setupDrag(this) + } + + composeView = view + windowManager.addView(view, layoutParams) + } + + fun hide() { + composeView?.let { + windowManager.removeView(it) + } + composeView = null + } + + fun destroy() { + hide() + } + + private fun setupDrag(view: View) { + var initialX = 0 + var initialY = 0 + var initialTouchX = 0f + var initialTouchY = 0f + + view.setOnTouchListener { _, event -> + when (event.action) { + MotionEvent.ACTION_DOWN -> { + initialX = layoutParams.x + initialY = layoutParams.y + initialTouchX = event.rawX + initialTouchY = event.rawY + true + } + MotionEvent.ACTION_MOVE -> { + layoutParams.x = initialX + (event.rawX - initialTouchX).toInt() + layoutParams.y = initialY + (event.rawY - initialTouchY).toInt() + windowManager.updateViewLayout(view, layoutParams) + true + } + else -> false + } + } + } +} diff --git a/android/app/src/main/java/com/thisux/droidclaw/overlay/OverlayContent.kt b/android/app/src/main/java/com/thisux/droidclaw/overlay/OverlayContent.kt new file mode 100644 index 0000000..a793ba7 --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/overlay/OverlayContent.kt @@ -0,0 +1,171 @@ +package com.thisux.droidclaw.overlay + +import androidx.compose.animation.animateColorAsState +import androidx.compose.animation.core.LinearEasing +import androidx.compose.animation.core.RepeatMode +import androidx.compose.animation.core.animateFloat +import androidx.compose.animation.core.infiniteRepeatable +import androidx.compose.animation.core.rememberInfiniteTransition +import androidx.compose.animation.core.tween +import androidx.compose.foundation.background +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size +import androidx.compose.foundation.layout.widthIn +import androidx.compose.foundation.shape.CircleShape +import androidx.compose.foundation.shape.RoundedCornerShape +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton +import androidx.compose.material3.IconButtonDefaults +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Text +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Close +import androidx.compose.runtime.Composable +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.runtime.collectAsState +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.draw.alpha +import androidx.compose.ui.draw.clip +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.text.style.TextOverflow +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import com.thisux.droidclaw.connection.ConnectionService +import com.thisux.droidclaw.model.ConnectionState +import com.thisux.droidclaw.model.GoalStatus +import com.thisux.droidclaw.ui.theme.DroidClawTheme +import kotlinx.coroutines.delay + +private val Green = Color(0xFF4CAF50) +private val Blue = Color(0xFF2196F3) +private val Red = Color(0xFFF44336) +private val Gray = Color(0xFF9E9E9E) +private val PillBackground = Color(0xE6212121) + +@Composable +fun OverlayContent() { + DroidClawTheme { + val connectionState by ConnectionService.connectionState.collectAsState() + val goalStatus by ConnectionService.currentGoalStatus.collectAsState() + val steps by ConnectionService.currentSteps.collectAsState() + + // Auto-reset Completed/Failed back to Idle after 3s + var displayStatus by remember { mutableStateOf(goalStatus) } + LaunchedEffect(goalStatus) { + displayStatus = goalStatus + if (goalStatus == GoalStatus.Completed || goalStatus == GoalStatus.Failed) { + delay(3000) + displayStatus = GoalStatus.Idle + } + } + + val isConnected = connectionState == ConnectionState.Connected + + val dotColor by animateColorAsState( + targetValue = when { + !isConnected -> Gray + displayStatus == GoalStatus.Running -> Blue + displayStatus == GoalStatus.Failed -> Red + else -> Green + }, + label = "dotColor" + ) + + val statusText = when { + !isConnected -> "Offline" + displayStatus == GoalStatus.Running -> { + val last = steps.lastOrNull() + if (last != null) { + val label = last.reasoning.ifBlank { + // Extract just the action name from the JSON string + Regex("""action[=:]?\s*(\w+)""").find(last.action)?.groupValues?.get(1) ?: "working" + } + "${last.step}: $label" + } else "Running..." + } + displayStatus == GoalStatus.Completed -> "Done" + displayStatus == GoalStatus.Failed -> "Stopped" + else -> "Ready" + } + + Row( + modifier = Modifier + .clip(RoundedCornerShape(24.dp)) + .background(PillBackground) + .height(48.dp) + .widthIn(min = 100.dp, max = 220.dp) + .padding(horizontal = 12.dp), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + StatusDot( + color = dotColor, + pulse = isConnected && displayStatus == GoalStatus.Running + ) + + Text( + text = statusText, + color = Color.White, + fontSize = 13.sp, + maxLines = 1, + overflow = TextOverflow.Ellipsis, + modifier = Modifier.weight(1f, fill = false) + ) + + if (isConnected && displayStatus == GoalStatus.Running) { + IconButton( + onClick = { ConnectionService.instance?.stopGoal() }, + modifier = Modifier.size(28.dp), + colors = IconButtonDefaults.iconButtonColors( + contentColor = Color.White.copy(alpha = 0.8f) + ) + ) { + Icon( + imageVector = Icons.Default.Close, + contentDescription = "Stop goal", + modifier = Modifier.size(16.dp) + ) + } + } + } + } +} + +@Composable +private fun StatusDot(color: Color, pulse: Boolean) { + if (pulse) { + val transition = rememberInfiniteTransition(label = "pulse") + val alpha by transition.animateFloat( + initialValue = 1f, + targetValue = 0.3f, + animationSpec = infiniteRepeatable( + animation = tween(800, easing = LinearEasing), + repeatMode = RepeatMode.Reverse + ), + label = "pulseAlpha" + ) + Box( + modifier = Modifier + .size(10.dp) + .alpha(alpha) + .clip(CircleShape) + .background(color) + ) + } else { + Box( + modifier = Modifier + .size(10.dp) + .clip(CircleShape) + .background(color) + ) + } +} diff --git a/android/app/src/main/java/com/thisux/droidclaw/ui/screens/HomeScreen.kt b/android/app/src/main/java/com/thisux/droidclaw/ui/screens/HomeScreen.kt index de4ce07..5e04328 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/ui/screens/HomeScreen.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/ui/screens/HomeScreen.kt @@ -1,7 +1,9 @@ package com.thisux.droidclaw.ui.screens import android.content.Intent +import androidx.compose.animation.AnimatedVisibility import androidx.compose.foundation.background +import androidx.compose.foundation.clickable import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Column @@ -15,35 +17,49 @@ import androidx.compose.foundation.layout.size import androidx.compose.foundation.lazy.LazyColumn import androidx.compose.foundation.lazy.items import androidx.compose.foundation.shape.CircleShape +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Delete +import androidx.compose.material.icons.filled.ExpandLess +import androidx.compose.material.icons.filled.ExpandMore import androidx.compose.material3.Button import androidx.compose.material3.Card +import androidx.compose.material3.CardDefaults +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedTextField +import androidx.compose.material3.Switch import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.collectAsState import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember +import androidx.compose.runtime.saveable.rememberSaveable import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.draw.clip import androidx.compose.ui.graphics.Color import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.style.TextOverflow import androidx.compose.ui.unit.dp +import com.thisux.droidclaw.DroidClawApp import com.thisux.droidclaw.connection.ConnectionService import com.thisux.droidclaw.model.ConnectionState import com.thisux.droidclaw.model.GoalStatus +import com.thisux.droidclaw.model.Workflow @Composable fun HomeScreen() { val context = LocalContext.current + val app = context.applicationContext as DroidClawApp val connectionState by ConnectionService.connectionState.collectAsState() val goalStatus by ConnectionService.currentGoalStatus.collectAsState() val steps by ConnectionService.currentSteps.collectAsState() val currentGoal by ConnectionService.currentGoal.collectAsState() val errorMessage by ConnectionService.errorMessage.collectAsState() + val workflows by app.workflowStore.workflows.collectAsState(initial = emptyList()) var goalInput by remember { mutableStateOf("") } @@ -108,7 +124,7 @@ fun HomeScreen() { Spacer(modifier = Modifier.height(16.dp)) - // Goal Input + // Goal Input — same field for goals and workflows Row( modifier = Modifier.fillMaxWidth(), horizontalArrangement = Arrangement.spacedBy(8.dp) @@ -116,7 +132,7 @@ fun HomeScreen() { OutlinedTextField( value = goalInput, onValueChange = { goalInput = it }, - label = { Text("Enter a goal...") }, + label = { Text("Goal or workflow...") }, modifier = Modifier.weight(1f), enabled = connectionState == ConnectionState.Connected && goalStatus != GoalStatus.Running, singleLine = true @@ -136,7 +152,7 @@ fun HomeScreen() { && goalStatus != GoalStatus.Running && goalInput.isNotBlank() ) { - Text("Run") + Text("Send") } } @@ -192,5 +208,103 @@ fun HomeScreen() { } ) } + + // Saved Workflows section + if (workflows.isNotEmpty()) { + Spacer(modifier = Modifier.height(8.dp)) + WorkflowsSection(workflows) + } + } +} + +@Composable +private fun WorkflowsSection(workflows: List) { + var expanded by rememberSaveable { mutableStateOf(false) } + + Column { + Row( + modifier = Modifier + .fillMaxWidth() + .clickable { expanded = !expanded } + .padding(vertical = 4.dp), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.SpaceBetween + ) { + Text( + text = "Saved Workflows (${workflows.size})", + style = MaterialTheme.typography.titleSmall + ) + Icon( + imageVector = if (expanded) Icons.Default.ExpandLess else Icons.Default.ExpandMore, + contentDescription = if (expanded) "Collapse" else "Expand" + ) + } + + AnimatedVisibility(visible = expanded) { + Column(verticalArrangement = Arrangement.spacedBy(6.dp)) { + workflows.forEach { wf -> + WorkflowChip(wf) + } + } + } + } +} + +@Composable +private fun WorkflowChip(workflow: Workflow) { + Card( + modifier = Modifier.fillMaxWidth(), + colors = CardDefaults.cardColors( + containerColor = if (workflow.enabled) { + MaterialTheme.colorScheme.secondaryContainer + } else { + MaterialTheme.colorScheme.surfaceVariant + } + ) + ) { + Row( + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 12.dp, vertical = 8.dp), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.SpaceBetween + ) { + Column(modifier = Modifier.weight(1f)) { + Text( + text = workflow.name, + style = MaterialTheme.typography.bodyMedium, + maxLines = 1, + overflow = TextOverflow.Ellipsis + ) + Text( + text = workflow.description, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + maxLines = 1, + overflow = TextOverflow.Ellipsis + ) + } + Row(verticalAlignment = Alignment.CenterVertically) { + Switch( + checked = workflow.enabled, + onCheckedChange = { enabled -> + ConnectionService.instance?.sendWorkflowUpdate(workflow.id, enabled) + } + ) + IconButton( + onClick = { + ConnectionService.instance?.sendWorkflowDelete(workflow.id) + }, + modifier = Modifier.size(32.dp) + ) { + Icon( + Icons.Default.Delete, + contentDescription = "Delete", + tint = MaterialTheme.colorScheme.error, + modifier = Modifier.size(18.dp) + ) + } + } + } } } diff --git a/android/app/src/main/java/com/thisux/droidclaw/ui/screens/SettingsScreen.kt b/android/app/src/main/java/com/thisux/droidclaw/ui/screens/SettingsScreen.kt index 2696d84..c5d21d4 100644 --- a/android/app/src/main/java/com/thisux/droidclaw/ui/screens/SettingsScreen.kt +++ b/android/app/src/main/java/com/thisux/droidclaw/ui/screens/SettingsScreen.kt @@ -2,7 +2,10 @@ package com.thisux.droidclaw.ui.screens import android.app.Activity import android.content.Context +import android.content.Intent import android.media.projection.MediaProjectionManager +import android.net.Uri +import android.provider.Settings import androidx.activity.compose.rememberLauncherForActivityResult import androidx.activity.result.contract.ActivityResultContracts import androidx.compose.foundation.layout.Arrangement @@ -46,6 +49,7 @@ import com.thisux.droidclaw.DroidClawApp import com.thisux.droidclaw.accessibility.DroidClawAccessibilityService import com.thisux.droidclaw.capture.ScreenCaptureManager import com.thisux.droidclaw.util.BatteryOptimization +import com.thisux.droidclaw.workflow.WorkflowNotificationService import kotlinx.coroutines.launch @Composable @@ -62,18 +66,31 @@ fun SettingsScreen() { var editingServerUrl by remember { mutableStateOf(null) } val displayServerUrl = editingServerUrl ?: serverUrl - val isAccessibilityEnabled by DroidClawAccessibilityService.isRunning.collectAsState() val isCaptureAvailable by ScreenCaptureManager.isAvailable.collectAsState() - val hasConsent by ScreenCaptureManager.hasConsentState.collectAsState() - val hasCaptureConsent = isCaptureAvailable || hasConsent + var isAccessibilityEnabled by remember { + mutableStateOf(DroidClawAccessibilityService.isEnabledOnDevice(context)) + } + var hasCaptureConsent by remember { + ScreenCaptureManager.restoreConsent(context) + mutableStateOf(isCaptureAvailable || ScreenCaptureManager.hasConsent()) + } var isBatteryExempt by remember { mutableStateOf(BatteryOptimization.isIgnoringBatteryOptimizations(context)) } + var hasOverlayPermission by remember { mutableStateOf(Settings.canDrawOverlays(context)) } + var isNotificationListenerEnabled by remember { + mutableStateOf(WorkflowNotificationService.isEnabled(context)) + } val lifecycleOwner = LocalLifecycleOwner.current DisposableEffect(lifecycleOwner) { val observer = LifecycleEventObserver { _, event -> if (event == Lifecycle.Event.ON_RESUME) { + isAccessibilityEnabled = DroidClawAccessibilityService.isEnabledOnDevice(context) + ScreenCaptureManager.restoreConsent(context) + hasCaptureConsent = isCaptureAvailable || ScreenCaptureManager.hasConsent() isBatteryExempt = BatteryOptimization.isIgnoringBatteryOptimizations(context) + hasOverlayPermission = Settings.canDrawOverlays(context) + isNotificationListenerEnabled = WorkflowNotificationService.isEnabled(context) } } lifecycleOwner.lifecycle.addObserver(observer) @@ -84,7 +101,8 @@ fun SettingsScreen() { ActivityResultContracts.StartActivityForResult() ) { result -> if (result.resultCode == Activity.RESULT_OK && result.data != null) { - ScreenCaptureManager.storeConsent(result.resultCode, result.data) + ScreenCaptureManager.storeConsent(context, result.resultCode, result.data) + hasCaptureConsent = true } } @@ -172,6 +190,31 @@ fun SettingsScreen() { actionLabel = "Disable", onAction = { BatteryOptimization.requestExemption(context) } ) + + ChecklistItem( + label = "Overlay permission", + isOk = hasOverlayPermission, + actionLabel = "Grant", + onAction = { + context.startActivity( + Intent( + Settings.ACTION_MANAGE_OVERLAY_PERMISSION, + Uri.parse("package:${context.packageName}") + ) + ) + } + ) + + ChecklistItem( + label = "Notification listener (for workflows)", + isOk = isNotificationListenerEnabled, + actionLabel = "Enable", + onAction = { + context.startActivity( + Intent("android.settings.ACTION_NOTIFICATION_LISTENER_SETTINGS") + ) + } + ) } } diff --git a/android/app/src/main/java/com/thisux/droidclaw/workflow/WorkflowNotificationService.kt b/android/app/src/main/java/com/thisux/droidclaw/workflow/WorkflowNotificationService.kt new file mode 100644 index 0000000..8508c28 --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/workflow/WorkflowNotificationService.kt @@ -0,0 +1,115 @@ +package com.thisux.droidclaw.workflow + +import android.content.ComponentName +import android.content.Context +import android.provider.Settings +import android.service.notification.NotificationListenerService +import android.service.notification.StatusBarNotification +import android.util.Log +import com.thisux.droidclaw.DroidClawApp +import com.thisux.droidclaw.connection.ConnectionService +import com.thisux.droidclaw.model.ConnectionState +import com.thisux.droidclaw.model.MatchMode +import com.thisux.droidclaw.model.TriggerCondition +import com.thisux.droidclaw.model.Workflow +import com.thisux.droidclaw.model.WorkflowTriggerMessage +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.flow.first +import kotlinx.coroutines.launch + +class WorkflowNotificationService : NotificationListenerService() { + + companion object { + private const val TAG = "WorkflowNotifSvc" + + fun isEnabled(context: Context): Boolean { + val flat = Settings.Secure.getString( + context.contentResolver, + "enabled_notification_listeners" + ) ?: return false + val ourComponent = ComponentName(context, WorkflowNotificationService::class.java) + return flat.contains(ourComponent.flattenToString()) + } + } + + private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + + override fun onNotificationPosted(sbn: StatusBarNotification?) { + sbn ?: return + val pkg = sbn.packageName ?: return + // Ignore our own notifications + if (pkg == packageName) return + + val extras = sbn.notification?.extras ?: return + val title = extras.getCharSequence("android.title")?.toString() ?: "" + val text = extras.getCharSequence("android.text")?.toString() ?: "" + + Log.d(TAG, "Notification from=$pkg title=$title text=$text") + + scope.launch { + try { + val app = application as DroidClawApp + val workflows = app.workflowStore.workflows.first() + val enabled = workflows.filter { it.enabled } + + for (wf in enabled) { + if (matchesWorkflow(wf, pkg, title, text)) { + Log.i(TAG, "Workflow '${wf.name}' matched notification from $pkg") + triggerWorkflow(wf, pkg, title, text) + } + } + } catch (e: Exception) { + Log.e(TAG, "Failed to process notification for workflows: ${e.message}") + } + } + } + + private fun matchesWorkflow( + wf: Workflow, + pkg: String, + title: String, + text: String + ): Boolean { + if (wf.conditions.isEmpty()) return false + return wf.conditions.all { cond -> matchesCondition(cond, pkg, title, text) } + } + + private fun matchesCondition( + cond: TriggerCondition, + pkg: String, + title: String, + text: String + ): Boolean { + val actual = when (cond.field) { + "app_package" -> pkg + "title" -> title + "text" -> text + else -> return false + } + return when (cond.matchMode) { + MatchMode.contains -> actual.contains(cond.value, ignoreCase = true) + MatchMode.exact -> actual.equals(cond.value, ignoreCase = true) + MatchMode.regex -> try { + Regex(cond.value, RegexOption.IGNORE_CASE).containsMatchIn(actual) + } catch (_: Exception) { false } + } + } + + private fun triggerWorkflow(wf: Workflow, pkg: String, title: String, text: String) { + val svc = ConnectionService.instance ?: return + if (ConnectionService.connectionState.value != ConnectionState.Connected) { + Log.w(TAG, "Cannot trigger workflow '${wf.name}': not connected") + return + } + svc.sendWorkflowTrigger( + WorkflowTriggerMessage( + workflowId = wf.id, + notificationApp = pkg, + notificationTitle = title, + notificationText = text + ) + ) + } +} diff --git a/android/app/src/main/res/xml/network_security_config.xml b/android/app/src/main/res/xml/network_security_config.xml new file mode 100644 index 0000000..2439f15 --- /dev/null +++ b/android/app/src/main/res/xml/network_security_config.xml @@ -0,0 +1,4 @@ + + + + diff --git a/android/gradle/libs.versions.toml b/android/gradle/libs.versions.toml index 3295033..fd7f33e 100644 --- a/android/gradle/libs.versions.toml +++ b/android/gradle/libs.versions.toml @@ -31,7 +31,7 @@ androidx-compose-ui-tooling-preview = { group = "androidx.compose.ui", name = "u androidx-compose-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" } androidx-compose-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" } androidx-compose-material3 = { group = "androidx.compose.material3", name = "material3" } -ktor-client-cio = { group = "io.ktor", name = "ktor-client-cio", version.ref = "ktor" } +ktor-client-okhttp = { group = "io.ktor", name = "ktor-client-okhttp", version.ref = "ktor" } ktor-client-websockets = { group = "io.ktor", name = "ktor-client-websockets", version.ref = "ktor" } ktor-client-content-negotiation = { group = "io.ktor", name = "ktor-client-content-negotiation", version.ref = "ktor" } ktor-serialization-kotlinx-json = { group = "io.ktor", name = "ktor-serialization-kotlinx-json", version.ref = "ktor" } diff --git a/packages/shared/src/protocol.ts b/packages/shared/src/protocol.ts index 38f3755..7b93e57 100644 --- a/packages/shared/src/protocol.ts +++ b/packages/shared/src/protocol.ts @@ -7,7 +7,13 @@ export type DeviceMessage = | { type: "goal"; text: string } | { type: "pong" } | { type: "heartbeat"; batteryLevel: number; isCharging: boolean } - | { type: "apps"; apps: InstalledApp[] }; + | { type: "apps"; apps: InstalledApp[] } + | { type: "stop_goal" } + | { type: "workflow_create"; description: string } + | { type: "workflow_update"; workflowId: string; enabled?: boolean } + | { type: "workflow_delete"; workflowId: string } + | { type: "workflow_sync" } + | { type: "workflow_trigger"; workflowId: string; notificationApp?: string; notificationTitle?: string; notificationText?: string }; export type ServerToDeviceMessage = | { type: "auth_ok"; deviceId: string } diff --git a/server/src/agent/input-classifier.ts b/server/src/agent/input-classifier.ts new file mode 100644 index 0000000..f340294 --- /dev/null +++ b/server/src/agent/input-classifier.ts @@ -0,0 +1,54 @@ +/** + * Classifies user input as either an immediate goal or a workflow (automation rule). + * + * Uses the user's LLM to determine intent. Workflows describe recurring + * automations ("when X happens, do Y"), goals are one-time tasks ("open WhatsApp"). + */ + +import type { LLMConfig } from "./llm.js"; +import { getLlmProvider, parseJsonResponse } from "./llm.js"; + +export type InputType = "goal" | "workflow"; + +export interface ClassificationResult { + type: InputType; +} + +const CLASSIFIER_PROMPT = `You classify user input for an Android automation agent. + +Decide if the input is: +- "goal": A one-time task to execute right now (e.g. "open WhatsApp", "search for pizza", "take a screenshot", "reply to John with hello") +- "workflow": An automation rule that should be saved and triggered later when a condition is met (e.g. "when I get a notification from WhatsApp saying where are you, reply with Bangalore", "whenever someone messages me on Telegram, auto-reply with I'm busy", "reply to all notifications that have a reply button") + +Key signals for "workflow": +- Uses words like "when", "whenever", "if", "every time", "automatically", "always" +- Describes a trigger condition + a response action +- Refers to future/recurring events + +Key signals for "goal": +- Describes a single task to do now +- Imperative commands ("open", "send", "search", "go to") +- No conditional/temporal trigger + +Respond with ONLY: {"type": "goal"} or {"type": "workflow"}`; + +export async function classifyInput( + text: string, + llmConfig: LLMConfig +): Promise { + const provider = getLlmProvider(llmConfig); + + try { + const raw = await provider.getAction(CLASSIFIER_PROMPT, text); + const parsed = parseJsonResponse(raw); + + if (parsed?.type === "workflow") { + return { type: "workflow" }; + } + } catch (err) { + console.error(`[Classifier] Failed to classify input, defaulting to goal:`, err); + } + + // Default to goal — safer to execute once than to accidentally create a rule + return { type: "goal" }; +} diff --git a/server/src/agent/llm.ts b/server/src/agent/llm.ts index 86563ac..371aa75 100644 --- a/server/src/agent/llm.ts +++ b/server/src/agent/llm.ts @@ -22,7 +22,8 @@ export interface LLMProvider { getAction( systemPrompt: string, userPrompt: string, - imageBase64?: string + imageBase64?: string, + signal?: AbortSignal ): Promise; } @@ -381,7 +382,8 @@ export function getLlmProvider(config: LLMConfig): LLMProvider { async getAction( systemPrompt: string, userPrompt: string, - imageBase64?: string + imageBase64?: string, + signal?: AbortSignal ): Promise { const messages: Array<{ role: string; content: unknown }> = [ { role: "system", content: systemPrompt }, @@ -418,6 +420,7 @@ export function getLlmProvider(config: LLMConfig): LLMProvider { max_tokens: 1024, response_format: { type: "json_object" }, }), + signal, }); if (!response.ok) { diff --git a/server/src/agent/loop.ts b/server/src/agent/loop.ts index 7cdcddf..d856a2e 100644 --- a/server/src/agent/loop.ts +++ b/server/src/agent/loop.ts @@ -489,9 +489,11 @@ export async function runAgentLoop( rawResponse = await llm.getAction( systemPrompt, userPrompt, - useScreenshot ? screenshot : undefined + useScreenshot ? screenshot : undefined, + signal ); } catch (err) { + if (signal?.aborted) break; console.error( `[Agent ${sessionId}] LLM error at step ${step + 1}: ${(err as Error).message}` ); @@ -510,7 +512,8 @@ export async function runAgentLoop( rawResponse = await llm.getAction( systemPrompt, userPrompt + "\n\nIMPORTANT: Your previous response was not valid JSON. You MUST respond with ONLY a valid JSON object.", - useScreenshot ? screenshot : undefined + useScreenshot ? screenshot : undefined, + signal ); parsed = parseJsonResponse(rawResponse); } catch { @@ -634,6 +637,7 @@ export async function runAgentLoop( } // ── 10. Brief pause for UI to settle ──────────────────── + if (signal?.aborted) break; await new Promise((r) => setTimeout(r, 500)); } } catch (error) { diff --git a/server/src/agent/workflow-parser.ts b/server/src/agent/workflow-parser.ts new file mode 100644 index 0000000..6bfd5dc --- /dev/null +++ b/server/src/agent/workflow-parser.ts @@ -0,0 +1,75 @@ +/** + * Parses a natural-language workflow description into structured + * trigger conditions and a goal template using the user's LLM. + */ + +import type { LLMConfig } from "./llm.js"; +import { getLlmProvider, parseJsonResponse } from "./llm.js"; + +export interface ParsedWorkflow { + name: string; + triggerType: "notification"; + conditions: Array<{ + field: "app_package" | "title" | "text"; + matchMode: "contains" | "exact" | "regex"; + value: string; + }>; + goalTemplate: string; +} + +const PARSER_PROMPT = `You are a workflow parser for an Android automation agent. + +The user describes an automation rule in plain English. Parse it into a structured workflow. + +A workflow has: +1. **name**: A short human-readable name (3-6 words). +2. **triggerType**: Always "notification" for now. +3. **conditions**: An array of matching rules for incoming notifications. Each condition has: + - "field": one of "app_package", "title", or "text" + - "matchMode": one of "contains", "exact", or "regex" + - "value": the string or regex to match +4. **goalTemplate**: The goal string to send to the agent when triggered. Use {{title}}, {{text}}, {{app}} as placeholders that get filled from the notification. + +Example input: "When I get a WhatsApp message saying 'where are you', reply with 'Bangalore'" +Example output: +{ + "name": "Auto-reply where are you", + "triggerType": "notification", + "conditions": [ + {"field": "app_package", "matchMode": "contains", "value": "whatsapp"}, + {"field": "text", "matchMode": "contains", "value": "where are you"} + ], + "goalTemplate": "Open the WhatsApp notification from {{title}} and reply with 'Bangalore'" +} + +Example input: "Reply to all notifications that have a reply button with 'I am busy'" +Example output: +{ + "name": "Auto-reply I am busy", + "triggerType": "notification", + "conditions": [], + "goalTemplate": "Open the notification '{{title}}' from {{app}} and reply with 'I am busy'" +} + +Respond with ONLY a valid JSON object. No explanation.`; + +export async function parseWorkflowDescription( + description: string, + llmConfig: LLMConfig +): Promise { + const provider = getLlmProvider(llmConfig); + + const raw = await provider.getAction(PARSER_PROMPT, description); + const parsed = parseJsonResponse(raw); + + if (!parsed || !parsed.name || !parsed.goalTemplate) { + throw new Error("Failed to parse workflow description into structured format"); + } + + return { + name: parsed.name as string, + triggerType: "notification", + conditions: (parsed.conditions as ParsedWorkflow["conditions"]) ?? [], + goalTemplate: parsed.goalTemplate as string, + }; +} diff --git a/server/src/schema.ts b/server/src/schema.ts index dd8fb24..ebd6fc0 100644 --- a/server/src/schema.ts +++ b/server/src/schema.ts @@ -128,6 +128,24 @@ export const agentSession = pgTable("agent_session", { completedAt: timestamp("completed_at"), }); +export const workflow = pgTable("workflow", { + id: text("id").primaryKey(), + userId: text("user_id") + .notNull() + .references(() => user.id, { onDelete: "cascade" }), + name: text("name").notNull(), + description: text("description").notNull(), + triggerType: text("trigger_type").notNull().default("notification"), + conditions: jsonb("conditions").notNull().default("[]"), + goalTemplate: text("goal_template").notNull(), + enabled: boolean("enabled").default(true).notNull(), + createdAt: timestamp("created_at").defaultNow().notNull(), + updatedAt: timestamp("updated_at") + .defaultNow() + .$onUpdate(() => new Date()) + .notNull(), +}); + export const agentStep = pgTable("agent_step", { id: text("id").primaryKey(), sessionId: text("session_id") diff --git a/server/src/ws/device.ts b/server/src/ws/device.ts index 6016d90..b8981cf 100644 --- a/server/src/ws/device.ts +++ b/server/src/ws/device.ts @@ -6,6 +6,14 @@ import { apikey, llmConfig, device } from "../schema.js"; import { sessions, type WebSocketData } from "./sessions.js"; import { runPipeline } from "../agent/pipeline.js"; import type { LLMConfig } from "../agent/llm.js"; +import { + handleWorkflowCreate, + handleWorkflowUpdate, + handleWorkflowDelete, + handleWorkflowSync, + handleWorkflowTrigger, +} from "./workflow-handlers.js"; +import { classifyInput } from "../agent/input-classifier.js"; /** * Hash an API key the same way better-auth does: @@ -22,7 +30,7 @@ async function hashApiKey(key: string): Promise { } /** Track running agent sessions to prevent duplicates per device */ -const activeSessions = new Map(); +const activeSessions = new Map(); /** * Send a JSON message to a device WebSocket (safe — catches send errors). @@ -251,8 +259,23 @@ export async function handleDeviceMessage( break; } + // Classify: is this an immediate goal or a workflow? + try { + const classification = await classifyInput(goal, userLlmConfig); + if (classification.type === "workflow") { + console.log(`[Classifier] Input classified as workflow: ${goal}`); + handleWorkflowCreate(ws, goal).catch((err) => + console.error(`[Workflow] Auto-create error:`, err) + ); + break; + } + } catch (err) { + console.warn(`[Classifier] Classification failed, treating as goal:`, err); + } + console.log(`[Pipeline] Starting goal for device ${deviceId}: ${goal}`); - activeSessions.set(deviceId, goal); + const abortController = new AbortController(); + activeSessions.set(deviceId, { goal, abort: abortController }); sendToDevice(ws, { type: "goal_started", sessionId: deviceId, goal }); @@ -262,6 +285,7 @@ export async function handleDeviceMessage( userId, goal, llmConfig: userLlmConfig, + signal: abortController.signal, onStep(step) { sendToDevice(ws, { type: "step", @@ -294,6 +318,23 @@ export async function handleDeviceMessage( break; } + case "stop_goal": { + const deviceId = ws.data.deviceId!; + const active = activeSessions.get(deviceId); + if (active) { + console.log(`[Pipeline] Stop requested for device ${deviceId}`); + active.abort.abort(); + activeSessions.delete(deviceId); + sendToDevice(ws, { + type: "goal_completed", + sessionId: deviceId, + success: false, + stepsUsed: 0, + }); + } + break; + } + case "apps": { const persistentDeviceId = ws.data.persistentDeviceId; if (persistentDeviceId) { @@ -342,6 +383,59 @@ export async function handleDeviceMessage( break; } + case "workflow_create": { + const description = (msg as unknown as { description: string }).description; + if (description) { + handleWorkflowCreate(ws, description).catch((err) => + console.error(`[Workflow] Create error:`, err) + ); + } + break; + } + + case "workflow_update": { + const { workflowId, enabled } = msg as unknown as { workflowId: string; enabled?: boolean }; + if (workflowId) { + handleWorkflowUpdate(ws, workflowId, enabled).catch((err) => + console.error(`[Workflow] Update error:`, err) + ); + } + break; + } + + case "workflow_delete": { + const { workflowId } = msg as unknown as { workflowId: string }; + if (workflowId) { + handleWorkflowDelete(ws, workflowId).catch((err) => + console.error(`[Workflow] Delete error:`, err) + ); + } + break; + } + + case "workflow_sync": { + handleWorkflowSync(ws).catch((err) => + console.error(`[Workflow] Sync error:`, err) + ); + break; + } + + case "workflow_trigger": { + const { workflowId, notificationApp, notificationTitle, notificationText } = + msg as unknown as { + workflowId: string; + notificationApp?: string; + notificationTitle?: string; + notificationText?: string; + }; + if (workflowId) { + handleWorkflowTrigger(ws, workflowId, notificationApp, notificationTitle, notificationText).catch( + (err) => console.error(`[Workflow] Trigger error:`, err) + ); + } + break; + } + default: { console.warn( `Unknown message type from device ${ws.data.deviceId}:`, @@ -360,7 +454,11 @@ export function handleDeviceClose( const { deviceId, userId, persistentDeviceId } = ws.data; if (!deviceId) return; - activeSessions.delete(deviceId); + const active = activeSessions.get(deviceId); + if (active) { + active.abort.abort(); + activeSessions.delete(deviceId); + } sessions.removeDevice(deviceId); // Update device status in DB diff --git a/server/src/ws/workflow-handlers.ts b/server/src/ws/workflow-handlers.ts new file mode 100644 index 0000000..271f8d3 --- /dev/null +++ b/server/src/ws/workflow-handlers.ts @@ -0,0 +1,229 @@ +/** + * Server-side handlers for workflow CRUD and trigger messages + * from the Android device WebSocket. + */ + +import type { ServerWebSocket } from "bun"; +import { eq, and } from "drizzle-orm"; +import { db } from "../db.js"; +import { workflow, llmConfig } from "../schema.js"; +import { parseWorkflowDescription } from "../agent/workflow-parser.js"; +import type { LLMConfig } from "../agent/llm.js"; +import type { WebSocketData } from "./sessions.js"; + +function sendToDevice(ws: ServerWebSocket, msg: Record) { + try { + ws.send(JSON.stringify(msg)); + } catch { + // device disconnected + } +} + +async function getUserLlmConfig(userId: string): Promise { + const configs = await db + .select() + .from(llmConfig) + .where(eq(llmConfig.userId, userId)) + .limit(1); + + if (configs.length === 0) return null; + + const cfg = configs[0]; + return { + provider: cfg.provider, + apiKey: cfg.apiKey, + model: cfg.model ?? undefined, + }; +} + +function workflowToJson(wf: typeof workflow.$inferSelect): string { + return JSON.stringify({ + id: wf.id, + name: wf.name, + description: wf.description, + triggerType: wf.triggerType, + conditions: wf.conditions, + goalTemplate: wf.goalTemplate, + enabled: wf.enabled, + createdAt: new Date(wf.createdAt).getTime(), + }); +} + +export async function handleWorkflowCreate( + ws: ServerWebSocket, + description: string +): Promise { + const userId = ws.data.userId!; + + const userLlm = await getUserLlmConfig(userId); + if (!userLlm) { + sendToDevice(ws, { + type: "error", + message: "No LLM provider configured. Set it up in the web dashboard.", + }); + return; + } + + try { + const parsed = await parseWorkflowDescription(description, userLlm); + + // Validate regexes before persisting + for (const cond of parsed.conditions) { + if (cond.matchMode === "regex") { + try { + new RegExp(cond.value, "i"); + } catch { + throw new Error(`Invalid regex in condition: ${cond.value}`); + } + } + } + + const id = crypto.randomUUID(); + const now = new Date(); + + await db.insert(workflow).values({ + id, + userId, + name: parsed.name, + description, + triggerType: parsed.triggerType, + conditions: parsed.conditions, + goalTemplate: parsed.goalTemplate, + enabled: true, + createdAt: now, + updatedAt: now, + }); + + const inserted = await db + .select() + .from(workflow) + .where(eq(workflow.id, id)) + .limit(1); + + if (inserted.length > 0) { + sendToDevice(ws, { + type: "workflow_created", + workflowId: id, + workflowJson: workflowToJson(inserted[0]), + }); + } + + console.log(`[Workflow] Created '${parsed.name}' for user ${userId}`); + } catch (err) { + console.error(`[Workflow] Failed to create workflow:`, err); + sendToDevice(ws, { + type: "error", + message: `Failed to parse workflow: ${err}`, + }); + } +} + +export async function handleWorkflowUpdate( + ws: ServerWebSocket, + workflowId: string, + enabled?: boolean +): Promise { + const userId = ws.data.userId!; + + const updates: Record = {}; + if (enabled !== undefined) updates.enabled = enabled; + + await db + .update(workflow) + .set(updates) + .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId))); + + console.log(`[Workflow] Updated ${workflowId}: enabled=${enabled}`); +} + +export async function handleWorkflowDelete( + ws: ServerWebSocket, + workflowId: string +): Promise { + const userId = ws.data.userId!; + + await db + .delete(workflow) + .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId))); + + sendToDevice(ws, { + type: "workflow_deleted", + workflowId, + }); + + console.log(`[Workflow] Deleted ${workflowId}`); +} + +export async function handleWorkflowSync( + ws: ServerWebSocket +): Promise { + const userId = ws.data.userId!; + + const workflows = await db + .select() + .from(workflow) + .where(eq(workflow.userId, userId)); + + const workflowsJson = JSON.stringify( + workflows.map((wf) => ({ + id: wf.id, + name: wf.name, + description: wf.description, + triggerType: wf.triggerType, + conditions: wf.conditions, + goalTemplate: wf.goalTemplate, + enabled: wf.enabled, + createdAt: new Date(wf.createdAt).getTime(), + })) + ); + + sendToDevice(ws, { + type: "workflow_synced", + workflowsJson, + }); + + console.log(`[Workflow] Synced ${workflows.length} workflows for user ${userId}`); +} + +export async function handleWorkflowTrigger( + ws: ServerWebSocket, + workflowId: string, + notificationApp?: string, + notificationTitle?: string, + notificationText?: string +): Promise { + const userId = ws.data.userId!; + + const workflows = await db + .select() + .from(workflow) + .where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId))) + .limit(1); + + if (workflows.length === 0) { + console.warn(`[Workflow] Trigger for unknown workflow ${workflowId}`); + return; + } + + const wf = workflows[0]; + if (!wf.enabled) return; + + // Expand goal template placeholders + let goal = wf.goalTemplate; + goal = goal.replace(/\{\{app\}\}/g, notificationApp ?? "unknown app"); + goal = goal.replace(/\{\{title\}\}/g, notificationTitle ?? ""); + goal = goal.replace(/\{\{text\}\}/g, notificationText ?? ""); + + console.log(`[Workflow] Triggering '${wf.name}' with goal: ${goal}`); + + // Send as a goal — reuse existing goal handling by injecting a goal message + sendToDevice(ws, { type: "ping" }); // keep-alive before goal injection + + // The device will receive this as a workflow-triggered goal + // We send the goal text back to the device to be submitted as a regular goal + sendToDevice(ws, { + type: "workflow_goal", + workflowId: wf.id, + goal, + }); +}