From ac7fc85891e9307f8c83c0e5b738ccff4cc34aa5 Mon Sep 17 00:00:00 2001 From: Sanju Sivalingam Date: Tue, 17 Feb 2026 17:47:10 +0530 Subject: [PATCH] feat(android): add GestureExecutor and ScreenCaptureManager Co-Authored-By: Claude Opus 4.6 --- .../accessibility/GestureExecutor.kt | 248 ++++++++++++++++++ .../droidclaw/capture/ScreenCaptureManager.kt | 123 +++++++++ 2 files changed, 371 insertions(+) create mode 100644 android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt create mode 100644 android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt diff --git a/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt b/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt new file mode 100644 index 0000000..dcf3f00 --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/accessibility/GestureExecutor.kt @@ -0,0 +1,248 @@ +package com.thisux.droidclaw.accessibility + +import android.accessibilityservice.AccessibilityService +import android.accessibilityservice.GestureDescription +import android.content.Intent +import android.graphics.Path +import android.net.Uri +import android.os.Bundle +import android.util.Log +import android.view.accessibility.AccessibilityNodeInfo +import com.thisux.droidclaw.model.ServerMessage +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlin.coroutines.resume + +data class ActionResult(val success: Boolean, val error: String? = null, val data: String? = null) + +class GestureExecutor(private val service: DroidClawAccessibilityService) { + + companion object { + private const val TAG = "GestureExecutor" + } + + suspend fun execute(msg: ServerMessage): ActionResult { + return try { + when (msg.type) { + "tap" -> executeTap(msg.x ?: 0, msg.y ?: 0) + "type" -> executeType(msg.text ?: "") + "enter" -> executeEnter() + "back" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_BACK) + "home" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_HOME) + "notifications" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_NOTIFICATIONS) + "longpress" -> executeLongPress(msg.x ?: 0, msg.y ?: 0) + "swipe" -> executeSwipe( + msg.x1 ?: 0, msg.y1 ?: 0, + msg.x2 ?: 0, msg.y2 ?: 0, + msg.duration ?: 300 + ) + "launch" -> executeLaunch(msg.packageName ?: "") + "clear" -> executeClear() + "clipboard_set" -> executeClipboardSet(msg.text ?: "") + "clipboard_get" -> executeClipboardGet() + "paste" -> executePaste() + "open_url" -> executeOpenUrl(msg.url ?: "") + "switch_app" -> executeLaunch(msg.packageName ?: "") + "keyevent" -> executeKeyEvent(msg.code ?: 0) + "open_settings" -> executeOpenSettings() + "wait" -> executeWait(msg.duration ?: 1000) + else -> ActionResult(false, "Unknown action: ${msg.type}") + } + } catch (e: Exception) { + Log.e(TAG, "Action ${msg.type} failed", e) + ActionResult(false, e.message) + } + } + + private suspend fun executeTap(x: Int, y: Int): ActionResult { + val node = service.findNodeAt(x, y) + if (node != null) { + try { + if (node.performAction(AccessibilityNodeInfo.ACTION_CLICK)) { + return ActionResult(true) + } + } finally { + node.recycle() + } + } + return dispatchTapGesture(x, y) + } + + private suspend fun executeType(text: String): ActionResult { + val focused = findFocusedNode() + if (focused != null) { + try { + val args = Bundle().apply { + putCharSequence(AccessibilityNodeInfo.ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE, text) + } + if (focused.performAction(AccessibilityNodeInfo.ACTION_SET_TEXT, args)) { + return ActionResult(true) + } + } finally { + focused.recycle() + } + } + return ActionResult(false, "No focused editable node found") + } + + private fun executeEnter(): ActionResult { + val focused = findFocusedNode() + if (focused != null) { + try { + if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.R) { + val action = AccessibilityNodeInfo.AccessibilityAction.ACTION_IME_ENTER + if (focused.performAction(action.id)) { + return ActionResult(true) + } + } + } finally { + focused.recycle() + } + } + // Fallback: dispatch Enter keyevent + return executeKeyEvent(android.view.KeyEvent.KEYCODE_ENTER) + } + + private fun executeGlobalAction(action: Int): ActionResult { + val success = service.performGlobalAction(action) + return ActionResult(success, if (!success) "Global action failed" else null) + } + + private suspend fun executeLongPress(x: Int, y: Int): ActionResult { + val node = service.findNodeAt(x, y) + if (node != null) { + try { + if (node.performAction(AccessibilityNodeInfo.ACTION_LONG_CLICK)) { + return ActionResult(true) + } + } finally { + node.recycle() + } + } + return dispatchSwipeGesture(x, y, x, y, 1000) + } + + private suspend fun executeSwipe(x1: Int, y1: Int, x2: Int, y2: Int, duration: Int): ActionResult { + return dispatchSwipeGesture(x1, y1, x2, y2, duration) + } + + private fun executeLaunch(packageName: String): ActionResult { + val intent = service.packageManager.getLaunchIntentForPackage(packageName) + ?: return ActionResult(false, "Package not found: $packageName") + intent.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + service.startActivity(intent) + return ActionResult(true) + } + + private fun executeClear(): ActionResult { + val focused = findFocusedNode() + if (focused != null) { + try { + val args = Bundle().apply { + putCharSequence(AccessibilityNodeInfo.ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE, "") + } + if (focused.performAction(AccessibilityNodeInfo.ACTION_SET_TEXT, args)) { + return ActionResult(true) + } + } finally { + focused.recycle() + } + } + return ActionResult(false, "No focused editable node to clear") + } + + private fun executeClipboardSet(text: String): ActionResult { + val clipboard = service.getSystemService(android.content.Context.CLIPBOARD_SERVICE) as android.content.ClipboardManager + val clip = android.content.ClipData.newPlainText("droidclaw", text) + clipboard.setPrimaryClip(clip) + return ActionResult(true) + } + + private fun executeClipboardGet(): ActionResult { + val clipboard = service.getSystemService(android.content.Context.CLIPBOARD_SERVICE) as android.content.ClipboardManager + val text = clipboard.primaryClip?.getItemAt(0)?.text?.toString() ?: "" + return ActionResult(true, data = text) + } + + private fun executePaste(): ActionResult { + val focused = findFocusedNode() + if (focused != null) { + try { + if (focused.performAction(AccessibilityNodeInfo.ACTION_PASTE)) { + return ActionResult(true) + } + } finally { + focused.recycle() + } + } + return ActionResult(false, "No focused node to paste into") + } + + private fun executeOpenUrl(url: String): ActionResult { + val intent = Intent(Intent.ACTION_VIEW, Uri.parse(url)).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + } + service.startActivity(intent) + return ActionResult(true) + } + + private fun executeKeyEvent(code: Int): ActionResult { + return try { + Runtime.getRuntime().exec(arrayOf("input", "keyevent", code.toString())) + ActionResult(true) + } catch (e: Exception) { + ActionResult(false, "keyevent failed: ${e.message}") + } + } + + private fun executeOpenSettings(): ActionResult { + val intent = Intent(android.provider.Settings.ACTION_SETTINGS).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + } + service.startActivity(intent) + return ActionResult(true) + } + + private suspend fun executeWait(duration: Int): ActionResult { + kotlinx.coroutines.delay(duration.toLong()) + return ActionResult(true) + } + + private suspend fun dispatchTapGesture(x: Int, y: Int): ActionResult { + val path = Path().apply { moveTo(x.toFloat(), y.toFloat()) } + val stroke = GestureDescription.StrokeDescription(path, 0, 50) + val gesture = GestureDescription.Builder().addStroke(stroke).build() + return dispatchGesture(gesture) + } + + private suspend fun dispatchSwipeGesture( + x1: Int, y1: Int, x2: Int, y2: Int, duration: Int + ): ActionResult { + val path = Path().apply { + moveTo(x1.toFloat(), y1.toFloat()) + lineTo(x2.toFloat(), y2.toFloat()) + } + val stroke = GestureDescription.StrokeDescription(path, 0, duration.toLong()) + val gesture = GestureDescription.Builder().addStroke(stroke).build() + return dispatchGesture(gesture) + } + + private suspend fun dispatchGesture(gesture: GestureDescription): ActionResult = + suspendCancellableCoroutine { cont -> + service.dispatchGesture( + gesture, + object : AccessibilityService.GestureResultCallback() { + override fun onCompleted(gestureDescription: GestureDescription?) { + if (cont.isActive) cont.resume(ActionResult(true)) + } + override fun onCancelled(gestureDescription: GestureDescription?) { + if (cont.isActive) cont.resume(ActionResult(false, "Gesture cancelled")) + } + }, + null + ) + } + + private fun findFocusedNode(): AccessibilityNodeInfo? { + return service.rootInActiveWindow?.findFocus(AccessibilityNodeInfo.FOCUS_INPUT) + } +} diff --git a/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt b/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt new file mode 100644 index 0000000..18ae3e4 --- /dev/null +++ b/android/app/src/main/java/com/thisux/droidclaw/capture/ScreenCaptureManager.kt @@ -0,0 +1,123 @@ +package com.thisux.droidclaw.capture + +import android.content.Context +import android.content.Intent +import android.graphics.Bitmap +import android.graphics.PixelFormat +import android.hardware.display.DisplayManager +import android.hardware.display.VirtualDisplay +import android.media.ImageReader +import android.media.projection.MediaProjection +import android.media.projection.MediaProjectionManager +import android.util.DisplayMetrics +import android.util.Log +import android.view.WindowManager +import kotlinx.coroutines.flow.MutableStateFlow +import java.io.ByteArrayOutputStream + +class ScreenCaptureManager(private val context: Context) { + + companion object { + private const val TAG = "ScreenCapture" + const val REQUEST_CODE = 1001 + val isAvailable = MutableStateFlow(false) + } + + private var mediaProjection: MediaProjection? = null + private var virtualDisplay: VirtualDisplay? = null + private var imageReader: ImageReader? = null + private var screenWidth = 720 + private var screenHeight = 1280 + private var screenDensity = DisplayMetrics.DENSITY_DEFAULT + + fun initialize(resultCode: Int, data: Intent) { + val mgr = context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager + mediaProjection = mgr.getMediaProjection(resultCode, data) + + val wm = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager + val metrics = DisplayMetrics() + @Suppress("DEPRECATION") + wm.defaultDisplay.getRealMetrics(metrics) + screenWidth = metrics.widthPixels + screenHeight = metrics.heightPixels + screenDensity = metrics.densityDpi + + val scale = 720f / screenWidth + val captureWidth = 720 + val captureHeight = (screenHeight * scale).toInt() + + imageReader = ImageReader.newInstance(captureWidth, captureHeight, PixelFormat.RGBA_8888, 2) + virtualDisplay = mediaProjection?.createVirtualDisplay( + "DroidClaw", + captureWidth, captureHeight, screenDensity, + DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR, + imageReader!!.surface, null, null + ) + + mediaProjection?.registerCallback(object : MediaProjection.Callback() { + override fun onStop() { + Log.i(TAG, "MediaProjection stopped") + release() + } + }, null) + + isAvailable.value = true + Log.i(TAG, "Screen capture initialized: ${captureWidth}x${captureHeight}") + } + + fun capture(): ByteArray? { + val reader = imageReader ?: return null + val image = reader.acquireLatestImage() ?: return null + return try { + val planes = image.planes + val buffer = planes[0].buffer + val pixelStride = planes[0].pixelStride + val rowStride = planes[0].rowStride + val rowPadding = rowStride - pixelStride * image.width + + val bitmap = Bitmap.createBitmap( + image.width + rowPadding / pixelStride, + image.height, + Bitmap.Config.ARGB_8888 + ) + bitmap.copyPixelsFromBuffer(buffer) + + val cropped = Bitmap.createBitmap(bitmap, 0, 0, image.width, image.height) + if (cropped != bitmap) bitmap.recycle() + + if (isBlackFrame(cropped)) { + cropped.recycle() + Log.w(TAG, "Detected FLAG_SECURE (black frame)") + return null + } + + val stream = ByteArrayOutputStream() + cropped.compress(Bitmap.CompressFormat.JPEG, 50, stream) + cropped.recycle() + stream.toByteArray() + } finally { + image.close() + } + } + + private fun isBlackFrame(bitmap: Bitmap): Boolean { + val points = listOf( + 0 to 0, + bitmap.width - 1 to 0, + 0 to bitmap.height - 1, + bitmap.width - 1 to bitmap.height - 1, + bitmap.width / 2 to bitmap.height / 2 + ) + return points.all { (x, y) -> bitmap.getPixel(x, y) == android.graphics.Color.BLACK } + } + + fun release() { + virtualDisplay?.release() + virtualDisplay = null + imageReader?.close() + imageReader = null + mediaProjection?.stop() + mediaProjection = null + isAvailable.value = false + } +}