feat(android): add GestureExecutor and ScreenCaptureManager

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sanju Sivalingam
2026-02-17 17:47:10 +05:30
parent 0e8ff24e08
commit ac7fc85891
2 changed files with 371 additions and 0 deletions

View File

@@ -0,0 +1,248 @@
package com.thisux.droidclaw.accessibility
import android.accessibilityservice.AccessibilityService
import android.accessibilityservice.GestureDescription
import android.content.Intent
import android.graphics.Path
import android.net.Uri
import android.os.Bundle
import android.util.Log
import android.view.accessibility.AccessibilityNodeInfo
import com.thisux.droidclaw.model.ServerMessage
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlin.coroutines.resume
data class ActionResult(val success: Boolean, val error: String? = null, val data: String? = null)
class GestureExecutor(private val service: DroidClawAccessibilityService) {
companion object {
private const val TAG = "GestureExecutor"
}
suspend fun execute(msg: ServerMessage): ActionResult {
return try {
when (msg.type) {
"tap" -> executeTap(msg.x ?: 0, msg.y ?: 0)
"type" -> executeType(msg.text ?: "")
"enter" -> executeEnter()
"back" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_BACK)
"home" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_HOME)
"notifications" -> executeGlobalAction(AccessibilityService.GLOBAL_ACTION_NOTIFICATIONS)
"longpress" -> executeLongPress(msg.x ?: 0, msg.y ?: 0)
"swipe" -> executeSwipe(
msg.x1 ?: 0, msg.y1 ?: 0,
msg.x2 ?: 0, msg.y2 ?: 0,
msg.duration ?: 300
)
"launch" -> executeLaunch(msg.packageName ?: "")
"clear" -> executeClear()
"clipboard_set" -> executeClipboardSet(msg.text ?: "")
"clipboard_get" -> executeClipboardGet()
"paste" -> executePaste()
"open_url" -> executeOpenUrl(msg.url ?: "")
"switch_app" -> executeLaunch(msg.packageName ?: "")
"keyevent" -> executeKeyEvent(msg.code ?: 0)
"open_settings" -> executeOpenSettings()
"wait" -> executeWait(msg.duration ?: 1000)
else -> ActionResult(false, "Unknown action: ${msg.type}")
}
} catch (e: Exception) {
Log.e(TAG, "Action ${msg.type} failed", e)
ActionResult(false, e.message)
}
}
private suspend fun executeTap(x: Int, y: Int): ActionResult {
val node = service.findNodeAt(x, y)
if (node != null) {
try {
if (node.performAction(AccessibilityNodeInfo.ACTION_CLICK)) {
return ActionResult(true)
}
} finally {
node.recycle()
}
}
return dispatchTapGesture(x, y)
}
private suspend fun executeType(text: String): ActionResult {
val focused = findFocusedNode()
if (focused != null) {
try {
val args = Bundle().apply {
putCharSequence(AccessibilityNodeInfo.ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE, text)
}
if (focused.performAction(AccessibilityNodeInfo.ACTION_SET_TEXT, args)) {
return ActionResult(true)
}
} finally {
focused.recycle()
}
}
return ActionResult(false, "No focused editable node found")
}
private fun executeEnter(): ActionResult {
val focused = findFocusedNode()
if (focused != null) {
try {
if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.R) {
val action = AccessibilityNodeInfo.AccessibilityAction.ACTION_IME_ENTER
if (focused.performAction(action.id)) {
return ActionResult(true)
}
}
} finally {
focused.recycle()
}
}
// Fallback: dispatch Enter keyevent
return executeKeyEvent(android.view.KeyEvent.KEYCODE_ENTER)
}
private fun executeGlobalAction(action: Int): ActionResult {
val success = service.performGlobalAction(action)
return ActionResult(success, if (!success) "Global action failed" else null)
}
private suspend fun executeLongPress(x: Int, y: Int): ActionResult {
val node = service.findNodeAt(x, y)
if (node != null) {
try {
if (node.performAction(AccessibilityNodeInfo.ACTION_LONG_CLICK)) {
return ActionResult(true)
}
} finally {
node.recycle()
}
}
return dispatchSwipeGesture(x, y, x, y, 1000)
}
private suspend fun executeSwipe(x1: Int, y1: Int, x2: Int, y2: Int, duration: Int): ActionResult {
return dispatchSwipeGesture(x1, y1, x2, y2, duration)
}
private fun executeLaunch(packageName: String): ActionResult {
val intent = service.packageManager.getLaunchIntentForPackage(packageName)
?: return ActionResult(false, "Package not found: $packageName")
intent.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK)
service.startActivity(intent)
return ActionResult(true)
}
private fun executeClear(): ActionResult {
val focused = findFocusedNode()
if (focused != null) {
try {
val args = Bundle().apply {
putCharSequence(AccessibilityNodeInfo.ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE, "")
}
if (focused.performAction(AccessibilityNodeInfo.ACTION_SET_TEXT, args)) {
return ActionResult(true)
}
} finally {
focused.recycle()
}
}
return ActionResult(false, "No focused editable node to clear")
}
private fun executeClipboardSet(text: String): ActionResult {
val clipboard = service.getSystemService(android.content.Context.CLIPBOARD_SERVICE) as android.content.ClipboardManager
val clip = android.content.ClipData.newPlainText("droidclaw", text)
clipboard.setPrimaryClip(clip)
return ActionResult(true)
}
private fun executeClipboardGet(): ActionResult {
val clipboard = service.getSystemService(android.content.Context.CLIPBOARD_SERVICE) as android.content.ClipboardManager
val text = clipboard.primaryClip?.getItemAt(0)?.text?.toString() ?: ""
return ActionResult(true, data = text)
}
private fun executePaste(): ActionResult {
val focused = findFocusedNode()
if (focused != null) {
try {
if (focused.performAction(AccessibilityNodeInfo.ACTION_PASTE)) {
return ActionResult(true)
}
} finally {
focused.recycle()
}
}
return ActionResult(false, "No focused node to paste into")
}
private fun executeOpenUrl(url: String): ActionResult {
val intent = Intent(Intent.ACTION_VIEW, Uri.parse(url)).apply {
addFlags(Intent.FLAG_ACTIVITY_NEW_TASK)
}
service.startActivity(intent)
return ActionResult(true)
}
private fun executeKeyEvent(code: Int): ActionResult {
return try {
Runtime.getRuntime().exec(arrayOf("input", "keyevent", code.toString()))
ActionResult(true)
} catch (e: Exception) {
ActionResult(false, "keyevent failed: ${e.message}")
}
}
private fun executeOpenSettings(): ActionResult {
val intent = Intent(android.provider.Settings.ACTION_SETTINGS).apply {
addFlags(Intent.FLAG_ACTIVITY_NEW_TASK)
}
service.startActivity(intent)
return ActionResult(true)
}
private suspend fun executeWait(duration: Int): ActionResult {
kotlinx.coroutines.delay(duration.toLong())
return ActionResult(true)
}
private suspend fun dispatchTapGesture(x: Int, y: Int): ActionResult {
val path = Path().apply { moveTo(x.toFloat(), y.toFloat()) }
val stroke = GestureDescription.StrokeDescription(path, 0, 50)
val gesture = GestureDescription.Builder().addStroke(stroke).build()
return dispatchGesture(gesture)
}
private suspend fun dispatchSwipeGesture(
x1: Int, y1: Int, x2: Int, y2: Int, duration: Int
): ActionResult {
val path = Path().apply {
moveTo(x1.toFloat(), y1.toFloat())
lineTo(x2.toFloat(), y2.toFloat())
}
val stroke = GestureDescription.StrokeDescription(path, 0, duration.toLong())
val gesture = GestureDescription.Builder().addStroke(stroke).build()
return dispatchGesture(gesture)
}
private suspend fun dispatchGesture(gesture: GestureDescription): ActionResult =
suspendCancellableCoroutine { cont ->
service.dispatchGesture(
gesture,
object : AccessibilityService.GestureResultCallback() {
override fun onCompleted(gestureDescription: GestureDescription?) {
if (cont.isActive) cont.resume(ActionResult(true))
}
override fun onCancelled(gestureDescription: GestureDescription?) {
if (cont.isActive) cont.resume(ActionResult(false, "Gesture cancelled"))
}
},
null
)
}
private fun findFocusedNode(): AccessibilityNodeInfo? {
return service.rootInActiveWindow?.findFocus(AccessibilityNodeInfo.FOCUS_INPUT)
}
}

View File

@@ -0,0 +1,123 @@
package com.thisux.droidclaw.capture
import android.content.Context
import android.content.Intent
import android.graphics.Bitmap
import android.graphics.PixelFormat
import android.hardware.display.DisplayManager
import android.hardware.display.VirtualDisplay
import android.media.ImageReader
import android.media.projection.MediaProjection
import android.media.projection.MediaProjectionManager
import android.util.DisplayMetrics
import android.util.Log
import android.view.WindowManager
import kotlinx.coroutines.flow.MutableStateFlow
import java.io.ByteArrayOutputStream
class ScreenCaptureManager(private val context: Context) {
companion object {
private const val TAG = "ScreenCapture"
const val REQUEST_CODE = 1001
val isAvailable = MutableStateFlow(false)
}
private var mediaProjection: MediaProjection? = null
private var virtualDisplay: VirtualDisplay? = null
private var imageReader: ImageReader? = null
private var screenWidth = 720
private var screenHeight = 1280
private var screenDensity = DisplayMetrics.DENSITY_DEFAULT
fun initialize(resultCode: Int, data: Intent) {
val mgr = context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
mediaProjection = mgr.getMediaProjection(resultCode, data)
val wm = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
val metrics = DisplayMetrics()
@Suppress("DEPRECATION")
wm.defaultDisplay.getRealMetrics(metrics)
screenWidth = metrics.widthPixels
screenHeight = metrics.heightPixels
screenDensity = metrics.densityDpi
val scale = 720f / screenWidth
val captureWidth = 720
val captureHeight = (screenHeight * scale).toInt()
imageReader = ImageReader.newInstance(captureWidth, captureHeight, PixelFormat.RGBA_8888, 2)
virtualDisplay = mediaProjection?.createVirtualDisplay(
"DroidClaw",
captureWidth, captureHeight, screenDensity,
DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
imageReader!!.surface, null, null
)
mediaProjection?.registerCallback(object : MediaProjection.Callback() {
override fun onStop() {
Log.i(TAG, "MediaProjection stopped")
release()
}
}, null)
isAvailable.value = true
Log.i(TAG, "Screen capture initialized: ${captureWidth}x${captureHeight}")
}
fun capture(): ByteArray? {
val reader = imageReader ?: return null
val image = reader.acquireLatestImage() ?: return null
return try {
val planes = image.planes
val buffer = planes[0].buffer
val pixelStride = planes[0].pixelStride
val rowStride = planes[0].rowStride
val rowPadding = rowStride - pixelStride * image.width
val bitmap = Bitmap.createBitmap(
image.width + rowPadding / pixelStride,
image.height,
Bitmap.Config.ARGB_8888
)
bitmap.copyPixelsFromBuffer(buffer)
val cropped = Bitmap.createBitmap(bitmap, 0, 0, image.width, image.height)
if (cropped != bitmap) bitmap.recycle()
if (isBlackFrame(cropped)) {
cropped.recycle()
Log.w(TAG, "Detected FLAG_SECURE (black frame)")
return null
}
val stream = ByteArrayOutputStream()
cropped.compress(Bitmap.CompressFormat.JPEG, 50, stream)
cropped.recycle()
stream.toByteArray()
} finally {
image.close()
}
}
private fun isBlackFrame(bitmap: Bitmap): Boolean {
val points = listOf(
0 to 0,
bitmap.width - 1 to 0,
0 to bitmap.height - 1,
bitmap.width - 1 to bitmap.height - 1,
bitmap.width / 2 to bitmap.height / 2
)
return points.all { (x, y) -> bitmap.getPixel(x, y) == android.graphics.Color.BLACK }
}
fun release() {
virtualDisplay?.release()
virtualDisplay = null
imageReader?.close()
imageReader = null
mediaProjection?.stop()
mediaProjection = null
isAvailable.value = false
}
}