Skip to content

Commit ef31852

Browse files
Background run (#3)
* Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload
1 parent 789d04b commit ef31852

File tree

4 files changed

+422
-38
lines changed

4 files changed

+422
-38
lines changed

app/src/main/AndroidManifest.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@
2626

2727
<!-- Accessibility service permission -->
2828
<uses-permission android:name="android.permission.BIND_ACCESSIBILITY_SERVICE" />
29+
30+
<!-- Foreground service permission for background operation -->
31+
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
2932

3033
<application
34+
android:name=".PhotoReasoningApplication"
3135
android:allowBackup="true"
3236
android:dataExtractionRules="@xml/data_extraction_rules"
3337
android:fullBackupContent="@xml/backup_rules"
@@ -51,7 +55,8 @@
5155
<service
5256
android:name=".ScreenOperatorAccessibilityService"
5357
android:permission="android.permission.BIND_ACCESSIBILITY_SERVICE"
54-
android:exported="true">
58+
android:exported="true"
59+
android:enabled="true">
5560
<intent-filter>
5661
<action android:name="android.accessibilityservice.AccessibilityService" />
5762
</intent-filter>
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package com.google.ai.sample
2+
3+
import kotlinx.coroutines.CoroutineExceptionHandler
4+
import kotlinx.coroutines.CoroutineScope
5+
import kotlinx.coroutines.Dispatchers
6+
import kotlinx.coroutines.SupervisorJob
7+
import android.app.Application
8+
import android.util.Log
9+
10+
/**
11+
* Application class for maintaining application-wide state and resources
12+
*/
13+
class PhotoReasoningApplication : Application() {
14+
15+
companion object {
16+
private const val TAG = "PhotoReasoningApp"
17+
18+
// Application-wide CoroutineScope that is not tied to any lifecycle
19+
// This scope will continue to run even when the app is in the background
20+
val applicationScope = CoroutineScope(
21+
SupervisorJob() +
22+
Dispatchers.Default +
23+
CoroutineExceptionHandler { _, throwable ->
24+
Log.e(TAG, "Uncaught exception in application scope: ${throwable.message}", throwable)
25+
}
26+
)
27+
28+
// Instance of the application for global access
29+
private lateinit var instance: PhotoReasoningApplication
30+
31+
fun getInstance(): PhotoReasoningApplication {
32+
return instance
33+
}
34+
}
35+
36+
override fun onCreate() {
37+
super.onCreate()
38+
instance = this
39+
Log.d(TAG, "Application created")
40+
}
41+
}

app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt

Lines changed: 273 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -604,19 +604,281 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
604604
showToast("Nehme Screenshot auf durch Simulation der Hardware-Tasten...", false)
605605

606606
try {
607+
// Capture screen information before taking the screenshot
608+
val screenInfo = captureScreenInformation()
609+
607610
// Simulate pressing Power + Volume Down buttons to take a screenshot
608611
simulateScreenshotButtonCombination()
609612

610613
// Wait a moment for the screenshot to be saved, then retrieve it
611614
handler.postDelayed({
612-
retrieveLatestScreenshot()
615+
retrieveLatestScreenshot(screenInfo)
613616
}, 1000) // Wait 1 second for the screenshot to be saved
614617
} catch (e: Exception) {
615618
Log.e(TAG, "Error taking screenshot: ${e.message}")
616619
showToast("Fehler beim Aufnehmen des Screenshots: ${e.message}", true)
617620
}
618621
}
619622

623+
/**
624+
* Capture information about all interactive elements on the screen
625+
*/
626+
private fun captureScreenInformation(): String {
627+
Log.d(TAG, "Capturing screen information")
628+
629+
// Refresh the root node to ensure we have the latest information
630+
refreshRootNode()
631+
632+
// Check if root node is available
633+
if (rootNode == null) {
634+
Log.e(TAG, "Root node is null, cannot capture screen information")
635+
return "Keine Bildschirminformationen verfügbar (Root-Knoten ist null)"
636+
}
637+
638+
// Build a string with information about all interactive elements
639+
val screenInfo = StringBuilder()
640+
screenInfo.append("Bildschirmelemente:\n")
641+
642+
// Capture information about all interactive elements
643+
val interactiveElements = findAllInteractiveElements(rootNode!!)
644+
645+
if (interactiveElements.isEmpty()) {
646+
screenInfo.append("Keine interaktiven Elemente gefunden.")
647+
} else {
648+
screenInfo.append("Gefundene interaktive Elemente (${interactiveElements.size}):\n\n")
649+
650+
interactiveElements.forEachIndexed { index, element ->
651+
screenInfo.append("${index + 1}. ")
652+
653+
// Get element ID if available
654+
val elementId = getNodeId(element)
655+
if (elementId.isNotEmpty()) {
656+
screenInfo.append("ID: \"$elementId\" ")
657+
}
658+
659+
// Add element text if available
660+
if (!element.text.isNullOrEmpty()) {
661+
screenInfo.append("Text: \"${element.text}\" ")
662+
}
663+
664+
// Add element content description if available
665+
if (!element.contentDescription.isNullOrEmpty()) {
666+
screenInfo.append("Beschreibung: \"${element.contentDescription}\" ")
667+
}
668+
669+
// Try to get the button name from the view hierarchy
670+
val buttonName = getButtonName(element)
671+
if (buttonName.isNotEmpty()) {
672+
screenInfo.append("Name: \"$buttonName\" ")
673+
}
674+
675+
// Add element class name
676+
screenInfo.append("Klasse: ${element.className} ")
677+
678+
// Add element bounds
679+
val rect = Rect()
680+
element.getBoundsInScreen(rect)
681+
screenInfo.append("Position: (${rect.centerX()}, ${rect.centerY()}) ")
682+
683+
// Add element clickable status
684+
screenInfo.append("Klickbar: ${if (element.isClickable) "Ja" else "Nein"}")
685+
686+
screenInfo.append("\n")
687+
688+
// Recycle the element to avoid memory leaks
689+
element.recycle()
690+
}
691+
}
692+
693+
Log.d(TAG, "Screen information captured: ${screenInfo.length} characters")
694+
return screenInfo.toString()
695+
}
696+
697+
/**
698+
* Get the ID of a node if available
699+
*/
700+
private fun getNodeId(node: AccessibilityNodeInfo): String {
701+
try {
702+
val viewIdResourceName = node.viewIdResourceName
703+
if (!viewIdResourceName.isNullOrEmpty()) {
704+
// Extract the ID name from the resource name (package:id/name)
705+
val parts = viewIdResourceName.split("/")
706+
if (parts.size > 1) {
707+
return parts[1]
708+
}
709+
return viewIdResourceName
710+
}
711+
} catch (e: Exception) {
712+
Log.e(TAG, "Error getting node ID: ${e.message}")
713+
}
714+
return ""
715+
}
716+
717+
/**
718+
* Try to get the button name from various properties
719+
*/
720+
private fun getButtonName(node: AccessibilityNodeInfo): String {
721+
try {
722+
// First check if the node has text
723+
if (!node.text.isNullOrEmpty()) {
724+
return node.text.toString()
725+
}
726+
727+
// Then check content description
728+
if (!node.contentDescription.isNullOrEmpty()) {
729+
return node.contentDescription.toString()
730+
}
731+
732+
// Get the node ID which might contain a name
733+
val nodeId = getNodeId(node)
734+
if (nodeId.isNotEmpty() && !nodeId.startsWith("android:")) {
735+
// Convert camelCase or snake_case to readable format
736+
val readableName = nodeId
737+
.replace("_", " ")
738+
.replace(Regex("([a-z])([A-Z])"), "$1 $2")
739+
.lowercase(Locale.getDefault())
740+
.capitalize(Locale.getDefault())
741+
742+
// If it contains common button names like "new", "add", etc., return it
743+
val commonButtonNames = listOf("new", "add", "edit", "delete", "save", "cancel", "ok", "send")
744+
for (buttonName in commonButtonNames) {
745+
if (readableName.contains(buttonName, ignoreCase = true)) {
746+
return readableName
747+
}
748+
}
749+
750+
// Return the readable ID name
751+
return readableName
752+
}
753+
754+
// Check if it's a known button type by class name
755+
val className = node.className?.toString() ?: ""
756+
if (className.contains("Button", ignoreCase = true) ||
757+
className.contains("ImageButton", ignoreCase = true) ||
758+
className.contains("FloatingActionButton", ignoreCase = true)) {
759+
760+
// For buttons without text, try to infer name from siblings or parent
761+
val parent = node.parent
762+
if (parent != null) {
763+
// Check if parent has text that might describe this button
764+
if (!parent.text.isNullOrEmpty()) {
765+
val parentText = parent.text.toString()
766+
parent.recycle()
767+
return parentText
768+
}
769+
770+
// Check siblings for text that might be related
771+
for (i in 0 until parent.childCount) {
772+
val sibling = parent.getChild(i) ?: continue
773+
if (sibling != node && !sibling.text.isNullOrEmpty()) {
774+
val siblingText = sibling.text.toString()
775+
sibling.recycle()
776+
parent.recycle()
777+
return siblingText
778+
}
779+
sibling.recycle()
780+
}
781+
782+
// Check if this is a FAB (Floating Action Button) which is often used as "New" or "Add"
783+
if (className.contains("FloatingActionButton", ignoreCase = true)) {
784+
parent.recycle()
785+
return "New"
786+
}
787+
788+
parent.recycle()
789+
}
790+
791+
// Special case for circular buttons at the bottom of the screen (likely navigation or action buttons)
792+
val rect = Rect()
793+
node.getBoundsInScreen(rect)
794+
val displayMetrics = resources.displayMetrics
795+
val screenHeight = displayMetrics.heightPixels
796+
797+
// If it's a circular button near the bottom of the screen
798+
if (rect.height() == rect.width() && rect.height() < displayMetrics.densityDpi / 4 &&
799+
rect.bottom > screenHeight * 0.8) {
800+
801+
// Check if it's in the bottom left corner (often "New" or "Add")
802+
if (rect.centerX() < displayMetrics.widthPixels * 0.3) {
803+
return "New"
804+
}
805+
}
806+
807+
// If it's a button but we couldn't find a name, use a generic name
808+
return "Button"
809+
}
810+
811+
// For EditText fields, try to get hint text
812+
if (className.contains("EditText", ignoreCase = true)) {
813+
// Try to get hint text using reflection (not always available)
814+
try {
815+
val hintTextMethod = node.javaClass.getMethod("getHintText")
816+
val hintText = hintTextMethod.invoke(node)?.toString()
817+
if (!hintText.isNullOrEmpty()) {
818+
return "Textfeld: $hintText"
819+
}
820+
} catch (e: Exception) {
821+
// Reflection failed, ignore
822+
}
823+
824+
return "Textfeld"
825+
}
826+
827+
// For specific view types that are commonly used as buttons
828+
if (className == "android.view.View" || className == "android.widget.ImageView") {
829+
// Check if it's in a position commonly used for specific buttons
830+
val rect = Rect()
831+
node.getBoundsInScreen(rect)
832+
val displayMetrics = resources.displayMetrics
833+
val screenHeight = displayMetrics.heightPixels
834+
val screenWidth = displayMetrics.widthPixels
835+
836+
// Check if it's a small circular element at the bottom of the screen
837+
if (rect.width() == rect.height() && rect.width() < displayMetrics.densityDpi / 3 &&
838+
rect.bottom > screenHeight * 0.9) {
839+
840+
// Bottom left is often "New" or "Add"
841+
if (rect.centerX() < screenWidth * 0.2) {
842+
return "New"
843+
}
844+
845+
// Bottom right is often "Send" or "Next"
846+
if (rect.centerX() > screenWidth * 0.8) {
847+
return "Send"
848+
}
849+
}
850+
}
851+
} catch (e: Exception) {
852+
Log.e(TAG, "Error getting button name: ${e.message}")
853+
}
854+
return ""
855+
}
856+
857+
/**
858+
* Find all interactive elements on the screen
859+
*/
860+
private fun findAllInteractiveElements(node: AccessibilityNodeInfo): List<AccessibilityNodeInfo> {
861+
val elements = mutableListOf<AccessibilityNodeInfo>()
862+
863+
try {
864+
// Check if this node is interactive (clickable, long clickable, or focusable)
865+
if (node.isClickable || node.isLongClickable || node.isFocusable) {
866+
elements.add(AccessibilityNodeInfo.obtain(node))
867+
}
868+
869+
// Check all child nodes
870+
for (i in 0 until node.childCount) {
871+
val child = node.getChild(i) ?: continue
872+
elements.addAll(findAllInteractiveElements(child))
873+
child.recycle()
874+
}
875+
} catch (e: Exception) {
876+
Log.e(TAG, "Error finding interactive elements: ${e.message}")
877+
}
878+
879+
return elements
880+
}
881+
620882
/**
621883
* Simulate pressing Power + Volume Down buttons to take a screenshot
622884
*/
@@ -686,7 +948,7 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
686948
/**
687949
* Retrieve the latest screenshot from the standard screenshot folder
688950
*/
689-
private fun retrieveLatestScreenshot() {
951+
private fun retrieveLatestScreenshot(screenInfo: String) {
690952
try {
691953
Log.d(TAG, "Retrieving latest screenshot")
692954
showToast("Suche nach dem aufgenommenen Screenshot...", false)
@@ -701,8 +963,8 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
701963
// Convert file to URI
702964
val screenshotUri = Uri.fromFile(screenshotFile)
703965

704-
// Add the screenshot to the conversation
705-
addScreenshotToConversation(screenshotUri)
966+
// Add the screenshot to the conversation with screen information
967+
addScreenshotToConversation(screenshotUri, screenInfo)
706968
} else {
707969
Log.e(TAG, "No screenshot file found")
708970
showToast("Kein Screenshot gefunden. Bitte prüfen Sie die Berechtigungen.", true)
@@ -834,11 +1096,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
8341096
}
8351097

8361098
/**
837-
* Add the screenshot to the conversation
1099+
* Add the screenshot to the conversation with screen information
8381100
*/
839-
private fun addScreenshotToConversation(screenshotUri: Uri) {
1101+
private fun addScreenshotToConversation(screenshotUri: Uri, screenInfo: String) {
8401102
try {
841-
Log.d(TAG, "Adding screenshot to conversation: $screenshotUri")
1103+
Log.d(TAG, "Adding screenshot to conversation with screen information: $screenshotUri")
8421104

8431105
// Get the MainActivity instance
8441106
val mainActivity = MainActivity.getInstance()
@@ -856,11 +1118,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
8561118
return
8571119
}
8581120

859-
// Add the screenshot to the conversation
860-
photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext)
1121+
// Add the screenshot to the conversation with screen information
1122+
photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext, screenInfo)
8611123

862-
Log.d(TAG, "Screenshot added to conversation")
863-
showToast("Screenshot zur Konversation hinzugefügt", false)
1124+
Log.d(TAG, "Screenshot added to conversation with screen information")
1125+
showToast("Screenshot mit Bildschirminformationen zur Konversation hinzugefügt", false)
8641126
} catch (e: Exception) {
8651127
Log.e(TAG, "Error adding screenshot to conversation: ${e.message}")
8661128
showToast("Fehler beim Hinzufügen des Screenshots zur Konversation: ${e.message}", true)

0 commit comments

Comments
 (0)