Skip to content

Commit d45ac54

Browse files
authored
ADFA-3290 | Implement smart boundary detection for dynamic margins (#1171)
* feat: implement smart boundary detection for dynamic margins Replaces hardcoded bounds with a vertical projection edge-detection algorithm. * fix: Guard against invalid `copyOfRange` indices for small or unusual images
1 parent 140716a commit d45ac54

File tree

3 files changed

+166
-15
lines changed

3 files changed

+166
-15
lines changed

cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import android.util.Log
99
import androidx.exifinterface.media.ExifInterface
1010
import androidx.lifecycle.ViewModel
1111
import androidx.lifecycle.viewModelScope
12-
import com.google.gson.Gson
12+
import kotlinx.coroutines.Dispatchers
1313
import org.appdevforall.codeonthego.computervision.data.repository.ComputerVisionRepository
1414
import org.appdevforall.codeonthego.computervision.domain.MarginAnnotationParser
1515
import org.appdevforall.codeonthego.computervision.ui.ComputerVisionEffect
@@ -23,8 +23,10 @@ import kotlinx.coroutines.flow.asStateFlow
2323
import kotlinx.coroutines.flow.receiveAsFlow
2424
import kotlinx.coroutines.flow.update
2525
import kotlinx.coroutines.launch
26+
import kotlinx.coroutines.withContext
2627
import org.appdevforall.codeonthego.computervision.R
2728
import org.appdevforall.codeonthego.computervision.utils.CvAnalyticsUtil
29+
import org.appdevforall.codeonthego.computervision.utils.SmartBoundaryDetector
2830

2931
class ComputerVisionViewModel(
3032
private val repository: ComputerVisionRepository,
@@ -107,22 +109,36 @@ class ComputerVisionViewModel(
107109
private fun loadImageFromUri(uri: Uri) {
108110
viewModelScope.launch {
109111
try {
110-
val bitmap = uriToBitmap(uri)
111-
if (bitmap != null) {
112+
val result = withContext(Dispatchers.Default) {
113+
val bitmap = uriToBitmap(uri) ?: return@withContext null
114+
112115
val rotatedBitmap = handleImageRotation(uri, bitmap)
113-
_uiState.update {
114-
it.copy(
115-
currentBitmap = rotatedBitmap,
116-
imageUri = uri,
117-
detections = emptyList(),
118-
visualizedBitmap = null,
119-
leftGuidePct = 0.2f, // Reset to default
120-
rightGuidePct = 0.8f, // Reset to default
121-
parsedAnnotations = emptyMap() // Reset on new image
122-
)
123-
}
124-
} else {
116+
val (leftBoundPx, rightBoundPx) = SmartBoundaryDetector.detectSmartBoundaries(rotatedBitmap)
117+
118+
val widthFloat = rotatedBitmap.width.toFloat()
119+
val leftPct = leftBoundPx / widthFloat
120+
val rightPct = rightBoundPx / widthFloat
121+
122+
Triple(rotatedBitmap, leftPct, rightPct)
123+
}
124+
125+
if (result == null) {
125126
_uiEffect.send(ComputerVisionEffect.ShowToast(R.string.msg_no_image_selected))
127+
return@launch
128+
}
129+
130+
val (rotatedBitmap, leftPct, rightPct) = result
131+
132+
_uiState.update {
133+
it.copy(
134+
currentBitmap = rotatedBitmap,
135+
imageUri = uri,
136+
detections = emptyList(),
137+
visualizedBitmap = null,
138+
leftGuidePct = leftPct,
139+
rightGuidePct = rightPct,
140+
parsedAnnotations = emptyMap() // Reset on new image
141+
)
126142
}
127143
} catch (e: Exception) {
128144
Log.e(TAG, "Error loading image from URI", e)

cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@ package org.appdevforall.codeonthego.computervision.utils
33
import android.graphics.Bitmap
44
import android.graphics.Color
55
import android.graphics.RectF
6+
import kotlin.math.abs
67
import kotlin.math.exp
78
import kotlin.math.roundToInt
89

910
object BitmapUtils {
1011

12+
private const val EDGE_DETECTION_THRESHOLD = 30
13+
1114
fun preprocessForOcr(bitmap: Bitmap, blockSize: Int = 31, c: Int = 15): Bitmap {
1215
val width = bitmap.width
1316
val height = bitmap.height
@@ -38,6 +41,35 @@ object BitmapUtils {
3841
return Bitmap.createBitmap(bitmap, left, top, w, h)
3942
}
4043

44+
fun calculateVerticalProjection(bitmap: Bitmap): FloatArray {
45+
val width = bitmap.width
46+
val height = bitmap.height
47+
val pixels = IntArray(width * height)
48+
bitmap.getPixels(pixels, 0, width, 0, 0, width, height)
49+
50+
val projection = FloatArray(width)
51+
if (width < 3 || height == 0) {
52+
return projection
53+
}
54+
55+
for (y in 0 until height) {
56+
val rowOffset = y * width
57+
for (x in 1 until width - 1) {
58+
val leftPixel = pixels[rowOffset + x - 1]
59+
val rightPixel = pixels[rowOffset + x + 1]
60+
61+
val rLeft = (leftPixel shr 16) and 0xFF
62+
val rRight = (rightPixel shr 16) and 0xFF
63+
64+
val diff = abs(rLeft - rRight)
65+
if (diff > EDGE_DETECTION_THRESHOLD) {
66+
projection[x] += 1f
67+
}
68+
}
69+
}
70+
return projection
71+
}
72+
4173
private fun toGrayscale(pixels: IntArray): IntArray {
4274
val gray = IntArray(pixels.size)
4375
for (i in pixels.indices) {
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package org.appdevforall.codeonthego.computervision.utils
2+
3+
import android.graphics.Bitmap
4+
import org.appdevforall.codeonthego.computervision.utils.BitmapUtils.calculateVerticalProjection
5+
6+
object SmartBoundaryDetector {
7+
8+
private const val DEFAULT_EDGE_IGNORE_PERCENT = 0.05f
9+
private const val LEFT_ZONE_END_PERCENT = 0.4f
10+
private const val RIGHT_ZONE_START_PERCENT = 0.6f
11+
private const val MIN_GAP_WIDTH_PERCENT = 0.02
12+
private const val PRIMARY_ACTIVITY_THRESHOLD = 0.05f
13+
private const val FALLBACK_ACTIVITY_THRESHOLD = 0.01f
14+
private const val LEFT_FALLBACK_BOUND_PERCENT = 0.15f
15+
private const val RIGHT_FALLBACK_BOUND_PERCENT = 0.85f
16+
17+
fun detectSmartBoundaries(
18+
bitmap: Bitmap,
19+
edgeIgnorePercent: Float = DEFAULT_EDGE_IGNORE_PERCENT
20+
): Pair<Int, Int> {
21+
val width = bitmap.width
22+
val projection = calculateVerticalProjection(bitmap)
23+
val minimumGapWidth = (width * MIN_GAP_WIDTH_PERCENT).toInt()
24+
25+
val ignoredEdgePixels = (width * edgeIgnorePercent).toInt()
26+
val leftZoneEnd = (width * LEFT_ZONE_END_PERCENT).toInt()
27+
val rightZoneStart = (width * RIGHT_ZONE_START_PERCENT).toInt()
28+
val rightZoneEnd = width - ignoredEdgePixels
29+
30+
if (ignoredEdgePixels >= leftZoneEnd || rightZoneStart >= rightZoneEnd) {
31+
return Pair(
32+
(width * LEFT_FALLBACK_BOUND_PERCENT).toInt(),
33+
(width * RIGHT_FALLBACK_BOUND_PERCENT).toInt()
34+
)
35+
}
36+
37+
val leftSignal = projection.copyOfRange(ignoredEdgePixels, leftZoneEnd)
38+
var (leftBound, leftGapLength) = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels)
39+
if (leftBound == null || leftGapLength < minimumGapWidth) {
40+
leftBound = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels, normalizeSignal = true).first
41+
}
42+
43+
val rightSignal = projection.copyOfRange(rightZoneStart, rightZoneEnd)
44+
var (rightBound, rightGapLength) = findBestGapMidpoint(rightSignal, offset = rightZoneStart)
45+
if (rightBound == null || rightGapLength < minimumGapWidth) {
46+
rightBound = findBestGapMidpoint(rightSignal, offset = rightZoneStart, normalizeSignal = true).first
47+
}
48+
49+
val finalLeftBound = leftBound ?: (width * LEFT_FALLBACK_BOUND_PERCENT).toInt()
50+
val finalRightBound = rightBound ?: (width * RIGHT_FALLBACK_BOUND_PERCENT).toInt()
51+
return Pair(finalLeftBound, finalRightBound)
52+
}
53+
54+
private fun findBestGapMidpoint(
55+
signalSegment: FloatArray,
56+
offset: Int = 0,
57+
normalizeSignal: Boolean = false
58+
): Pair<Int?, Int> {
59+
if (signalSegment.isEmpty()) {
60+
return Pair(null, 0)
61+
}
62+
63+
val signal = if (normalizeSignal) {
64+
val minValue = signalSegment.minOrNull() ?: 0f
65+
FloatArray(signalSegment.size) { index -> signalSegment[index] - minValue }
66+
} else {
67+
signalSegment
68+
}
69+
70+
val activityThresholdMultiplier = if (normalizeSignal) {
71+
FALLBACK_ACTIVITY_THRESHOLD
72+
} else {
73+
PRIMARY_ACTIVITY_THRESHOLD
74+
}
75+
val threshold = (signal.maxOrNull() ?: 0f) * activityThresholdMultiplier
76+
77+
var maxGapLength = 0
78+
var maxGapMidpoint: Int? = null
79+
var currentGapStart = -1
80+
var previousIsActive = false
81+
82+
signal.forEachIndexed { index, value ->
83+
val isActive = value > threshold
84+
if (previousIsActive && !isActive) {
85+
currentGapStart = index
86+
}
87+
88+
val isGapClosing = currentGapStart != -1 && (index + 1 == signal.size || (!isActive && signal[index + 1] > threshold))
89+
if (isGapClosing) {
90+
val gapLength = index - currentGapStart + 1
91+
if (gapLength > maxGapLength) {
92+
maxGapLength = gapLength
93+
maxGapMidpoint = currentGapStart + (gapLength / 2)
94+
}
95+
currentGapStart = -1
96+
}
97+
98+
previousIsActive = isActive
99+
}
100+
101+
return Pair(maxGapMidpoint?.plus(offset), maxGapLength)
102+
}
103+
}

0 commit comments

Comments
 (0)