Skip to content

Commit 0425d51

Browse files
committed
feat: implement smart boundary detection for dynamic margins
Replaces hardcoded bounds with a vertical projection edge-detection algorithm.
1 parent e0a588a commit 0425d51

File tree

3 files changed

+148
-5
lines changed

3 files changed

+148
-5
lines changed

cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import android.util.Log
99
import androidx.exifinterface.media.ExifInterface
1010
import androidx.lifecycle.ViewModel
1111
import androidx.lifecycle.viewModelScope
12-
import com.google.gson.Gson
12+
import kotlinx.coroutines.Dispatchers
1313
import org.appdevforall.codeonthego.computervision.data.repository.ComputerVisionRepository
1414
import org.appdevforall.codeonthego.computervision.domain.MarginAnnotationParser
1515
import org.appdevforall.codeonthego.computervision.ui.ComputerVisionEffect
@@ -23,8 +23,10 @@ import kotlinx.coroutines.flow.asStateFlow
2323
import kotlinx.coroutines.flow.receiveAsFlow
2424
import kotlinx.coroutines.flow.update
2525
import kotlinx.coroutines.launch
26+
import kotlinx.coroutines.withContext
2627
import org.appdevforall.codeonthego.computervision.R
2728
import org.appdevforall.codeonthego.computervision.utils.CvAnalyticsUtil
29+
import org.appdevforall.codeonthego.computervision.utils.SmartBoundaryDetector
2830

2931
class ComputerVisionViewModel(
3032
private val repository: ComputerVisionRepository,
@@ -107,17 +109,30 @@ class ComputerVisionViewModel(
107109
private fun loadImageFromUri(uri: Uri) {
108110
viewModelScope.launch {
109111
try {
110-
val bitmap = uriToBitmap(uri)
111-
if (bitmap != null) {
112+
val result = withContext(Dispatchers.Default) {
113+
val bitmap = uriToBitmap(uri) ?: return@withContext null
114+
112115
val rotatedBitmap = handleImageRotation(uri, bitmap)
116+
val (leftBoundPx, rightBoundPx) = SmartBoundaryDetector.detectSmartBoundaries(rotatedBitmap)
117+
118+
val widthFloat = rotatedBitmap.width.toFloat()
119+
val leftPct = leftBoundPx / widthFloat
120+
val rightPct = rightBoundPx / widthFloat
121+
122+
Triple(rotatedBitmap, leftPct, rightPct)
123+
}
124+
125+
if (result != null) {
126+
val (rotatedBitmap, leftPct, rightPct) = result
127+
113128
_uiState.update {
114129
it.copy(
115130
currentBitmap = rotatedBitmap,
116131
imageUri = uri,
117132
detections = emptyList(),
118133
visualizedBitmap = null,
119-
leftGuidePct = 0.2f, // Reset to default
120-
rightGuidePct = 0.8f, // Reset to default
134+
leftGuidePct = leftPct,
135+
rightGuidePct = rightPct,
121136
parsedAnnotations = emptyMap() // Reset on new image
122137
)
123138
}

cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@ package org.appdevforall.codeonthego.computervision.utils
33
import android.graphics.Bitmap
44
import android.graphics.Color
55
import android.graphics.RectF
6+
import kotlin.math.abs
67
import kotlin.math.exp
78
import kotlin.math.roundToInt
89

910
object BitmapUtils {
1011

12+
private const val EDGE_DETECTION_THRESHOLD = 30
13+
1114
fun preprocessForOcr(bitmap: Bitmap, blockSize: Int = 31, c: Int = 15): Bitmap {
1215
val width = bitmap.width
1316
val height = bitmap.height
@@ -38,6 +41,35 @@ object BitmapUtils {
3841
return Bitmap.createBitmap(bitmap, left, top, w, h)
3942
}
4043

44+
fun calculateVerticalProjection(bitmap: Bitmap): FloatArray {
45+
val width = bitmap.width
46+
val height = bitmap.height
47+
val pixels = IntArray(width * height)
48+
bitmap.getPixels(pixels, 0, width, 0, 0, width, height)
49+
50+
val projection = FloatArray(width)
51+
if (width < 3 || height == 0) {
52+
return projection
53+
}
54+
55+
for (y in 0 until height) {
56+
val rowOffset = y * width
57+
for (x in 1 until width - 1) {
58+
val leftPixel = pixels[rowOffset + x - 1]
59+
val rightPixel = pixels[rowOffset + x + 1]
60+
61+
val rLeft = (leftPixel shr 16) and 0xFF
62+
val rRight = (rightPixel shr 16) and 0xFF
63+
64+
val diff = abs(rLeft - rRight)
65+
if (diff > EDGE_DETECTION_THRESHOLD) {
66+
projection[x] += 1f
67+
}
68+
}
69+
}
70+
return projection
71+
}
72+
4173
private fun toGrayscale(pixels: IntArray): IntArray {
4274
val gray = IntArray(pixels.size)
4375
for (i in pixels.indices) {
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package org.appdevforall.codeonthego.computervision.utils
2+
3+
import android.graphics.Bitmap
4+
import org.appdevforall.codeonthego.computervision.utils.BitmapUtils.calculateVerticalProjection
5+
6+
object SmartBoundaryDetector {
7+
8+
private const val DEFAULT_EDGE_IGNORE_PERCENT = 0.05f
9+
private const val LEFT_ZONE_END_PERCENT = 0.4f
10+
private const val RIGHT_ZONE_START_PERCENT = 0.6f
11+
private const val MIN_GAP_WIDTH_PERCENT = 0.02
12+
private const val PRIMARY_ACTIVITY_THRESHOLD = 0.05f
13+
private const val FALLBACK_ACTIVITY_THRESHOLD = 0.01f
14+
private const val LEFT_FALLBACK_BOUND_PERCENT = 0.15f
15+
private const val RIGHT_FALLBACK_BOUND_PERCENT = 0.85f
16+
17+
fun detectSmartBoundaries(
18+
bitmap: Bitmap,
19+
edgeIgnorePercent: Float = DEFAULT_EDGE_IGNORE_PERCENT
20+
): Pair<Int, Int> {
21+
val width = bitmap.width
22+
val projection = calculateVerticalProjection(bitmap)
23+
val minimumGapWidth = (width * MIN_GAP_WIDTH_PERCENT).toInt()
24+
25+
val ignoredEdgePixels = (width * edgeIgnorePercent).toInt()
26+
val leftZoneEnd = (width * LEFT_ZONE_END_PERCENT).toInt()
27+
val rightZoneStart = (width * RIGHT_ZONE_START_PERCENT).toInt()
28+
val rightZoneEnd = width - ignoredEdgePixels
29+
30+
val leftSignal = projection.copyOfRange(ignoredEdgePixels, leftZoneEnd)
31+
var (leftBound, leftGapLength) = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels)
32+
if (leftBound == null || leftGapLength < minimumGapWidth) {
33+
leftBound = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels, normalizeSignal = true).first
34+
}
35+
36+
val rightSignal = projection.copyOfRange(rightZoneStart, rightZoneEnd)
37+
var (rightBound, rightGapLength) = findBestGapMidpoint(rightSignal, offset = rightZoneStart)
38+
if (rightBound == null || rightGapLength < minimumGapWidth) {
39+
rightBound = findBestGapMidpoint(rightSignal, offset = rightZoneStart, normalizeSignal = true).first
40+
}
41+
42+
val finalLeftBound = leftBound ?: (width * LEFT_FALLBACK_BOUND_PERCENT).toInt()
43+
val finalRightBound = rightBound ?: (width * RIGHT_FALLBACK_BOUND_PERCENT).toInt()
44+
return Pair(finalLeftBound, finalRightBound)
45+
}
46+
47+
private fun findBestGapMidpoint(
48+
signalSegment: FloatArray,
49+
offset: Int = 0,
50+
normalizeSignal: Boolean = false
51+
): Pair<Int?, Int> {
52+
if (signalSegment.isEmpty()) {
53+
return Pair(null, 0)
54+
}
55+
56+
val signal = if (normalizeSignal) {
57+
val minValue = signalSegment.minOrNull() ?: 0f
58+
FloatArray(signalSegment.size) { index -> signalSegment[index] - minValue }
59+
} else {
60+
signalSegment
61+
}
62+
63+
val activityThresholdMultiplier = if (normalizeSignal) {
64+
FALLBACK_ACTIVITY_THRESHOLD
65+
} else {
66+
PRIMARY_ACTIVITY_THRESHOLD
67+
}
68+
val threshold = (signal.maxOrNull() ?: 0f) * activityThresholdMultiplier
69+
70+
var maxGapLength = 0
71+
var maxGapMidpoint: Int? = null
72+
var currentGapStart = -1
73+
var previousIsActive = false
74+
75+
signal.forEachIndexed { index, value ->
76+
val isActive = value > threshold
77+
if (previousIsActive && !isActive) {
78+
currentGapStart = index
79+
}
80+
81+
val isGapClosing = currentGapStart != -1 && (index + 1 == signal.size || (!isActive && signal[index + 1] > threshold))
82+
if (isGapClosing) {
83+
val gapLength = index - currentGapStart + 1
84+
if (gapLength > maxGapLength) {
85+
maxGapLength = gapLength
86+
maxGapMidpoint = currentGapStart + (gapLength / 2)
87+
}
88+
currentGapStart = -1
89+
}
90+
91+
previousIsActive = isActive
92+
}
93+
94+
return Pair(maxGapMidpoint?.plus(offset), maxGapLength)
95+
}
96+
}

0 commit comments

Comments
 (0)