Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import android.util.Log
import androidx.exifinterface.media.ExifInterface
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import com.google.gson.Gson
import kotlinx.coroutines.Dispatchers
import org.appdevforall.codeonthego.computervision.data.repository.ComputerVisionRepository
import org.appdevforall.codeonthego.computervision.domain.MarginAnnotationParser
import org.appdevforall.codeonthego.computervision.ui.ComputerVisionEffect
Expand All @@ -23,8 +23,10 @@ import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.receiveAsFlow
import kotlinx.coroutines.flow.update
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import org.appdevforall.codeonthego.computervision.R
import org.appdevforall.codeonthego.computervision.utils.CvAnalyticsUtil
import org.appdevforall.codeonthego.computervision.utils.SmartBoundaryDetector

class ComputerVisionViewModel(
private val repository: ComputerVisionRepository,
Expand Down Expand Up @@ -107,22 +109,36 @@ class ComputerVisionViewModel(
private fun loadImageFromUri(uri: Uri) {
viewModelScope.launch {
try {
val bitmap = uriToBitmap(uri)
if (bitmap != null) {
val result = withContext(Dispatchers.Default) {
val bitmap = uriToBitmap(uri) ?: return@withContext null

val rotatedBitmap = handleImageRotation(uri, bitmap)
_uiState.update {
it.copy(
currentBitmap = rotatedBitmap,
imageUri = uri,
detections = emptyList(),
visualizedBitmap = null,
leftGuidePct = 0.2f, // Reset to default
rightGuidePct = 0.8f, // Reset to default
parsedAnnotations = emptyMap() // Reset on new image
)
}
} else {
val (leftBoundPx, rightBoundPx) = SmartBoundaryDetector.detectSmartBoundaries(rotatedBitmap)

val widthFloat = rotatedBitmap.width.toFloat()
val leftPct = leftBoundPx / widthFloat
val rightPct = rightBoundPx / widthFloat

Triple(rotatedBitmap, leftPct, rightPct)
}

if (result == null) {
_uiEffect.send(ComputerVisionEffect.ShowToast(R.string.msg_no_image_selected))
return@launch
}

val (rotatedBitmap, leftPct, rightPct) = result

_uiState.update {
it.copy(
currentBitmap = rotatedBitmap,
imageUri = uri,
detections = emptyList(),
visualizedBitmap = null,
leftGuidePct = leftPct,
rightGuidePct = rightPct,
parsedAnnotations = emptyMap() // Reset on new image
)
}
} catch (e: Exception) {
Log.e(TAG, "Error loading image from URI", e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ package org.appdevforall.codeonthego.computervision.utils
import android.graphics.Bitmap
import android.graphics.Color
import android.graphics.RectF
import kotlin.math.abs
import kotlin.math.exp
import kotlin.math.roundToInt

object BitmapUtils {

private const val EDGE_DETECTION_THRESHOLD = 30

fun preprocessForOcr(bitmap: Bitmap, blockSize: Int = 31, c: Int = 15): Bitmap {
val width = bitmap.width
val height = bitmap.height
Expand Down Expand Up @@ -38,6 +41,35 @@ object BitmapUtils {
return Bitmap.createBitmap(bitmap, left, top, w, h)
}

fun calculateVerticalProjection(bitmap: Bitmap): FloatArray {
val width = bitmap.width
val height = bitmap.height
val pixels = IntArray(width * height)
bitmap.getPixels(pixels, 0, width, 0, 0, width, height)

val projection = FloatArray(width)
if (width < 3 || height == 0) {
return projection
}

for (y in 0 until height) {
val rowOffset = y * width
for (x in 1 until width - 1) {
val leftPixel = pixels[rowOffset + x - 1]
val rightPixel = pixels[rowOffset + x + 1]

val rLeft = (leftPixel shr 16) and 0xFF
val rRight = (rightPixel shr 16) and 0xFF

val diff = abs(rLeft - rRight)
if (diff > EDGE_DETECTION_THRESHOLD) {
projection[x] += 1f
}
}
}
return projection
}

private fun toGrayscale(pixels: IntArray): IntArray {
val gray = IntArray(pixels.size)
for (i in pixels.indices) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package org.appdevforall.codeonthego.computervision.utils

import android.graphics.Bitmap
import org.appdevforall.codeonthego.computervision.utils.BitmapUtils.calculateVerticalProjection

object SmartBoundaryDetector {

private const val DEFAULT_EDGE_IGNORE_PERCENT = 0.05f
private const val LEFT_ZONE_END_PERCENT = 0.4f
private const val RIGHT_ZONE_START_PERCENT = 0.6f
private const val MIN_GAP_WIDTH_PERCENT = 0.02
private const val PRIMARY_ACTIVITY_THRESHOLD = 0.05f
private const val FALLBACK_ACTIVITY_THRESHOLD = 0.01f
private const val LEFT_FALLBACK_BOUND_PERCENT = 0.15f
private const val RIGHT_FALLBACK_BOUND_PERCENT = 0.85f

fun detectSmartBoundaries(
bitmap: Bitmap,
edgeIgnorePercent: Float = DEFAULT_EDGE_IGNORE_PERCENT
): Pair<Int, Int> {
val width = bitmap.width
val projection = calculateVerticalProjection(bitmap)
val minimumGapWidth = (width * MIN_GAP_WIDTH_PERCENT).toInt()

val ignoredEdgePixels = (width * edgeIgnorePercent).toInt()
val leftZoneEnd = (width * LEFT_ZONE_END_PERCENT).toInt()
val rightZoneStart = (width * RIGHT_ZONE_START_PERCENT).toInt()
val rightZoneEnd = width - ignoredEdgePixels

if (ignoredEdgePixels >= leftZoneEnd || rightZoneStart >= rightZoneEnd) {
return Pair(
(width * LEFT_FALLBACK_BOUND_PERCENT).toInt(),
(width * RIGHT_FALLBACK_BOUND_PERCENT).toInt()
)
}

val leftSignal = projection.copyOfRange(ignoredEdgePixels, leftZoneEnd)
var (leftBound, leftGapLength) = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels)
if (leftBound == null || leftGapLength < minimumGapWidth) {
leftBound = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels, normalizeSignal = true).first
}

val rightSignal = projection.copyOfRange(rightZoneStart, rightZoneEnd)
var (rightBound, rightGapLength) = findBestGapMidpoint(rightSignal, offset = rightZoneStart)
if (rightBound == null || rightGapLength < minimumGapWidth) {
rightBound = findBestGapMidpoint(rightSignal, offset = rightZoneStart, normalizeSignal = true).first
}

val finalLeftBound = leftBound ?: (width * LEFT_FALLBACK_BOUND_PERCENT).toInt()
val finalRightBound = rightBound ?: (width * RIGHT_FALLBACK_BOUND_PERCENT).toInt()
return Pair(finalLeftBound, finalRightBound)
}

private fun findBestGapMidpoint(
signalSegment: FloatArray,
offset: Int = 0,
normalizeSignal: Boolean = false
): Pair<Int?, Int> {
if (signalSegment.isEmpty()) {
return Pair(null, 0)
}

val signal = if (normalizeSignal) {
val minValue = signalSegment.minOrNull() ?: 0f
FloatArray(signalSegment.size) { index -> signalSegment[index] - minValue }
} else {
signalSegment
}

val activityThresholdMultiplier = if (normalizeSignal) {
FALLBACK_ACTIVITY_THRESHOLD
} else {
PRIMARY_ACTIVITY_THRESHOLD
}
val threshold = (signal.maxOrNull() ?: 0f) * activityThresholdMultiplier

var maxGapLength = 0
var maxGapMidpoint: Int? = null
var currentGapStart = -1
var previousIsActive = false

signal.forEachIndexed { index, value ->
val isActive = value > threshold
if (previousIsActive && !isActive) {
currentGapStart = index
}

val isGapClosing = currentGapStart != -1 && (index + 1 == signal.size || (!isActive && signal[index + 1] > threshold))
if (isGapClosing) {
val gapLength = index - currentGapStart + 1
if (gapLength > maxGapLength) {
maxGapLength = gapLength
maxGapMidpoint = currentGapStart + (gapLength / 2)
}
currentGapStart = -1
}

previousIsActive = isActive
}

return Pair(maxGapMidpoint?.plus(offset), maxGapLength)
}
}
Loading