diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt index b5c00731d2..b41c6fd9d4 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/ui/viewmodel/ComputerVisionViewModel.kt @@ -9,7 +9,7 @@ import android.util.Log import androidx.exifinterface.media.ExifInterface import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope -import com.google.gson.Gson +import kotlinx.coroutines.Dispatchers import org.appdevforall.codeonthego.computervision.data.repository.ComputerVisionRepository import org.appdevforall.codeonthego.computervision.domain.MarginAnnotationParser import org.appdevforall.codeonthego.computervision.ui.ComputerVisionEffect @@ -23,8 +23,10 @@ import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.receiveAsFlow import kotlinx.coroutines.flow.update import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext import org.appdevforall.codeonthego.computervision.R import org.appdevforall.codeonthego.computervision.utils.CvAnalyticsUtil +import org.appdevforall.codeonthego.computervision.utils.SmartBoundaryDetector class ComputerVisionViewModel( private val repository: ComputerVisionRepository, @@ -107,22 +109,36 @@ class ComputerVisionViewModel( private fun loadImageFromUri(uri: Uri) { viewModelScope.launch { try { - val bitmap = uriToBitmap(uri) - if (bitmap != null) { + val result = withContext(Dispatchers.Default) { + val bitmap = uriToBitmap(uri) ?: return@withContext null + val rotatedBitmap = handleImageRotation(uri, bitmap) - _uiState.update { - it.copy( - currentBitmap = rotatedBitmap, - imageUri = uri, - detections = emptyList(), - visualizedBitmap = null, - leftGuidePct = 0.2f, // Reset to default - rightGuidePct = 0.8f, // Reset to default - parsedAnnotations = emptyMap() // Reset on new image - ) - } - } else { + val (leftBoundPx, rightBoundPx) = SmartBoundaryDetector.detectSmartBoundaries(rotatedBitmap) + + val widthFloat = rotatedBitmap.width.toFloat() + val leftPct = leftBoundPx / widthFloat + val rightPct = rightBoundPx / widthFloat + + Triple(rotatedBitmap, leftPct, rightPct) + } + + if (result == null) { _uiEffect.send(ComputerVisionEffect.ShowToast(R.string.msg_no_image_selected)) + return@launch + } + + val (rotatedBitmap, leftPct, rightPct) = result + + _uiState.update { + it.copy( + currentBitmap = rotatedBitmap, + imageUri = uri, + detections = emptyList(), + visualizedBitmap = null, + leftGuidePct = leftPct, + rightGuidePct = rightPct, + parsedAnnotations = emptyMap() // Reset on new image + ) } } catch (e: Exception) { Log.e(TAG, "Error loading image from URI", e) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt index 98006dae5a..2f34757d71 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/BitmapUtils.kt @@ -3,11 +3,14 @@ package org.appdevforall.codeonthego.computervision.utils import android.graphics.Bitmap import android.graphics.Color import android.graphics.RectF +import kotlin.math.abs import kotlin.math.exp import kotlin.math.roundToInt object BitmapUtils { + private const val EDGE_DETECTION_THRESHOLD = 30 + fun preprocessForOcr(bitmap: Bitmap, blockSize: Int = 31, c: Int = 15): Bitmap { val width = bitmap.width val height = bitmap.height @@ -38,6 +41,35 @@ object BitmapUtils { return Bitmap.createBitmap(bitmap, left, top, w, h) } + fun calculateVerticalProjection(bitmap: Bitmap): FloatArray { + val width = bitmap.width + val height = bitmap.height + val pixels = IntArray(width * height) + bitmap.getPixels(pixels, 0, width, 0, 0, width, height) + + val projection = FloatArray(width) + if (width < 3 || height == 0) { + return projection + } + + for (y in 0 until height) { + val rowOffset = y * width + for (x in 1 until width - 1) { + val leftPixel = pixels[rowOffset + x - 1] + val rightPixel = pixels[rowOffset + x + 1] + + val rLeft = (leftPixel shr 16) and 0xFF + val rRight = (rightPixel shr 16) and 0xFF + + val diff = abs(rLeft - rRight) + if (diff > EDGE_DETECTION_THRESHOLD) { + projection[x] += 1f + } + } + } + return projection + } + private fun toGrayscale(pixels: IntArray): IntArray { val gray = IntArray(pixels.size) for (i in pixels.indices) { diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/SmartBoundaryDetector.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/SmartBoundaryDetector.kt new file mode 100644 index 0000000000..5ed36a8162 --- /dev/null +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/utils/SmartBoundaryDetector.kt @@ -0,0 +1,96 @@ +package org.appdevforall.codeonthego.computervision.utils + +import android.graphics.Bitmap +import org.appdevforall.codeonthego.computervision.utils.BitmapUtils.calculateVerticalProjection + +object SmartBoundaryDetector { + + private const val DEFAULT_EDGE_IGNORE_PERCENT = 0.05f + private const val LEFT_ZONE_END_PERCENT = 0.4f + private const val RIGHT_ZONE_START_PERCENT = 0.6f + private const val MIN_GAP_WIDTH_PERCENT = 0.02 + private const val PRIMARY_ACTIVITY_THRESHOLD = 0.05f + private const val FALLBACK_ACTIVITY_THRESHOLD = 0.01f + private const val LEFT_FALLBACK_BOUND_PERCENT = 0.15f + private const val RIGHT_FALLBACK_BOUND_PERCENT = 0.85f + + fun detectSmartBoundaries( + bitmap: Bitmap, + edgeIgnorePercent: Float = DEFAULT_EDGE_IGNORE_PERCENT + ): Pair { + val width = bitmap.width + val projection = calculateVerticalProjection(bitmap) + val minimumGapWidth = (width * MIN_GAP_WIDTH_PERCENT).toInt() + + val ignoredEdgePixels = (width * edgeIgnorePercent).toInt() + val leftZoneEnd = (width * LEFT_ZONE_END_PERCENT).toInt() + val rightZoneStart = (width * RIGHT_ZONE_START_PERCENT).toInt() + val rightZoneEnd = width - ignoredEdgePixels + + val leftSignal = projection.copyOfRange(ignoredEdgePixels, leftZoneEnd) + var (leftBound, leftGapLength) = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels) + if (leftBound == null || leftGapLength < minimumGapWidth) { + leftBound = findBestGapMidpoint(leftSignal, offset = ignoredEdgePixels, normalizeSignal = true).first + } + + val rightSignal = projection.copyOfRange(rightZoneStart, rightZoneEnd) + var (rightBound, rightGapLength) = findBestGapMidpoint(rightSignal, offset = rightZoneStart) + if (rightBound == null || rightGapLength < minimumGapWidth) { + rightBound = findBestGapMidpoint(rightSignal, offset = rightZoneStart, normalizeSignal = true).first + } + + val finalLeftBound = leftBound ?: (width * LEFT_FALLBACK_BOUND_PERCENT).toInt() + val finalRightBound = rightBound ?: (width * RIGHT_FALLBACK_BOUND_PERCENT).toInt() + return Pair(finalLeftBound, finalRightBound) + } + + private fun findBestGapMidpoint( + signalSegment: FloatArray, + offset: Int = 0, + normalizeSignal: Boolean = false + ): Pair { + if (signalSegment.isEmpty()) { + return Pair(null, 0) + } + + val signal = if (normalizeSignal) { + val minValue = signalSegment.minOrNull() ?: 0f + FloatArray(signalSegment.size) { index -> signalSegment[index] - minValue } + } else { + signalSegment + } + + val activityThresholdMultiplier = if (normalizeSignal) { + FALLBACK_ACTIVITY_THRESHOLD + } else { + PRIMARY_ACTIVITY_THRESHOLD + } + val threshold = (signal.maxOrNull() ?: 0f) * activityThresholdMultiplier + + var maxGapLength = 0 + var maxGapMidpoint: Int? = null + var currentGapStart = -1 + var previousIsActive = false + + signal.forEachIndexed { index, value -> + val isActive = value > threshold + if (previousIsActive && !isActive) { + currentGapStart = index + } + + val isGapClosing = currentGapStart != -1 && (index + 1 == signal.size || (!isActive && signal[index + 1] > threshold)) + if (isGapClosing) { + val gapLength = index - currentGapStart + 1 + if (gapLength > maxGapLength) { + maxGapLength = gapLength + maxGapMidpoint = currentGapStart + (gapLength / 2) + } + currentGapStart = -1 + } + + previousIsActive = isActive + } + + return Pair(maxGapMidpoint?.plus(offset), maxGapLength) + } +}