Skip to content

Commit 9adb0b0

Browse files
committed
Vision-OCR:fix:多选识别内容的bug
1 parent f39e6e6 commit 9adb0b0

1 file changed

Lines changed: 83 additions & 2 deletions

File tree

Vision-OCR/App.tsx

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,86 @@ type RecognizedItem = {
3636

3737
const AUTO_PASTE_KEY = 'vision_ocr_auto_paste_clipboard'
3838

39+
type PixelBox = { left: number; top: number; width: number; height: number }
40+
41+
function normalizeTextKey(text: string): string {
42+
return text
43+
.toLowerCase()
44+
.replace(/\s+/g, '')
45+
.replace(/[:;,.!?()\[\]{}<>'"`~\-_/\\|]/g, '')
46+
.trim()
47+
}
48+
49+
function toPixelBox(
50+
box: { x: number; y: number; width: number; height: number },
51+
imgW: number,
52+
imgH: number
53+
): PixelBox {
54+
const isNormalized = box.x <= 1 && box.y <= 1 && box.width <= 1 && box.height <= 1
55+
if (isNormalized) {
56+
return {
57+
left: box.x * imgW,
58+
top: (1 - box.y - box.height) * imgH,
59+
width: box.width * imgW,
60+
height: box.height * imgH,
61+
}
62+
}
63+
return {
64+
left: box.x,
65+
top: imgH - (box.y + box.height),
66+
width: box.width,
67+
height: box.height,
68+
}
69+
}
70+
71+
function boxIou(a: PixelBox, b: PixelBox): number {
72+
const ax2 = a.left + a.width
73+
const ay2 = a.top + a.height
74+
const bx2 = b.left + b.width
75+
const by2 = b.top + b.height
76+
const interW = Math.max(0, Math.min(ax2, bx2) - Math.max(a.left, b.left))
77+
const interH = Math.max(0, Math.min(ay2, by2) - Math.max(a.top, b.top))
78+
const inter = interW * interH
79+
if (inter <= 0) return 0
80+
const union = a.width * a.height + b.width * b.height - inter
81+
return union > 0 ? inter / union : 0
82+
}
83+
84+
function dedupeRecognizedItems(
85+
list: RecognizedItem[],
86+
imgW: number,
87+
imgH: number
88+
): RecognizedItem[] {
89+
const ranked = list
90+
.map((item, idx) => ({ item, idx, key: normalizeTextKey(item.content), box: toPixelBox(item.boundingBox, imgW, imgH) }))
91+
.sort((a, b) => b.item.confidence - a.item.confidence)
92+
93+
const kept: typeof ranked = []
94+
for (const cur of ranked) {
95+
if (cur.key.length === 0) {
96+
kept.push(cur)
97+
continue
98+
}
99+
const duplicate = kept.some(prev => {
100+
if (prev.key !== cur.key) return false
101+
const iou = boxIou(prev.box, cur.box)
102+
if (iou >= 0.45) return true
103+
const cxA = prev.box.left + prev.box.width / 2
104+
const cyA = prev.box.top + prev.box.height / 2
105+
const cxB = cur.box.left + cur.box.width / 2
106+
const cyB = cur.box.top + cur.box.height / 2
107+
const avgW = Math.max(1, (prev.box.width + cur.box.width) / 2)
108+
const avgH = Math.max(1, (prev.box.height + cur.box.height) / 2)
109+
return Math.abs(cxA - cxB) < avgW * 0.25 && Math.abs(cyA - cyB) < avgH * 0.35
110+
})
111+
if (!duplicate) kept.push(cur)
112+
}
113+
114+
return kept
115+
.sort((a, b) => a.idx - b.idx)
116+
.map((row, i) => ({ ...row.item, id: i.toString() }))
117+
}
118+
39119
function readStoredBool(key: string, fallback = false): boolean {
40120
const st: any = (globalThis as any).Storage
41121
if (!st) return fallback
@@ -94,7 +174,7 @@ function RectOverlay({
94174
}) {
95175
const stroke = selected ? 'rgba(0,122,255,1)' : 'rgba(0,200,0,1)'
96176
const fill = selected ? 'rgba(0,122,255,0.18)' : 'rgba(0,200,0,0.06)'
97-
const hitPad = 6
177+
const hitPad = 3
98178
const handleTap = () => {
99179
fireHaptic('light')
100180
onTap()
@@ -254,8 +334,9 @@ export default function App({ initialImage }: { initialImage?: UIImage | null })
254334
boundingBox: c.boundingBox as { x: number; y: number; width: number; height: number },
255335
edited: undefined,
256336
}))
337+
const deduped = dedupeRecognizedItems(recognized, clean.width, clean.height)
257338
if (seq === loadSeqRef.current) {
258-
setItems(recognized)
339+
setItems(deduped)
259340
}
260341
} catch (e) {
261342
// use global Dialog

0 commit comments

Comments
 (0)