Skip to content

Commit e6bd07c

Browse files
committed
dev(stats): some metrics API and prepare better metrics
1 parent a23affa commit e6bd07c

3 files changed

Lines changed: 92 additions & 2 deletions

File tree

src/Models.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,22 @@ class ScanTypeEnum(str, Enum):
217217
V10_MASK = "V10MASK"
218218

219219

220+
class ScanStats(BaseModel):
221+
name: str
222+
# Number of images output by segmenter, sent to ML classifier
223+
segmented: int
224+
# Number of images sent to ML separator, i.e., with ML classifier score > 0.4 and not too large
225+
sentToSeparator: int
226+
# Number of ML-separated images not modified by users
227+
untouchedByUser: int
228+
# Number of images modified by users but never sent to ML separator
229+
addedByUser: int
230+
# Number of (re-)separated images modified by users (but not cleared)
231+
separatedByUser: int
232+
# Number of (re-)separated images cleared by users
233+
clearedByUser: int
234+
235+
220236
class ScanPostRsp(BaseModel):
221237
id: str
222238
image: str

src/modern/filesystem.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,11 @@ def mark_ML_separation_done(self):
185185
self.ensure_meta_dir()
186186
event_date = datetime.now()
187187
with open(self.SEP_generated_file_path, "w") as f:
188-
f.write(event_date.strftime("%Y-%m-%d %H:%M:%S"))
188+
f.write(event_date.strftime("%Y-%m-%d %H:%M:%S") + "\n")
189+
# Dump content of multiples directory after ML separation
190+
for a_file in self.multiples_vis_dir.iterdir():
191+
if a_file.is_file():
192+
f.write(a_file.name + "\n")
189193

190194
def mark_SEP_validated(self, event_date: datetime):
191195
self.ensure_meta_dir()

src/routers/projects.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
1+
import json
12
import tempfile
23
from pathlib import Path
34
from typing import List
45

6+
import cv2
7+
import numpy as np
58
from fastapi import APIRouter, Depends
69
from fastapi.responses import StreamingResponse
710
from sqlalchemy.orm import Session
811

9-
from Models import Project, Background, Scan
12+
from Models import Project, Background, Scan, ScanStats
1013
from ZooProcess_lib.ZooscanFolder import ZooscanDrive
14+
from ZooProcess_lib.img_tools import load_image
1115
from config_rdr import config
1216
from helpers.auth import get_current_user_from_credentials
1317
from helpers.logger import logger
@@ -16,12 +20,15 @@
1620
from legacy.backgrounds import find_final_background_file, find_raw_background_file
1721
from legacy_to_remote.importe import import_old_project
1822
from local_DB.db_dependencies import get_db
23+
from modern.filesystem import TOP_V10_DIR, ModernScanFileSystem
1924
from modern.from_legacy import (
2025
project_from_legacy,
2126
backgrounds_from_legacy_project,
2227
scans_from_legacy_project,
2328
DEPTH_ALL,
2429
)
30+
from modern.ids import subsample_name_from_scan_name
31+
from providers.ML_multiple_separator import RGB_RED_COLOR, BGR_RED_COLOR
2532
from remote.DB import DB
2633
from .utils import validate_path_components
2734

@@ -168,6 +175,69 @@ def get_scans(
168175
return scans_from_legacy_project(db, zoo_project)
169176

170177

178+
@router.get("/{project_hash}/stats")
179+
def get_project_scanning_stats(
180+
project_hash: str,
181+
_user=Depends(get_current_user_from_credentials),
182+
db: Session = Depends(get_db),
183+
) -> List[ScanStats]:
184+
zoo_drive, zoo_project, _, _ = validate_path_components(db, project_hash)
185+
186+
result: List[ScanStats] = []
187+
188+
def files_in_dir(path: Path) -> List[Path]:
189+
return [a_file for a_file in path.iterdir() if a_file.is_file()]
190+
191+
def is_after(path: Path, path_cmp: Path) -> bool:
192+
return path.stat().st_mtime > path_cmp.stat().st_mtime
193+
194+
def has_a_separator(image_path: Path) -> bool:
195+
sep_img = load_image(image_path, cv2.IMREAD_COLOR_BGR)
196+
return np.any(np.all(sep_img == BGR_RED_COLOR, axis=2))
197+
198+
v10_work_dir: Path = zoo_project.zooscan_scan.path / TOP_V10_DIR
199+
if not v10_work_dir.exists():
200+
return result
201+
for scan_dir in v10_work_dir.iterdir():
202+
subsample = subsample_name_from_scan_name(scan_dir.name)
203+
modern_fs = ModernScanFileSystem(zoo_project, "", subsample)
204+
# We need output directories...
205+
nb_subdirs = 1 if modern_fs.cut_dir.exists() else 0
206+
nb_subdirs += 1 if modern_fs.multiples_vis_dir.exists() else 0
207+
if nb_subdirs != 2:
208+
continue
209+
# ...and the flag that manual separation is finished
210+
if not modern_fs.SEP_validated_file_path.exists():
211+
continue
212+
after_seg = len(files_in_dir(modern_fs.cut_dir))
213+
with open(modern_fs.scores_file_path, "r") as f:
214+
scores = json.load(f)
215+
sent_to_ml_separator = {k: v for k, v in scores.items() if v > 0.4}
216+
217+
# Separation work directory. Written into by ML or user
218+
in_sep_dir = files_in_dir(modern_fs.multiples_vis_dir)
219+
# Writes of images in work dir
220+
auto_sep_log = modern_fs.ensure_meta_dir() / "auto_sep_job.log"
221+
modified_images = [f for f in in_sep_dir if is_after(f, auto_sep_log)]
222+
modified_images_names = [f.name for f in modified_images]
223+
# Images added by user but the ML classifier did not see them
224+
added_by_user = set(modified_images_names).difference(sent_to_ml_separator)
225+
# Writes of a separator-less image
226+
cleared_images = [f for f in modified_images if not has_a_separator(f)]
227+
stats_for_scan = ScanStats(
228+
name=subsample,
229+
segmented=after_seg,
230+
sentToSeparator=len(sent_to_ml_separator),
231+
untouchedByUser=len(in_sep_dir) - len(modified_images),
232+
addedByUser=len(added_by_user),
233+
separatedByUser=len(modified_images) - len(cleared_images),
234+
clearedByUser=len(cleared_images),
235+
)
236+
result.append(stats_for_scan)
237+
238+
return result
239+
240+
171241
@router.get("/{project_hash}/background/{background_id}")
172242
async def get_image_for_background(
173243
project_hash: str,

0 commit comments

Comments
 (0)