@@ -33,6 +33,17 @@ def _row_to_serializable_dict(row: Any, drop: list[str]) -> dict[str, Any]:
3333 return out
3434
3535
36+ def _crossmatch_metadata_to_candidates (metadata : dict [str , Any ] | None ) -> list [int ]:
37+ if metadata is None :
38+ return []
39+ candidates : list [int ] = []
40+ if "pgc" in metadata and metadata ["pgc" ] is not None :
41+ candidates .append (int (metadata ["pgc" ]))
42+ if "possible_matches" in metadata and metadata ["possible_matches" ] is not None :
43+ candidates .extend (int (p ) for p in metadata ["possible_matches" ])
44+ return candidates
45+
46+
3647@dataclass
3748class QuantityMock :
3849 values : pandas .Series
@@ -289,6 +300,7 @@ def fetch_records(
289300 order_direction : str = "asc" ,
290301 has_pgc : bool | None = None ,
291302 pgc_value : int | None = None ,
303+ triage_status : str | None = None ,
292304 ) -> list [model .TableRecord ]:
293305 where_parts : list [str ] = []
294306 if has_pgc is True :
@@ -301,39 +313,91 @@ def fetch_records(
301313 params : list [Any ] = []
302314 if pgc_value is not None :
303315 params .append (pgc_value )
304- params .append (limit )
305- params .append (row_offset )
306316
307317 id_col = sql .Identifier (INTERNAL_ID_COLUMN_NAME )
308- parts : list [sql .Composable ] = [
309- sql .SQL ("SELECT r.*, o.pgc FROM {}.{} AS r JOIN layer0.records AS o ON r.{} = o.id" ).format (
310- sql .Identifier (RAWDATA_SCHEMA ),
311- sql .Identifier (table_name ),
312- id_col ,
313- ),
314- ]
318+ direction = sql .SQL (order_direction if order_direction in ("asc" , "desc" ) else "asc" )
319+
320+ if triage_status == "unprocessed" :
321+ where_parts .append ("NOT EXISTS (SELECT 1 FROM layer0.crossmatch c WHERE c.record_id = o.id)" )
322+ params .append (limit )
323+ params .append (row_offset )
324+ parts : list [sql .Composable ] = [
325+ sql .SQL (
326+ "SELECT r.*, o.pgc "
327+ "FROM {}.{} AS r "
328+ "JOIN layer0.records AS o ON r.{} = o.id "
329+ "AND o.table_id = (SELECT id FROM layer0.tables WHERE table_name = %s)"
330+ ).format (
331+ sql .Identifier (RAWDATA_SCHEMA ),
332+ sql .Identifier (table_name ),
333+ id_col ,
334+ ),
335+ ]
336+ params .insert (0 , table_name )
337+ elif triage_status in ("pending" , "resolved" ):
338+ where_parts .append ("c.triage_status = %s" )
339+ params .append (triage_status )
340+ params .append (limit )
341+ params .append (row_offset )
342+ parts = [
343+ sql .SQL (
344+ "SELECT r.*, o.pgc, c.triage_status, c.metadata AS crossmatch_metadata "
345+ "FROM {}.{} AS r "
346+ "JOIN layer0.records AS o ON r.{} = o.id "
347+ "AND o.table_id = (SELECT id FROM layer0.tables WHERE table_name = %s) "
348+ "JOIN layer0.crossmatch AS c ON o.id = c.record_id"
349+ ).format (
350+ sql .Identifier (RAWDATA_SCHEMA ),
351+ sql .Identifier (table_name ),
352+ id_col ,
353+ ),
354+ ]
355+ params .insert (0 , table_name )
356+ else :
357+ params .append (limit )
358+ params .append (row_offset )
359+ parts = [
360+ sql .SQL (
361+ "SELECT r.*, o.pgc, c.triage_status, c.metadata AS crossmatch_metadata "
362+ "FROM {}.{} AS r "
363+ "JOIN layer0.records AS o ON r.{} = o.id "
364+ "AND o.table_id = (SELECT id FROM layer0.tables WHERE table_name = %s) "
365+ "LEFT JOIN layer0.crossmatch AS c ON o.id = c.record_id"
366+ ).format (
367+ sql .Identifier (RAWDATA_SCHEMA ),
368+ sql .Identifier (table_name ),
369+ id_col ,
370+ ),
371+ ]
372+ params .insert (0 , table_name )
373+
315374 if where_parts :
316375 parts .append (sql .SQL (" WHERE " ))
317376 parts .append (sql .SQL (" AND " ).join ([sql .SQL (w ) for w in where_parts ]))
318377 parts .append (sql .SQL (" ORDER BY r.{} " ).format (id_col ))
319- parts .append (sql . SQL ( order_direction if order_direction in ( "asc" , "desc" ) else "asc" ) )
378+ parts .append (direction )
320379 parts .append (sql .SQL (" LIMIT %s OFFSET %s" ))
321380
322381 rows = self ._storage .query (sql .Composed (parts ), params = params )
323382 id_col_name = INTERNAL_ID_COLUMN_NAME
324- drop_labels = [id_col_name , "pgc" ]
383+ drop_labels = [id_col_name , "pgc" , "triage_status" , "crossmatch_metadata" ]
325384 result : list [model .TableRecord ] = []
326385 for row in rows :
327386 record_id = str (row [id_col_name ])
328387 original_data = _row_to_serializable_dict (row , drop = drop_labels )
329388 pgc_val = row .get ("pgc" )
330389 if pgc_val is not None and (pandas .isna (pgc_val ) or (isinstance (pgc_val , float ) and np .isnan (pgc_val ))):
331390 pgc_val = None
391+ raw_triage = row .get ("triage_status" )
392+ triage_val = raw_triage if raw_triage is not None else "unprocessed"
393+ candidates = _crossmatch_metadata_to_candidates (row .get ("crossmatch_metadata" ))
332394 result .append (
333395 model .TableRecord (
334396 id = record_id ,
335397 original_data = original_data ,
336398 pgc = int (pgc_val ) if pgc_val is not None else None ,
399+ triage_status = triage_val ,
400+ crossmatch_candidates = candidates ,
337401 )
338402 )
339403 return result
0 commit comments