@@ -313,39 +313,40 @@ def format_ref_table(new_rows, sample_data, datatype):
313313 return sort_df
314314
315315
316- def marker_plots (df , output_name , kit , wd = "." ):
316+ def marker_plots (df , output_name , kit , sample_list , wd = "." ):
317317 Path (f"{ wd } /MarkerPlots" ).mkdir (parents = True , exist_ok = True )
318318 filt_df = df [df ["allele_type" ] == "Typed" ]
319- for sample_id in df ["SampleID" ].unique ():
320- if df [df ["SampleID" ] == sample_id ].empty :
321- print (f"{ sample_id } does not have any reads passing filter. Skipping to next sample." )
322- else :
323- with PdfPages (f"{ wd } /MarkerPlots/{ output_name } _{ sample_id } _marker_plots.pdf" ) as pdf :
324- if not filt_df [filt_df ["SampleID" ] == sample_id ].empty :
325- make_plot (filt_df , sample_id , output_name , kit , filters = True , at = False )
326- pdf .savefig ()
327- make_plot (df , sample_id , output_name , kit )
328- pdf .savefig ()
329- make_plot (df , sample_id , output_name , kit , sameyaxis = True )
319+ for sample_id in sample_list :
320+ # if df[df["SampleID"] == sample_id].empty:
321+ # print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
322+ # else:
323+ with PdfPages (f"{ wd } /MarkerPlots/{ output_name } _{ sample_id } _marker_plots.pdf" ) as pdf :
324+ if not filt_df [filt_df ["SampleID" ] == sample_id ].empty :
325+ make_plot (filt_df , sample_id , output_name , kit , filters = True , at = False )
330326 pdf .savefig ()
327+ make_plot (df , sample_id , output_name , kit )
328+ pdf .savefig ()
329+ make_plot (df , sample_id , output_name , kit , sameyaxis = True )
330+ pdf .savefig ()
331331
332332
333333def make_plot (df , sample_id , output_name , kit , sameyaxis = False , filters = False , at = True ):
334334 sample_df = df [df ["SampleID" ] == sample_id ].copy ()
335- conditions = [
336- sample_df ["allele_type" ].str .contains ("Typed" ),
337- sample_df ["allele_type" ].str .contains ("BelowAT" ),
338- sample_df ["allele_type" ].str .contains ("stutter" ),
339- sample_df ["allele_type" ].str .contains ("Deleted" ),
340- ]
341- values = ["Typed" , "BelowAT" , "Stutter" , "Deleted" ]
342- sample_df .loc [:, "Type" ] = np .select (conditions , values )
343- max_reads = max (sample_df ["Reads" ])
344- n = 100 if max_reads > 1000 else 10
345- max_yvalue = (int (math .ceil (max_reads / n )) * n ) + n
346- increase_value = int (math .ceil ((max_yvalue / 5 ) / n )) * n
335+ plot_loc = 0
347336 fig = plt .figure (figsize = (30 , 30 ))
348- n = 0
337+ if not sample_df .empty :
338+ conditions = [
339+ sample_df ["allele_type" ].str .contains ("Typed" ),
340+ sample_df ["allele_type" ].str .contains ("BelowAT" ),
341+ sample_df ["allele_type" ].str .contains ("stutter" ),
342+ sample_df ["allele_type" ].str .contains ("Deleted" ),
343+ ]
344+ values = ["Typed" , "BelowAT" , "Stutter" , "Deleted" ]
345+ sample_df .loc [:, "Type" ] = np .select (conditions , values )
346+ max_reads = max (sample_df ["Reads" ])
347+ n = 100 if max_reads > 1000 else 10
348+ max_yvalue = (int (math .ceil (max_reads / n )) * n ) + n
349+ increase_value = int (math .ceil ((max_yvalue / 5 ) / n )) * n
349350 if kit == "powerseq" :
350351 str_list = (
351352 str_lists ["powerseq_ystrs" ] if "sexloci" in output_name else str_lists ["powerseq_strs" ]
@@ -355,10 +356,10 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
355356 str_lists ["forenseq_ystrs" ] if "sexloci" in output_name else str_lists ["forenseq_strs" ]
356357 )
357358 for marker in str_list :
358- n += 1
359+ plot_loc += 1
359360 colors = {"Typed" : "green" , "Stutter" : "blue" , "BelowAT" : "red" , "Deleted" : "purple" }
360361 marker_df = sample_df [sample_df ["Locus" ] == marker ].sort_values (by = "CE_Allele" )
361- ax = fig .add_subplot (6 , 5 , n )
362+ ax = fig .add_subplot (6 , 5 , plot_loc )
362363 if not marker_df .empty :
363364 if marker == "AMELOGENIN" :
364365 for i , row in marker_df .iterrows ():
@@ -448,6 +449,7 @@ def process_input(
448449 info = True ,
449450):
450451 full_df = pd .read_csv (f"{ input_name } .txt" , sep = "\t " )
452+ sample_list = full_df ["SampleID" ].unique ()
451453 if custom :
452454 seq_col = "Custom_Range_Sequence"
453455 brack_col = "Custom_Bracketed_Notation"
@@ -460,7 +462,7 @@ def process_input(
460462 )
461463 if nofiltering :
462464 full_df ["allele_type" ] = "Typed"
463- marker_plots (full_df , input_name , kit )
465+ marker_plots (full_df , input_name , kit , sample_list )
464466 if output_type == "efm" or output_type == "mpsproto" :
465467 EFM_output (full_df , outpath , profile_type , data_type , brack_col , sex , kit , separate )
466468 else :
@@ -469,7 +471,7 @@ def process_input(
469471 dict_loc = {k : v for k , v in full_df .groupby (["SampleID" , "Locus" ])}
470472 final_df , flags_df = process_strs (dict_loc , data_type , seq_col , brack_col , kit )
471473 if final_df is not None :
472- marker_plots (final_df , input_name , kit )
474+ marker_plots (final_df , input_name , kit , sample_list )
473475 if output_type == "efm" or output_type == "mpsproto" :
474476 EFM_output (
475477 final_df , outpath , profile_type , data_type , brack_col , sex , kit , separate
0 commit comments