2323################################### LC-FTICR OBJECT AND OBJECT'S METHODS ###################################
2424@dataclass
2525class LC_FTICR_WorkflowParameters :
26+ """
27+ Data class to establish workflow parameters.
28+
29+ Parameters
30+ ----------
31+ start_time : int
32+ Start time (minutes).
33+ end_time : int
34+ End time (minutes).
35+ time_block : int
36+ Time block (seconds).
37+ refmasslist_neg : str
38+ Path to reference m/z database.
39+ full_input_file_path : str
40+ The path of file to process.
41+ output_directory : str
42+ Path to save outputs.
43+ output_file_name : str
44+ Output filename.
45+ output_file_type : str
46+ Output extension.
47+ lc_fticr_toml_path : str
48+ The path to the toml file with the lc-fticr ms workflow parameters.
49+ corems_toml_path : str
50+ The path to the toml file with the CoreMS parameters.
51+ do_plot_van_krevelen_all_ids : bool
52+ Output van krevelen plot for all ID's
53+ do_plot_van_krevelen_individual : bool
54+ Output individual van krevelen plot for all ID's
55+ do_plot_properties : bool
56+ Output plot of properties.
57+ """
58+
2659 # Time Block Parameters:
2760 start_time : int # minutes
2861 end_time : int # minutes
@@ -79,6 +112,13 @@ def create_temp_corems_toml(self):
79112
80113 ### function that init parser and get data
81114 def init_parser_extract_data (self ) -> pd .DataFrame :
115+ """
116+ Initialize the parser and extract data from input file.
117+ This function reads the input file, extracts the Total Ion Chromatogram (TIC) data,
118+ and returns a DataFrame containing the scans, TIC values, and time.
119+
120+ """
121+
82122 # Define datafile location
83123 file_in = self .full_input_file_path
84124
@@ -103,6 +143,25 @@ def init_parser_extract_data(self) -> pd.DataFrame:
103143 ### process timeblocks
104144 # Process the time block mass spectrum
105145 def proc_time_block_inner (self , msreader , datafile , block ):
146+ """
147+ Process time blocks of mass spectra.
148+
149+ Parameters:
150+ ----------
151+ msreader : ImportMassSpectraThermoMSFileReader
152+ The mass spectrum reader object.
153+ datafile : str
154+ The path to the data file.
155+ block : int
156+ The time block number.
157+
158+ Returns:
159+ -------
160+ msdf : pd.DataFrame
161+ DataFrame containing the processed mass spectrum data.
162+ statdict : dict
163+ Dictionary containing statistics for the processed mass spectrum.
164+ """
106165 # scans = list(subset_df['scan'])
107166
108167 # load_and_set_toml_parameters_ms(MSParameters, self.corems_toml_path)
@@ -143,6 +202,21 @@ def proc_time_block_inner(self, msreader, datafile, block):
143202 return (msdf , statdict )
144203
145204 def process_with_time_block (self , tic_df ):
205+ """
206+ Process the mass spectra with time blocks.
207+
208+ Parameters:
209+ ----------
210+ tic_df : pd.DataFrame
211+ DataFrame containing the Total Ion Chromatogram (TIC) data with time and scan information.
212+ Returns:
213+ -------
214+ all_msdfs : pd.DataFrame
215+ DataFrame containing all processed mass spectra data.
216+ all_statdics : list
217+ List of dictionaries containing statistics for each time block.
218+
219+ """
146220 # Strip out the time where there's no useful data
147221 file_in = self .full_input_file_path
148222 tic_df = tic_df [(tic_df ['time' ] > self .start_time ) & (tic_df ['time' ] < self .end_time )]
@@ -178,6 +252,17 @@ def process_with_time_block(self, tic_df):
178252
179253
180254 def create_summary (self , all_statdics ):
255+ """
256+ Create a summary DataFrame from the list of dictionaries containing statistics.
257+ Parameters:
258+ ----------
259+ all_statdics : list
260+ List of dictionaries containing statistics for each time block.
261+ Returns:
262+ -------
263+ summary_df : pd.DataFrame
264+ DataFrame containing the summary statistics for all time blocks.
265+ """
181266 # Flatten the list of dictionaries
182267 flat_list = [inner_dict for outer_dict in all_statdics for inner_dict in outer_dict .values ()]
183268 # Create a DataFrame
@@ -191,6 +276,19 @@ def create_summary(self, all_statdics):
191276
192277## for creating plots
193278def filter_out_common_background (df ):
279+ """
280+ Filter out common background entries in the DataFrame based on 'Molecular Formula' and 'Peak Height'.
281+
282+ Parameters:
283+ ----------
284+ df : pd.DataFrame
285+ DataFrame containing 'Molecular Formula', 'Peak Height', and 'block' columns.
286+ Returns:
287+ -------
288+ filtered_df : pd.DataFrame
289+ DataFrame with common background entries removed.
290+ """
291+
194292 formula_block_counts = df .pivot_table (index = 'Molecular Formula' , columns = 'block' , aggfunc = 'size' , fill_value = 0 )
195293
196294 # Filter to get 'Molecular Formula' entries that appear in all blocks
@@ -214,6 +312,19 @@ def peak_height_similar(df, tolerance=0.99): # 10% tolerance
214312
215313### create plots
216314def plot_van_krevelen_all_ids (all_msdfs_path , output_dir ):
315+ """
316+ Plot a van Krevelen diagram for all IDs in the provided DataFrame or CSV file.
317+ Parameters:
318+ ----------
319+ all_msdfs_path : str or pd.DataFrame
320+ Path to the CSV file containing all mass spectra data or a DataFrame.
321+ output_dir : str
322+ Directory where the plot will be saved.
323+ Returns:
324+ -------
325+ None
326+ """
327+
217328 if isinstance (all_msdfs_path ,str ):
218329 all_msdfs_df = pd .read_csv (all_msdfs_path )
219330 else :
@@ -240,6 +351,18 @@ def plot_van_krevelen_all_ids(all_msdfs_path, output_dir):
240351 plt .show ()
241352
242353def plot_van_krevelen_individual (all_msdfs_path , output_dir ):
354+ """
355+ Plot individual van Krevelen diagrams for each time block in the provided DataFrame or CSV file.
356+ Parameters:
357+ ----------
358+ all_msdfs_path : str or pd.DataFrame
359+ Path to the CSV file containing all mass spectra data or a DataFrame.
360+ output_dir : str
361+ Directory where the plots will be saved.
362+ Returns:
363+ -------
364+ None
365+ """
243366 if isinstance (all_msdfs_path ,str ):
244367 all_msdfs_df = pd .read_csv (all_msdfs_path )
245368 else :
@@ -278,6 +401,18 @@ def plot_van_krevelen_individual(all_msdfs_path, output_dir):
278401 fig .savefig (output_dir + 'TimeBlockIDs.png' ,dpi = 300 ,bbox_inches = 'tight' )
279402
280403def plot_properties (summary_df_path ,output_dir ):
404+ """
405+ Plot trends and distributions of various properties from the summary DataFrame or CSV file.
406+ Parameters:
407+ ----------
408+ summary_df_path : str or pd.DataFrame
409+ Path to the CSV file containing summary statistics or a DataFrame.
410+ output_dir : str
411+ Directory where the plots will be saved.
412+ Returns:
413+ -------
414+ None
415+ """
281416 if isinstance (summary_df_path ,str ):
282417 summary_df = pd .read_csv (summary_df_path )
283418 else :
@@ -314,6 +449,19 @@ def plot_properties(summary_df_path,output_dir):
314449################################### RUN LC-FTICR WORKFLOW ###################################
315450
316451def run_LC_FTICR_workflow (lc_fticr_workflow_paramaters_toml_file ):
452+ """
453+ Run LC-FTICR metabolomics workflow.
454+
455+ Parameters
456+ ----------
457+ lc_fticr_workflow_paramaters_toml_file : str
458+ Path to workflow parameters file.
459+ Returns
460+ -------
461+ None
462+
463+ """
464+
317465 # read in LC_WorkflowParameters from toml file
318466 with open (lc_fticr_workflow_paramaters_toml_file , "r" ) as infile :
319467 lc_object = LC_FTICR_WorkflowParameters (** toml .load (infile ))
@@ -345,6 +493,41 @@ def run_LC_FTICR_workflow_wdl(
345493 do_plot_van_krevelen_individual ,
346494 do_plot_properties ,
347495):
496+ """
497+ Run LC-FTICR metabolomics workflow with parameters from WDL inputs.
498+ Parameters
499+ ----------
500+ start_time : int
501+ Start time (minutes).
502+ end_time : int
503+ End time (minutes).
504+ time_block : int
505+ Time block (seconds).
506+ refmasslist_neg : str
507+ Path to reference m/z database.
508+ full_input_file_path : str
509+ The path of file to process.
510+ output_directory : str
511+ Path to save outputs.
512+ output_file_name : str
513+ Output filename.
514+ output_file_type : str
515+ Output extension.
516+ lc_fticr_toml_path : str
517+ The path to the toml file with the lc-fticr ms workflow parameters.
518+ corems_toml_path : str
519+ The path to the toml file with the CoreMS parameters.
520+ do_plot_van_krevelen_all_ids : bool
521+ Output van krevelen plot for all ID's.
522+ do_plot_van_krevelen_individual : bool
523+ Output individual van krevelen plot for all ID's.
524+ do_plot_properties : bool
525+ Output plot of properties.
526+
527+ Returns
528+ -------
529+ None
530+ """
348531 # read in LC_WorkflowParameters from wdl inputs
349532 lc_object = LC_FTICR_WorkflowParameters (start_time = start_time ,
350533 end_time = end_time ,
0 commit comments