-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtaxonomy_formatter.py
More file actions
40 lines (33 loc) · 1.41 KB
/
taxonomy_formatter.py
File metadata and controls
40 lines (33 loc) · 1.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
fields = ['catalogNumber', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'specificEpithet', 'taxonRank']
taxonomy_df = pd.read_csv('taxonomy.csv', usecols=fields)
nums = taxonomy_df["catalogNumber"].to_list()
for num in nums:
row = taxonomy_df.loc[taxonomy_df.catalogNumber == num]
kingdom = row['kingdom'].values[0]
taxonRank = row['taxonRank'].values[0]
if taxonRank == 'Species':
genus = row['genus'].values[0]
species = row['species'].values[0]
taxon = f'{genus}_{species}'
elif taxonRank == 'Kingdom' and kingdom == 'Unknown':
taxon = None
else:
taxon = row[taxonRank.lower()].values[0]
class Taxonomy:
fields = ['catalogNumber', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'specificEpithet', 'taxonRank']
def __init__(self):
self.df = pd.read_csv('taxonomy.csv', fields)
def return_taxon(self, cat_num):
row = self.df.loc[self.df.catalogNumber == cat_num]
kingdom = row['kingdom'].values[0]
taxonRank = row['taxonRank'].values[0]
if taxonRank == 'Species':
genus = row['genus'].values[0]
species = row['species'].values[0]
taxon = f'{genus}_{species}'
elif taxonRank == 'Kingdom' and kingdom == 'Unknown':
taxon = None
else:
taxon = row[taxonRank.lower()].values[0]
return taxon