Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c965a77
experiments
acannistra Mar 1, 2017
203f4e2
experiments
acannistra Mar 1, 2017
66d0c55
temporary fix to multiple covariate prior issue
acannistra Mar 27, 2017
c5e5e19
getting merge to work
acannistra Mar 27, 2017
a1753e1
removed working directory
acannistra Mar 27, 2017
a45908f
code for parallel analysis
acannistra Apr 5, 2017
394353e
updated code for multispecies analysis in parallel (10 cores).
acannistra Apr 12, 2017
d2e6075
added species selection code
acannistra Apr 13, 2017
544d539
updates to summation report
acannistra Apr 13, 2017
b4fe9e4
check for NA occs in summary analysis
acannistra Apr 13, 2017
4b18524
removed testing phase from summation script
acannistra Apr 13, 2017
e34336a
fixing mistates
acannistra Apr 13, 2017
ec3bf8b
added analysis in python
acannistra Apr 13, 2017
a8e70f7
added some results
acannistra Apr 13, 2017
8d82dd7
updated results
acannistra Apr 14, 2017
5997699
refactored centroid scripts
acannistra Apr 14, 2017
01eba75
Delete select_species.R
acannistra Apr 14, 2017
dea9c6b
added documentation
acannistra Apr 14, 2017
3883d3e
rename + documentation
acannistra Apr 14, 2017
d14807b
Merge branch 'fixing-priors' of github.com:acannistra/SDMPriors into …
acannistra Apr 14, 2017
3d7c115
renaming python files
acannistra Apr 14, 2017
8992dd1
renaming python files v2
acannistra Apr 14, 2017
a5c1c33
put runModels in its own file
acannistra Apr 14, 2017
5247dbd
fixed small error in loop bounds
acannistra Apr 14, 2017
e42ec9a
added response plots for desert species
acannistra Apr 14, 2017
703e9db
add misc files, bad commit
acannistra Jun 6, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,378 changes: 2,378 additions & 0 deletions GP Analyses.ipynb

Large diffs are not rendered by default.

404 changes: 404 additions & 0 deletions Multispecies GP Model Performance Assessment.ipynb

Large diffs are not rendered by default.

183 changes: 183 additions & 0 deletions Multispecies+GP+Model+Performance+Assessment.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@

library(dismo) #see also zoon R package?
library(plyr)
library(rgbif)
library(GRaF) #see methods paper here: http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12523/pdf
library(pROC)
library(ROCR)
library(foreach)
library(doMC)
library(rJava)
library(optparse)

registerDoMC(cores=10)

phys = read.csv("Sundayetal_thermallimits.csv")
phys= phys[!is.na(phys$tmax) & !is.na(phys$tmin),]
phys$spec = gsub("_", " ", phys$species)
paste("We've got Tmin and TMax for", nrow(phys), "species.")

climVars = c("presence", "bio1", "bio5", "bio6")


runModels = function(physdata, speciesIdx){
#
# the purpose of this function is to run a series of
# species distribution models given physiological information
# to inform them for a single species.
#
# It should really be 4 or 5 different functions.
#
# each time it runs it produces a .csv file in the current
# directory with {speciesname}-intermed.csv as the filename
# which contains the results from the model runs
# in order.

specName = phys$spec[speciesIdx]
print(paste(specName, speciesIdx))
## get data from GBIF
occs = occ_data(scientificName = specName, limit=1000, minimal=TRUE)$data
if(is.null(occs)) {
print(paste("No occurence information for ", specName, ', SKIPPING.'))
return(list(specName, NaN, NaN, NaN, NaN))
}
occs = occs[which(!is.na(occs$"decimalLongitude") & !is.na(occs$"decimalLatitude")),]



## generate presence + absence
bufs = circles(occs[,c("decimalLongitude", "decimalLatitude")], d=50000, lonlat=TRUE)
abs = spsample(bufs@polygons, 100, type='random', iter=100)

## get climate data
BClim = getData("worldclim", var='bio', res=2.5)
specExt = extent(rbind(range(occs$decimalLongitude), range(occs$decimalLatitude)))
BClim = crop(BClim, specExt)

## assign data to each presence ...
clim_Pres = extract(BClim, occs[,c("decimalLongitude", "decimalLatitude")])
if (all(is.na(clim_Pres))) {
print(paste("No climate data for ", specName, ", SKIPPING"))
return(list(specName, NaN, NaN, NaN, NaN))
}

clim_Pres = data.frame(lon=occs$decimalLongitude,
lat=occs$decimalLatitude,
clim_Pres)
## ..and absence point.
clim_Abs = extract(BClim, abs)
clim_Abs = data.frame(lon=abs@coords[,'x'], lat=abs@coords[,'y'], clim_Abs)

## assign binary presence (1) absence (0) flags.
presence = rep(1,dim(clim_Pres)[1])
presence_temp = data.frame(presence, clim_Pres[,3:ncol(clim_Pres)])
presence = rep(0, dim(clim_Abs)[1])
absence_temp = data.frame(presence, clim_Abs[,3:ncol(clim_Abs)])


## and combine them.
clim_PresAbs = rbind(presence_temp, absence_temp)

## extract and transform relevant climate information
covs = clim_PresAbs[, climVars]
covs[,2:ncol(covs)] = covs[,2:ncol(covs)]/10 ## (BClim needs to be divided by 10)
# omit rows with NA
covs = na.omit(covs)

## split data into train and test sets.
train_size = floor(0.75*nrow(covs))
trainPres = NULL
testPres = NULL
## make sure that there are 2 classes in the test data
numTries = 0
while (!(length(unique(testPres)) > 1 && length(unique(trainPres)) > 1) && numTries < 10){
train_idxs = sample(seq_len(nrow(covs)), size=train_size)
trainData = covs[train_idxs,]
trainPres = trainData[,1]
trainCovs = trainData[,2:ncol(trainData)]
testData = covs[-train_idxs,]
testPres = testData[,1]
testCovs = testData[,2:ncol(trainData)]
numTries =+ 1
}

if (!(length(unique(testPres)) > 1 && length(unique(trainPres)) > 1)){
print(paste("Bad train/test split for", specName, ", SKIPPING"))
return(list(specName, NaN, NaN, NaN, NaN))

}
## BUILD MODELS

# simple model
simple = graf(trainPres, trainCovs, opt.l=T)

# threshold model

# define threshold functions and threshold prior.
e.max<-function(x) ifelse(x<physdata$tmax[speciesIdx]-10, 0.9, exp(-(x-physdata$tmax[speciesIdx]+10)/5)) #max
e.min<-function(x) ifelse(x<physdata$tmin[speciesIdx] , 0.1, 1- exp(-(x-(physdata$tmax[speciesIdx])/10000) ) ) #min fix
e.prior = function(x) e.max(x[,2]) * e.min(x[,3])

if (any(is.na(qnorm(e.prior(trainCovs))))){
print(paste("Error in prior for species ", specName, ", SKIPPING"))
return(list(specName, NaN, NaN, NaN, NaN))

# results = append(results, 0)
#return(0)
}

eModel = graf(trainPres, trainCovs, prior = e.prior, opt.l=T)

# normal prior
ct.mean = physdata$tmax[speciesIdx] - physdata$tmin[speciesIdx]
ct.std = ct.mean / 2
ct.thresh <- function(x) ifelse(x$bio6 > physdata$tmin[speciesIdx] & x$bio5 < physdata$tmax[speciesIdx], 1, .00001)
ct.prob = function(x) {
dnorm(x$bio1, mean=ct.mean, sd = ct.std) * ct.thresh(x)
}
ct.cumprob = function(x){
ct.thresh(x) * (pnorm(x$bio5, mean=ct.mean, sd=ct.std) - pnorm(x$bio6, mean=ct.mean, sd=ct.std))
}

ct.model = graf(trainPres, trainCovs, prior = ct.prob, opt.l=T)

# MaxEnt model
me = maxent(trainCovs , p=trainPres)

## EVALUATE MODELS
## order of eval: Simple, e.prior, normal.prior, maxEnt
intermed_results = c(specName)

for (mod in list(simple, eModel, ct.model, me)){
prob = data.frame(predict(mod, testCovs))
if(class(mod) != "MaxEnt") prob = prob$posterior.mode
pred = prediction(prob, testPres)
auc = performance(pred, measure='auc')
auc = auc@y.values[[1]]
intermed_results = c(intermed_results, auc)
}
write.table(t(matrix(unlist(intermed_results))), paste("./",gsub(" ", "-", specName),"-intermed.csv", sep=""), sep=",", col.names=FALSE)
print(paste("finished index", speciesIdx))
return(intermed_results)
}


# for most experiments this loop won't run all the way through.
# Seems to get stuck at the end. Not sure why. We push through by
# segmenting input indices (1..30, 31..60, 61..90, 91..nrow(phys))

# this parallelized for loop runs the `runmodels` function for each species in the
# physiological data file.
results = foreach(speciesIdx=seq(1, nrow(phys)), .errorhandling = 'remove') %dopar% {
return(runModels(phys, speciesIdx))
}



# # it's unlikely that the code reaches this point.
# resultsDF = as.data.frame(t(matrix(unlist(results), nrow=length(unlist(results[1])))))
# colnames(resultsDF) = c("species", "simple", "eMinMax", "normdist", "maxent")
# print(resultsDF)
# write.csv(resultsDF, "./allSpecies_results.csv")


3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
### Disclaimer:
This is in-progress research software. We provide no warranty, guarantee, or description of functionality. All credit belongs to Tony Cannistra, Lauren Buckley, and the University of Washington. If you're interested in collaboration, contact [tonycan@uw.edu](mailto:tonycan@uw.edu)

# SDMpriors
Exploring the potential to incorporate physiological priors in species distribution models
44 changes: 26 additions & 18 deletions SDMpriors.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ gbifmap(occ)

#restrict to points with lat and lon
occ<- occ[which(!is.na(occ$"decimalLongitude") & !is.na(occ$"decimalLatitude")) ,]

nrow(occ)
#http://onlinelibrary.wiley.com/doi/10.1111/ecog.02118/abstract
#errorcheck(occ)
#quickclean
Expand Down Expand Up @@ -79,6 +79,7 @@ occ= occ[check,]

# define circles with a radius of 50 km around the subsampled points
x = circles(occ[,c("decimalLongitude","decimalLatitude")], d=50000, lonlat=T)
x
# draw random points that must fall within the circles in object x
bg = spsample(x@polygons, 100, type='random', iter=100)

Expand Down Expand Up @@ -111,9 +112,10 @@ head(df,5)

covs <- df[, c("pres","bio1", "bio5","bio6")]#Pick variables
#covs <- df

head(covs, 5)
#divide var by 10
covs[,2:ncol(covs)]= covs[,2:ncol(covs)]/10
head(covs)

#remove NAs
covs= na.omit(covs)
Expand All @@ -130,9 +132,8 @@ pa_te <- test$pres
m1 <- graf(pa_tr, train[,2:ncol(train)])
pred_df<-data.frame(predict(m1,test[,2:ncol(train)]))

#plot
plot(m1)

par(mfrow = c(1, 3))
plot(m1, prior=TRUE)
#---------------------------------------------
#establish function incorporating priors

Expand All @@ -146,14 +147,19 @@ m.lin <- glm(pa_tr ~ bio1, data=train, family = binomial)
lin <- function(temp) predict(m.lin, temp, type = "response")

m3 <- graf(pa_tr, train[,2:ncol(train), drop = FALSE],opt.l = TRUE, prior = lin)
m4 <- graf(pa_tr, train[,2:ncol(train), drop = FALSE],opt.l = TRUE)
par(mfrow = c(1, 3))

plot(m3, prior=TRUE)

plot(m3)

#NEW THRESHOLD FUNCTIONS

#threshold using bio1
thresh <- function(x) ifelse(x$bio1 > dat$tmin[spec.k] & x$bio1 < dat$tmax[spec.k] ,0.6, 0.1)
thresh <- function(x) ifelse(x > dat$tmin[spec.k] & x$bio1 < dat$tmax[spec.k] ,0.6, 0.1)
m3 <- graf(pa_tr, train[,2, drop = FALSE],opt.l = TRUE, prior = thresh)
par(mfrow=c(1,3))
plot(m3, prior=T)

#normal curve using bio1
n.mean= (dat$tmin[spec.k]+dat$tmax[spec.k])/2
Expand All @@ -171,28 +177,30 @@ plot(m3, prior = TRUE)
#-------------------------------------------
#Threshold with multiple envi variables, needs fixing
#Threshold with bio5 and bio6
e.max<-function(x) ifelse(x<dat$tmax[spec.k], p<-0.8, p<- 0.1) #max
e.min<-function(x) ifelse(x<dat$tmin[spec.k], p<-0.1, p<- 0.8) #min
e.max<-function(x) ifelse(x<dat$tmax[spec.k], 0.8, 0.1) #max
e.min<-function(x) ifelse(x<dat$tmin[spec.k], 0.1, 0.8) #min

#exponential based on bio5 and bio6
#start decline ten degrees above / below
e.max<-function(x) ifelse(x<dat$tmax[spec.k]-10, p<-1, p<- exp(-(x-dat$tmax[spec.k]+10)/5)) #max
e.min<-function(x) ifelse(x<dat$tmin[spec.k], p<-0, p<- 1- exp(-(x-(dat$tmax[spec.k])/10000) ) ) #min fix
e.max<-function(x) ifelse(x<dat$tmax[spec.k]-10, 0.8, exp(-(x-dat$tmax[spec.k]+10)/5)) #max
e.min<-function(x) ifelse(x<dat$tmin[spec.k] , 0.1, 1- exp(-(x-(dat$tmax[spec.k])/10000) ) ) #min fix

#plot(-10:60, e.min(-10:60))

#bio1: mean, bio5:max, bio6:min
e.prior= function(x)cbind(1,e.max(x[,2]),e.min(x[,3]) )
m3 <- graf(pa_tr, train[,2:4, drop = FALSE],opt.l = TRUE, prior = e.prior)
### ERROR fix to run on multiple environmental variables
#bio1: mean, bio5:max, bio6:min (THIS IS A BAD PRIOR -- FOR TESTING)
e.prior = function(x) e.max(x[,2]) * e.min(x[,3])

m3 <- graf(pa_tr, train[,2:4, drop = FALSE],opt.l = TRUE, prior=e.prior)
plot(m3, prior=TRUE)


#---------------------------------------------------

pred_df<-data.frame(predict(m3,test[,2:ncol(train), drop = FALSE]))
print(paste("Area under ROC with prior knowledge of thermal niche : ",auc(pa_te, pred_df$posterior.mode)))

#Plot response curves
plot(m3)
plot(m3, prior=TRUE)
m3
#plot3d(m3)

#--------------------------------
Expand Down Expand Up @@ -264,7 +272,7 @@ points(bg, cex=0.5, col="darkorange3")
axis(1,las=1)
axis(2,las=1)

# restore the box around the map
# restore the box around the mapaxi
box()

#-----------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions allSpecies_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"","species","simple","eMinMax","normdist","maxent"
"1","Bufo marmoreus","0.820494864612512","0.8265639589169","0.753267973856209","0.752801120448179"
"2","Bufo mazatlanensis","0.876562954347194","0.839633614422797","0.759668508287293","0.854463506833382"
"3","Eleutherodactylus fleischmanni","0.865116279069767","0.849612403100775","0.841860465116279","0.897674418604651"
"4","Dendrobates auratus","0.625454545454545","0.644545454545455","0.620909090909091","0.65"
"5","Hyla regilla","NaN","NaN","NaN","NaN"
76 changes: 76 additions & 0 deletions centroids2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"","1","2"
"c.30.6100525418367...95.3682653867347.",30.6100525418367,-95.3682653867347
"c.10.4387178261012...84.8217449702201.",10.4387178261012,-84.8217449702201
"c.10.3638909599721...84.1767005813859.",10.3638909599721,-84.1767005813859
"c.9.96868276294891...84.1348119826708.",9.96868276294891,-84.1348119826708
"c.16.5879103385629...90.9928411643135.",16.5879103385629,-90.9928411643135
"c.11.3123137888343...84.127161289388.",11.3123137888343,-84.127161289388
"c.10.1907847424075...83.7234220696816.",10.1907847424075,-83.7234220696816
"c.10.0093717958735...84.0491083130309.",10.0093717958735,-84.0491083130309
"c.30.6817034921089...110.265888723641.",30.6817034921089,-110.265888723641
"c.15.3012823411031...92.5517109098679.",15.3012823411031,-92.5517109098679
"c.27.9070271587593...103.77168907196.",27.9070271587593,-103.77168907196
"c.15.1146392600849...44.6269180636943.",15.1146392600849,-44.6269180636943
"c.18.0680076058085...99.3560532607072.",18.0680076058085,-99.3560532607072
"c.27.0471048343469...108.614598068305.",27.0471048343469,-108.614598068305
"c..16.7302184559342..30.3012873044125.",-16.7302184559342,30.3012873044125
"c.0.26543568503937..31.8409833070866.",0.26543568503937,31.8409833070866
"c.12.5362688448623...81.4283880513318.",12.5362688448623,-81.4283880513318
"c..32.77320563598..150.284648579457.",-32.77320563598,150.284648579457
"c..29.2195818236566..127.15347928125.",-29.2195818236566,127.15347928125
"c.10.2449713264692...87.7034088499705.",10.2449713264692,-87.7034088499705
"c.20.870922075419...141.259858712291.",20.870922075419,-141.259858712291
"c.9.95533566487685...83.815790913293.",9.95533566487685,-83.815790913293
"c.38.6860590638056..119.675565424769.",38.6860590638056,119.675565424769
"c.40.102018517558..89.0285841765317.",40.102018517558,89.0285841765317
"c.28.0507945619048..119.391271995238.",28.0507945619048,119.391271995238
"c.36.4873136199095...110.231106692308.",36.4873136199095,-110.231106692308
"c..8.9426019031339..34.644185031339.",-8.9426019031339,34.644185031339
"c..27.6245692557377..28.611590157377.",-27.6245692557377,28.611590157377
"c.16.6517253827113...92.5112137823413.",16.6517253827113,-92.5112137823413
"c.33.6621143324453..32.8397374167328.",33.6621143324453,32.8397374167328
"c.15.3781038363588...89.4958978699503.",15.3781038363588,-89.4958978699503
"c.33.9644498930379...107.137147444984.",33.9644498930379,-107.137147444984
"c..39.4255848412439...69.9956397184943.",-39.4255848412439,-69.9956397184943
"c..42.9933589041096...68.9180306267123.",-42.9933589041096,-68.9180306267123
"c..38.7221178221477...70.4619704010067.",-38.7221178221477,-70.4619704010067
"c..30.9822492777778...67.5584265.",-30.9822492777778,-67.5584265
"c..34.0166449887218...68.0689754398496.",-34.0166449887218,-68.0689754398496
"c..45.6871409285714...68.6060513333333.",-45.6871409285714,-68.6060513333333
"c..24.1932005...66.0472025.",-24.1932005,-66.0472025
"c..47.306496443299...67.8115863608247.",-47.306496443299,-67.8115863608247
"c..38.7203211620112...68.2508498100559.",-38.7203211620112,-68.2508498100559
"c..29.9810771666667...67.3533774333333.",-29.9810771666667,-67.3533774333333
"c..36.1006651065574...57.2852816147541.",-36.1006651065574,-57.2852816147541
"c..37.62925709375...58.872253359375.",-37.62925709375,-58.872253359375
"c..30.235869516129...68.5248281612903.",-30.235869516129,-68.5248281612903
"c..41.723690109375...67.813143921875.",-41.723690109375,-67.813143921875
"c..28.063902...66.8233463333333.",-28.063902,-66.8233463333333
"c..39.8837683444976...68.9584796315789.",-39.8837683444976,-68.9584796315789
"c..26.7977487407407...66.3199806296296.",-26.7977487407407,-66.3199806296296
"c.27.43558792...88.60575684.",27.43558792,-88.60575684
"c.34.7102273548387...112.172824666667.",34.7102273548387,-112.172824666667
"c.31.8206123228428...101.490253855478.",31.8206123228428,-101.490253855478
"c.38.9653640273163...114.061897497626.",38.9653640273163,-114.061897497626
"c..40.8185353939394...69.3418617575758.",-40.8185353939394,-69.3418617575758
"c.36.142520089232...118.842883316431.",36.142520089232,-118.842883316431
"c.21.7018966554622..106.827619647059.",21.7018966554622,106.827619647059
"c.4.45470998798396...80.1118133047774.",4.45470998798396,-80.1118133047774
"c.43.494036727208...80.3698183159876.",43.494036727208,-80.3698183159876
"c.9.80027735895738...83.586345236015.",9.80027735895738,-83.586345236015
"c.35.7348566578947...85.7463143684211.",35.7348566578947,-85.7463143684211
"c.37.0838506560191...117.460765002867.",37.0838506560191,-117.460765002867
"c.12.0249065919764...86.3404123110338.",12.0249065919764,-86.3404123110338
"c.34.7219541720322...118.938984805835.",34.7219541720322,-118.938984805835
"c.18.9260385195473...94.84229025.",18.9260385195473,-94.84229025
"c.22.174409375..120.1571748125.",22.174409375,120.1571748125
"c..24.1440616511628..115.048632011628.",-24.1440616511628,115.048632011628
"c.29.443917223563..119.595095119724.",29.443917223563,119.595095119724
"c.14.6658235798833..103.694852774372.",14.6658235798833,103.694852774372
"c.27.9088431847826...16.0441501195652.",27.9088431847826,-16.0441501195652
"c.45.3111530520182...123.617448096812.",45.3111530520182,-123.617448096812
"c.45.1335259707984...123.634770550186.",45.1335259707984,-123.634770550186
"c.34.2582430932798...116.945841086259.",34.2582430932798,-116.945841086259
"c.33.007954140074...118.60829627176.",33.007954140074,-118.60829627176
"c.34.0888366929329...115.36270116416.",34.0888366929329,-115.36270116416
"c.56.34187681..10.233252573.",56.34187681,10.233252573
Loading