# Secundary functions

## Small code examples

Here are presented, in small and unlinked examples, secundary functions (starting with `bm_[...]`) of `biomod2`. Most of these functions are internally called by some main functions (starting with `BIOMOD_[...]`) of `biomod2`, but can be easily used as such.

### Prepare data

#### Vector data

``````## Generate a binary vector -------------------------------------------------------------
vec.a <- sample(c(0, 1), 100, replace = TRUE)

## Generate a 0-1000 vector (random drawing) --------------------------------------------
vec.b <- runif(100, min = 0, max = 1000)

## Generate a 0-1000 vector (biased drawing) --------------------------------------------
BiasedDrawing <- function(x, m1 = 300, sd1 = 200, m2 = 700, sd2 = 200) {
return(ifelse(x < 0.5, rnorm(1, m1, sd1), rnorm(1, m2, sd2)))
}
vec.c <- sapply(vec.a, BiasedDrawing)
vec.c[which(vec.c < 0)] <- 0
vec.c[which(vec.c > 1000)] <- 1000

## Generate a 0-1000 vector (normal distribution) ---------------------------------------
vec.d <- rnorm(100, 500, 100)
``````

#### Observations & explanatory variables

``````library(biomod2)
library(terra)
``````
``````## Create simple simulated data ---------------------------------------------------------
myResp.sim <- sample(c(0, 1), 20, replace = TRUE)
myExpl.sim <- data.frame(var1 = sample(c(0, 1), 100, replace = TRUE),
var2 = rnorm(100),
var3 = 1:100)

## Create raster data -------------------------------------------------------------------
ras.1 <- ras.2 <- mask.out <- rast(nrows = 10, ncols = 10)
ras.1[] <- as.factor(rep(c(1, 2, 3, 4, 5), each = 20))
ras.1 <- as.factor(ras.1)
ras.2[] <- rnorm(100)
stk <- c(ras.1, ras.2)
names(stk) <- c('varFact', 'varNorm')

## define a list of masks where we want to sample in priority
mask.in[[1]][1:80] <- NA ## only level 5 should be sampled in this mask
mask.in[[1]][21:80] <- NA ## only levels 1 and 5 should be sampled in this mask

data(DataSpecies)
myResp.real <- as.numeric(DataSpecies[, 'GuloGulo'])

data(bioclim_current)
myExpl.real <- rast(bioclim_current)

myCoord.real <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]
myCoord.real.1 <- DataSpecies[which(myResp.real == 1), c('X_WGS84', 'Y_WGS84')]
myResp.real.raster <- classify(subset(myExpl.real, 1),
matrix(c(-Inf, Inf, 0), ncol = 3))
myResp.real.raster[cellFromXY(myResp.real.raster, myCoord.real.1)] <- 1
``````

### Secundary functions : vector data

#### Generate calibration / evaluation datasets

``````bm_SampleBinaryVector(obs = vec.a, ratio = 0.7)
``````

#### Find optimal threshold for a specific evaluation metric

``````# varying the number of thresholds to test
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.b, nb.thresh = 5)
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.b, nb.thresh = 100)

# different metrics
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.c)
bm_FindOptimStat(metric.eval = 'ROC', obs = vec.a, fit = vec.c)
bm_FindOptimStat(metric.eval = 'BOYCE', obs = vec.a, fit = vec.c)
``````

#### From continuous to binary / filtered vector

``````vec.d_bin <- bm_BinaryTransformation(data = vec.d, threshold = 500)
vec.d_filt <- bm_BinaryTransformation(data = vec.d, threshold = 500, do.filtering = TRUE)
cbind(vec.d, vec.d_bin, vec.d_filt)
``````

### Secundary functions : explanatory variables

#### Generate automatic formula

``````bm_MakeFormula(resp.name = 'myResp.sim',
interaction.level = 0)
``````

#### Sample all factor levels

``````samp1 <- bm_SampleFactorLevels(expl.var = stk, mask.out = mask.out)
``````

#### Compute Species Range Envelop model

``````# Compute SRE for several quantile values
sre.100 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0)
sre.095 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0.025)
sre.090 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0.05)

# Visualize results
res <- c(myResp.real.raster, sre.100, sre.095, sre.090)
names(res) <- c("Original distribution", "Full data calibration", "Over 95 percent", "Over 90 percent")
plot(res, zlim = c(0, 1))
``````

#### Compute variables importance

``````mod <- glm(var1 ~ var2 + var3, data = myExpl.sim)
bm_VariablesImportance(bm.model = mod,
expl.var = myExpl.sim[, c('var2', 'var3')],
method = 'full_rand',
nb.rep = 3)
``````

### Secundary functions : `biomod2` data

``````# Format Data with true absences
myBiomodData <- BIOMOD_FormatingData(resp.var = myResp.real,
expl.var = myExpl.real,
resp.xy = myCoord.real,
resp.name = 'Alien')

# Transform true absences into potential pseudo-absences
myResp.real.PA <- ifelse(myResp.real == 1, 1, NA)
myResp.real.PA.vect <- vect(cbind(myCoord.real, myResp.real.PA), geom = c('X_WGS84','Y_WGS84'))
``````

#### Generate pseudo-absence datasets

``````# random method
PA.r <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 1000,
strategy = 'random')

# disk method
PA.d <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 500,
strategy = 'disk',
dist.min = 5,
dist.max = 35)

# SRE method
PA.s <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 1000,
strategy = 'sre',
sre.quant = 0.025)

# user.defined method
myPAtable <- data.frame(PA1 = ifelse(myResp.real == 1, TRUE, FALSE),
PA2 = ifelse(myResp.real == 1, TRUE, FALSE))
for (i in 1:ncol(myPAtable)) myPAtable[sample(which(myPAtable[, i] == FALSE), 500), i] = TRUE
PA.u <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
strategy = 'user.defined',
user.table = myPAtable)

str(PA.r)
apply(PA.r\$pa.tab, 2, table)

apply(PA.d\$pa.tab, 2, table)

apply(PA.s\$pa.tab, 2, table)

tail(PA.u\$pa.tab)
apply(PA.u\$pa.tab, 2, table)
``````
``````# random method : different number of PA
PA.r_mult <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = c(1000, 500, 500, 200),
strategy = 'random')

str(PA.r_mult)
apply(PA.r_mult\$pa.tab, 2, table)
``````

#### Generate calibration / evaluation datasets

``````# random selection
cv.r <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'random',
nb.rep = 3,
k = 0.8)

# k-fold selection
cv.k <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'kfold',
nb.rep = 2,
k = 3)

# block selection
cv.b <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'block')

# stratified selection (geographic)
cv.s <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'strat',
k = 2,
balance = 'presences',
strat = 'x')

# stratified selection (environmental)
cv.e <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'env',
k = 2,
balance = 'presences')

apply(cv.r, 2, table)
apply(cv.k, 2, table)
apply(cv.b, 2, table)
apply(cv.s, 2, table)
apply(cv.e, 2, table)
``````

#### Retrieve modeling options

All models currently available in `biomod2` (and their related package and function) are listed within a small dataset. Some of them can be tuned through the `train` function of the `caret` package and corresponding training function to be used is also indicated.

``````data(ModelsTable)
ModelsTable
``````
``````allModels <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM'
, 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')

# default parameters
opt.d <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'default')

# providing formated data
opt.df <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'default',
bm.format = myBiomodData,
calib.lines = cv.k)

opt.d
opt.d@models
opt.d@options\$ANN.binary.nnet.nnet
names(opt.d@options\$ANN.binary.nnet.nnet@args.values)

opt.df@options\$ANN.binary.nnet.nnet
names(opt.df@options\$ANN.binary.nnet.nnet@args.values)
``````
``````# bigboss parameters
opt.b <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'bigboss')

# user defined parameters
user.SRE <- list('_allData_allRun' = list(quant = 0.01))
user.XGBOOST <- list('_allData_allRun' = list(nrounds = 10))
user.val <- list(SRE.binary.biomod2.bm_SRE = user.SRE
, XGBOOST.binary.xgboost.xgboost = user.XGBOOST)

opt.u <- bm_ModelingOptions(data.type = 'binary',
models = c('SRE', 'XGBOOST'),
strategy = 'user.defined',
user.val = user.val)

opt.b
opt.u
``````
``````# tune parameters for Random Forest model
tuned.rf <- bm_Tuning(model = 'RF',
tuning.fun = 'rf', ## see in ModelsTable
do.formula = TRUE,
bm.options = opt.d@options\$RF.binary.randomForest.randomForest,
bm.format = myBiomodData)
tuned.rf ## list object

# tune parameters for GAM (from mgcv package) model
tuned.gam <- bm_Tuning(model = 'GAM',
tuning.fun = 'gam', ## see in ModelsTable
do.formula = TRUE,
do.stepAIC = TRUE,
bm.options = opt.d@options\$GAM.binary.mgcv.gam,
bm.format = myBiomodData)
tuned.gam ##list object

# tuned parameters with formated data
opt.t <- bm_ModelingOptions(data.type = 'binary',
models = c('SRE', 'XGBOOST'),
strategy = 'tuned',
bm.format = myBiomodData)

opt.t ## BIOMOD.modeling.options object
``````