Last active
December 4, 2020 18:10
-
-
Save MirzaCengic/fd5a5347f7237e8ad37494c4439b93f1 to your computer and use it in GitHub Desktop.
ensemble_modeling2.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cat("BIOMOD_EnsembleModeling2 loaded", "\n") | |
| BIOMOD_EnsembleModeling2 <- function (modeling.output, chosen.models = "all", em.by = "PA_dataset+repet", | |
| eval.metric = "all", eval.metric.quality.threshold = NULL, | |
| models.eval.meth = c("KAPPA", "TSS", "ROC"), prob.mean = TRUE, | |
| prob.cv = FALSE, prob.ci = FALSE, prob.ci.alpha = 0.05, prob.median = FALSE, | |
| committee.averaging = FALSE, prob.mean.weight = FALSE, prob.mean.weight.decay = "proportional", | |
| VarImport = 0) | |
| { | |
| biomod2:::.bmCat("Build Ensemble Models") | |
| args <- biomod2:::.BIOMOD_EnsembleModeling.check.args(modeling.output, | |
| chosen.models, eval.metric, eval.metric.quality.threshold, | |
| models.eval.meth, prob.mean, prob.cv, prob.ci, prob.ci.alpha, | |
| prob.median, committee.averaging, prob.mean.weight, prob.mean.weight.decay, | |
| em.by) | |
| modeling.output <- args$modeling.output | |
| chosen.models <- args$chosen.models | |
| eval.metric <- args$eval.metric | |
| eval.metric.quality.threshold <- args$eval.metric.quality.threshold | |
| models.eval.meth <- args$models.eval.meth | |
| prob.mean <- args$prob.mean | |
| prob.cv <- args$prob.cv | |
| prob.ci <- args$prob.ci | |
| prob.ci.alpha <- args$prob.ci.alpha | |
| prob.median <- args$prob.median | |
| committee.averaging <- args$committee.averaging | |
| prob.mean.weight <- args$prob.mean.weight | |
| prob.mean.weight.decay <- args$prob.mean.weight.decay | |
| em.by <- args$em.by | |
| rm("args") | |
| em.avail <- c("prob.mean", "prob.cv", "prob.ci.inf", "prob.ci.sup", | |
| "prob.median", "committee.averaging", "prob.mean.weight") | |
| em.algo <- em.avail[c(prob.mean, prob.cv, prob.ci, prob.ci, | |
| prob.median, committee.averaging, prob.mean.weight)] | |
| Options <- list(em.by = em.by) | |
| expl_var_type = get_var_type(get_formal_data(modeling.output, | |
| "expl.var")) | |
| expl_var_range = get_var_range(get_formal_data(modeling.output, | |
| "expl.var")) | |
| EM <- new("BIOMOD.EnsembleModeling.out", sp.name = modeling.output@sp.name, | |
| expl.var.names = modeling.output@expl.var.names, em.by = em.by, | |
| modeling.id = modeling.output@modeling.id) | |
| EM@models.out.obj@link <- file.path(modeling.output@sp.name, | |
| paste(modeling.output@sp.name, ".", modeling.output@modeling.id, | |
| ".models.out", sep = "")) | |
| em.mod.assemb <- biomod2:::.em.models.assembling(chosen.models, em.by) | |
| for (assemb in names(em.mod.assemb)) { | |
| cat("\n\n >", assemb, "ensemble modeling") | |
| models.kept <- em.mod.assemb[[assemb]] | |
| if (modeling.output@has.evaluation.data) { | |
| eval.obs <- get_formal_data(modeling.output, "eval.resp.var") | |
| eval.expl <- get_formal_data(modeling.output, "eval.expl.var") | |
| } | |
| obs <- get_formal_data(modeling.output, "resp.var") | |
| expl <- get_formal_data(modeling.output, "expl.var") | |
| if (em.by %in% c("PA_dataset", "PA_dataset+algo", "PA_dataset+repet")) { | |
| if (unlist(strsplit(assemb, "_"))[3] != "AllData") { | |
| if (inherits(get_formal_data(modeling.output), | |
| "BIOMOD.formated.data.PA")) { | |
| kept_cells <- get_formal_data(modeling.output)@PA[, | |
| unlist(strsplit(assemb, "_"))[3]] | |
| } | |
| else { | |
| kept_cells <- rep(T, length(obs)) | |
| } | |
| obs <- obs[kept_cells] | |
| expl <- expl[kept_cells, , drop = F] | |
| } | |
| } | |
| obs[is.na(obs)] <- 0 | |
| needed_predictions <- biomod2:::.get_needed_predictions(modeling.output, | |
| em.by, models.kept, eval.metric, eval.metric.quality.threshold) | |
| if (!length(needed_predictions)) | |
| next | |
| for (eval.m in eval.metric) { | |
| models.kept <- needed_predictions$models.kept[[eval.m]] | |
| models.kept.scores <- needed_predictions$models.kept.scores[[eval.m]] | |
| for (algo in em.algo) { | |
| if (algo == "prob.mean") { | |
| cat("\n > Mean of probabilities...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMmeanBy", eval.m, "_", assemb, sep = "") | |
| model.bm <- new("EMmean_biomod2_model", model = models.kept, | |
| model_name = model_name, model_class = "EMmean", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id) | |
| } | |
| if (algo == "prob.cv") { | |
| cat("\n > Coef of variation of probabilities...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMcvBy", eval.m, "_", assemb, sep = "") | |
| model.bm <- new("EMcv_biomod2_model", model = models.kept, | |
| model_name = model_name, model_class = "EMcv", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id) | |
| } | |
| if (algo == "prob.median") { | |
| cat("\n > Median of probabilities...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMmedianBy", eval.m, "_", assemb, sep = "") | |
| model.bm <- new("EMmedian_biomod2_model", model = models.kept, | |
| model_name = model_name, model_class = "EMmedian", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id) | |
| } | |
| if (algo == "prob.ci.inf") { | |
| cat("\n > Confidence Interval...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMciInfBy", eval.m, "_", assemb, sep = "") | |
| model.bm <- new("EMci_biomod2_model", model = models.kept, | |
| model_name = model_name, model_class = "EMci", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id, | |
| alpha = prob.ci.alpha, side = "inferior") | |
| } | |
| if (algo == "prob.ci.sup") { | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMciSupBy", eval.m, "_", assemb, sep = "") | |
| model.bm <- new("EMci_biomod2_model", model = models.kept, | |
| model_name = model_name, model_class = "EMci", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id, | |
| alpha = prob.ci.alpha, side = "superior") | |
| } | |
| if (algo == "committee.averaging") { | |
| cat("\n > Committee averaging...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMcaBy", eval.m, "_", assemb, sep = "") | |
| models.kept.tresh <- unlist(lapply(models.kept, | |
| function(x) { | |
| mod <- tail(unlist(strsplit(x, "_")), 3)[3] | |
| run <- tail(unlist(strsplit(x, "_")), 3)[2] | |
| dat <- tail(unlist(strsplit(x, "_")), 3)[1] | |
| return(get_evaluations(modeling.output)[eval.m, | |
| "Cutoff", mod, run, dat]) | |
| })) | |
| names(models.kept.tresh) <- models.kept | |
| to_keep <- is.finite(models.kept.tresh) | |
| model.bm <- new("EMca_biomod2_model", model = models.kept[to_keep], | |
| model_name = model_name, model_class = "EMca", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id, | |
| tresholds = models.kept.tresh[to_keep]) | |
| } | |
| if (algo == "prob.mean.weight") { | |
| cat("\n > Probabilities weighting mean...") | |
| model_name <- paste(modeling.output@sp.name, | |
| "_", "EMwmeanBy", eval.m, "_", assemb, sep = "") | |
| models.kept.tmp <- models.kept | |
| models.kept.scores.tmp <- models.kept.scores | |
| if (eval.m == "ROC") { | |
| sre.id <- grep("_SRE", models.kept) | |
| if (length(sre.id) > 0) { | |
| cat("\n ! SRE modeling were switched off") | |
| models.kept.tmp <- models.kept[-sre.id] | |
| models.kept.scores.tmp <- models.kept.scores[-sre.id] | |
| } | |
| } | |
| models.kept.tmp <- models.kept.tmp[is.finite(models.kept.scores.tmp)] | |
| models.kept.scores.tmp <- models.kept.scores.tmp[is.finite(models.kept.scores.tmp)] | |
| models.kept.scores.tmp <- round(models.kept.scores.tmp, | |
| 3) | |
| cat("\n\t\t", " original models scores = ", | |
| models.kept.scores.tmp) | |
| if (is.numeric(prob.mean.weight.decay)) { | |
| DecayCount <- sum(models.kept.scores.tmp > | |
| 0) | |
| WOrder <- order(models.kept.scores.tmp, decreasing = T) | |
| Dweights <- models.kept.scores.tmp | |
| for (J in 1:DecayCount) Dweights[WOrder[J]] <- I(prob.mean.weight.decay^(DecayCount - | |
| J + 1)) | |
| for (J in 1:length(models.kept.scores.tmp)) { | |
| if (sum(models.kept.scores.tmp[J] == models.kept.scores.tmp) > | |
| 1) | |
| Dweights[which(models.kept.scores.tmp[J] == | |
| models.kept.scores.tmp)] <- mean(Dweights[which(models.kept.scores.tmp[J] == | |
| models.kept.scores.tmp)]) | |
| } | |
| models.kept.scores.tmp <- round(Dweights, | |
| digits = 3) | |
| rm(list = c("Dweights", "DecayCount", "WOrder")) | |
| } | |
| else if (is.function(prob.mean.weight.decay)) { | |
| models.kept.scores.tmp <- sapply(models.kept.scores.tmp, | |
| prob.mean.weight.decay) | |
| } | |
| models.kept.scores.tmp <- round(models.kept.scores.tmp/sum(models.kept.scores.tmp, | |
| na.rm = T), digits = 3) | |
| cat("\n\t\t", " final models weights = ", models.kept.scores.tmp) | |
| model.bm <- new("EMwmean_biomod2_model", model = models.kept.tmp, | |
| model_name = model_name, model_class = "EMwmean", | |
| model_options = Options, resp_name = modeling.output@sp.name, | |
| expl_var_names = modeling.output@expl.var.names, | |
| expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
| modeling.id = modeling.output@modeling.id, | |
| penalization_scores = models.kept.scores.tmp) | |
| } | |
| pred.bm <- predict(model.bm, expl, formal_predictions = needed_predictions$predictions[, | |
| model.bm@model, drop = F], on_0_1000 = T) | |
| pred.bm.name <- paste0(model_name, ".predictions") | |
| pred.bm.outfile <- file.path(model.bm@resp_name, | |
| ".BIOMOD_DATA", model.bm@modeling.id, "ensemble.models", | |
| "ensemble.models.predictions", pred.bm.name) | |
| dir.create(dirname(pred.bm.outfile), showWarnings = FALSE, | |
| recursive = TRUE) | |
| assign(pred.bm.name, pred.bm) | |
| save(list = pred.bm.name, file = pred.bm.outfile, | |
| compress = TRUE) | |
| rm(list = pred.bm.name) | |
| if (exists("eval.obs") & exists("eval.expl")) { | |
| eval_pred.bm <- predict(model.bm, eval.expl) | |
| pred.bm.name <- paste0(model_name, ".predictionsEval") | |
| assign(pred.bm.name, eval_pred.bm) | |
| save(list = pred.bm.name, file = pred.bm.outfile, | |
| compress = TRUE) | |
| rm(list = pred.bm.name) | |
| } | |
| if (length(models.eval.meth)) { | |
| cat("\n\t\t\tEvaluating Model stuff...") | |
| if (algo == "prob.cv") { | |
| cross.validation <- matrix(NA, 4, length(models.eval.meth), | |
| dimnames = list(c("Testing.data", "Cutoff", | |
| "Sensitivity", "Specificity"), models.eval.meth)) | |
| } | |
| else { | |
| if (em.by == "PA_dataset+repet") { | |
| calib_lines <- get_calib_lines(modeling.output) | |
| pa_dataset_id <- paste("_", unlist(strsplit(assemb, | |
| "_"))[3], sep = "") | |
| repet_id <- paste("_", unlist(strsplit(assemb, | |
| "_"))[2], sep = "") | |
| if (repet_id == "_Full") { | |
| eval_lines <- rep(T, length(pred.bm)) | |
| } | |
| else { | |
| eval_lines <- !na.omit(calib_lines[, | |
| repet_id, pa_dataset_id]) | |
| if (all(!eval_lines)) { | |
| eval_lines <- !eval_lines | |
| } | |
| } | |
| } | |
| else { | |
| eval_lines <- rep(T, length(pred.bm)) | |
| } | |
| cross.validation <- sapply(models.eval.meth, | |
| Find.Optim.Stat, Fit = pred.bm[eval_lines], | |
| Obs = obs[eval_lines]) | |
| rownames(cross.validation) <- c("Testing.data", | |
| "Cutoff", "Sensitivity", "Specificity") | |
| } | |
| if (exists("eval_pred.bm")) { | |
| if (algo == "prob.cv") { | |
| cross.validation <- matrix(NA, 5, length(models.eval.meth), | |
| dimnames = list(c("Testing.data", "Evaluating.data", | |
| "Cutoff", "Sensitivity", "Specificity"), | |
| models.eval.meth)) | |
| } | |
| else { | |
| true.evaluation <- sapply(models.eval.meth, | |
| function(x) { | |
| Find.Optim.Stat(Fit = eval_pred.bm * | |
| 1000, Obs = eval.obs, Fixed.thresh = cross.validation["Cutoff", | |
| x]) | |
| }) | |
| cross.validation <- rbind(cross.validation["Testing.data", | |
| ], true.evaluation) | |
| rownames(cross.validation) <- c("Testing.data", | |
| "Evaluating.data", "Cutoff", "Sensitivity", | |
| "Specificity") | |
| } | |
| } | |
| cross.validation <- t(round(cross.validation, | |
| digits = 3)) | |
| model.bm@model_evaluation <- cross.validation | |
| rm(list = c("cross.validation")) | |
| } | |
| if (VarImport > 0) { | |
| cat("\n\t\t\tEvaluating Predictor Contributions...", | |
| "\n") | |
| variables.importance <- variables_importance(model.bm, | |
| expl, nb_rand = VarImport) | |
| model.bm@model_variables_importance <- variables.importance$mat | |
| rm(list = c("variables.importance")) | |
| } | |
| assign(model_name, model.bm) | |
| save(list = model_name, file = file.path(modeling.output@sp.name, | |
| "models", modeling.output@modeling.id, model_name)) | |
| EM@em.models <- c(EM@em.models, model.bm) | |
| EM@em.computed <- c(EM@em.computed, model_name) | |
| } | |
| } | |
| } | |
| names(EM@em.models) <- EM@em.computed | |
| model.name <- paste(EM@sp.name, ".", EM@modeling.id, "ensemble.models.out", | |
| sep = "") | |
| assign(x = model.name, value = EM) | |
| save(list = model.name, file = file.path(EM@sp.name, model.name)) | |
| biomod2:::.bmCat("Done") | |
| return(EM) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment