La función de enlace adaboost se describe aquí . Este ejemplo proporciona una descripción detallada del cálculo:
library(gbm);
set.seed(123);
n <- 1000;
sim.df <- data.frame(x.1 = sample(0:1, n, replace=TRUE),
x.2 = sample(0:1, n, replace=TRUE));
prob.array <- c(0.9, 0.7, 0.2, 0.8);
df$y <- rbinom(n, size = 1, prob=prob.array[1+sim.df$x.1+2*sim.df$x.2])
n.trees <- 10;
shrinkage <- 0.01;
gbmFit <- gbm(
formula = y~.,
distribution = "bernoulli",
data = sim.df,
n.trees = n.trees,
interaction.depth = 2,
n.minobsinnode = 2,
shrinkage = shrinkage,
bag.fraction = 0.5,
cv.folds = 0,
# verbose = FALSE
n.cores = 1
);
sim.df$logods <- predict(gbmFit, sim.df, n.trees = n.trees); #$
sim.df$prob <- predict(gbmFit, sim.df, n.trees = n.trees, type = 'response'); #$
sim.df$prob.2 <- plogis(predict(gbmFit, sim.df, n.trees = n.trees)); #$
sim.df$logloss <- sim.df$y*log(sim.df$prob) + (1-sim.df$y)*log(1-sim.df$prob); #$
gbmFit <- gbm(
formula = y~.,
distribution = "adaboost",
data = sim.df,
n.trees = n.trees,
interaction.depth = 2,
n.minobsinnode = 2,
shrinkage = shrinkage,
bag.fraction = 0.5,
cv.folds = 0,
# verbose = FALSE
n.cores = 1
);
sim.df$exp.scale <- predict(gbmFit, sim.df, n.trees = n.trees); #$
sim.df$ada.resp <- predict(gbmFit, sim.df, n.trees = n.trees, type = 'response'); #$
sim.df$ada.resp.2 <- plogis(2*predict(gbmFit, sim.df, n.trees = n.trees)); #$
sim.df$ada.error <- -exp(-sim.df$y * sim.df$exp.scale); #$
sim.df[1:20,]