% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bagger.R
\name{bagger}
\alias{bagger}
\alias{bagger.default}
\alias{bagger.data.frame}
\alias{bagger.matrix}
\alias{bagger.formula}
\alias{bagger.recipe}
\title{Bagging functions}
\usage{
bagger(x, ...)

\method{bagger}{default}(x, ...)

\method{bagger}{data.frame}(
  x,
  y,
  weights = NULL,
  base_model = "CART",
  times = 11L,
  control = control_bag(),
  cost = NULL,
  ...
)

\method{bagger}{matrix}(
  x,
  y,
  weights = NULL,
  base_model = "CART",
  times = 11L,
  control = control_bag(),
  cost = NULL,
  ...
)

\method{bagger}{formula}(
  formula,
  data,
  weights = NULL,
  base_model = "CART",
  times = 11L,
  control = control_bag(),
  cost = NULL,
  ...
)

\method{bagger}{recipe}(
  x,
  data,
  base_model = "CART",
  times = 11L,
  control = control_bag(),
  cost = NULL,
  ...
)
}
\arguments{
\item{x}{A data frame, matrix, or recipe (depending on the method being used).}

\item{...}{Optional arguments to pass to the base model function.}

\item{y}{A numeric or factor vector of outcomes. Categorical outcomes (i.e
classes) should be represented as factors, not integers.}

\item{weights}{A numeric vector of non-negative case weights. These values are
not used during bootstrap resampling.}

\item{base_model}{A single character value for the model being bagged. Possible
values are "CART", "MARS", "nnet", and "C5.0" (classification only).}

\item{times}{A single integer greater than 1 for the maximum number of bootstrap
samples/ensemble members (some model fits might fail).}

\item{control}{A list of options generated by \code{control_bag()}.}

\item{cost}{A non-negative scale (for two class problems) or a cost matrix.}

\item{formula}{An object of class "formula" (or one that can be coerced to
that class): a symbolic description of the model to be fitted. Note that
this package does not support multivariate outcomes and that, if some
predictors are factors, dummy variables will \emph{not} be created unless by the
underlying model function.}

\item{data}{A data frame containing the variables used in the formula or
recipe.}
}
\description{
General suite of bagging functions for several models.
}
\details{
\code{bagger()} fits separate models to bootstrap samples. The
prediction function for each model object is encoded in an R expression and
the original model object is discarded. When making predictions, each
prediction formula is evaluated on the new data and aggregated using the
mean.

Variable importance scores are calculated using implementations in each
package. When requested, the results are in a tibble with column names
\code{term} (the predictor), \code{value} (the importance score), and \code{used} (the
percentage of times that the variable was in the prediction equation).

The models can be fit in parallel using the \pkg{future} package. The
enable parallelism, use the \code{future::plan()} function to declare \emph{how} the
computations should be distributed. Note that this will almost certainly
multiply the memory requirements required to fit the models.

For neural networks, variable importance is calculated using the method
of Garson described in Gevrey \emph{et al} (2003)
}
\examples{
library(recipes)
library(dplyr)

data(biomass, package = "modeldata")

biomass_tr <-
  biomass \%>\%
  dplyr::filter(dataset == "Training") \%>\%
  dplyr::select(-dataset, -sample)

biomass_te <-
  biomass \%>\%
  dplyr::filter(dataset == "Testing") \%>\%
  dplyr::select(-dataset, -sample)

# ------------------------------------------------------------------------------

ctrl <- control_bag(var_imp = TRUE)

# ------------------------------------------------------------------------------

# `times` is low to make the examples run faster

set.seed(7687)
mars_bag <- bagger(x = biomass_tr[, -6], y = biomass_tr$HHV,
                   base_model = "MARS", times = 5, control = ctrl)
mars_bag
var_imp(mars_bag)

set.seed(7687)
cart_bag <- bagger(x = biomass_tr[, -6], y = biomass_tr$HHV,
                   base_model = "CART", times = 5, control = ctrl)
cart_bag

# ------------------------------------------------------------------------------
# Other interfaces

# Recipes can be used
biomass_rec <-
  recipe(HHV ~ ., data = biomass_tr) \%>\%
  step_pca(all_predictors())

set.seed(7687)
cart_pca_bag <- bagger(biomass_rec, data = biomass_tr, base_model = "CART",
                       times = 5, control = ctrl)

cart_pca_bag

# Using formulas
mars_bag <- bagger(HHV ~ ., data = biomass_tr, base_model = "MARS", times = 5,
                   control = ctrl)
mars_bag
}
\references{
Gevrey, M., Dimopoulos, I., and Lek, S. (2003). Review and
comparison of methods to study the contribution of variables in artificial
neural network models. Ecological Modelling, 160(3), 249-264.
}
