% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/train_models.R
\name{train_models}
\alias{train_models}
\title{Train Specified Machine Learning Algorithms on the Training Data}
\usage{
train_models(
  train_data,
  label,
  task,
  algorithms,
  resampling_method,
  folds,
  repeats,
  group_cols = NULL,
  block_col = NULL,
  block_size = NULL,
  initial_window = NULL,
  assess_window = NULL,
  skip = 0,
  outer_folds = NULL,
  resamples = NULL,
  tune_params,
  engine_params = list(),
  metric,
  summaryFunction = NULL,
  seed = 123,
  recipe,
  use_default_tuning = FALSE,
  tuning_strategy = "grid",
  tuning_iterations = 10,
  tuning_complexity = "balanced",
  grid_levels = 3L,
  early_stopping = FALSE,
  adaptive = FALSE,
  algorithm_engines = NULL,
  use_parsnip_defaults = FALSE,
  warn_engine_defaults = TRUE,
  n_cores = 1,
  verbose = FALSE,
  event_class = "first",
  class_threshold = "auto",
  start_col = NULL,
  time_col = NULL,
  status_col = NULL,
  eval_times = NULL,
  at_risk_threshold = 0.1,
  survival_metric_convention = "fastml",
  audit_env = NULL,
  multiclass_auc = "macro",
  store_fold_models = FALSE
)
}
\arguments{
\item{train_data}{Preprocessed training data frame.}

\item{label}{Name of the target variable.}

\item{task}{Type of task: "classification", "regression", or "survival".}

\item{algorithms}{Vector of algorithm names to train.}

\item{resampling_method}{Resampling method for cross-validation. Supported
options include standard \code{"cv"}, \code{"repeatedcv"}, and
\code{"boot"}, as well as grouped resampling via \code{"grouped_cv"},
blocked/rolling schemes via \code{"blocked_cv"} or \code{"rolling_origin"},
nested resampling via \code{"nested_cv"}, and the passthrough
\code{"none"} option.}

\item{folds}{Number of folds for cross-validation.}

\item{repeats}{Number of times to repeat cross-validation (only applicable for methods like "repeatedcv").}

\item{group_cols}{Optional character vector of grouping columns used with
`resampling_method = "grouped_cv"`. For classification problems the outcome
column is used to request grouped stratification where supported; if class
imbalance prevents stratification, grouped folds are still created and a
warning is emitted to document the limitation.}

\item{block_col}{Optional name of the ordering column used with blocked or
rolling resampling.}

\item{block_size}{Optional integer specifying the block size for
`resampling_method = "blocked_cv"`.}

\item{initial_window}{Optional integer specifying the initial window size for
rolling resampling.}

\item{assess_window}{Optional integer specifying the assessment window size
for rolling resampling.}

\item{skip}{Optional integer number of resamples to skip between rolling
resamples.}

\item{outer_folds}{Optional integer specifying the number of outer folds for
`resampling_method = "nested_cv"`.}

\item{resamples}{Optional rsample object. If provided, custom resampling splits
will be used instead of those created internally.}

\item{tune_params}{A named list of tuning ranges. For each algorithm, supply a
list of engine-specific parameter values, e.g.
\code{list(rand_forest = list(ranger = list(mtry = c(1, 3)))).}}

\item{engine_params}{A named list of fixed engine-level arguments passed
directly to the model fitting call for each algorithm/engine combination.
Use this to control options like \code{ties = "breslow"} for Cox models or
\code{importance = "impurity"} for ranger. Unlike \code{tune_params}, these
values are not tuned over a grid.}

\item{metric}{The performance metric to optimize. For classification, options
include \code{"accuracy"}, \code{"roc_auc"}, \code{"logloss"},
\code{"brier_score"}, and \code{"ece"} (plus other class metrics).}

\item{summaryFunction}{A custom summary function for model evaluation. Default is \code{NULL}.}

\item{seed}{An integer value specifying the random seed for reproducibility.}

\item{recipe}{A recipe object for preprocessing.}

\item{use_default_tuning}{Logical; if \code{TRUE} and \code{tune_params} is \code{NULL}, tuning is performed using default grids. Tuning also occurs when custom \code{tune_params} are supplied. When \code{FALSE} and no custom parameters are given, the model is fitted once with default settings.}

\item{tuning_strategy}{A string specifying the tuning strategy. Must be one of
\code{"grid"}, \code{"bayes"}, or \code{"none"}. Adaptive methods may be
used with \code{"grid"}. If \code{"none"} is selected, the workflow is fitted
directly without tuning.
If custom \code{tune_params} are supplied with \code{tuning_strategy = "none"},
they will be ignored with a warning.}

\item{tuning_iterations}{Number of iterations for Bayesian tuning. Ignored
when \code{tuning_strategy} is not \code{"bayes"}; validation occurs only
for the Bayesian strategy.}

\item{tuning_complexity}{Character string specifying tuning complexity preset.
One of "quick", "balanced", "thorough", or "exhaustive". Controls both
grid density and parameter range width.}

\item{grid_levels}{Integer specifying number of levels per parameter for
grid search. Higher values create denser grids but increase computation
exponentially (grid size = levels^n_params).}

\item{early_stopping}{Logical for early stopping in Bayesian tuning.}

\item{adaptive}{Logical indicating whether to use adaptive/racing methods.}

\item{algorithm_engines}{A named list specifying the engine to use for each algorithm.}

\item{use_parsnip_defaults}{Logical. If \code{TRUE}, use parsnip's default engines
instead of fastml's optimized defaults. Default is \code{FALSE}.}

\item{warn_engine_defaults}{Logical. If \code{TRUE} (default), warn when fastml's
default engine differs from parsnip's default.}

\item{n_cores}{Integer number of cores requested for parallel processing.
Used to decide whether tuning/resampling should run in parallel and to
configure engine thread settings when supported.}

\item{verbose}{Logical. If \code{TRUE}, print informational messages about
engine selection and parameter overrides.}

\item{event_class}{Character string identifying the positive class when computing
classification metrics ("first" or "second").}

\item{class_threshold}{For binary classification, controls how class probabilities
are converted into hard class predictions during evaluation. Numeric values in
(0, 1) set a fixed threshold. The default `"auto"` tunes a threshold on the
training data to maximize F1; use `"model"` to keep the model's default threshold.}

\item{start_col}{Optional name of the survival start time column passed through
to downstream evaluation helpers.}

\item{time_col}{Optional name of the survival stop time column.}

\item{status_col}{Optional name of the survival status/event column.}

\item{eval_times}{Optional numeric vector of time horizons for survival metrics.}

\item{at_risk_threshold}{Numeric cutoff used to determine the evaluation window
for survival metrics within guarded resampling.}

\item{survival_metric_convention}{Character string specifying which survival
metric conventions to follow. `"fastml"` (default) uses fastml's internal
defaults for evaluation horizons and t_max. `"tidymodels"` uses
`eval_times` as the explicit evaluation grid and applies yardstick-style
Brier/IBS normalization; when `eval_times` is `NULL`, time-dependent Brier
metrics are omitted.}

\item{audit_env}{Internal environment that tracks security audit findings when
custom preprocessing hooks are executed. Typically supplied by
\code{fastml()} and should be left as \code{NULL} when calling
\code{train_models()} directly.}

\item{multiclass_auc}{For multiclass ROC AUC, the averaging method to use:
`"macro"` (default, tidymodels) or `"macro_weighted"`. Macro weights each
class equally, while macro_weighted weights by class prevalence and can
change model rankings on imbalanced data.}

\item{store_fold_models}{Logical. If \code{TRUE}, store the fitted fold models
during resampling for later inspection or stability analysis.}
}
\value{
A list of trained model objects.
}
\description{
Trains specified machine learning algorithms on the preprocessed training data.
}
