% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/popchar.R
\name{hfv_qc}
\alias{hfv_qc}
\title{Assess Quality of hfv Datasets}
\usage{
hfv_qc(
  data,
  stageframe = NULL,
  historical = TRUE,
  suite = "size",
  vitalrates = c("surv", "size", "fec"),
  surv = c("alive3", "alive2", "alive1"),
  obs = c("obsstatus3", "obsstatus2", "obsstatus1"),
  size = c("sizea3", "sizea2", "sizea1"),
  sizeb = c(NA, NA, NA),
  sizec = c(NA, NA, NA),
  repst = c("repstatus3", "repstatus2", "repstatus1"),
  fec = c("feca3", "feca2", "feca1"),
  stage = c("stage3", "stage2", "stage1"),
  matstat = c("matstatus3", "matstatus2", "matstatus1"),
  indiv = "individ",
  patch = NA,
  year = "year2",
  density = NA,
  patch.as.random = TRUE,
  year.as.random = TRUE,
  juvestimate = NA,
  juvsize = FALSE,
  fectime = 2,
  censor = NA,
  age = NA,
  indcova = NA,
  indcovb = NA,
  indcovc = NA,
  random.indcova = FALSE,
  random.indcovb = FALSE,
  random.indcovc = FALSE,
  test.group = FALSE,
  ...
)
}
\arguments{
\item{data}{The vertical dataset to be used for analysis. This dataset should 
be of class \code{hfvdata}, but can also be a data frame formatted similarly
to the output format provided by functions \code{\link{verticalize3}()} or
\code{\link{historicalize3}()}, as long as all needed variables are properly
designated.}

\item{stageframe}{The stageframe characterizing the life history model used.
Optional unless \code{test.group = TRUE}, in which case it is required.
Defaults to \code{NULL}.}

\item{historical}{A logical variable denoting whether to assess the effects
of state in occasion \emph{t}-1, in addition to state in occasion \emph{t}.
Defaults to \code{TRUE}.}

\item{suite}{This describes the global model for each vital rate estimation,
and has the following possible values: \code{full}, includes main effects and
all two-way interactions of size and reproductive status; \code{main},
includes main effects only of size and reproductive status; \code{size},
includes only size (also interactions between size in historical model);
\code{rep}, includes only reproductive status (also interactions between
status in historical model); \code{age}, all vital rates estimated with age
and y-intercepts only; \code{cons}, all vital rates estimated only as
y-intercepts. Defaults to \code{size}.}

\item{vitalrates}{A vector describing which vital rates will be estimated via
linear modeling, with the following options: \code{surv}, survival
probability; \code{obs}, observation probability; \code{size}, overall size;
\code{repst}, probability of reproducing; and \code{fec}, amount of
reproduction (overall fecundity). May also be set to
\code{vitalrates = "leslie"}, which is equivalent to setting
\code{c("surv", "fec")} for a Leslie MPM. This choice also determines how
internal data subsetting for vital rate model estimation will work. Defaults
to \code{c("surv", "size", "fec")}.}

\item{surv}{A vector indicating the variable names coding for status as alive
or dead in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("alive3", "alive2", "alive1")}.}

\item{obs}{A vector indicating the variable names coding for observation
status in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("obsstatus3", "obsstatus2", "obsstatus1")}.}

\item{size}{A vector indicating the variable names coding for the primary
size variable on occasions \emph{t}+1, \emph{t}, and \emph{t}-1,
respectively. Defaults to \code{c("sizea3", "sizea2", "sizea1")}.}

\item{sizeb}{A vector indicating the variable names coding for the secondary
size variable on occasions \emph{t}+1, \emph{t}, and \emph{t}-1,
respectively. Defaults to \code{c(NA, NA, NA)}, in which case \code{sizeb} is
not used.}

\item{sizec}{A vector indicating the variable names coding for the tertiary
size variable on occasions \emph{t}+1, \emph{t}, and \emph{t}-1,
respectively. Defaults to \code{c(NA, NA, NA)}, in which case \code{sizec} is
not used.}

\item{repst}{A vector indicating the variable names coding for reproductive
status in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("repstatus3", "repstatus2", "repstatus1")}.}

\item{fec}{A vector indicating the variable names coding for fecundity in
occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively. Defaults to
\code{c("feca3", "feca2", "feca1")}.}

\item{stage}{A vector indicating the variable names coding for stage in
occasions \emph{t}+1, \emph{t}, and \emph{t}-1. Defaults to
\code{c("stage3", "stage2", "stage1")}.}

\item{matstat}{A vector indicating the variable names coding for maturity
status in occasions \emph{t}+1, \emph{t}, and \emph{t}-1. Defaults to
\code{c("matstatus3", "matstatus2", "matstatus1")}.}

\item{indiv}{A text value indicating the variable name coding individual
identity. Defaults to \code{"individ"}.}

\item{patch}{A text value indicating the variable name coding for patch,
where patches are defined as permanent subgroups within the study population.
Defaults to \code{NA}.}

\item{year}{A text value indicating the variable coding for observation
occasion \emph{t}. Defaults to \code{"year2"}.}

\item{density}{A text value indicating the name of the variable coding for
spatial density, should the user wish to test spatial density as a fixed
factor affecting vital rates. Defaults to \code{NA}.}

\item{patch.as.random}{If set to \code{TRUE} and \code{approach = "mixed"},
then \code{patch} is included as a random factor. If set to \code{FALSE} and
\code{approach = "glm"}, then \code{patch} is included as a fixed factor. All
other combinations of logical value and \code{approach} lead to \code{patch}
not being included in modeling. Defaults to \code{TRUE}.}

\item{year.as.random}{If set to \code{TRUE} and \code{approach = "mixed"},
then \code{year} is included as a random factor. If set to \code{FALSE}, then
\code{year} is included as a fixed factor. All other combinations of logical
value and \code{approach} lead to \code{year} not being included in modeling.
Defaults to \code{TRUE}.}

\item{juvestimate}{An optional variable denoting the stage name of the
juvenile stage in the vertical dataset. If not \code{NA}, and \code{stage} is
also given (see below), then vital rates listed in \code{vitalrates} other
than \code{fec} will also be estimated from the juvenile stage to all adult
stages. Defaults to \code{NA}, in which case juvenile vital rates are not
estimated.}

\item{juvsize}{A logical variable denoting whether size should be used as a
term in models involving transition from the juvenile stage. Defaults to
\code{FALSE}, and is only used if \code{juvestimate} does not equal
\code{NA}.}

\item{fectime}{A variable indicating which year of fecundity to use as the
response term in fecundity models. Options include \code{2}, which refers to
occasion \emph{t}, and \code{3}, which refers to occasion \emph{t}+1.
Defaults to \code{2}.}

\item{censor}{A vector denoting the names of censoring variables in the
dataset, in order from occasion \emph{t}+1, followed by occasion \emph{t},
and lastly followed by occasion \emph{t}-1. Defaults to \code{NA}.}

\item{age}{Designates the name of the variable corresponding to age in time
\emph{t} in the vertical dataset. Defaults to \code{NA}, in which case age
is not included in linear models. Should only be used if building Leslie or
age x stage matrices.}

\item{indcova}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of an individual covariate. Defaults to \code{NA}.}

\item{indcovb}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of a second individual covariate. Defaults to
\code{NA}.}

\item{indcovc}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of a third individual covariate. Defaults to
\code{NA}.}

\item{random.indcova}{A logical value indicating whether \code{indcova}
should be treated as a random categorical factor, rather than as a fixed
factor. Defaults to \code{FALSE}.}

\item{random.indcovb}{A logical value indicating whether \code{indcovb}
should be treated as a random categorical factor, rather than as a fixed
factor. Defaults to \code{FALSE}.}

\item{random.indcovc}{A logical value indicating whether \code{indcovc}
should be treated as a random categorical factor, rather than as a fixed
factor. Defaults to \code{FALSE}.}

\item{test.group}{A logical value indicating whether to include the
\code{group} variable from the input \code{stageframe} as a fixed categorical
variable in linear models. Defaults to \code{FALSE}.}

\item{...}{Other parameters.}
}
\value{
This function yields text output describing the subsets to be used in
linear vital rate modeling. No value or object is returned.
}
\description{
Function \code{hfv_qc()} tests the overall quality of hfv datasets, and also
runs a series of tests to assess which statistical distributions match the
variables within these datasets. The input format is equivalent to the input
format of function \code{\link{modelsearch}()}, allowing users to assess
vital rate variable distributions assuming the same internal dataset
subsetting used by the latter function and simply copy and pasting the
parameter options from one function to the other.
}
\section{Notes}{

This function is meant to handle input as would be supplied to function
\code{modelsearch()}. To use most easily, users may copy all input parameters
from a call to function \code{modelsearch()}, and paste directly within this
function. The exact subsets used in the \code{modelsearch()} run will also be
created here.

Tests of Gaussian normality are conducted as Shapiro-Wilk tests via base R's
\code{shapiro.test()} function. If datasets with more than 5000 rows are
supplied, function \code{hfv_qc()} will sample 5000 rows from the dataset and
conduct the Shapiro-Wilk test on the data sample.
}

\examples{
data(lathyrus)

sizevector <- c(0, 4.6, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8,
  9)
stagevector <- c("Sd", "Sdl", "Dorm", "Sz1nr", "Sz2nr", "Sz3nr", "Sz4nr",
  "Sz5nr", "Sz6nr", "Sz7nr", "Sz8nr", "Sz9nr", "Sz1r", "Sz2r", "Sz3r", 
  "Sz4r", "Sz5r", "Sz6r", "Sz7r", "Sz8r", "Sz9r")
repvector <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
obsvector <- c(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
matvector <- c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
immvector <- c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
propvector <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
  0)
indataset <- c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
binvec <- c(0, 4.6, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)

lathframeln <- sf_create(sizes = sizevector, stagenames = stagevector, 
  repstatus = repvector, obsstatus = obsvector, matstatus = matvector, 
  immstatus = immvector, indataset = indataset, binhalfwidth = binvec, 
  propstatus = propvector)

lathvertln <- verticalize3(lathyrus, noyears = 4, firstyear = 1988,
  patchidcol = "SUBPLOT", individcol = "GENET", blocksize = 9, 
  juvcol = "Seedling1988", sizeacol = "lnVol88", repstracol = "Intactseed88",
  fecacol = "Intactseed88", deadacol = "Dead1988", 
  nonobsacol = "Dormant1988", stageassign = lathframeln, stagesize = "sizea",
  censorcol = "Missing1988", censorkeep = NA, NAas0 = TRUE, censor = TRUE)

lathvertln$feca2 <- round(lathvertln$feca2)
lathvertln$feca1 <- round(lathvertln$feca1)
lathvertln$feca3 <- round(lathvertln$feca3)

hfv_qc(lathvertln, historical = TRUE, suite = "main", 
  vitalrates = c("surv", "obs", "size", "repst", "fec"), juvestimate = "Sdl",
  indiv = "individ", patch = "patchid", year = "year2",year.as.random = TRUE,
  patch.as.random = TRUE)

}
