% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cross-validation-KNN.R
\name{cv_knn}
\alias{cv_knn}
\title{Cross-validation for K nearest-neighbor regression}
\usage{
cv_knn(x_mat, dise_vec, veri_stat, k_list = NULL, type = "eucli", plot = FALSE)
}
\arguments{
\item{x_mat}{a numeric design matrix, which used in \code{\link{rho_knn}} to estimate probabilities of the disease status.}

\item{dise_vec}{a n * 3  binary matrix with three columns, corresponding to the three classes of the disease status. In row i, 1 in column j indicates that the i-th subject belongs to class j, with j = 1, 2, 3. A row of \code{NA} values indicates a non-verified subject.}

\item{veri_stat}{a binary vector containing the verification status (1 verified, 0 not verified).}

\item{k_list}{a list of candidate values for K. If \code{NULL}(the default), the set \eqn{\{1, 2, ..., n.ver\}}{{1, 2, ..., n.ver}} is employed, where, \eqn{n.ver} is the number of verified subjects.}

\item{type}{a type of distance, see \code{\link{rho_knn}} for more details. Default \code{"eucli"}.}

\item{plot}{if \code{TRUE}, a plot of cross-validation prediction error is produced.}
}
\value{
A suitable choice for k is returned.
}
\description{
This function calculates the estimated cross-validation prediction error for K nearest-neighbor regression and returns a suitable choice for K.
}
\details{
Data are divided into two groups, the first contains the data corresponding to veri_stat = 1, whereas the second contains the data corresponding to veri_stat = 0. In the first group, the discrepancy between the true disease status and the KNN estimates of the probabilities of the disease status is computed by varying \code{k} from 1 to the number of verification subjects, see To Duc et al. (2020). The optimal value of \code{k} is the value that corresponds to the smallest value of the discrepancy.
}
\examples{
data(EOC)
x_mat <- cbind(EOC$CA125, EOC$CA153, EOC$Age)
dise_na <- pre_data(EOC$D, EOC$CA125)
dise_vec_na <- dise_na$dise_vec
cv_knn(x_mat, dise_vec_na, EOC$V, type = "mahala", plot = TRUE)

}
\references{
To Duc, K., Chiogna, M. and Adimari, G. (2020)
Nonparametric estimation of ROC surfaces in presence of verification bias.
\emph{REVSTAT-Statistical Journal}. \bold{18}, 5, 697–720.
}
