% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collapse_tokens.R
\name{collapse_tokens}
\alias{collapse_tokens}
\title{Collapse sequences of tokens by condition}
\usage{
collapse_tokens(tbl, condition, .collapse = "")
}
\arguments{
\item{tbl}{A tidy text dataset.}

\item{condition}{<\code{\link[rlang:args_data_masking]{data-masked}}>
A logical expression.}

\item{.collapse}{String with which tokens are concatenated.}
}
\value{
A data.frame.
}
\description{
Concatenates sequences of tokens in the tidy text dataset,
while grouping them by an expression.
}
\details{
Note that this function drops all columns except but 'token'
and columns for grouping sequences. So, the returned data.frame
has only 'doc_id', 'sentence_id', 'token_id', and 'token' columns.
}
\examples{
\dontrun{
df <- tokenize(
  data.frame(
    doc_id = "odakyu-sen",
    text = "\u5c0f\u7530\u6025\u7dda"
  )
) |>
  prettify(col_select = "POS1")

head(collapse_tokens(
  df,
  POS1 == "\u540d\u8a5e" & stringr::str_detect(token, "^[\\\\p{Han}]+$")
))
}
}
