Extensions of base R functions for tokens objects.
# S3 method for tokens
unlist(x, recursive = FALSE, use.names = TRUE)
# S3 method for tokens
[(x, i, drop_docid = TRUE)
# S3 method for tokens
+(t1, t2)
# S3 method for tokens_xptr
c(...)
# S3 method for tokens
c(...)
unlist
returns a simple vector of characters from a
tokens object.
c(...)
and +
return a tokens object whose documents
have been added as a single sequence of documents.
toks <- tokens(c(d1 = "one two three", d2 = "four five six", d3 = "seven eight"))
str(toks)
#> List of 3
#> $ d1: chr [1:3] "one" "two" "three"
#> $ d2: chr [1:3] "four" "five" "six"
#> $ d3: chr [1:2] "seven" "eight"
#> - attr(*, "class")= chr "tokens"
#> - attr(*, "types")= chr [1:8] "one" "two" "three" "four" ...
#> - attr(*, "padding")= logi TRUE
#> - attr(*, "docvars")='data.frame': 3 obs. of 3 variables:
#> ..$ docname_: chr [1:3] "d1" "d2" "d3"
#> ..$ docid_ : Factor w/ 3 levels "d1","d2","d3": 1 2 3
#> ..$ segid_ : int [1:3] 1 1 1
#> - attr(*, "meta")=List of 3
#> ..$ system:List of 5
#> .. ..$ package-version:Classes 'package_version', 'numeric_version' hidden list of 1
#> .. .. ..$ : int [1:3] 4 0 2
#> .. ..$ r-version :Classes 'R_system_version', 'package_version', 'numeric_version' hidden list of 1
#> .. .. ..$ : int [1:3] 4 4 0
#> .. ..$ system : Named chr [1:3] "Windows" "x86-64" "watan"
#> .. .. ..- attr(*, "names")= chr [1:3] "sysname" "machine" "user"
#> .. ..$ directory : chr "C:/Users/watan/Repo/quanteda/docs/reference"
#> .. ..$ created : Date[1:1], format: "2024-07-17"
#> ..$ object:List of 7
#> .. ..$ unit : chr "documents"
#> .. ..$ what : chr "word"
#> .. ..$ tokenizer : chr "tokenize_word4"
#> .. ..$ ngram : int 1
#> .. ..$ skip : int 0
#> .. ..$ concatenator: chr "_"
#> .. ..$ summary :List of 2
#> .. .. ..$ hash: chr(0)
#> .. .. ..$ data: NULL
#> ..$ user : list()
toks[c(1,3)]
#> Tokens consisting of 2 documents.
#> d1 :
#> [1] "one" "two" "three"
#>
#> d3 :
#> [1] "seven" "eight"
#>
# combining tokens
toks1 <- tokens(c(doc1 = "a b c d e", doc2 = "f g h"))
toks2 <- tokens(c(doc3 = "1 2 3"))
toks1 + toks2
#> Tokens consisting of 3 documents.
#> doc1 :
#> [1] "a" "b" "c" "d" "e"
#>
#> doc2 :
#> [1] "f" "g" "h"
#>
#> doc3 :
#> [1] "1" "2" "3"
#>
c(toks1, toks2)
#> Tokens consisting of 3 documents.
#> doc1 :
#> [1] "a" "b" "c" "d" "e"
#>
#> doc2 :
#> [1] "f" "g" "h"
#>
#> doc3 :
#> [1] "1" "2" "3"
#>