KeywordProcessor {rflashtext} | R Documentation |
Based on the python library flashtext. To see more details about the algorithm visit: FlashText
attrs
list. Stores the attributes of the KeywordProcessor
object.
new()
Initializes the KeywordProcessor
object.
KeywordProcessor$new( keys = NULL, words = NULL, trie = NULL, id = "_word_", chars = paste0(c(letters, LETTERS, 0:9, "_"), collapse = ""), ignore_case = FALSE )
keys
character vector. Strings to identify (find/replace) in the text. Must be provided if trie
is NULL
.
words
character vector. Strings to be returned (find) or replaced (replace) when found the respective keys
. Should have the same length as keys
. If not provided, words = keys
.
trie
character. JSON built character by character and needed for the search. It can be provided instead of keys
and words
.
id
character. Used to name the end nodes of the trie
dictionary.
chars
character. Used to validate if a word continues. Default paste0(c(letters, LETTERS, 0:9, "_"), collapse = "")
equivalent to [a-zA-Z0-9_]
.
ignore_case
logical. If FALSE
the search is case sensitive. Default TRUE
.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) processor$attrs
library(rflashtext) processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA")) processor$attrs
show_trie()
Shows the trie
dictionary used to find/replace keys
.
KeywordProcessor$show_trie()
character. JSON string of the trie
structure. It can be converted to list using jsonlite::fromJSON
.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) processor$show_trie()
add_keys_words()
Adds keys
and words
to the trie
dictionary.
KeywordProcessor$add_keys_words(keys, words = NULL)
keys
character vector. Strings to identify (find/replace) in the text.
words
character vector. Strings to be returned (find) or replaced (replace) when found the respective keys
. Should have the same length as keys
. If not provided, words = keys
.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) processor$add_keys_words(keys = "CA", words = "California") processor$show_trie()
contain_keys()
Checks if keys
are in the trie
dictionary.
KeywordProcessor$contain_keys(keys)
keys
character vector. Strings to check if already are in the search trie
dictionary.
logical vector. TRUE
if the keys
are in the search trie
dictionary.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) processor$contain_keys(keys = c("NY", "LA", "TX"))
get_words()
Gets the words
for the keys
found in the trie
dictionary.
KeywordProcessor$get_words(keys)
keys
character vector. Strings to get back the respective words
.
character vector. Respective words
. If keys
not found returns NA_character_
.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) processor$get_words(keys = c("NY", "LA", "TX"))
find_keys()
Finds keys
in the sentences using the search trie
dictionary.
KeywordProcessor$find_keys(sentences, span_info = TRUE)
sentences
character vector. Text to find the keys
previously defined.
span_info
logical. TRUE
to retrieve the words
and the position of the matches. FALSE
to only retrieve the words
. Default TRUE
.
list with the words
corresponding to keys
found in the sentence
. Hint: Use data.table::rbindlist(...)
to transform the list to a data frame.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) words_found <- processor$find_keys(sentences = "I live in LA but I like NY") words_found
replace_keys()
Replaces keys
found in the sentences by the corresponding words
.
KeywordProcessor$replace_keys(sentences)
sentences
character vector. Text to replace the keys
found by the corresponding words
.
character vector. Text with the keys
replaced by the respective words
.
library(rflashtext) processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles")) new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY") new_sentences
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = "NY")
processor$get_words(keys = "LA")
processor$find_keys(sentences = "I live in LA but I like NY")
processor$replace_keys(sentences = "I live in LA but I like NY")
## ------------------------------------------------
## Method `KeywordProcessor$new`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs
library(rflashtext)
processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs
## ------------------------------------------------
## Method `KeywordProcessor$show_trie`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()
## ------------------------------------------------
## Method `KeywordProcessor$add_keys_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()
## ------------------------------------------------
## Method `KeywordProcessor$contain_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
## ------------------------------------------------
## Method `KeywordProcessor$get_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
## ------------------------------------------------
## Method `KeywordProcessor$find_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found
## ------------------------------------------------
## Method `KeywordProcessor$replace_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences