Skip to content

Commit

Permalink
applies prefilter for alphanumeric and hyphenated words ropensci#45
Browse files Browse the repository at this point in the history
This is meant to be a quick fix; issue should probably be resolved in hunspell parser instead
* remove "ignore" words from WORDLIST before parsing in hunspell
* replaces complex if ... else if ... statement with simplier switch()
  • Loading branch information
jmbarbone committed Feb 10, 2021
1 parent f029081 commit ab81b6a
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 52 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ Imports:
knitr
Suggests: pdftools
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.99.9001
RoxygenNote: 7.1.1
Language: en-GB
82 changes: 59 additions & 23 deletions R/check-files.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,39 @@ spell_check_files <- function(path, ignore = character(), lang = "en_US"){
lang <- normalize_lang(lang)
dict <- hunspell::dictionary(lang, add_words = ignore)
path <- sort(normalizePath(path, mustWork = TRUE))
lines <- lapply(path, spell_check_file_one, dict = dict)
lines <- lapply(path, spell_check_file_one, dict = dict, ignore = ignore)
summarize_words(path, lines)
}

spell_check_file_one <- function(path, dict){
if(grepl("\\.r?md$",path, ignore.case = TRUE))
return(spell_check_file_md(path, dict = dict))
if(grepl("\\.rd$", path, ignore.case = TRUE))
return(spell_check_file_rd(path, dict = dict))
if(grepl("\\.(rnw|snw)$",path, ignore.case = TRUE))
return(spell_check_file_knitr(path = path, format = "latex", dict = dict))
if(grepl("\\.(tex)$",path, ignore.case = TRUE))
return(spell_check_file_plain(path = path, format = "latex", dict = dict))
if(grepl("\\.(html?)$", path, ignore.case = TRUE)){
try({
path <- pre_filter_html(path)
})
return(spell_check_file_plain(path = path, format = "html", dict = dict))
}
if(grepl("\\.(xml)$",path, ignore.case = TRUE))
return(spell_check_file_plain(path = path, format = "xml", dict = dict))
if(grepl("\\.(pdf)$",path, ignore.case = TRUE))
return(spell_check_file_pdf(path = path, format = "text", dict = dict))
return(spell_check_file_plain(path = path, format = "text", dict = dict))
spell_check_file_one <- function(path, dict, ignore = character()) {
ext <- tolower(tools::file_ext(path))

# Recode a few file extensions
ext <- switch(
ext,
rmd = "md",
snw = "rnw",
htm = "html",
# default
ext
)

switch(
ext,
md = spell_check_file_md(path, dict = dict),
rnw = spell_check_file_knitr(path, format = "latex", dict = dict),
tex = spell_check_file_plain(path, format = "latex", dict = dict, ignore = ignore),
html = {
try({
path <- pre_filter_html(path)
})
spell_check_file_plain(path, format = "html", dict = dict)
},
xml = spell_check_file_plain(path, format = "xml", dict = dict, ignore = ignore),
pdf = spell_check_file_pdf(path, format = "text", dict = dict),
# default
spell_check_file_plain(path, format = "text", dict = dict, ignore = ignore)
)
}

#' @rdname spell_check_files
Expand Down Expand Up @@ -85,13 +94,19 @@ spell_check_description_text <- function(file, dict){
spell_check_plain(lines, dict = dict)
}

spell_check_file_rd <- function(rdfile, macros = NULL, dict) {
spell_check_file_rd <- function(rdfile, macros = NULL, dict, ignore = character()) {
text <- if (!length(macros)) {
tools::RdTextFilter(rdfile)
} else {
tools::RdTextFilter(rdfile, macros = macros)
}

Encoding(text) <- "UTF-8"

if (!identical(ignore, character())) {
text <- pre_filter_plain_rd(text, ignore = ignore)
}

spell_check_plain(text, dict = dict)
}

Expand All @@ -115,8 +130,13 @@ spell_check_file_knitr <- function(path, format, dict){
spell_check_plain(text, dict = dict)
}

spell_check_file_plain <- function(path, format, dict){
spell_check_file_plain <- function(path, format, dict, ignore = character()){
lines <- readLines(path, warn = FALSE, encoding = 'UTF-8')

if (!identical(ignore, character())) {
lines <- pre_filter_plain_rd(lines, ignore = ignore)
}

words <- hunspell::hunspell_parse(lines, format = format, dict = dict)
text <- vapply(words, paste, character(1), collapse = " ")
spell_check_plain(text, dict = dict)
Expand Down Expand Up @@ -147,3 +167,19 @@ pre_filter_html <- function(path){
replace_text <- function(x){
gsub(".*", "", x, perl = TRUE)
}

# This removes all the words from the WORDLIST in the lines
# This will correctly remove words such as "1st" and "one-two"
pre_filter_plain_rd <- function(lines, ignore = character()) {
# Split the words out -- preserve the use of "-"
word_list <- strsplit(lines, "([^-[:alnum:][:punct:]])")

vapply(
word_list,
function(i) {
# Remove the ignore words from the line
paste(i[!i %in% ignore], collapse = " ")
},
character(1)
)
}
44 changes: 23 additions & 21 deletions R/spell-check.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, use_wordlist = TRUE
lang <- normalize_lang(pkg$language)

# Add custom words to the ignore list
add_words <- if(isTRUE(use_wordlist))
add_words <- if (isTRUE(use_wordlist))
get_wordlist(pkg$path)
author <- if(length(pkg[['authors@r']])){

author <- if (length(pkg[['authors@r']])) {
parse_r_field(pkg[['authors@r']])
} else {
strsplit(pkg[['author']], " ", fixed = TRUE)[[1]]
Expand All @@ -58,7 +59,8 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, use_wordlist = TRUE
file.path(R.home("share"), "Rd", "macros", "system.Rd"),
tools::loadPkgRdMacros(pkg$path, macros = NULL)
)
rd_lines <- lapply(rd_files, spell_check_file_rd, dict = dict, macros = macros)

rd_lines <- lapply(rd_files, spell_check_file_rd, dict = dict, macros = macros, ignore = add_words)

# Check 'DESCRIPTION' fields
pkg_fields <- c("title", "description")
Expand All @@ -70,7 +72,7 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, use_wordlist = TRUE
all_sources <- c(rd_files, pkg_fields)
all_lines <- c(rd_lines, pkg_lines)

if(isTRUE(vignettes)){
if (isTRUE(vignettes)) {
# Where to check for rmd/md files
vign_files <- list.files(file.path(pkg$path, "vignettes"), pattern = "\\.r?md$",
ignore.case = TRUE, full.names = TRUE, recursive = TRUE)
Expand All @@ -93,10 +95,10 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, use_wordlist = TRUE
}

as_package <- function(pkg){
if(inherits(pkg, 'package'))
if (inherits(pkg, 'package'))
return(pkg)
path <- pkg
description <- if(file.exists(file.path(path, "DESCRIPTION.in"))){
description <- if (file.exists(file.path(path, "DESCRIPTION.in"))) {
file.path(path, "DESCRIPTION.in")
} else {
normalizePath(file.path(path, "DESCRIPTION"), mustWork = TRUE)
Expand Down Expand Up @@ -128,15 +130,15 @@ summarize_words <- function(file_names, found_line){

#' @export
print.summary_spellcheck <- function(x, ...){
if(!nrow(x)){
if (!nrow(x)) {
cat("No spelling errors found.\n")
return(invisible())
}
words <- x$word
fmt <- paste0("%-", max(nchar(words), 0) + 3, "s")
pretty_names <- sprintf(fmt, words)
cat(sprintf(fmt, " WORD"), " FOUND IN\n", sep = "")
for(i in seq_len(nrow(x))){
for (i in seq_len(nrow(x))) {
cat(pretty_names[i])
cat(paste(x$found[[i]], collapse = paste0("\n", sprintf(fmt, ""))))
cat("\n")
Expand Down Expand Up @@ -166,10 +168,10 @@ spell_check_setup <- function(pkg = ".", vignettes = TRUE, lang = "en-US", error

#' @export
spell_check_test <- function(vignettes = TRUE, error = FALSE, lang = NULL, skip_on_cran = TRUE){
if(isTRUE(skip_on_cran)){
if (isTRUE(skip_on_cran)) {
not_cran <- Sys.getenv('NOT_CRAN')
# See logic in tools:::config_val_to_logical
if(is.na(match(tolower(not_cran), c("1", "yes", "true"))))
if (is.na(match(tolower(not_cran), c("1", "yes", "true"))))
return(NULL)
}
out_save <- readLines(system.file("templates/spelling.Rout.save", package = 'spelling'))
Expand All @@ -179,27 +181,27 @@ spell_check_test <- function(vignettes = TRUE, error = FALSE, lang = NULL, skip_

# Try to find pkg source directory
pkg_dir <- list.files("../00_pkg_src", full.names = TRUE)
if(!length(pkg_dir)){
if (!length(pkg_dir)) {
# This is where it is on e.g. win builder
check_dir <- dirname(getwd())
if(grepl("\\.Rcheck$", check_dir)){
if (grepl("\\.Rcheck$", check_dir)) {
source_dir <- sub("\\.Rcheck$", "", check_dir)
if(file.exists(source_dir))
if (file.exists(source_dir))
pkg_dir <- source_dir
}
}
if(!length(pkg_dir) && identical(basename(getwd()), 'tests')){
if(file.exists('../DESCRIPTION')){
if (!length(pkg_dir) && identical(basename(getwd()), 'tests')) {
if (file.exists('../DESCRIPTION')) {
pkg_dir <- dirname(getwd())
}
}
if(!length(pkg_dir)){
if (!length(pkg_dir)) {
warning("Failed to find package source directory from: ", getwd())
return(invisible())
}
results <- spell_check_package(pkg_dir, vignettes = vignettes)
if(nrow(results)){
if(isTRUE(error)){
if (nrow(results)) {
if (isTRUE(error)) {
output <- sprintf("Potential spelling errors: %s\n", paste(results$word, collapse = ", "))
stop(output, call. = FALSE)
} else {
Expand All @@ -213,16 +215,16 @@ spell_check_test <- function(vignettes = TRUE, error = FALSE, lang = NULL, skip_
update_description <- function(pkg, lang = NULL){
desc <- normalizePath(file.path(pkg$path, "DESCRIPTION"), mustWork = TRUE)
lines <- readLines(desc, warn = FALSE)
if(!any(grepl("spelling", c(pkg$package, pkg$suggests, pkg$imports, pkg$depends)))){
lines <- if(!any(grepl("^Suggests", lines))){
if (!any(grepl("spelling", c(pkg$package, pkg$suggests, pkg$imports, pkg$depends)))) {
lines <- if (!any(grepl("^Suggests", lines))) {
c(lines, "Suggests:\n spelling")
} else {
sub("^Suggests:", "Suggests:\n spelling,", lines)
}
}
is_lang <- grepl("^Language:", lines, ignore.case = TRUE)
isolang <- gsub("_", "-", lang, fixed = TRUE)
if(any(is_lang)){
if (any(is_lang)) {
is_lang <- which(grepl("^Language:", lines))
lines[is_lang] <- paste("Language:", isolang)
} else {
Expand Down
7 changes: 4 additions & 3 deletions man/spell_check_files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/spell_check_package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/wordlist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ab81b6a

Please sign in to comment.