Skip to content

Commit

Permalink
Merge pull request #126 from ModelOriented/Hubert_issues
Browse files Browse the repository at this point in the history
forester 1.6.1 - minor fixes and default options changes
  • Loading branch information
HubertR21 authored May 9, 2024
2 parents fb7833e + 1acbbd2 commit 4b8bc9b
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 34 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: forester
Type: Package
Title: Quick and Simple Tools for Training and Testing of Tree-Based Models
Version: 1.6.0
Version: 1.6.1
Authors@R:
c(person("Hubert", "Ruczyński", role = c("aut", "cre", "cph"), email = "[email protected]"),
person("Anna", "Kozak", role = c("aut", "ths"), email = "[email protected]"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# forester 1.6.1
- Changed default feature selection method in `custom_preprocessing()`, and `preprocessing_feature_selection()` to `BORUTA`, as it is the most effective one,
- Changed default imputation method for `preprocessing()` inside `train()` to `knn`, as it is the most effective one,
- Fixed an issue for VS plots, where the color was not assigned properly for the models,

# forester 1.6.0

- Updated `.Rbuildignore`, `DESCRIPTION`, and `NAMESPACE`.
Expand Down
6 changes: 3 additions & 3 deletions R/custom_preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
#' the same column names. The parameters are described below:
#' \itemize{
#' \item \code{`feature_selection_method`} A string value indication the feature selection method.
#' The imputation method must be one of 'VI', 'MCFS', 'MI', 'BORUTA', or 'none' if we don't
#' The imputation method must be one of 'VI', 'MCFS', 'MI', 'BORUTA' (default), or 'none' if we don't
#' want it.
#' \item \code{`max_features`} A positive integer value describing the desired number of
#' selected features. Initial value set as 'default' which is min(10, ncol(data) - 1)
Expand Down Expand Up @@ -117,7 +117,7 @@
#' m = 5
#' ),
#' feature_selection_parameters = list(
#' feature_selection_method = 'VI',
#' feature_selection_method = 'BORUTA',
#' max_features = 'default',
#' nperm = 1,
#' cutoffPermutations = 20,
Expand Down Expand Up @@ -152,7 +152,7 @@ custom_preprocessing <- function(data,
m = 5
),
feature_selection_parameters = list(
feature_selection_method = 'none',
feature_selection_method = 'BORUTA',
max_features = 'default',
nperm = 1,
cutoffPermutations = 20,
Expand Down
2 changes: 1 addition & 1 deletion R/plot_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ plot.binary_clf <- function(x,

score <- cbind(train_score, test_score)

p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = 'engine')) +
p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = .data[['engine']])) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_forester() +
Expand Down
2 changes: 1 addition & 1 deletion R/plot_multiclass.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ plot.multiclass <- function(x,

score <- cbind(train_score, test_score)

p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = 'engine')) +
p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = .data[['engine']])) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_forester() +
Expand Down
2 changes: 1 addition & 1 deletion R/plot_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ plot.regression <- function(x,

score <- cbind(train_score, test_score)

p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = 'engine')) +
p <- ggplot(score, aes(x = .data[[paste0(metric, '_train')]], y = .data[[paste0(metric, '_test')]], color = .data[['engine']])) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_forester() +
Expand Down
5 changes: 2 additions & 3 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Conduct preprocessing processes
#' Conduct basic preprocessing processes
#'
#' @param data A data source, that is one the of major R formats: data.table, data.frame,
#' matrix, and so on.
Expand Down Expand Up @@ -133,8 +133,7 @@ manage_missing <- function(df, y) {
df <- df[, -col_to_rm]
}
# Input missing values via mice algorithm.
df <- mice::mice(df, seed = 123, print = FALSE, remove_collinear = FALSE)
df <- mice::complete(df)
df <- preprocessing_imputation(df, imputation_method = 'knn', verbose = FALSE)
return(df)
}

Expand Down
16 changes: 8 additions & 8 deletions R/preprocessing_feature_selection.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#' Conducts a feature selection process with one out of four proposed methods
#' Conducts a feature selection process with one out of five proposed methods
#'
#' \itemize{
#' \item \code{`VI`} The variable importance method based on random forest - short time,
#' \item \code{`MCFS`} The Monte Carlo Feature Selection - long time,
#' \item \code{`MI`} The Varrank method based on mutual information scores - moderate time,
#' if we set too big `max_features` it can work really long,
#' \item \code{`BORUTA`} The BORUTA algorithm - short time.
#' \item \code{`VI`} The variable importance method based on random forest - long time, worst results,
#' \item \code{`MCFS`} The Monte Carlo Feature Selection - short time, reasonable results,
#' \item \code{`MI`} The Varrank method based on mutual information scores - short time,
#' if we set too big `max_features` it can work really long, bad results,
#' \item \code{`BORUTA`} The BORUTA algorithm - long time, best results.
#' }
#'
#' @param data A data source, that is one of the major R formats: data.table, data.frame,
#' matrix and so on.
#' @param y A string that indicates a target column name.
#' @param feature_selection_method A string value indication the feature selection method.
#' The imputation method must be one of 'VI', 'MCFS', 'MI', or 'BORUTA'.
#' The imputation method must be one of 'VI', 'MCFS', 'MI', or 'BORUTA' (default).
#' @param max_features A positive integer value describing the desired number of
#' selected features. Initial value set as 'default' which is min(10, ncol(data) - 1)
#' for `VI` and `MI`, and NULL (number of relevant features chosen by the method)
Expand Down Expand Up @@ -43,7 +43,7 @@
#' @export
preprocessing_feature_selection <- function(data,
y,
feature_selection_method = 'VI',
feature_selection_method = 'BORUTA',
max_features = 'default',
nperm = 1,
cutoffPermutations = 20,
Expand Down
6 changes: 3 additions & 3 deletions man/custom_preprocessing.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/preprocessing.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions man/preprocessing_feature_selection.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions tests/testthat/test-13-check-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,4 @@ test_that('test-check-data', {
expect_output(check_y_balance(df, y, time, status, type, verbose))
expect_output(detect_id_columns(df, verbose))
}


})
1 change: 0 additions & 1 deletion tests/testthat/test-17-predict-new.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,5 @@ test_that('test-predict-new', {
for (j in 1:(length(predictions) - 1)) {
expect_equal(length(as.vector(predictions[[j]])), length(as.vector(predictions[[j + 1]])))
}

}
})

0 comments on commit 4b8bc9b

Please sign in to comment.