I am using SuperLearner to predict an outcome from a Random Forest algorithm. However, the Random Forest predicts only 0, and I don't understand how to fix the issue.
Here is a reproducible example:
#Training dataset
y <- c(-4.605170, 9.181019, -4.605170, -4.605170, 5.998099, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, 8.788880, -4.605170, 7.259213, -4.605170, -4.605170, -4.605170, -4.605170, 8.851838, 8.182144, -4.605170, -4.605170, -4.605170, 8.824345, -4.605170, -4.605170, 8.824345, -4.605170, -4.605170, -4.605170, 9.195547, 8.214720, 8.374350, 6.971533)
weightML <- c(14.95239, 18.55120, 18.55120, 19.70231, 14.95239, 14.95239, 18.55120, 14.95239, 18.55120, 18.55120, 18.55120, 14.95239, 18.55120, 15.73830, 18.55120, 18.55120, 19.70231, 15.73830, 14.95239, 15.73830, 14.95239, 14.95239, 15.73830, 18.55120, 18.55120, 14.95239, 14.95239, 14.95239, 14.95239, 15.73830, 14.95239, 14.95239, 14.95239, 14.95239, 18.55120, 19.70231, 14.95239)
train_x<-data.frame(matrix(,nrow=length(y),ncol=0))
train_x$x1 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x2 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x3 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x4 <- sample(100, size = nrow(df), replace = TRUE)
#Test dataset
test_x<-data.frame(matrix(,nrow=length(y),ncol=0))
test_x$x1 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x2 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x3 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x4 <- sample(100, size = nrow(df), replace = TRUE)
# RF
rf <- SuperLearner(Y = y, X = train_x, family = gaussian(), SL.library = "SL.ranger", obsWeights = weightML)
predict(rf, test_x, onlySL = TRUE)$pred
This code returns the following output:
> rf <- SuperLearner(Y = y, X = train_x, family = gaussian(), SL.library = "SL.ranger", obsWeights = weightML)
Warning messages:
1: All algorithms have zero weight
2: All metalearner coefficients are zero, predictions will all be equal to 0
> predict(rf, test_x, onlySL = TRUE)$pred
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Warning message:
All metalearner coefficients are zero, predictions will all be equal to 0
Any idea why it predicts only 0 and how I can fix the issue?
I noticed that if I change y by by y+2 the prediction works. For instance, like this:
#Training dataset
y <- c(-4.605170, 9.181019, -4.605170, -4.605170, 5.998099, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, -4.605170, 8.788880, -4.605170, 7.259213, -4.605170, -4.605170, -4.605170, -4.605170, 8.851838, 8.182144, -4.605170, -4.605170, -4.605170, 8.824345, -4.605170, -4.605170, 8.824345, -4.605170, -4.605170, -4.605170, 9.195547, 8.214720, 8.374350, 6.971533)
y <- y+2
weightML <- c(14.95239, 18.55120, 18.55120, 19.70231, 14.95239, 14.95239, 18.55120, 14.95239, 18.55120, 18.55120, 18.55120, 14.95239, 18.55120, 15.73830, 18.55120, 18.55120, 19.70231, 15.73830, 14.95239, 15.73830, 14.95239, 14.95239, 15.73830, 18.55120, 18.55120, 14.95239, 14.95239, 14.95239, 14.95239, 15.73830, 14.95239, 14.95239, 14.95239, 14.95239, 18.55120, 19.70231, 14.95239)
train_x<-data.frame(matrix(,nrow=length(y),ncol=0))
train_x$x1 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x2 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x3 <- sample(100, size = nrow(df), replace = TRUE)
train_x$x4 <- sample(100, size = nrow(df), replace = TRUE)
#Test dataset
test_x<-data.frame(matrix(,nrow=length(y),ncol=0))
test_x$x1 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x2 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x3 <- sample(100, size = nrow(df), replace = TRUE)
test_x$x4 <- sample(100, size = nrow(df), replace = TRUE)
# RF
rf <- SuperLearner(Y = y, X = train_x, family = gaussian(), SL.library = "SL.ranger", obsWeights = weightML)
predict(rf, test_x, onlySL = TRUE)$pred
I am using SuperLearner to predict an outcome from a Random Forest algorithm. However, the Random Forest predicts only 0, and I don't understand how to fix the issue.
Here is a reproducible example:
This code returns the following output:
Any idea why it predicts only 0 and how I can fix the issue?
I noticed that if I change
yby byy+2the prediction works. For instance, like this: