1 year ago

#375707

test-img

TJeff

Running random forest loop, predicting to unsampled locations and saving outputs

I would like to run a random forest 100 times, save train and test AUC and OOB scores, predict to model spatial extent, save mean, standard error and coefficient of variation per cell, then write to raster, bonus points if anyone knows how to add TSS score. Apologies there is missing code, fragmented code and errors, the first half of the code works, but from the random forest model onwards there are problems, any help would be greatly appreciated.

Packages:
require(raster);require(dismo);require(sp); 
require(pROC);require(FinCal);require(ggplot2);require(randomForest);require (ROCR) 
require(tidyverse); require(dplyr)

n.boot <- 100
length.map <- nrow(predictor_variables)
boot_mat <- array(0, c(length.map,n.boot)) # matrix where rows will store predictions for each 
bootstrap (columns)
influence_mat <- array(0,c(21,n.boot)) # for saving information on predictors 
auc_mat <- array(0,c(n.boot, 4)) #to save the RF training and test AUC and OOB. 

for (i in 1:n.boot) # bootstrap loop
{ # create training and evaluation presence/ relative absence dataframes
train_ind <- sample(seq_len(nrow(sampled)), size = floor(0.75 * nrow(sampled))) # index of 
rows for 75% of presence data
# creat training and evaluation presences 
sampled _train <- sampled [train_ind, ]
sampled _eval <- sampled [-train_ind, ]

#creation of absences
rnd <- sort(sample(seq(1,length.abs),nrow(sampled _train)*2)) 
rnd.ev <- sort(sample(seq(1,length.abs),nrow(sampled _eval)*2)) 
absence.rnd <- absence[rnd,] 
absence.rnd.ev <- absence[rnd.ev,]

# creation of presence and absence
sampled.PresAbs.B <- rbind(sampled _train,absence.rnd) # final presence/absence data
sampled_eval <- rbind(sampled_eval,absence.rnd.ev)

training<- randomForest(response~., sampled.PresAbs.B [,c(5:25)], ntree=1000, mtry = 2, 
importance = TRUE) # column number of predictor variables to be used in RF
# need to save influence/importance of predictors on response for each run.
# need to save training (training) and test AUC for each run.
# need to save OOB error rate too for training and test runs if possible.

# predict spatially to map 
pred_map <-  predict(predictor_variables, training, type = "prob", index = 2) 
boot_mat[,i] <- round(pred.map, digits = 2) 
}

save(boot_mat, file=" sampled_boot_mat.Rdata")
save(auc_mat, file=" sampled_dev_mat.Rdata")
save(influence_mat, file=" sampled_inf_mat.Rdata")

# need to calculate mean and standard error of influence of predictors on response, then write 
to csv.

# calculate mean suitability and CV spatially
sampled.boot.mean<-apply(boot_mat,1,mean)
sampled.boot.sd<-apply(boot_mat,1,sd)
sampled.boot.cv<- sampled.boot.sd/sampled.boot.mean # calculation of Coefficient of variation if needed

# export the maps
sampled.map.mean <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.mean) # mean prediciton
sampled.map.UC <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.sd) # uncertainty 
sampled.map.CV <-cbind(sna_all_preds[,c("X", "Y")], sampled.boot.cv) # CV
#can then export as rasters easily enough.

e.g of data:

dput(head(sna_all_boffffs_and_preds, 10))
structure(list(site = 1:10, X = c(708396.365970067, 708646.365970067, 
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067, 
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067
), Y = c(1243944.65711016, 1243944.65711016, 1243944.65711016, 
1243944.65711016, 1243694.65711016, 1243694.65711016, 1243694.65711016, 
1243694.65711016, 1243444.65711016, 1243444.65711016), sna_boffffs_clip = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), adet_2010_2019_250_clip = c(-14.4839498108682, 
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682, 
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682, 
-14.4839498108682), bathy_250_slope_clip = c(3.62498998641968, 
1.90744984149933, 4.96259355545044, 8.03348731994629, 2.80171227455139, 
2.33035254478455, 5.86305713653564, 8.246413230896, 5.99569511413574, 
6.80374336242676), bathy_250_vrm_007_clip = c(0.00215762853622437, 
0.00210392475128174, 0.00183302164077759, 0.00150519609451294, 
0.00216823816299438, 0.00220978260040283, 0.0020909309387207, 
0.00188064575195312, 0.00211155414581299, 0.00220668315887451
), btr_100km_clip = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), btr_10km_clip = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), btr_50km_clip = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 0), carbonate_250_clip = c(30.7859439849854, 30.7859439849854, 
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854, 
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854
), Catch_kg_per_km2_AllMethods_250_clip = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0), current_speed_250_clip = c(0.17399999499321, 0.17399999499321, 
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321, 
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321
), dist_shore_250_clip = c(67926.890625, 67982.53125, 68039.0546875, 
68096.4375, 67683.0859375, 67738.9296875, 67795.6484375, 67853.2421875, 
67439.328125, 67495.3671875), ebed_20_av_250_clip = c(0.538848269429723, 
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723, 
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723, 
0.538848269429723), gravel_250_clip = c(19.7878284454346, 19.7878284454346, 
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346, 
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346
), hvis_20_av_250_clip = c(21.4873270945488, 21.4873270945488, 
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488, 
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488
), orb_vel_250_clip = c(150L, 150L, 150L, 150L, 150L, 150L, 150L, 
150L, 150L, 150L), rocky_reef_clip = c(0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L), sal_depth_250_clip = c(35.231746673584, 35.231746673584, 
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584, 
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584
), sand_250_clip = c(19.3533153533936, 19.3533153533936, 19.3533153533936, 
19.3533153533936, 19.3533153533936, 19.3533153533936, 19.3533153533936, 
19.3533153533936, 19.3533153533936, 19.3533153533936), sst_20_av_250_clip = c(16.6609603188989, 
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989, 
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989, 
16.6609603188989), sst_annual_amp_250_clip = c(2.67400002479553, 
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553, 
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553, 
2.67400002479553), sst_grad_250_clip = c(0.00596274901181459, 
0.00596274901181459, 0.00596274901181459, 0.00596274901181459, 
0.00596274901181459, 0.00596274901181459, 0.00596274901181459, 
0.00596274901181459, 0.00596274901181459, 0.00596274901181459
), temp_res_250_clip = c(4.63328790664673, 4.63328790664673, 
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673, 
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673
)), row.names = c(NA, 10L), class = "data.frame")

r

regression

random-forest

spatial

0 Answers

Your Answer

Accepted video resources