pinferunemjmcmc {EMJMCMC} | R Documentation |
A wrapper for running the GLMM, BLR, or DBRM based inference and predictions in an expert but rather easy to use way
pinferunemjmcmc(
n.cores = 4,
mcgmj = mcgmjpse,
report.level = 0.5,
simplify = FALSE,
num.mod.best = 1000,
predict = FALSE,
test.data = 1,
link.function = function(z) z,
runemjmcmc.params
)
n.cores |
the maximal number of cores (and (R)(G)MJMCMC threads) to be addressed in the analysis |
mcgmj |
an mclapply like function for performing for performing parallel computing, do not change the default unless you are using Windows |
report.level |
a numeric value in (0,1) specifying the threshold for detections based on the marginal inclusion probabilities |
simplify |
a logical value specifying in simplification of the features is to be done after the search is completed |
num.mod.best |
the number of the best models in the thread to calculate marginal inclusion probabilities |
predict |
a logical value specifying if predictions should be done by the run of pinferunemjmcmc |
test.data |
covariates data.frame to be used for predictions |
link.function |
the link functions to be used to make predictions |
runemjmcmc.params |
a vector of parameters of runemjmcmc function, see the help of runemjmcmc for details |
a list of
detected features or logical expressions and their marginal inclusion probabilities
predicted values if they are required, NULL otherwise
all visited by (R)(G)MJMCMC features and logical expressions and their marginal inclusion probabilities
a vector of detailed outputs of individual n.cores threads of (R)(G)MJMCMC run
runemjmcmc LogrRegr DeepRegr LinRegr
# inference
X <- read.csv(system.file("extdata", "exa1.csv", package="EMJMCMC"))
data.example <- as.data.frame(X)
# specify the initial formula
formula1 <- as.formula(
paste(colnames(X)[5], "~ 1 +", paste0(colnames(X)[-5], collapse = "+"))
)
# define the number or cpus
M <- 1L
# define the size of the simulated samples
NM <- 1000
# define \k_{max} + 1 from the paper
compmax <- 16
# define treshold for preinclusion of the tree into the analysis
th <- (10)^(-5)
# define a final treshold on the posterior marginal probability for reporting a
# tree
thf <- 0.05
# specify tuning parameters of the algorithm for exploring DBRM of interest
# notice that allow_offsprings=3 corresponds to the GMJMCMC runs and
# allow_offsprings=4 -to the RGMJMCMC runs
res1 <- pinferunemjmcmc(
n.cores = M, report.level = 0.5, num.mod.best = NM, simplify = TRUE,
runemjmcmc.params = list(
formula = formula1, data = data.example, estimator = estimate.gamma.cpen_2,
estimator.args = list(data = data.example), recalc_margin = 249,
save.beta = FALSE, interact = TRUE, outgraphs = FALSE,
relations = c("to23", "expi", "logi", "to35", "sini", "troot", "sigmoid"),
relations.prob = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
interact.param = list(allow_offsprings = 3, mutation_rate = 250,
last.mutation = 10000, max.tree.size = 5, Nvars.max = 15,
p.allow.replace = 0.9, p.allow.tree = 0.01, p.nor = 0.9, p.and = 0.9),
n.models = 10000, unique = TRUE, max.cpu = M, max.cpu.glob = M,
create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE,
burn.in = 100, print.freq = 1000,
advanced.param = list(
max.N.glob = as.integer(10),
min.N.glob = as.integer(5),
max.N = as.integer(3),
min.N = as.integer(1),
printable = FALSE
)
)
)
print(res1$feat.stat)
# prediction
compmax <- 21
# read in the train and test data sets
test <- read.csv(
system.file("extdata", "breast_cancer_test.csv", package="EMJMCMC"),
header = TRUE, sep = ","
)[, -1]
train <- read.csv(
system.file("extdata", "breast_cancer_train.csv", package="EMJMCMC"),
header = TRUE, sep = ","
)[, -1]
# transform the train data set to a data.example data.frame that EMJMCMC class
# will internally use
data.example <- as.data.frame(train)
# specify the link function that will be used in the prediction phase
g <- function(x) {
return((x <- 1 / (1 + exp(-x))))
}
formula1 <- as.formula(
paste(
colnames(data.example)[31], "~ 1 +",
paste0(colnames(data.example)[-31], collapse = "+")
)
)
# Defining a custom estimator function
estimate.bas.glm.cpen <- function(
formula, data, family, prior, logn, r = 0.1, yid=1,
relat =c("cosi","sigmoid","tanh","atan","erf","m(")
) {
#only poisson and binomial families are currently adopted
X <- model.matrix(object = formula,data = data)
capture.output({out <- BAS::bayesglm.fit(x = X, y = data[,yid], family=family,coefprior=prior)})
fmla.proc<-as.character(formula)[2:3]
fobserved <- fmla.proc[1]
fmla.proc[2]<- stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "")
fmla.proc[2]<- stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "")
sj<-2*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = "*"))
sj<-sj+1*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = "+"))
for(rel in relat) {
sj<-sj+2*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = rel))
}
mlik = ((-out$deviance +2*log(r)*sum(sj)))/2
return(
list(
mlik = mlik, waic = -(out$deviance + 2*out$rank),
dic = -(out$deviance + logn*out$rank),
summary.fixed = list(mean = coefficients(out))
)
)
}
res <- pinferunemjmcmc(
n.cores = M, report.level = 0.5, num.mod.best = NM, simplify = TRUE,
predict = TRUE, test.data = as.data.frame(test), link.function = g,
runemjmcmc.params = list(
formula = formula1, data = data.example, gen.prob = c(1, 1, 1, 1, 0),
estimator = estimate.bas.glm.cpen,
estimator.args = list(
data = data.example, prior = BAS::aic.prior(), family = binomial(),
yid = 31, logn = log(143), r = exp(-0.5)
), recalc_margin = 95, save.beta = TRUE, interact = TRUE,
relations = c("gauss", "tanh", "atan", "sin"),
relations.prob = c(0.1, 0.1, 0.1, 0.1),
interact.param = list(
allow_offsprings = 4, mutation_rate = 100, last.mutation = 1000,
max.tree.size = 6, Nvars.max = 20, p.allow.replace = 0.5,
p.allow.tree = 0.4, p.nor = 0.3, p.and = 0.9
), n.models = 7000, unique = TRUE, max.cpu = M, max.cpu.glob = M,
create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE,
burn.in = 100, print.freq = 1000,
advanced.param = list(
max.N.glob = as.integer(10), min.N.glob = as.integer(5),
max.N = as.integer(3), min.N = as.integer(1), printable = FALSE
)
)
)
for (jjjj in 1:10)
{
resw <- as.integer(res$predictions >= 0.1 * jjjj)
prec <- (1 - sum(abs(resw - test$X), na.rm = TRUE) / length(resw))
print(prec)
# FNR
ps <- which(test$X == 1)
fnr <- sum(abs(resw[ps] - test$X[ps])) / (sum(abs(resw[ps] - test$X[ps])) + length(ps))
# FPR
ns <- which(test$X == 0)
fpr <- sum(abs(resw[ns] - test$X[ns])) / (sum(abs(resw[ns] - test$X[ns])) + length(ns))
}