The following code chunk tries to install the MPHIACausal
package.
if (!requireNamespace("MPHIACausal", quietly = TRUE)) {
message("MPHIACausal package not found. Try to install it automatically.")
if (!requireNamespace("remotes", quietly = TRUE)) {
install.packages("remotes")
}
remotes::install_local("MPHIACausal_0.1.0.tar.gz")
}
The following code chunk loads the package and do the parallel computing setup.
library(MPHIACausal)
options(CausalLearnWorkders = 10)
The following code chunk checks whether the saved graphs in the package can be reproduced.
data("Tri90Data")
data("Tri90Results")
methods <- c("GS", "PC.stable", "IAMB", "MMPC", "New")
learnGraphs <- function(data){
r <- lapply(methods,
function(method) {
f <- getAlgo(method)
suppressWarnings(f(data,
alpha = 0.05, max.sx = 3,
undirected = FALSE))
})
names(r) <- methods
r
}
system.time(Tri90ResultsCheck <- lapply(Tri90Data, learnGraphs))
## user system elapsed
## 65.399 5.709 1120.066
checkgraph <- function(g, g0){
comp <- bnlearn::compare(g0, g)
if(!identical(comp$fp, 0)) {
cat("check failed.\n")
return()
}
if(!identical(comp$fn, 0)) {
cat("check failed.\n")
return()
}
cat("check passed.\n")
}
datasets <- names(Tri90Data)
for (d in datasets) {
cat(paste0(d, "\n"))
for (method in methods) {
cat(paste0(method, ": "))
checkgraph(Tri90ResultsCheck[[d]][[method]],
Tri90Results[[d]][[method]])
}
}
## Tri90AwareMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90AwareFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90ARTMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90ARTFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90VLSMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90VLSFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
The following code chunk reproduces the results in Table 1 from the saved datasets in the package.
getNandP <- function(){
datasets <- names(Tri90Data)
resultTable <- as.data.frame(matrix(nrow = 2, ncol = length(datasets)))
colnames(resultTable) <- datasets
for (d in datasets) {
resultTable[[d]] <- dim(Tri90Data[[d]])
}
rownames(resultTable) <- c("n", "N(V)")
resultTable
}
knitr::kable(getNandP())
Tri90AwareMale | Tri90AwareFemale | Tri90ARTMale | Tri90ARTFemale | Tri90VLSMale | Tri90VLSFemale | |
---|---|---|---|---|---|---|
n | 712 | 1505 | 510 | 1210 | 454 | 1110 |
N(V) | 66 | 93 | 66 | 92 | 66 | 92 |
The following code chunk reproduces the results in Table 2 from the saved graphs in the package.
getGraphBasic <- function(method){
datasets <- names(Tri90Data)
resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
colnames(resultTable) <- c("Method", "Type", datasets)
for (d in datasets) {
g <- Tri90Results[[d]][[method]]
nv <- bnlearn::nnodes(g)
ne <- bnlearn::narcs(g)
nde <- nrow(bnlearn::directed.arcs(g))
resultTable[[d]] <- c(nv, ne, nde)
}
resultTable$Method <- method
resultTable$Type <- c("NV", "NE", "NDE")
resultTable
}
methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")
knitr::kable(do.call(rbind, lapply(methods, getGraphBasic)))
Method | Type | Tri90AwareMale | Tri90AwareFemale | Tri90ARTMale | Tri90ARTFemale | Tri90VLSMale | Tri90VLSFemale |
---|---|---|---|---|---|---|---|
PC.stable | NV | 66 | 93 | 66 | 92 | 66 | 92 |
PC.stable | NE | 10 | 19 | 3 | 19 | 1 | 20 |
PC.stable | NDE | 0 | 2 | 0 | 2 | 0 | 4 |
MMPC | NV | 66 | 93 | 66 | 92 | 66 | 92 |
MMPC | NE | 6 | 18 | 3 | 15 | 4 | 15 |
MMPC | NDE | 0 | 2 | 0 | 0 | 0 | 0 |
IAMB | NV | 66 | 93 | 66 | 92 | 66 | 92 |
IAMB | NE | 8 | 17 | 4 | 17 | 4 | 15 |
IAMB | NDE | 0 | 0 | 0 | 0 | 0 | 0 |
GS | NV | 66 | 93 | 66 | 92 | 66 | 92 |
GS | NE | 10 | 10 | 11 | 12 | 10 | 13 |
GS | NDE | 0 | 0 | 0 | 0 | 0 | 0 |
New | NV | 66 | 93 | 66 | 92 | 66 | 92 |
New | NE | 113 | 156 | 103 | 158 | 104 | 153 |
New | NDE | 99 | 142 | 86 | 149 | 90 | 146 |
The following code chunk reproduces the results in Table 3 from the saved graphs in the package.
getGraphIC <- function(method){
datasets <- names(Tri90Data)
resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
colnames(resultTable) <- c("Method", "Type", datasets)
for (d in datasets) {
g <- Tri90Results[[d]][[method]]
r <- fitNetwork(net = g, data = Tri90Data[[d]])
df <- r$df
ll <- r$score
## AIC <- -2*ll+2*df
BIC <- -2*ll+log(nrow(Tri90Data[[d]]))*df
resultTable[[d]] <- c(df, ll, BIC)
}
resultTable$Method <- method
resultTable$Type <- c("DF", "Log-likelihood", "BIC")
resultTable
}
methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")
knitr::kable(do.call(rbind, lapply(methods, getGraphIC)), digits = 1)
Method | Type | Tri90AwareMale | Tri90AwareFemale | Tri90ARTMale | Tri90ARTFemale | Tri90VLSMale | Tri90VLSFemale |
---|---|---|---|---|---|---|---|
PC.stable | DF | 50.0 | 142.0 | 12.0 | 147.0 | 6.0 | 144.0 |
PC.stable | Log-likelihood | 3076.7 | 12051.1 | 534.1 | 10172.2 | 162.4 | 9448.7 |
PC.stable | BIC | -5825.0 | -23063.3 | -993.4 | -19300.9 | -288.1 | -17887.6 |
MMPC | DF | 44.0 | 140.0 | 26.0 | 128.0 | 30.0 | 140.0 |
MMPC | Log-likelihood | 2218.7 | 12853.6 | 1146.7 | 9833.0 | 1238.2 | 10344.0 |
MMPC | BIC | -4148.5 | -24683.0 | -2131.3 | -18757.5 | -2292.8 | -19706.3 |
IAMB | DF | 71.0 | 134.0 | 32.0 | 132.0 | 32.0 | 124.0 |
IAMB | Log-likelihood | 2650.4 | 14391.3 | 1454.8 | 11160.4 | 1293.2 | 10352.2 |
IAMB | BIC | -4834.5 | -27802.2 | -2710.1 | -21383.9 | -2390.7 | -19834.9 |
GS | DF | 81.0 | 69.0 | 62.0 | 96.0 | 63.0 | 104.0 |
GS | Log-likelihood | 2348.3 | 6027.7 | 1573.6 | 6712.0 | 1888.7 | 6074.3 |
GS | BIC | -4164.7 | -11550.6 | -2760.7 | -12742.5 | -3392.0 | -11419.3 |
New | DF | 948.0 | 1012.0 | 970.0 | 2117.0 | 1184.0 | 979.0 |
New | Log-likelihood | 17755.5 | 52807.1 | 10274.1 | 48597.2 | 9222.8 | 41991.0 |
New | BIC | -29284.5 | -98209.8 | -14500.7 | -82167.2 | -11201.8 | -77117.2 |
The following code chunk reproduces the results in Table S.1 from the saved graphs in the package.
getGraphNnbr <- function(method){
datasets <- names(Tri90Data)
resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
colnames(resultTable) <- c("Method", "Type", datasets)
for (d in datasets) {
target <- d %>% gsub(pattern = "Female", replacement = "") %>%
gsub(pattern = "Male", replacement = "") %>% tolower()
g <- Tri90Results[[d]][[method]]
ig <- MPHIACausal:::bn2igraph(g)
NAMES <- igraph::vertex_attr(ig)$name
TARGET <- which(NAMES == target)
dists <- igraph::distances(ig, to = TARGET)
Nd1 <- sum(dists <= 1) - 1 ## -1 because we need to exclude the target itself
Nd2 <- sum(dists <= 2) - 1
Nd3 <- sum(dists <= 3) - 1
resultTable[[d]] <- c(Nd1, Nd2, Nd3)
}
resultTable$Method <- method
resultTable$Type <- c("Nd1", "Nd2", "Nd3")
resultTable
}
methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")
knitr::kable(do.call(rbind, lapply(methods, getGraphNnbr)))
Method | Type | Tri90AwareMale | Tri90AwareFemale | Tri90ARTMale | Tri90ARTFemale | Tri90VLSMale | Tri90VLSFemale |
---|---|---|---|---|---|---|---|
PC.stable | Nd1 | 0 | 0 | 0 | 0 | 0 | 0 |
PC.stable | Nd2 | 0 | 0 | 0 | 0 | 0 | 0 |
PC.stable | Nd3 | 0 | 0 | 0 | 0 | 0 | 0 |
MMPC | Nd1 | 1 | 0 | 0 | 1 | 0 | 0 |
MMPC | Nd2 | 1 | 0 | 0 | 1 | 0 | 0 |
MMPC | Nd3 | 1 | 0 | 0 | 1 | 0 | 0 |
IAMB | Nd1 | 0 | 0 | 0 | 0 | 0 | 0 |
IAMB | Nd2 | 0 | 0 | 0 | 0 | 0 | 0 |
IAMB | Nd3 | 0 | 0 | 0 | 0 | 0 | 0 |
GS | Nd1 | 0 | 0 | 0 | 0 | 0 | 0 |
GS | Nd2 | 0 | 0 | 0 | 0 | 0 | 0 |
GS | Nd3 | 0 | 0 | 0 | 0 | 0 | 0 |
New | Nd1 | 6 | 5 | 4 | 4 | 2 | 3 |
New | Nd2 | 19 | 18 | 13 | 11 | 7 | 8 |
New | Nd3 | 49 | 51 | 38 | 28 | 19 | 28 |
The complete simulations take a very long time, and are carried out using a high performance computing cluster. Here we provide some sample codes to illustrate how to reproduce the simulation results.
Each row in Table 5 is the average of 500 times simulation of the following code.
## code for one simulation for n = 250, Goal = Aware, Gender = Male
suppressWarnings(SimulationI(n = 250, dn = "Tri90AwareMale"))
## GS PC.stable IAMB MMPC New
## TPR 0.1081081 0.0990991 0.1171171 0.1261261 0.3153153
## TNR 1.0000000 1.0000000 0.9985486 0.9985486 0.9791969
Each row in Table S.2 is the average of 500 times simulation of the following code.
## code for one simulation for Goal = Aware, Gender = Male
suppressWarnings(SimulationS51(n = 500, dn = "Tri90AwareMale"))
## MCI=2 MCI=3 MCI=4 MCI=5 MCI=Inf
## TPR 0.4594595 0.4504505 0.4594595 0.4594595 0.4594595
## TNR 0.9820997 0.9816159 0.9816159 0.9816159 0.9816159
The complete simulation study in S.5.2 also takes a long time. Here we reproduce the tables and figures from the saved simulation results in the MPHIACausal package. The way to reproduce the results from beginning is documented in the “README.rmd”.
The following code chunk reproduces the results in Table S.3 from the saved simulation results in the package.
library(ROCR)
data("SimulationS52ROCRs")
GoalAndGender <- c("Tri90AwareMale", "Tri90AwareFemale",
"Tri90ARTMale", "Tri90ARTFemale",
"Tri90VLSMale", "Tri90VLSFemale")
results <- do.call(rbind, lapply(GoalAndGender, function(dn) SimulationS52AUC(SimulationS52ROCRs[[dn]])))
rownames(results) <- GoalAndGender
knitr::kable(results)
GS | PC.stable | IAMB | MMPC | New | |
---|---|---|---|---|---|
Tri90AwareMale | 0.7080620 | 0.7262506 | 0.7189025 | 0.7643728 | 0.9223383 |
Tri90AwareFemale | 0.6996269 | 0.7344495 | 0.7081491 | 0.7506213 | 0.9402191 |
Tri90ARTMale | 0.7296676 | 0.6658440 | 0.6943069 | 0.7105777 | 0.8854048 |
Tri90ARTFemale | 0.6553563 | 0.6996232 | 0.6659216 | 0.7465644 | 0.9246077 |
Tri90VLSMale | 0.6882114 | 0.6616242 | 0.7062644 | 0.6974734 | 0.9102250 |
Tri90VLSFemale | 0.6531434 | 0.6950925 | 0.6858441 | 0.7486009 | 0.9180418 |
The following code chunk reproduces subplots of Figures S.6 and S.7 from the saved simulation results in the package.
library(ROCR)
data("SimulationS52ROCRs")
## the plots for Goal = Aware, Gender = Male
SimulationS52PlotROC(SimulationS52ROCRs$Tri90AwareMale)
SimulationS52PlotROC(SimulationS52ROCRs$Tri90AwareMale, xlim = c(0, 0.05), legendx = "topleft")
Each row in Table S.4 is the average of 1,000 times simulation of the following code.
## code for one simulation for \rho = prob = 0.01 and \theta = 0.125
suppressWarnings(SimulationS53(n = 500, p = 100, prob = 0.01, theta = 0.125))
## GS PC.stable IAMB MMPC New
## TPR 0.3488372 0.3953488 0.3953488 0.3953488 0.3953488
## TNR 0.9937462 0.9929393 0.9935445 0.9931410 0.9915271