Installation and Setup

The following code chunk tries to install the MPHIACausal package.

if (!requireNamespace("MPHIACausal", quietly = TRUE)) {
  message("MPHIACausal package not found. Try to install it automatically.")
  if (!requireNamespace("remotes", quietly = TRUE)) {
    install.packages("remotes")
  }
  remotes::install_local("MPHIACausal_0.1.0.tar.gz")
}

The following code chunk loads the package and do the parallel computing setup.

library(MPHIACausal)
options(CausalLearnWorkders = 10)

Application to MPHIA

The following code chunk checks whether the saved graphs in the package can be reproduced.

data("Tri90Data")
data("Tri90Results")

methods <- c("GS", "PC.stable", "IAMB", "MMPC", "New")
learnGraphs <- function(data){
  r <- lapply(methods,
         function(method) {
           f <- getAlgo(method)
           suppressWarnings(f(data,
                              alpha = 0.05, max.sx = 3,
                              undirected = FALSE))
           })
  names(r) <- methods
  r
}
system.time(Tri90ResultsCheck <- lapply(Tri90Data, learnGraphs))
##     user   system  elapsed 
##   65.399    5.709 1120.066
checkgraph <- function(g, g0){
  comp <- bnlearn::compare(g0, g)
  if(!identical(comp$fp, 0)) {
    cat("check failed.\n")
    return()
  }
  if(!identical(comp$fn, 0)) {
    cat("check failed.\n")
    return()
  }
  cat("check passed.\n")
}

datasets <- names(Tri90Data)

for (d in datasets) {
  cat(paste0(d, "\n"))
  for (method in methods) {
    cat(paste0(method, ": "))
    checkgraph(Tri90ResultsCheck[[d]][[method]],
               Tri90Results[[d]][[method]])
  }
}
## Tri90AwareMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90AwareFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90ARTMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90ARTFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90VLSMale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.
## Tri90VLSFemale
## GS: check passed.
## PC.stable: check passed.
## IAMB: check passed.
## MMPC: check passed.
## New: check passed.

The following code chunk reproduces the results in Table 1 from the saved datasets in the package.

getNandP <- function(){
  datasets <- names(Tri90Data)
  resultTable <- as.data.frame(matrix(nrow = 2, ncol = length(datasets)))
  colnames(resultTable) <- datasets
  for (d in datasets) {
    resultTable[[d]] <- dim(Tri90Data[[d]])
  }
  rownames(resultTable) <- c("n", "N(V)")
  resultTable
}

knitr::kable(getNandP())
Tri90AwareMale Tri90AwareFemale Tri90ARTMale Tri90ARTFemale Tri90VLSMale Tri90VLSFemale
n 712 1505 510 1210 454 1110
N(V) 66 93 66 92 66 92

The following code chunk reproduces the results in Table 2 from the saved graphs in the package.

getGraphBasic <- function(method){
  datasets <- names(Tri90Data)
  resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
  colnames(resultTable) <- c("Method", "Type", datasets)
  for (d in datasets) {
    g <- Tri90Results[[d]][[method]]
    nv <- bnlearn::nnodes(g)
    ne <- bnlearn::narcs(g)
    nde <- nrow(bnlearn::directed.arcs(g))
    resultTable[[d]] <- c(nv, ne, nde)
  }
  resultTable$Method <- method
  resultTable$Type <- c("NV", "NE", "NDE")
  resultTable
}

methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")

knitr::kable(do.call(rbind, lapply(methods, getGraphBasic)))
Method Type Tri90AwareMale Tri90AwareFemale Tri90ARTMale Tri90ARTFemale Tri90VLSMale Tri90VLSFemale
PC.stable NV 66 93 66 92 66 92
PC.stable NE 10 19 3 19 1 20
PC.stable NDE 0 2 0 2 0 4
MMPC NV 66 93 66 92 66 92
MMPC NE 6 18 3 15 4 15
MMPC NDE 0 2 0 0 0 0
IAMB NV 66 93 66 92 66 92
IAMB NE 8 17 4 17 4 15
IAMB NDE 0 0 0 0 0 0
GS NV 66 93 66 92 66 92
GS NE 10 10 11 12 10 13
GS NDE 0 0 0 0 0 0
New NV 66 93 66 92 66 92
New NE 113 156 103 158 104 153
New NDE 99 142 86 149 90 146

The following code chunk reproduces the results in Table 3 from the saved graphs in the package.

getGraphIC <- function(method){
  datasets <- names(Tri90Data)
  resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
  colnames(resultTable) <- c("Method", "Type", datasets)
  for (d in datasets) {
    g <- Tri90Results[[d]][[method]]
    r <- fitNetwork(net = g, data = Tri90Data[[d]])
    df <- r$df
    ll <- r$score
    ## AIC <- -2*ll+2*df
    BIC <- -2*ll+log(nrow(Tri90Data[[d]]))*df
    resultTable[[d]] <- c(df, ll, BIC)
  }
  resultTable$Method <- method
  resultTable$Type <- c("DF", "Log-likelihood", "BIC")
  resultTable
}

methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")

knitr::kable(do.call(rbind, lapply(methods, getGraphIC)), digits = 1)
Method Type Tri90AwareMale Tri90AwareFemale Tri90ARTMale Tri90ARTFemale Tri90VLSMale Tri90VLSFemale
PC.stable DF 50.0 142.0 12.0 147.0 6.0 144.0
PC.stable Log-likelihood 3076.7 12051.1 534.1 10172.2 162.4 9448.7
PC.stable BIC -5825.0 -23063.3 -993.4 -19300.9 -288.1 -17887.6
MMPC DF 44.0 140.0 26.0 128.0 30.0 140.0
MMPC Log-likelihood 2218.7 12853.6 1146.7 9833.0 1238.2 10344.0
MMPC BIC -4148.5 -24683.0 -2131.3 -18757.5 -2292.8 -19706.3
IAMB DF 71.0 134.0 32.0 132.0 32.0 124.0
IAMB Log-likelihood 2650.4 14391.3 1454.8 11160.4 1293.2 10352.2
IAMB BIC -4834.5 -27802.2 -2710.1 -21383.9 -2390.7 -19834.9
GS DF 81.0 69.0 62.0 96.0 63.0 104.0
GS Log-likelihood 2348.3 6027.7 1573.6 6712.0 1888.7 6074.3
GS BIC -4164.7 -11550.6 -2760.7 -12742.5 -3392.0 -11419.3
New DF 948.0 1012.0 970.0 2117.0 1184.0 979.0
New Log-likelihood 17755.5 52807.1 10274.1 48597.2 9222.8 41991.0
New BIC -29284.5 -98209.8 -14500.7 -82167.2 -11201.8 -77117.2

The following code chunk reproduces the results in Table S.1 from the saved graphs in the package.

getGraphNnbr <- function(method){
  datasets <- names(Tri90Data)
  resultTable <- as.data.frame(matrix(nrow = 3, ncol = length(datasets)+2))
  colnames(resultTable) <- c("Method", "Type", datasets)
  for (d in datasets) {
    target <- d %>% gsub(pattern = "Female", replacement = "") %>%
      gsub(pattern = "Male", replacement = "") %>% tolower()
    g <- Tri90Results[[d]][[method]]
    ig <- MPHIACausal:::bn2igraph(g)
    NAMES <- igraph::vertex_attr(ig)$name
    TARGET <- which(NAMES == target)
    dists <- igraph::distances(ig, to = TARGET)
    Nd1 <- sum(dists <= 1) - 1 ## -1 because we need to exclude the target itself
    Nd2 <- sum(dists <= 2) - 1
    Nd3 <- sum(dists <= 3) - 1
    resultTable[[d]] <- c(Nd1, Nd2, Nd3)
  }
  resultTable$Method <- method
  resultTable$Type <- c("Nd1", "Nd2", "Nd3")
  resultTable
}

methods <- c("PC.stable", "MMPC", "IAMB", "GS", "New")

knitr::kable(do.call(rbind, lapply(methods, getGraphNnbr)))
Method Type Tri90AwareMale Tri90AwareFemale Tri90ARTMale Tri90ARTFemale Tri90VLSMale Tri90VLSFemale
PC.stable Nd1 0 0 0 0 0 0
PC.stable Nd2 0 0 0 0 0 0
PC.stable Nd3 0 0 0 0 0 0
MMPC Nd1 1 0 0 1 0 0
MMPC Nd2 1 0 0 1 0 0
MMPC Nd3 1 0 0 1 0 0
IAMB Nd1 0 0 0 0 0 0
IAMB Nd2 0 0 0 0 0 0
IAMB Nd3 0 0 0 0 0 0
GS Nd1 0 0 0 0 0 0
GS Nd2 0 0 0 0 0 0
GS Nd3 0 0 0 0 0 0
New Nd1 6 5 4 4 2 3
New Nd2 19 18 13 11 7 8
New Nd3 49 51 38 28 19 28

Simulation Studies

The complete simulations take a very long time, and are carried out using a high performance computing cluster. Here we provide some sample codes to illustrate how to reproduce the simulation results.

Simulation Study in Section 4

Each row in Table 5 is the average of 500 times simulation of the following code.

## code for one simulation for n = 250, Goal = Aware, Gender = Male
suppressWarnings(SimulationI(n = 250, dn = "Tri90AwareMale"))
##            GS PC.stable      IAMB      MMPC       New
## TPR 0.1081081 0.0990991 0.1171171 0.1261261 0.3153153
## TNR 1.0000000 1.0000000 0.9985486 0.9985486 0.9791969

Simulation Study in Supplement S.5.1

Each row in Table S.2 is the average of 500 times simulation of the following code.

## code for one simulation for Goal = Aware, Gender = Male
suppressWarnings(SimulationS51(n = 500, dn = "Tri90AwareMale"))
##         MCI=2     MCI=3     MCI=4     MCI=5   MCI=Inf
## TPR 0.4594595 0.4504505 0.4594595 0.4594595 0.4594595
## TNR 0.9820997 0.9816159 0.9816159 0.9816159 0.9816159

Simulation Study in Supplement S.5.2

The complete simulation study in S.5.2 also takes a long time. Here we reproduce the tables and figures from the saved simulation results in the MPHIACausal package. The way to reproduce the results from beginning is documented in the “README.rmd”.

The following code chunk reproduces the results in Table S.3 from the saved simulation results in the package.

library(ROCR)
data("SimulationS52ROCRs")
GoalAndGender <- c("Tri90AwareMale", "Tri90AwareFemale", 
                   "Tri90ARTMale", "Tri90ARTFemale", 
                   "Tri90VLSMale", "Tri90VLSFemale")
results <- do.call(rbind, lapply(GoalAndGender, function(dn) SimulationS52AUC(SimulationS52ROCRs[[dn]])))
rownames(results) <- GoalAndGender
knitr::kable(results)
GS PC.stable IAMB MMPC New
Tri90AwareMale 0.7080620 0.7262506 0.7189025 0.7643728 0.9223383
Tri90AwareFemale 0.6996269 0.7344495 0.7081491 0.7506213 0.9402191
Tri90ARTMale 0.7296676 0.6658440 0.6943069 0.7105777 0.8854048
Tri90ARTFemale 0.6553563 0.6996232 0.6659216 0.7465644 0.9246077
Tri90VLSMale 0.6882114 0.6616242 0.7062644 0.6974734 0.9102250
Tri90VLSFemale 0.6531434 0.6950925 0.6858441 0.7486009 0.9180418

The following code chunk reproduces subplots of Figures S.6 and S.7 from the saved simulation results in the package.

library(ROCR)
data("SimulationS52ROCRs")
## the plots for Goal = Aware, Gender = Male
SimulationS52PlotROC(SimulationS52ROCRs$Tri90AwareMale)

SimulationS52PlotROC(SimulationS52ROCRs$Tri90AwareMale, xlim = c(0, 0.05), legendx = "topleft")

Simulation Study in Supplement S.5.3

Each row in Table S.4 is the average of 1,000 times simulation of the following code.

## code for one simulation for \rho = prob = 0.01 and \theta = 0.125
suppressWarnings(SimulationS53(n = 500, p = 100, prob = 0.01, theta = 0.125))
##            GS PC.stable      IAMB      MMPC       New
## TPR 0.3488372 0.3953488 0.3953488 0.3953488 0.3953488
## TNR 0.9937462 0.9929393 0.9935445 0.9931410 0.9915271