-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRun_DSCC.R
More file actions
127 lines (107 loc) · 4.21 KB
/
Run_DSCC.R
File metadata and controls
127 lines (107 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
source("./DSCC_helper.R")
processedDataPath <- "./Data/DSCC_Main/"
ptoPath <- "./Data/DSCC_Relevant"
resPath <- "./Subtyping_Results/"
datasets <- list.files(processedDataPath)
datasets <- strsplit(datasets, ".rds") %>% do.call(what = c)
rmTypes <- c("clinical", "survival", "clinicalImputedV2", "clinicalMCA", "clinical_Clean", "clinicalImputedV3", "clinicalMCAV2")
ptoTypes <- c("PTO_Sum_SpectralCounts", "PTO_Sum_DistinctPeptides", "PTO_Sum_UnsharedPeptides", "PTO_Peptides_SpectralCount",
"PTO_Sum_SpectralCounts_JHU", "PTO_Sum_DistinctPeptides_JHU", "PTO_Sum_UnsharedPeptides_JHU", "PTO_Peptides_SpectralCount_JHU")
method <- "DSCC"
if (!file.exists(resPath)) {
dir.create(resPath)
}
for (dataset in datasets) {
# mclapply(datasets, mc.cores=length(datasets), function(dataset){
(function() {
message("Subtyping on: ", dataset)
rds <- readRDS(file.path(processedDataPath, paste0(dataset, ".rds")))
survival <- rds$survival
survival <- survival[rowSums(is.na(survival)) == 0,]
survival <- survival[survival$os >= 0,]
survival$os[survival$os == 0] <- 1
toUseDataList <- rds[setdiff(names(rds), c("survival"))]
toUseDataList <- toUseDataList[!names(toUseDataList) %in% rmTypes]
toUseDataList <- toUseDataList[!sapply(toUseDataList, is.null)]
if (file.exists(file.path(ptoPath, paste0(dataset, ".rds")))) {
ptodata <- readRDS(file.path(ptoPath, paste0(dataset, ".rds")))
ptodata <- ptodata[names(ptodata) %in% ptoTypes]
toUseDataList <- c(toUseDataList, ptodata)
}
rm(rds)
toUseDataList <- toUseDataList[!sapply(toUseDataList, is.null)]
unionSamples <- Reduce(c, lapply(toUseDataList, rownames)) %>% unique()
toUseSamples <- intersect(unionSamples, rownames(survival))
toUseDataList <- lapply(toUseDataList, function(data) {
sampkeep <- rownames(data)[rownames(data) %in% toUseSamples]
if (length(sampkeep) < 2) {
data <- NULL
}else {
data <- data[sampkeep,]
}
data
}) %>% `names<-`(names(toUseDataList))
toUseDataList <- toUseDataList[!sapply(toUseDataList, is.null)]
unionSamples <- Reduce(c, lapply(toUseDataList, rownames)) %>% unique()
toUseSamples <- intersect(unionSamples, rownames(survival))
toUseDataList <- lapply(names(toUseDataList), function(dataType) {
data <- toUseDataList[[dataType]]
data <- as.matrix(data)
data[is.na(data)] <- 0
if (min(data) >= 0) {
if (max(data) > 100) {
data <- log2(data + 1)
}
}
data
}) %>% `names<-`(names(toUseDataList))
noDeath <- survival[toUseSamples, "OSstatus"]
noDeath <- sum(noDeath, na.rm = T)
cat("Finished loading data\n")
runningTime <- Sys.time()
resFile <- paste0(resPath, method, "-", dataset, ".rds")
cluster <- try({ runDSCC(dataList = toUseDataList, nSamples = length(toUseSamples)) })
runningTime <- Sys.time() - runningTime
if (!is(cluster, "try-error")) {
ncluster <- max(cluster[toUseSamples])
coxFit <- try({ summary(coxph(Surv(time = os, event = isDead) ~ as.factor(cluster[toUseSamples]), data = survival[toUseSamples,], ties = "exact")) })
if (!is(coxFit, "try-error")) {
pval <- round(coxFit$sctest[3], digits = 40)
coxFit <- summary(coxFit)
} else {
pval <- NA
coxFit <- NA
}
message(method, "\t", dataset, "\t", pval)
saveRDS(
list(
cluster = cluster[toUseSamples],
nclusters = ncluster,
pval_cox = pval,
noSamples = length(toUseSamples),
nDeaths = sum(survival[toUseSamples, "isDead"]),
noDataTypes = length(toUseDataList),
coxFit = coxFit,
dataset = dataset,
method = method,
runningTime = runningTime
), file = resFile)
} else {
saveRDS(
list(
cluster = NA,
nclusters = NA,
pval_cox = NA,
noSamples = length(toUseSamples),
nDeaths = sum(survival[toUseSamples, "isDead"]),
noDataTypes = length(toUseDataList),
coxFit = NA,
dataset = dataset,
method = method,
runningTime = runningTime
), file = resFile)
}
})()
gc()
# })
}