-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2-download_drug_data.R
127 lines (101 loc) · 5.49 KB
/
2-download_drug_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# This script takes a list of signatures and downloads their data.
# It Then processes their data, generating the up and down signatures with our given threshold and finds concordant perturbagens to it.
library(tidyverse)
library(glue)
source("pipeline.R")
# This function takes a pertid and optionally thresholds and a library and then processes the signature from start to finish.
# This processing involves the following:
# 1. It downloads the designated L1000 Signature
# 2. It generates separate up and down filtered signatures for the given threshold
# 3. It then gets concordant drugs for those signatures
# 4. It then combines the results in one file
#
# At each step, it writes the intermediate results to a file
process_perturbagen <- function(pertid, threshold = 0.85, library = "LIB_5", cell_line) {
basepath <- file.path("data", cell_line)
sig_path <- file.path("data", "signatures", "drug")
filtered_up_path <- file.path(basepath, "filtered", "drug", "up")
filtered_down_path <- file.path(basepath, "filtered", "drug", "down")
connected_up_path <- file.path(basepath, "connected", "drug", "up")
connected_down_path <- file.path(basepath, "connected", "drug", "down")
consensus_path <- file.path(basepath, "consensus", "drug")
name_sig <- paste(pertid, "signature", sep = "-")
name_filtered <- paste(pertid, threshold, "filtered", sep = "-")
name_connected <- paste(pertid, threshold, "connected", sep = "-")
name_consensus <- paste(pertid, threshold, "consensus", sep = "-")
file_sig <- generate_name(sig_path, name_sig, "tsv")
file_filtered_up <- generate_name(filtered_up_path, name_filtered, "tsv")
file_filtered_down <- generate_name(filtered_down_path, name_filtered, "tsv")
file_connected_up <- generate_name(connected_up_path, name_connected, "tsv")
file_connected_down <- generate_name(connected_down_path, name_connected, "tsv")
file_consensus <- generate_name(consensus_path, name_consensus, "tsv")
print(glue("Processing {pertid} for threshold {threshold}"))
if (!file.exists(file_sig)) {
sig <- get_l1000_signature(pertid)
write_tsv(sig, file_sig)
} else {
print(glue("{file_sig} already exists"))
sig <- read_tsv(file_sig)
}
print(glue("Generating filtered signatures for {pertid}"))
if (!file.exists(file_filtered_up)) {
filtered_up <- generate_filtered_signature(sig, direction = "up", threshold = threshold)
write_tsv(filtered_up, file_filtered_up)
} else {
print(glue("{file_filtered_up} already exists"))
filtered_up <- read_tsv(file_filtered_up)
}
if (!file.exists(file_filtered_down)) {
filtered_down <- generate_filtered_signature(sig, direction = "down", threshold = threshold)
write_tsv(filtered_down, file_filtered_down)
} else {
print(glue("{file_filtered_down} already exists"))
filtered_down <- read_tsv(file_filtered_down)
}
print(glue("Getting connected signatures for {pertid}"))
if (!file.exists(file_connected_up)) {
connected_up <- get_concordant_signatures(filtered_up, library = library)
write_tsv(connected_up, file_connected_up)
} else {
print(glue("{file_connected_up} already exists"))
connected_up <- read_tsv(file_connected_up)
}
if (!file.exists(file_connected_down)) {
connected_down <- get_concordant_signatures(filtered_down, library = library)
write_tsv(connected_down, file_connected_down)
} else {
print(glue("{file_connected_down} already exists"))
connected_down <- read_tsv(file_connected_down)
}
print(glue("Generating consensus perturbagen list for {pertid}"))
if (!file.exists(file_consensus)) {
consensus <- generate_consensus_signature(connected_up, connected_down, cell_line = "all")
write_tsv(consensus, file_consensus)
} else {
print(glue("{file_consensus} already exists"))
}
}
# This function take a list of signature-to-drug maps and then processes each signature id.
process_seed_drugs <- function(datafile, threshold = 0.85, library = "LIB_5", cell_line) {
pertids <- read_tsv(datafile) %>%
pull(SignatureId)
for (pert in pertids) {
process_perturbagen(pert, threshold = threshold, library = library, cell_line = cell_line)
}
}
cutoffs <- c(0, 0.26, 0.5, 0.85, 1)
sig_map_lists <- setdiff(list.files("maps"), list.files("maps", "Drug"))
sig_map_names <- str_match(sig_map_lists, "(.*)\\-Signature*")[,2]
sig_map_names <- sig_map_names[!is.na(sig_map_names)]
map_prefix <- "maps"
maps <- file.path(map_prefix, sig_map_lists)
maps <- maps[str_detect(maps, "csv", negate = T)]
names(maps) <- sig_map_names
a5491 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[1]], cell_line = sig_map_names[1], library = "LIB_5")
a5492 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[2]], cell_line = sig_map_names[2], library = "LIB_5")
ha1e2 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[3]], cell_line = sig_map_names[3], library = "LIB_5")
ht29 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[4]], cell_line = sig_map_names[4], library = "LIB_5")
mcf72 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[5]], cell_line = sig_map_names[5], library = "LIB_5")
pc3 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[6]], cell_line = sig_map_names[6], library = "LIB_5")
vcap1 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[7]], cell_line = sig_map_names[7], library = "LIB_5")
vcap2 <- sapply(cutoffs, process_seed_drugs, datafile = maps[sig_map_names[8]], cell_line = sig_map_names[8], library = "LIB_5")