-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_default_VIPERA.sh
51 lines (38 loc) · 1.19 KB
/
run_default_VIPERA.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env bash
# run_default_VIPERA.sh
#
# This script is used to download and analyze intra-patient SARS-CoV-2 data
# using VIPERA, a tool for the automated detection of serially sampled infections
# and the identification of evolutionary patterns within the same viral infection.
#
# Access the full data record via DOI: 10.20350/digitalCSIC/15648
set -e
logthis () {
echo $(date) "|" $@
}
NCPU=1
DATA_URL="https://digital.csic.es/bitstream/10261/337461/1/data.zip"
MD5_SUM="ff59d513309a3af47f2c9248f7f3518d"
tmpdir=$(mktemp -d)
logthis "Downloading compressed data from '$DATA_URL'"
curl -o ${tmpdir}/data.zip ${DATA_URL}
logthis "Validating file"
md5_sum_dwld="$(md5sum ${tmpdir}/data.zip | cut -d' ' -f1)"
if [ "$md5_sum_dwld" != "$MD5_SUM" ]; then
logthis "ERROR: MD5 checksum does not match"
exit 1
fi
logthis "Creating data directories"
mkdir -p data/bam data/fasta
logthis "Decompressing"
unzip -d ${tmpdir} ${tmpdir}/data.zip
rm ${tmpdir}/data.zip
logthis "Organizing files"
mv ${tmpdir}/**/*.bam data/bam
mv ${tmpdir}/**/*.fa data/fasta
mv ${tmpdir}/**/*.csv data
logthis "Running VIPERA"
snakemake --use-conda -c ${NCPU}
logthis "Cleaning up"
rm -r ${tmpdir}
logthis "Done!"