-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalign_families.sh
executable file
·84 lines (74 loc) · 2.07 KB
/
align_families.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash -e
export MAX_N_PID_4_TCOFFEE=$(cat /proc/sys/kernel/pid_max)
# Runs all tools then evaluates LDDT and SoP/TC/CS
#
# ./align_families.sh families/ scores.tsv
#
# Where families is a directory of directories for Homstrad families/AFDB clusters
# Each family directory should be of structure:
# folder/
# pdbs/
# structureA.pdb
# structureB.pdb
# folder_msa.fa
# folder_aa.fa
#
# Will generate alignment files:
# folder/matt/result.fasta
# folder/caretta_results/result.fasta
# folder/mTM_result/result.fasta
# folder/mustang/mustang.afasta
# folder/foldmason_aa.fa
# folder/clustalo.fa
# folder/famsa.fa
# folder/mafft.fa
# folder/muscle.fa
#
# and HTML reports:
# folder/<tool>.html
#
# scores.tsv will contain LDDT/SoP fwd and rev/TC/CS scores and time in 4 column TSV (family tool scoreType score)
# e.g.
# family1 foldmason lddt 0.6
# family1 muscle sp_fwd 0.3
# family1 mafft tc 0.7
if [ "$#" -ne 2 ]; then
echo "Error: 2 arguments are required."
echo "Usage: $0 dataDir/ scores.tsv"
exit 1
fi
if [ -e "$2" ]; then rm "$2"; fi
TOOL_THREADS="${TOOL_THREADS:=1}"
RUN_THREADS="${RUN_THREADS:=1}"
SCORE_THREADS="${SCORE_THREADS:=1}"
# Run all aligners on families in $1
echo "Running aligners"
find $1 -mindepth 1 -maxdepth 1 -type d |\
THREADS="$TOOL_THREADS" xargs -I{} -P"$RUN_THREADS" ./align_family.sh {}
DIR="$1"
N=$(find $1 -mindepth 1 -maxdepth 1 -type d | wc -l)
check_msas() {
cnt=$(find "$DIR" -type f -path "*/${1}" | wc -l)
if [ ! $cnt -eq $N ]; then
echo "Missing ${1}, ${cnt}/${N}"
exit 1
fi
}
echo "Checking all MSAs have been generated"
check_msas "foldmason_aa.fa"
check_msas "foldmason_refine100_aa.fa"
check_msas "clustalo.fa"
check_msas "famsa.fa"
check_msas "muscle.fa"
check_msas "mafft.fa"
check_msas "caretta_results/result.fasta"
check_msas "mTM_result/result.fasta"
check_msas "usalign.fa"
check_msas "matt.fasta"
check_msas "mustang.afasta"
check_msas "3dcoffee.fa"
# Get scores per tool
echo "Computing scores"
find $1 -mindepth 1 -maxdepth 1 -type d |\
xargs -I{} -P"$SCORE_THREADS" ./compute_scores.sh {} |\
sort > "$2"