Skip to content

Commit

Permalink
Merge pull request #542 from genomic-medicine-sweden/develop
Browse files Browse the repository at this point in the history
chore: dev to master
  • Loading branch information
monikaBrandt authored Dec 16, 2024
2 parents db19a71 + 2567a84 commit f838aa7
Show file tree
Hide file tree
Showing 24 changed files with 308 additions and 36 deletions.
3 changes: 2 additions & 1 deletion .tests/integration/config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
output: "../../config/output_files.yaml"
singularity_schema: "../../config/singularity.schema.yaml"
general_report: "../../config/reports/general_report.yaml"

annotate_cnv:
cnv_amp_genes: "reference/cnv_amp_genes.bed"
Expand Down Expand Up @@ -129,7 +130,7 @@ manta_run_workflow_t:
mosdepth_bed:
container: "docker://hydragenetics/mosdepth:0.3.2"

msisensor_pro:
msisensor_pro_filter_sites:
PoN: "DATA/msisensor_pro_reference_26_normal.list_baseline"

multiqc:
Expand Down
11 changes: 6 additions & 5 deletions .tests/jenkins/test_input_develop.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ results/dna/VAL2022-2-5M_T/additional_files/biomarker/VAL2022-2-5M_T.purecn.scar
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.amplifications.tsv f397f92f69100d1f20ca65ffac830dd0
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.deletions.tsv e441b862e6d6d573d18fe40f1eaa7103
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.manta_tumorSV.vcf.gz 1c4f0a2b346d1355dc76fcd126222729
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology.amp_all_del_all.cnv_report.tsv aba15a5adbe875fe2c20af10ce0ede84
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology.amp_all_del_all.cnv_report.tsv c28cfb14367c5a677c94ce710680640e
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology.amp_all_del_validated.cnv_report.tsv 26f9c02ca97ee3d5c6f2e017d2be55a2
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology_purecn.amp_all_del_all.cnv_report.tsv a092cf19bfbcfaae69531ab66ddc902a
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology_purecn.amp_all_del_all.cnv_report.tsv 6bbddbd811de1d6c1b1d1cb57a1da9b5
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology_purecn.svdb_query.vcf a6b65b19684af6b13f458dda07fb9c1f
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.pathology.svdb_query.vcf 80f9174ffbfd9b00bc10ab48476956c0
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.purecn.amp_all_del_all.cnv_report.tsv a092cf19bfbcfaae69531ab66ddc902a
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.purecn.amp_all_del_all.cnv_report.tsv 6bbddbd811de1d6c1b1d1cb57a1da9b5
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.purecn.amp_all_del_validated.cnv_report.tsv 43d39ccc1cd0363f1dfbd7c6b192aaf5
results/dna/VAL2022-2-5M_T/additional_files/cnv/VAL2022-2-5M_T.purecn.svdb_query.vcf a6b65b19684af6b13f458dda07fb9c1f
results/dna/VAL2022-2-5M_T/additional_files/fusion/VAL2022-2-5M_T.fuseq_wes.unfiltered.results.csv b8960baa4356001311f4de40c97f1598
results/dna/VAL2022-2-5M_T/additional_files/fusion/VAL2022-2-5M_T.fuseq_wes.unfiltered.results.csv b8960baa4356001311f4de40c97f1598
results/dna/VAL2022-2-5M_T/additional_files/qc/VAL2022-2-5M_T.alignment_summary_metrics.txt fa88f8adc1d6146cf399c6d17a37d098
results/dna/VAL2022-2-5M_T/additional_files/qc/VAL2022-2-5M_T.contamination.table a4ee6cd2e1cfe029229f5b218ac318e9
results/dna/VAL2022-2-5M_T/additional_files/qc/VAL2022-2-5M_T.duplication_metrics.txt 1c476addf89d82bf68aea08eba98aacf
Expand All @@ -26,7 +26,8 @@ results/dna/VAL2022-2-5M_T/additional_files/vcf/VAL2022-2-5M_T.annotated.exon_on
results/dna/VAL2022-2-5M_T/additional_files/vcf/VAL2022-2-5M_T.annotated.exon_only.filter.soft_filter.vcf bbde2c4df3aaf4c65251685d91548f4d
results/dna/VAL2022-2-5M_T/additional_files/vcf/VAL2022-2-5M_T.annotated.vcf.gz 6b514125458fc362f7a065a5740d74db
results/dna/VAL2022-2-5M_T/additional_files/vcf/vardict_VAL2022-2-5M_T.vcf.gz 22c587c4f250d6d800a5f022324da458
results/dna/VAL2022-2-5M_T/biomarker/VAL2022-2-5M_T.msisensor_pro.score.tsv e47f1fb00f0220a20335f972e1af8d39
results/dna/VAL2022-2-5M_T/biomarker/VAL2022-2-5M_T.msisensor_pro.unfiltered.score.tsv e47f1fb00f0220a20335f972e1af8d39
results/dna/VAL2022-2-5M_T/biomarker/VAL2022-2-5M_T.msisensor_pro.filtered.score.tsv 19631874989ce0297693948a4b5c2edd
results/dna/VAL2022-2-5M_T/biomarker/VAL2022-2-5M_T.pathology_purecn.scarhrd_cnvkit_score.txt d4ad660ca5fcece75edede50490156d7
results/dna/VAL2022-2-5M_T/biomarker/VAL2022-2-5M_T.TMB.txt a2fa195be32925c1135188a6cc6215b8
results/dna/VAL2022-2-5M_T/cnv/VAL2022-2-5M_T.pathology_purecn.amp_all_del_validated.cnv_report.tsv 43d39ccc1cd0363f1dfbd7c6b192aaf5
Expand Down
13 changes: 12 additions & 1 deletion config/config.data.hg19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ hotspot_report:
all: "{{PROJECT_DESIGN_DATA}}/GMS560/reports/Hotspots_combined_regions_nodups.231011.csv"
ENC: "{{PROJECT_DESIGN_DATA}}/GMS560/reports/ENC_hotspots_240919.csv"

juli_annotate:
cosmic: "{{PROJECT_REF_DATA}}/ref_data/juli/CosmicFusionExport_V76.tsv"
pfam: "{{PROJECT_REF_DATA}}/ref_data/juli/Pfam-A.full.human"
ref_genes: "{{PROJECT_REF_DATA}}/ref_data/juli/refGene_hg19.txt"
uniprot: "{{PROJECT_REF_DATA}}/ref_data/juli/HGNC_GeneName_UniProtID_160524.txt"

juli_call:
gap_file: "{{PROJECT_REF_DATA}}/ref_data/juli/gap_hg19.txt"
ref_genes: "{{PROJECT_REF_DATA}}/ref_data/juli/refGene_hg19.txt"

jumble_run:
normal_reference: "{{PROJECT_DESIGN_DATA}}/GMS560/PoN/jumble.combined.filtered.50.PoN.hg19.RDS"

Expand All @@ -133,8 +143,9 @@ merge_cnv_json:
mosdepth:
extra: "--no-per-base --fast-mode"

msisensor_pro:
msisensor_pro_filter_sites:
PoN: "{{PROJECT_PON_DATA}}/GMS560/PoN/Msisensor_pro_reference_nextseq_36.list_baseline"
msi_sites_bed: "{{PROJECT_PON_DATA}}/GMS560/design/SelectedMSIloci2019Sorted.bed"

purecn:
extra: "--model betabin --mapping-bias-file {{PROJECT_PON_DATA}}/GMS560/PoN/purecn/mapping_bias_nextseq_27_hg19.rds"
Expand Down
3 changes: 2 additions & 1 deletion config/config.data.hg38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ merge_cnv_json:
mosdepth:
extra: "--no-per-base --fast-mode"

msisensor_pro:
msisensor_pro_filter_sites:
PoN: "{{PROJECT_PON_DATA}}/GMS560/PoN/Msisensor_pro_reference_nextseq_noUmea_27_hg38.list_baseline"
#msi_sites_bed: "{{PROJECT_PON_DATA}}/GMS560/design/SelectedMSIloci2019Sorted.bed" #hg19

purecn:
extra: "--model betabin --mapping-bias-file {{PROJECT_PON_DATA}}/GMS560/PoN/purecn/mapping_bias_nextseq_hg38.rds"
Expand Down
13 changes: 12 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ units: "units.tsv"

output: "config/output_files.yaml"
default_container: "docker://hydragenetics/common:3.0.0"
general_report: "config/reports/general_report.yaml"

trimmer_software: "fastp_pe"

Expand Down Expand Up @@ -189,6 +190,16 @@ gatk_mutect2_merge_stats:
gene_fuse:
container: "docker://hydragenetics/genefuse:0.6.1"

general_html_report:
final_directory_depth: 4
multiqc_config: "config/reports/multiqc_config_dna.yaml"

juli_annotate:
container: "docker://hydragenetics/juli:0.1.6.2"

juli_call:
container: "docker://hydragenetics/juli:0.1.6.2"

jumble_cnvkit_call:
container: "docker://hydragenetics/cnvkit:0.9.9"

Expand Down Expand Up @@ -285,7 +296,7 @@ multiqc:

optitype:
#container: "docker://hydragenetics/optitype:1.3.5"
container: "docker://fred2/optitype"
container: "docker://fred2/optitype:release-v1.3.1"
sample_type: "-d"
enumeration: 4

Expand Down
2 changes: 1 addition & 1 deletion config/filters/config_hard_filter_cnv_loh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filters:
soft_filter: "False"
copy_number:
description: "Hard filter cnvs with over 1.4 copies if the BAF is close to 0.5 as well as all amplifications"
expression: "(INFO:CORR_CN > 1.4 and INFO:BAF > 0.3 and INFO:BAF < 0.7) or (INFO:CORR_CN > 2.5)"
expression: "(INFO:CORR_CN > 1.4 and INFO:NA_TRUE:BAF > 0.3 and INFO:NA_TRUE:BAF < 0.7) or (INFO:CORR_CN > 2.5)"
soft_filter: "False"
loh_gene:
description: "Only keep variants with gene annotations"
Expand Down
46 changes: 44 additions & 2 deletions config/output_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,21 @@ files:
types:
- T
- N
- name: MSI Sensor Pro TSV
- name: Results report DNA HTML
input: reports/general_html_report/{sample}_{type}.general_report.html
output: results/dna/{sample}_{type}/general_report/{sample}_{type}.general_report.html
types:
- T
- N
- name: MSI Sensor Pro filtered TSV
input: biomarker/msisensor_pro/{sample}_{type}
output: results/dna/{sample}_{type}/biomarker/{sample}_{type}.msisensor_pro.score.tsv
output: results/dna/{sample}_{type}/biomarker/{sample}_{type}.msisensor_pro.filtered.score.tsv
types:
- T
- N
- name: MSI Sensor Pro unfiltered TSV
input: biomarker/msisensor_pro_unfiltered/{sample}_{type}
output: results/dna/{sample}_{type}/biomarker/{sample}_{type}.msisensor_pro.unfiltered.score.tsv
types:
- T
- N
Expand Down Expand Up @@ -283,6 +295,18 @@ files:
- N
deduplication:
- umi
- name: JuLI fusions
input: fusions/juli_call/{sample}_{type}.annotated.filtered.txt
output: results/dna/{sample}_{type}/fusion/{sample}_{type}.juli.filtered.fusions.txt
types:
- T
- N
- name: JuLI unfiltered fusions
input: fusions/juli_call/{sample}_{type}.annotated.txt
output: results/dna/{sample}_{type}/additional_files/fusion/{sample}_{type}.juli.fusions.txt
types:
- T
- N
- name: ID-SNP VCF RNA
input: snv_indels/bcftools_id_snps/{sample}_{type}.id_snps.vcf
output: results/rna/{sample}_{type}/id_snps/{sample}_{type}.id_snps.vcf
Expand Down Expand Up @@ -420,8 +444,26 @@ files:
types:
- T
- N
- name: Germline vcf used in CNV analysis
input: snv_indels/bcbio_variation_recall_ensemble/{sample}_{type}.ensembled.vep_annotated.filter.germline.exclude.blacklist.vcf.gz
output: results/dna/{sample}_{type}/additional_files/cnv/{sample}_{type}.germline.vcf.gz
types:
- T
- N
- name: Manta VCF
input: cnv_sv/manta_run_workflow_t/{sample}/results/variants/tumorSV.vcf.gz
output: results/dna/{sample}_{type}/additional_files/cnv/{sample}_{type}.manta_tumorSV.vcf.gz
types:
- T
- name: HLA optitype
input: biomarker/optitype/{sample}_{type}/{sample}_{type}_hla_type_result.tsv
output: results/dna/hla/{sample}_{type}_hla_type_result.tsv
types:
- T
- N
- name: HLA coverage optitype
input: biomarker/optitype/{sample}_{type}/{sample}_{type}_hla_type_coverage_plot.pdf
output: results/dna/hla/{sample}_{type}_hla_type_coverage_plot.pdf
types:
- T
- N
2 changes: 1 addition & 1 deletion config/output_reference_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ files:
types:
- N
- name: msisensor_pro_reference_list_baseline
input: references/msisensor_pro_scan/Msisensor_pro_reference.list
input: references/msisensor_pro_baseline/Msisensor_pro_reference.list_baseline
output: result/Msisensor_pro_reference.list_baseline
types:
- N
Expand Down
7 changes: 7 additions & 0 deletions config/references/design_files.hg19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.9.0/cnv/cytoBand.hg19.txt

msisensor_pro_filter_sites:
msi_sites_bed:
checksum: 74c4fddf7928902c7b107673551b2c8b
path: GMS560/design/SelectedMSIloci2019Sorted.bed
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.12.0/design/SelectedMSIloci2019Sorted.bed

purecn:
intervals:
checksum: 0857e05962696cd3c2e5a4ea94d0fb2c
Expand Down
2 changes: 1 addition & 1 deletion config/references/nextseq.hg19.pon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
type: file
url: https://figshare.com/ndownloader/files/49365301?private_link=88202740beb6fcbac09d

msisensor_pro:
msisensor_pro_filter_sites:
PoN:
checksum: 23c9553c7043fff1a8428fcd3b231c60
path: GMS560/PoN/Msisensor_pro_reference_nextseq_36.list_baseline
Expand Down
2 changes: 1 addition & 1 deletion config/references/nextseq.hg38.pon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# type: file
# url: https://figshare.com/ndownloader/files/49365301?private_link=88202740beb6fcbac09d

msisensor_pro:
msisensor_pro_filter_sites:
PoN:
checksum: 62f5e1ce5c242013af8398ba427314d7
path: GMS560/PoN/Msisensor_pro_reference_nextseq_noUmea_27_hg38.list_baseline
Expand Down
2 changes: 1 addition & 1 deletion config/references/novaseq.hg19.pon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
type: file
url: https://figshare.com/ndownloader/files/49365301?private_link=88202740beb6fcbac09d

msisensor_pro:
msisensor_pro_filter_sites:
PoN:
checksum: 40bda778c9208a366b39ec0e6cefde11
path: GMS560/PoN/Msisensor_pro_reference_novaseq_13.list_baseline
Expand Down
2 changes: 1 addition & 1 deletion config/references/novaseq.hg38.pon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# type: file
# url: https://figshare.com/ndownloader/files/49365301?private_link=88202740beb6fcbac09d

# msisensor_pro:
# msisensor_pro_filter_sites:
# PoN:
# checksum: 8b5ea989a5c617aaa1ab1f81ec525e8b
# path: GMS560/PoN/Msisensor_pro_reference_novaseq_13_hg38.list_baseline
Expand Down
32 changes: 32 additions & 0 deletions config/references/references.hg19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,38 @@
path: ref_data/GNOMAD/small_exac_common_3.hg19.vcf.gz.tbi
type: file
url: https://figshare.scilifelab.se/ndownloader/files/42061254

juli_annotate:
cosmic:
checksum: b61743a8cd0d0c3513492c717fcfa51a
path: ref_data/juli/CosmicFusionExport_V76.tsv
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/refs/tags/v0.13.0/juli/CosmicFusionExport_V76.tsv

pfam:
checksum: 81cf37cce3d34adb8e0d5df1edebaaeb
path: ref_data/juli/Pfam-A.full.human
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/refs/tags/v0.13.0/juli/Pfam-A.full.human

ref_genes:
checksum: 225faf771242f46e49e0cb2cbaed4132
path: ref_data/juli/refGene_hg19.txt
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/refs/tags/v0.13.0/juli/refGene_hg19.txt

uniprot:
checksum: 930a8899a7b3451adb7f553340febef6
path: ref_data/juli/HGNC_GeneName_UniProtID_160524.txt
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/refs/tags/v0.13.0/juli/HGNC_GeneName_UniProtID_160524.txt

juli_call:
gap_file:
checksum: cb6448d52153b6add870cf26921aea44
path: ref_data/juli/gap_hg19.txt
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/refs/tags/v0.13.0/juli/gap_hg19.txt

references:
fasta:
Expand Down
66 changes: 66 additions & 0 deletions config/reports/general_report.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
files:
- name: Small variants
type: file_link
input: results/dna/{sample}_{type}/vcf/{sample}_{type}.annotated.exon_only.filter.hard_filter.codon_snv.qci.vcf
description: File link to vcf with SNVs and INDELs
nav_header: Small variants
- name: Coverage and mutations
type: file_link
input: results/dna/{sample}_{type}/qc/{sample}_{type}.coverage_and_mutations.tsv
description: File link to coverage and mutations
nav_header: Small variants
- name: Coverage and mutations ENC
type: file_link
input: results/dna/{sample}_{type}/qc/{sample}_{type}.coverage_and_mutations.ENC.tsv
description: File link to coverage and mutations for ENC
nav_header: Small variants

- name: TMB
type: plain_text
description: TMB (cut-off 10)
input: results/dna/{sample}_{type}/biomarker/{sample}_{type}.TMB.txt
nav_header: Biomarkers
- name: MSI unfiltered
type: file_table
description: MSI score (cut-off X%)
input: results/dna/{sample}_{type}/biomarker/{sample}_{type}.msisensor_pro.unfiltered.score.tsv
nav_header: Biomarkers
- name: MSI filtered
type: file_table
description: MSI score (cut-off X%). Filtering based on ~400 known msi-sites.
input: results/dna/{sample}_{type}/biomarker/{sample}_{type}.msisensor_pro.filtered.score.tsv
nav_header: Biomarkers

- name: PureCN TC
type: file_table
input: results/dna/{sample}_{type}/cnv/{sample}_{type}.purecn_purity_ploidity.csv
description: TC estimated by purecn
nav_header: CNV
- name: CNV html
type: file_link
input: results/dna/{sample}_{type}/cnv/{sample}_{type}.pathology_purecn.cnv.html
description: File link to CNV html report
nav_header: CNV
- name: CNV tsv report
type: file_table
input: results/dna/{sample}_{type}/additional_files/cnv/{sample}_{type}.pathology_purecn.amp_all_del_all.cnv_report.tsv
description: CNV tsv report (All amplification and deletion genes)
nav_header: CNV

- name: FuseqWes
type: file_table
input: results/dna/{sample}_{type}/fusion/{sample}_{type}.fuseq_wes.report.csv
description: FuseqWES report
nav_header: Fusions
- name: JuLI
type: file_table
input: results/dna/{sample}_{type}/fusion/{sample}_{type}.juli.filtered.fusions.txt
description: JuLI report
nav_header: Fusions

- name: MultiQC
type: multiqc
input: qc/multiqc/multiqc_DNA_data/multiqc_data.json
sections: ["table"]
description: multiqc general stats for this sample
nav_header: MultiQC
7 changes: 5 additions & 2 deletions config/reports/multiqc_config_dna.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ show_analysis_paths: True
#decimalPoint_format: ','
extra_fn_clean_exts: ##from this until end
- '.duplication_metrics'
- type: regex
pattern: '_fastq[12]'
- '.HsMetrics'
- '.alignment_summary_metrics'
- type: regex_keep
pattern: '[0-9A-Z-]+'
#extra_fn_clean_trim: #if found in beginning or end
#fn_ignore_dirs:
#fn_ignore_files:
Expand Down Expand Up @@ -63,6 +65,7 @@ table_columns_visible:
avg_sequence_length: False
percent_fails: False
total_sequences: False
median_sequence_length: False

multiqc_cgs:
"Samtools: stats":
Expand Down
6 changes: 6 additions & 0 deletions config/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ fastqc:
mem_mb: 12288
mem_per_cpu: 6144

juli_call:
threads: 10
mem_mb: 61440
mem_per_cpu: 6144
time: "12:00:00"

jumble_run:
threads: 10

Expand Down
Loading

0 comments on commit f838aa7

Please sign in to comment.