Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove to-dos, handle lint warnings and run notebooks #20

Merged
merged 3 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions gnomad_toolbox/analysis/general.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Set of general functions for gnomAD analysis."""

from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional

import hail as hl
from gnomad.assessment.summary_stats import freq_bin_expr
Expand All @@ -9,7 +9,7 @@


def get_variant_count_by_freq_bin(
af_cutoffs: List[float] = [0.001, 0.01],
af_cutoffs: Optional[List[float]] = None,
singletons: bool = False,
doubletons: bool = False,
pass_only: bool = True,
Expand Down Expand Up @@ -37,6 +37,10 @@ def get_variant_count_by_freq_bin(
'data_type', and 'version'.
:return: Dictionary with counts.
"""
# Initialize af_cutoffs if not provided
if af_cutoffs is None:
af_cutoffs = [0.001, 0.01]

# Load the Hail Table if not provided
ht = _get_dataset(dataset="variant", **kwargs)

Expand Down
39 changes: 10 additions & 29 deletions gnomad_toolbox/filtering/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from gnomad_toolbox.load_data import _get_dataset, get_compatible_dataset_versions


# TODO: Check these csq sets, the ones in the code don't match what is listed on the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you confirm these csq sets? I forgot about this TODO, but I think we need to make sure they are consistent. I copied the doc string csqs from the browser hover over, but I just used the csqs that you had in your original code. If the code is correct, can you just change the doc string?

Copy link
Contributor Author

@KoalaQin KoalaQin Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I asked the browser people if it's hard coded or the list includes all the csqs for different VEP versions (there are some new csq 105 csq not shown there), but for now I changed a bit the code and the docstring, so it's doing what it's saying.

Copy link
Contributor Author

@KoalaQin KoalaQin Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like it's hardcoded for 'Other' and they didn't update the list by VEP version, so we might have a different number for that category but it should be rare (I assume those deprecated/added are not so common), for now, our code is aligned to their LoF, missense and synonymous.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good, thank you

# browser. We should make sure they are consistent.
def filter_by_consequence_category(
plof: bool = False,
missense: bool = False,
Expand Down Expand Up @@ -52,31 +50,7 @@ def filter_by_consequence_category(

Other:

- protein_altering_variant
- incomplete_terminal_codon_variant
- stop_retained_variant
- coding_sequence_variant
- mature_miRNA_variant
- 5_prime_UTR_variant
- 3_prime_UTR_variant
- non_coding_transcript_exon_variant
- non_coding_exon_variant
- NMD_transcript_variant
- non_coding_transcript_variant
- nc_transcript_variant
- downstream_gene_variant
- TFBS_ablation
- TFBS_amplification
- TF_binding_site_variant
- regulatory_region_ablation
- regulatory_region_amplification
- feature_elongation
- regulatory_region_variant
- feature_truncation
- intergenic_variant
- intron_variant
- splice_region_variant
- upstream_gene_variant
- All other consequences not included in the above categories.

:param plof: Whether to include pLoF variants.
:param missense: Whether to include missense variants.
Expand All @@ -94,8 +68,14 @@ def filter_by_consequence_category(
# Load the Hail Table if not provided
ht = _get_dataset(dataset="variant", **kwargs)

lof_csqs = list(LOF_CSQ_SET)
missense_csqs = ["missense_variant", "inframe_insertion", "inframe_deletion"]
lof_csqs = list(LOF_CSQ_SET + ["transcript_ablation"])
missense_csqs = [
"missense_variant",
"inframe_insertion",
"inframe_deletion",
"stop_lost",
"start_lost",
]
synonymous_csqs = ["synonymous_variant"]
other_csqs = lof_csqs + missense_csqs + synonymous_csqs

Expand Down Expand Up @@ -165,6 +145,7 @@ def filter_to_high_confidence_loftee(
is False.
:param canonical_only: Whether to include only canonical transcripts. Default is
False.
:param version: Optional version of the dataset to use.
:param kwargs: Additional arguments to pass to `_get_dataset`.
:return: Table with high-confidence LOFTEE variants.
"""
Expand Down
2 changes: 0 additions & 2 deletions gnomad_toolbox/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@
}


# TODO: Are there other things we want to store in a session? Like a PASS variants only
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this to a ticket so we don't forget?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you

# variable?
class GnomADSession:
"""Class to manage the default data type and version for a gnomAD session."""

Expand Down
8 changes: 0 additions & 8 deletions gnomad_toolbox/notebooks/dive_into_secondary_analyses.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1872,14 +1872,6 @@
"source": [
"get_observed_plofs_for_gene_constraint('drd2').show(-1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c2212f4-a7cf-45bc-930d-aa18cebff7a3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
17 changes: 12 additions & 5 deletions gnomad_toolbox/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def load_config(config_file: str = CONFIG_FILE) -> dict:
:return: The configuration dictionary.
"""
if os.path.exists(config_file):
with open(config_file, "r") as f:
with open(config_file, "r", encoding="utf-8") as f:
return json.load(f)
return {}

Expand All @@ -40,7 +40,7 @@ def save_config(config: dict, config_file: str = CONFIG_FILE) -> None:
:param config_file: Path to the configuration file.
:return: None.
"""
with open(config_file, "w") as f:
with open(config_file, "w", encoding="utf-8") as f:
json.dump(config, f, indent=4)


Expand Down Expand Up @@ -159,8 +159,12 @@ def copy_notebooks_cli(destination: str, overwrite: bool) -> None:
try:
copy_notebooks(destination, overwrite)
logger.info("Notebooks successfully copied to %s.", destination)
except Exception as e:
logger.error("Error during notebook copy: %s", e)
except FileNotFoundError:
logger.error("The destination directory %s does not exist.", destination)
except PermissionError:
logger.error("Permission denied while accessing %s.", destination)
except OSError as e:
logger.error("OS error during notebook copy: %s", e)


def run_jupyter_cli() -> None:
Expand All @@ -185,4 +189,7 @@ def run_jupyter_cli() -> None:

# Launch Jupyter.
command = sys.argv[1] if len(sys.argv) > 1 else "lab"
subprocess.run(["jupyter", command])
try:
subprocess.run(["jupyter", command], check=True)
except subprocess.CalledProcessError as e:
logger.error("Failed to launch Jupyter: %s", e)
Loading