Formatting

saketkc · saketkc · commit aa47879a83a2 · 2026-02-13T19:48:39.000+05:30
diff --git a/notebooks/Plotting_ribotricer_profile.ipynb b/notebooks/Plotting_ribotricer_profile.ipynb
diff --git a/ribotricer/bam.py b/ribotricer/bam.py
@@ -1,4 +1,5 @@
 """Utilities for spliting bam file"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/cli.py b/ribotricer/cli.py
@@ -1,5 +1,5 @@
-"""Command line interface for ribotricer
-"""
+"""Command line interface for ribotricer"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -238,9 +238,7 @@ def detect_orfs_cmd(
         sys.exit("Error: psite_offsets only allowed when read_lengths is provided")
     if read_lengths is not None and psite_offsets is not None:
         try:
-            psite_offsets = [
-                int(x.strip()) for x in psite_offsets.strip().split(",")
-            ]
+            psite_offsets = [int(x.strip()) for x in psite_offsets.strip().split(",")]
         except Exception:
             sys.exit("Error: cannot convert psite_offsets into integers")
         if len(read_lengths) != len(psite_offsets):
diff --git a/ribotricer/common.py b/ribotricer/common.py
@@ -1,4 +1,5 @@
 """Utilities for common usage"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -78,9 +79,7 @@ def merge_intervals(intervals):
             intervals[i].end,
             intervals[i].strand,
         )
-        while (
-            i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end
-        ):
+        while i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end:
             to_merge.end = max(to_merge.end, intervals[i + 1].end)
             i += 1
         merged_intervals.append(to_merge)
@@ -128,7 +127,6 @@ def collapse_coverage_to_codon(coverage):
                     Coverage collapsed to codon level
     """
     codon_coverage = [
-        sum(coverage[current: current + 3])
-        for current in range(0, len(coverage), 3)
+        sum(coverage[current : current + 3]) for current in range(0, len(coverage), 3)
     ]
     return codon_coverage
diff --git a/ribotricer/const.py b/ribotricer/const.py
@@ -1,4 +1,5 @@
 """Constants used in ribotricer"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/count_orfs.py b/ribotricer/count_orfs.py
@@ -1,4 +1,5 @@
 """Utilities for translating ORF detection"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -21,9 +22,7 @@
 import pandas as pd
 
 
-def count_orfs(
-    ribotricer_index, detected_orfs, features, outfile, report_all=False
-):
+def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=False):
     """
     Parameters
     ----------
@@ -59,11 +58,7 @@ def count_orfs(
                 # do not output 'nontranslating' events unless report_all is set
                 if status != "nontranslating" or report_all:
                     intervals = orf_index[oid].intervals
-                    coor = [
-                        x
-                        for iv in intervals
-                        for x in range(iv.start, iv.end + 1)
-                    ]
+                    coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
                     if strand == "-":
                         coor = coor[::-1]
                     profile_stripped = profile.strip()[1:-1].split(", ")
@@ -111,9 +106,7 @@ def count_orfs_codon(
                 if True, all coverages will be exported
     """
     orf_index = {}
-    fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index(
-        "ORF_ID"
-    )
+    fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index("ORF_ID")
     read_counts = defaultdict(dict)
     with open(ribotricer_index, "r") as fin:
         # Skip header
@@ -134,15 +127,9 @@ def count_orfs_codon(
                 # do not output 'nontranslating' events unless report_all is set
                 if status != "nontranslating" or report_all:
                     intervals = orf_index[oid].intervals
-                    coor = [
-                        x
-                        for iv in intervals
-                        for x in range(iv.start, iv.end + 1)
-                    ]
+                    coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
                     codon_coor = [
-                        x
-                        for iv in intervals
-                        for x in range(iv.start, iv.end + 1, 3)
+                        x for iv in intervals for x in range(iv.start, iv.end + 1, 3)
                     ]
                     if strand == "-":
                         coor = coor[::-1]
@@ -207,16 +194,12 @@ def count_orfs_codon(
     fout_df["per_codon_enrichment(total/n_occur)"] = (
         fout_df["total_codon_coverage"] / fout_df["codon_occurences"]
     )
-    fout_df[
-        "-log10_relative_enrichment(per_codon/total_gene_coverage)"
-    ] = -np.log10(
+    fout_df["-log10_relative_enrichment(per_codon/total_gene_coverage)"] = -np.log10(
         fout_df["per_codon_enrichment(total/n_occur)"]
         / fout_df.groupby("gene_id")["total_codon_coverage"].transform("sum")
     )
     # Overwrite
-    fout_df.to_csv(
-        "{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True
-    )
+    fout_df.to_csv("{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True)
     # Remove infs
     fout_df = fout_df.replace([np.inf, -np.inf], np.nan)
     fout_df = fout_df.dropna()
diff --git a/ribotricer/detect_orfs.py b/ribotricer/detect_orfs.py
@@ -1,4 +1,5 @@
 """Utilities for translating ORF detection"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -174,10 +175,7 @@ def orf_coverage(orf, alignments, offset_5p=0, offset_3p=0):
                 except KeyError:
                     coverage.append(0)
             else:
-                if (
-                    strand in alignments
-                    and (chrom, pos) in alignments[strand]
-                ):
+                if strand in alignments and (chrom, pos) in alignments[strand]:
                     coverage.append(alignments[strand][(chrom, pos)])
                 else:
                     coverage.append(0)
@@ -269,9 +267,7 @@ def export_orf_coverages(
                 valid_codons_ratio = valid_codons / n_codons
                 # total reads in the ORF divided by the length
                 orf_density = np.sum(codon_coverage) / n_codons
-                codon_coverage_exceeds_min = (
-                    codon_coverage >= min_reads_per_codon
-                )
+                codon_coverage_exceeds_min = codon_coverage >= min_reads_per_codon
                 status = (
                     "translating"
                     if (
@@ -327,9 +323,7 @@ def export_wig(merged_alignments, prefix):
             if chrom != cur_chrom:
                 cur_chrom = chrom
                 to_write += "variableStep chrom={}\n".format(chrom)
-            to_write += "{}\t{}\n".format(
-                pos, merged_alignments[strand][(chrom, pos)]
-            )
+            to_write += "{}\t{}\n".format(pos, merged_alignments[strand][(chrom, pos)])
         if strand == "+":
             fname = "{}_pos.wig".format(prefix)
         else:
@@ -387,11 +381,7 @@ def detect_orfs(
 
     # parse the index file
     now = datetime.datetime.now()
-    print(
-        now.strftime(
-            "%b %d %H:%M:%S ... started parsing ribotricer index file"
-        )
-    )
+    print(now.strftime("%b %d %H:%M:%S ... started parsing ribotricer index file"))
     annotated, refseq = parse_ribotricer_index(ribotricer_index)
 
     # create directory
@@ -412,9 +402,7 @@ def detect_orfs(
     # split bam file into strand and read length
     now = datetime.datetime.now()
     print(now.strftime("%b %d %H:%M:%S ... started reading bam file"))
-    alignments, read_length_counts = split_bam(
-        bam, protocol, prefix, read_lengths
-    )
+    alignments, read_length_counts = split_bam(bam, protocol, prefix, read_lengths)
 
     # plot read length distribution
     now = datetime.datetime.now()
diff --git a/ribotricer/fasta.py b/ribotricer/fasta.py
@@ -1,4 +1,5 @@
 """process fasta files"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/gtf.py b/ribotricer/gtf.py
@@ -1,4 +1,5 @@
 """Utilities for reading GTF file"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/infer_protocol.py b/ribotricer/infer_protocol.py
@@ -1,4 +1,5 @@
 """infer experimental protocol"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/interval.py b/ribotricer/interval.py
@@ -1,4 +1,5 @@
 """Utility for handling chromosome intervals"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/learn_cutoff.py b/ribotricer/learn_cutoff.py
@@ -1,4 +1,5 @@
 """Utilities for translating learning phase-score cutoffs"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/metagene.py b/ribotricer/metagene.py
@@ -1,4 +1,5 @@
 """Metagene profile related functions"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -281,7 +282,7 @@ def align_metagenes(
         xcorr = np.correlate(reference, cov, "full")
         origin = len(xcorr) // 2
         bound = min(base, length)
-        xcorr = xcorr[(origin - bound):(origin + bound)]
+        xcorr = xcorr[(origin - bound) : (origin + bound)]
         lag = np.argmax(xcorr) - len(xcorr) // 2
         psite_offsets[length] = lag + TYPICAL_OFFSET
         to_write += "\tlag of {}: {}\n".format(length, lag)
diff --git a/ribotricer/orf.py b/ribotricer/orf.py
@@ -1,5 +1,5 @@
-"""Utilities for translating ORF detection
-"""
+"""Utilities for translating ORF detection"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
diff --git a/ribotricer/orf_seq.py b/ribotricer/orf_seq.py
@@ -1,4 +1,5 @@
 """Generate sequences for ribotricer annotation"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -92,7 +93,7 @@ def translate_nt_to_aa(seq):
     protein = ""
     if len(seq) % 3 == 0:
         for i in range(0, len(seq), 3):
-            codon = seq[i: i + 3]
+            codon = seq[i : i + 3]
             if "N" in codon:
                 protein += "X"
             elif codon not in codon_table:
@@ -146,6 +147,6 @@ def orf_seq(ribotricer_index, genome_fasta, saveto, translate=False):
                         "a multiple of three. Output sequence might be "
                         "truncated.\n"
                     )
-                    seq = seq[0: (len(seq) // 3) * 3]
+                    seq = seq[0 : (len(seq) // 3) * 3]
                 seq = translate_nt_to_aa(seq)
             fh.write("{}\t{}\n".format(orf_id, seq))
diff --git a/ribotricer/plotting.py b/ribotricer/plotting.py
@@ -1,4 +1,5 @@
 """Plotting functions."""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -15,6 +16,7 @@
 
 import numpy as np
 import matplotlib
+
 matplotlib.use("Agg")
 
 # ADS: verify that matplotlib.use("Agg") must precede imports below
diff --git a/ribotricer/prepare_orfs.py b/ribotricer/prepare_orfs.py
@@ -1,4 +1,5 @@
 """Functions for finding all candidate ORFs"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -205,7 +206,7 @@ def search_orfs(fasta, intervals, min_orf_length, start_codons, stop_codons, lon
                         )
                         seq = merged_seq[start:idx]
                         leader = merged_seq[:start]
-                        trailer = merged_seq[idx + 3:]
+                        trailer = merged_seq[idx + 3 :]
                         if ivs:
                             orfs.append((ivs, seq, leader, trailer))
                     if longest:
@@ -365,18 +366,18 @@ def prepare_orfs(
         )
         if orf.start_codon in start_codons:
             to_write += formatter.format(
-                    orf.oid,
-                    orf.category,
-                    orf.tid,
-                    orf.ttype,
-                    orf.gid,
-                    orf.gname,
-                    orf.gtype,
-                    orf.chrom,
-                    orf.strand,
-                    orf.start_codon,
-                    coordinate,
-                    )
+                orf.oid,
+                orf.category,
+                orf.tid,
+                orf.ttype,
+                orf.gid,
+                orf.gname,
+                orf.gtype,
+                orf.chrom,
+                orf.strand,
+                orf.start_codon,
+                coordinate,
+            )
 
     with open("{}_candidate_orfs.tsv".format(prefix), "w") as output:
         output.write(to_write)
diff --git a/ribotricer/utils.py b/ribotricer/utils.py
@@ -1,4 +1,5 @@
 """Utilities for analysis"""
+
 # Part of ribotricer software
 #
 # Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
@@ -400,7 +401,7 @@ def translate(seq):
     protein = ""
     if len(seq) % 3 == 0:
         for i in range(0, len(seq), 3):
-            codon = seq[i: i + 3]
+            codon = seq[i : i + 3]
             protein += CODON_TO_AA[codon]
     return protein
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""Utilities for spliting bam file"""`
	`2`	`+`
`2`	`3`	`# Part of ribotricer software`
`3`	`4`	`#`
`4`	`5`	`# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""Constants used in ribotricer"""`
	`2`	`+`
`2`	`3`	`# Part of ribotricer software`
`3`	`4`	`#`
`4`	`5`	`# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""process fasta files"""`
	`2`	`+`
`2`	`3`	`# Part of ribotricer software`
`3`	`4`	`#`
`4`	`5`	`# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""Utilities for reading GTF file"""`
	`2`	`+`
`2`	`3`	`# Part of ribotricer software`
`3`	`4`	`#`
`4`	`5`	`# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""infer experimental protocol"""`
	`2`	`+`
`2`	`3`	`# Part of ribotricer software`
`3`	`4`	`#`
`4`	`5`	`# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith`