11"""Utilities for translating ORF detection"""
2+
23# Part of ribotricer software
34#
45# Copyright (C) 2020 Saket Choudhary, Wenzheng Li, and Andrew D Smith
2122import pandas as pd
2223
2324
24- def count_orfs (
25- ribotricer_index , detected_orfs , features , outfile , report_all = False
26- ):
25+ def count_orfs (ribotricer_index , detected_orfs , features , outfile , report_all = False ):
2726 """
2827 Parameters
2928 ----------
@@ -59,11 +58,7 @@ def count_orfs(
5958 # do not output 'nontranslating' events unless report_all is set
6059 if status != "nontranslating" or report_all :
6160 intervals = orf_index [oid ].intervals
62- coor = [
63- x
64- for iv in intervals
65- for x in range (iv .start , iv .end + 1 )
66- ]
61+ coor = [x for iv in intervals for x in range (iv .start , iv .end + 1 )]
6762 if strand == "-" :
6863 coor = coor [::- 1 ]
6964 profile_stripped = profile .strip ()[1 :- 1 ].split (", " )
@@ -111,9 +106,7 @@ def count_orfs_codon(
111106 if True, all coverages will be exported
112107 """
113108 orf_index = {}
114- fasta_df = pd .read_csv (ribotricer_index_fasta , sep = "\t " ).set_index (
115- "ORF_ID"
116- )
109+ fasta_df = pd .read_csv (ribotricer_index_fasta , sep = "\t " ).set_index ("ORF_ID" )
117110 read_counts = defaultdict (dict )
118111 with open (ribotricer_index , "r" ) as fin :
119112 # Skip header
@@ -134,15 +127,9 @@ def count_orfs_codon(
134127 # do not output 'nontranslating' events unless report_all is set
135128 if status != "nontranslating" or report_all :
136129 intervals = orf_index [oid ].intervals
137- coor = [
138- x
139- for iv in intervals
140- for x in range (iv .start , iv .end + 1 )
141- ]
130+ coor = [x for iv in intervals for x in range (iv .start , iv .end + 1 )]
142131 codon_coor = [
143- x
144- for iv in intervals
145- for x in range (iv .start , iv .end + 1 , 3 )
132+ x for iv in intervals for x in range (iv .start , iv .end + 1 , 3 )
146133 ]
147134 if strand == "-" :
148135 coor = coor [::- 1 ]
@@ -207,16 +194,12 @@ def count_orfs_codon(
207194 fout_df ["per_codon_enrichment(total/n_occur)" ] = (
208195 fout_df ["total_codon_coverage" ] / fout_df ["codon_occurences" ]
209196 )
210- fout_df [
211- "-log10_relative_enrichment(per_codon/total_gene_coverage)"
212- ] = - np .log10 (
197+ fout_df ["-log10_relative_enrichment(per_codon/total_gene_coverage)" ] = - np .log10 (
213198 fout_df ["per_codon_enrichment(total/n_occur)" ]
214199 / fout_df .groupby ("gene_id" )["total_codon_coverage" ].transform ("sum" )
215200 )
216201 # Overwrite
217- fout_df .to_csv (
218- "{}_genewise.tsv" .format (prefix ), sep = "\t " , index = False , header = True
219- )
202+ fout_df .to_csv ("{}_genewise.tsv" .format (prefix ), sep = "\t " , index = False , header = True )
220203 # Remove infs
221204 fout_df = fout_df .replace ([np .inf , - np .inf ], np .nan )
222205 fout_df = fout_df .dropna ()
0 commit comments