forked from alexcwsmith/singleCellTools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcountDEGs.py
More file actions
executable file
·63 lines (57 loc) · 1.93 KB
/
countDEGs.py
File metadata and controls
executable file
·63 lines (57 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 22 15:17:11 2021
@author: smith
"""
import numpy as np
import pandas as pd
import os
def countDEGs(file, directory, n_genes=1000, pcutoff=.05, plot=True, save=False):
"""Count number of differentially expressed genes in scanpy result file.
Parameters
----------
file : string
Path to saved .xlsx or .csv file containing differential expression data.
directory : string
Directory to save results.
n_genes : int, (optional, default 1000)
Number of genes used in original data analysis.
pcutoff : float (optional, default .05)
Alpha value for significance.
save : bool (optional, default False)
Whether to save or only return result.
Returns
-------
Pandas DataFrame with # of DEGs for each cluster.
"""
fname, ext = os.path.splitext(os.path.basename(file))
if file.endswith('.xlsx'):
df = pd.read_excel(file, index_col=0, engine='openpyxl')
df = df[:n_genes]
elif file.endswith('.csv'):
df = pd.read_csv(file, index_col=0)
df = df[:n_genes]
clusters=[]
degs=[]
for col in df.columns:
if col.endswith('_p'):
count = (df[col]<pcutoff).value_counts()
try:
count = count.loc[count.index==True].values[0]
except IndexError:
count=0
clu = int(col.strip('_p').split(' ')[-1].strip(')'))
clusters.append(clu)
degs.append(count)
lz = list(zip(clusters,degs))
res = pd.DataFrame(lz)
res.columns=['Cluster', 'DEGs']
res.set_index('Cluster', inplace=True, drop=True)
if plot:
fig = res.plot(kind='bar', grid=False)
ax = fig.get_figure()
ax.savefig(os.path.join(directory, fname + '_DEG_Counts.png'))
if save:
res.to_excel(os.path.join(directory, fname + '_DEG_Counts.xlsx'))
return res