-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextend_circle.py
More file actions
58 lines (41 loc) · 1.66 KB
/
extend_circle.py
File metadata and controls
58 lines (41 loc) · 1.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
##########################
# Author: B.M. Anderson
# Date: Aug 2024
# Description: extend a linear representation of a circular contig (fasta) by a specified number of basepairs
# Note: This might be used to map to a longer representation of a plastome (to extend the overlaps)
# Will output a longer fasta in the current directory named `new_contig.fasta`
##########################
import argparse
from Bio import SeqIO
import sys
# instantiate the parser
parser = argparse.ArgumentParser(
description = 'A script to extend a linear representation of a circular sequence (fasta file) by a specified amount')
# add arguments to parse
parser.add_argument('contig', type=str, help='The circular contig to extend')
parser.add_argument('-b', type=int, dest='bps', help='How much to extend each end by (default 150)')
parser.add_argument('-n', type=str, dest='ns', help='Whether to extend as Ns to pad a linear contig (yes or no [default])')
# parse the command line
if len(sys.argv[1:]) == 0:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
contig = args.contig
bps = args.bps
ns = args.ns
# assign variables
if not bps:
bps = 150
if not ns:
ns = 'no'
# create new extended fasta
with open(contig, 'r') as contig_file, open('new_contig.fasta', 'w') as out_file:
fasta = SeqIO.read(contig_file, 'fasta')
if ns.lower() == 'yes':
fasta.seq = bps * 'N' + fasta.seq + bps * 'N'
else:
add_front = fasta.seq[len(fasta.seq) - bps: ] # add the end of the circle before the start
add_back = fasta.seq[0: bps] # add the start of the circle to the end
fasta.seq = add_front + fasta.seq + add_back
SeqIO.write(fasta, out_file, 'fasta')