-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProtein.cpp
More file actions
106 lines (95 loc) · 2.65 KB
/
Copy pathProtein.cpp
File metadata and controls
106 lines (95 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include "Protein.hpp"
Fasta::Fasta (string id, string full_header, string seq, int cnt) {
_id = id;
_fullHeader = full_header;
_sequence = seq;
_sequentialCnt = cnt;
return;
}
void Fasta::setId (string id) {
_id = id;
return;
}
void Fasta::setFullHeader (string fullHeader) {
_fullHeader = fullHeader;
return;
}
void Fasta::setSequence (string sequence) {
_sequence = sequence;
return;
}
string Fasta::getId() const {
return _id;
}
string Fasta::getFullHeader() const {
return _fullHeader;
}
string Fasta::getSequence() const {
return _sequence;
}
int Fasta::getCnt() const {
return _sequentialCnt;
}
bool Fasta::isReverse() const {
if (_id.substr(0, DigestOptions::reverseTag.size()).compare(DigestOptions::reverseTag) == 0) {
return true;
}
return false;
}
/* Make sure REV proteins have fake init Mets
*/
void Fasta::swapRevInitMet() {
if (*_sequence.rbegin() == 'M') {
_sequence = "M" + _sequence.substr(0, _sequence.length() - 1);
}
return;
}
/* Swap cut sites for reversed proteins to avoid the issue of reverse peptides having the
same exact mass as the forward. Following MaxQuant paper 2008
* Turns out to be important to target all eg [KR] sites, including those before a P
* If you don't, you introduce a small FWD bias for KP/PK cases
*/
void Fasta::swapCutSites() {
string s;
if (DigestOptions::enzymeAfterSites.second) {
s = DigestOptions::enzymeAfterSites.first;
} else if (DigestOptions::enzymeBeforeSites.second) {
s = DigestOptions::enzymeBeforeSites.first;
}
for (string::iterator i = s.begin(); i != s.end(); i++) {
if (!isalpha(s.at(i - s.begin()))) {
s.erase(i);
i--;
}
}
if (DigestOptions::enzymeAfterSites.second) {
s = "(?<=[" + s + "])";
} else if (DigestOptions::enzymeBeforeSites.second) {
s = "(?=[" + s + "])";
}
const boost::regex re(s);
boost::sregex_iterator reIt((_sequence).begin(), (_sequence).end(), re);
boost::sregex_iterator end;
string old_seq = _sequence;
vector<int> sites;
sites.reserve(_sequence.size());
while (reIt != end) {
sites.push_back((*reIt++).position());
}
for (vector<int>::iterator vecIt = sites.begin(); vecIt != sites.end(); vecIt++) {
if (*vecIt > 1) {
swap(_sequence[*vecIt - 2], _sequence[*vecIt - 1]);
}
}
return;
}
void Fasta::outputDigest() const {
cout << _fullHeader << endl;
DigestIterator dIt(*this);
while (dIt != DigestIterator()) {
Peptide pep = *dIt;
pep.output();
dIt++;
}
return;
}