-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCard.py
More file actions
149 lines (127 loc) · 5.52 KB
/
Copy pathCard.py
File metadata and controls
149 lines (127 loc) · 5.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from bs4 import BeautifulSoup
import urllib2
import _mysql
class Card:
#Base for URL to fetch cards from
__BASE_URL = 'http://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid='
# HTML Object IDs
__name_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow'
__mana_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow'
__cmc_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow'
__types_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow'
__text_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow'
__flavor_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_flavorRow'
__p_t_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow'
__set_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow'
__rarity_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow'
__artist_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_artistRow'
__card_id = 'ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow'
def __init__(self,gatherer_id, db_connection = None):
self.db = db_connection
self.unique_id = gatherer_id
URL = self.__BASE_URL + str(gatherer_id)
self.soup = BeautifulSoup(urllib2.urlopen(URL))
self.name = self.__extract_content(self.__name_id)
self.mana = self.__extract_mana(self.__mana_id)
self.cmc = self.__extract_content(self.__cmc_id)
self.types = self.__extract_content(self.__types_id)
self.text = self.__extract_content(self.__text_id,'\n')
self.flavor = self.__extract_content(self.__flavor_id)
self.power = self.__extract_power(self.__p_t_id)
self.toughness = self.__extract_toughness(self.__p_t_id)
self.expansion = self.__extract_text(self.__set_id)
self.rarity = self.__extract_content(self.__rarity_id)
self.card_id = self.__extract_content(self.__card_id)
self.artist = self.__extract_content(self.__artist_id)
def __str__(self):
return self.name
def __extract_items(self,content_id):
bs4content = self.soup.find(id=content_id)
if bs4content != None:
bs4value = bs4content.find('div',class_='value')
return bs4value
else:
return
# extracts and formats regular content with or without images
def __extract_content(self,content_id, delim = ' '):
bs4content = self.__extract_items(content_id)
if bs4content == None:
return ''
bs4items = bs4content.descendants
content = ''
last_item = ''
for item in bs4items:
item_type = item.name
if last_item == 'img' or item_type == 'img':
content = content.strip() + " "
else:
content = content.strip() + delim
if item_type == 'img':
content = content + '[' + item['alt'] + ']'
last_item = 'img'
elif item_type == None:
content = content + item.strip()
last_item = 'text'
return self.__normalize_string(content)
def __normalize_string(self, string):
if not all(ord(c) < 128 for c in string):
return string.strip()
else:
return string.strip()
# extracts raw text from content ignoring images
def __extract_text(self,content_id):
bs4content = self.__extract_items(content_id)
if bs4content is None:
return ''
bs4strings = bs4content.stripped_strings
return self.__normalize_string(' '.join(string for string in bs4strings))
# extracts mana values from contents
def __extract_mana(self,content_id):
bs4content = self.__extract_items(content_id)
if bs4content is not None:
bs4images = bs4content.find_all('img')
if bs4images is not None:
return self.__normalize_string(','.join('['+image['alt']+']' for image in bs4images))
return ''
def __extract_power(self,content_id):
bs4content = self.__extract_items(content_id)
if bs4content is not None:
return self.__normalize_string(bs4content.string.strip().split('/')[0])
return ''
def __extract_toughness(self,content_id):
bs4content = self.__extract_items(content_id)
if bs4content is not None:
return self.__normalize_string(bs4content.string.strip().split('/')[1])
return ''
def save_card(self):
raw_values = [self.unique_id, self.name, self.mana, self.cmc, self.types, self.text, self.flavor,
self.power, self.toughness, self.expansion, self.rarity, self.card_id, self.artist]
values = []
for (i,value) in enumerate(raw_values):
try:
values.append(value.replace("'", "\\'"))
except AttributeError:
values.append(raw_values[i])
create_card_query = u"""
INSERT INTO cards (
ID, name, mana, cmc, types, text, flavor,
power, toughness, expansion, rarity, card_id, artist
) VALUES (
'%s', '%s', '%s', '%s', '%s', '%s', '%s',
'%s', '%s', '%s', '%s', '%s', '%s'
)""" % tuple(values)
create_card_query = create_card_query.encode('utf-8')
self.db.query(create_card_query)
def print_card(self):
print "Unique : %s" % self.unique_id
print "Name : %s" % self.name
print "Mana : %s" % self.mana
print "CMC : %s" % self.cmc
print "Types : %s" % self.types
print "Text : %s" % self.text
print "Flavor : %s" % self.flavor
print "P/T : %s / %s" % (self.power, self.toughness)
print "Expansion : %s" % self.expansion
print "Rarity : %s" % self.rarity
print "Card ID : %s" % self.card_id
print "Artist : %s" % self.artist