-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathepisode.py
More file actions
149 lines (133 loc) · 5.15 KB
/
episode.py
File metadata and controls
149 lines (133 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# Episode = namedtuple('Episode', ['no', 'img_url', 'title', 'rating', 'created_date'])
import os
from urllib.parse import urlencode
import requests
from bs4 import BeautifulSoup
class Episode:
"""
namedtuple 'Episode'와 같은 역할을 할 수 있도록 생성
"""
def __init__(self, webtoon, no, url_thumbnail, title, rating, created_date):
self.webtoon = webtoon
self.no = no
self.url_thumbnail = url_thumbnail
self.title = title
self.rating = rating
self.created_date = created_date
self.thumbnail_dir = f'webtoon/{self.webtoon.title}/{self.webtoon.title_id}_thumbnail'
self.image_dir = 'webtoon/%s/%s_images/%s' % (self.webtoon.title, self.webtoon.title_id, self.no)
self.episode_dir = f'webtoon/{self.webtoon.title}/{self.webtoon.title_id}_main'
# ex) webtoon/669233_images/1/01.jpg
# ex) webtoon/669233_images/1/02.jpg
# ex) webtoon/669233_images/1/03.jpg
self.save_thumbnail()
self.save_contents()
# @property
# def webtoon(self):
# return self.webtoon
#
# @property
# def no(self):
# return self.no
#
# @property
# def url_thumbnail(self):
# return self.url_thumbnail
#
# @property
# def title(self):
# return self.title
#
# @property
# def rating(self):
# return self.rating
#
# @property
# def created_date(self):
# return self.created_date
@property
def has_thumbnail(self):
"""
현재경로/webtoon/{self.webtoon.title_id}_thumbnail/{self.no}.jpg
파일이 있는지 검사 후 리턴
:return:
"""
path = f'{self.thumbnail_dir}/{self.no}.jpg'
return os.path.exists(path)
def save_thumbnail(self, force_update=False):
"""
Episode자신의 img_url에 있는 이미지를 저장한다
:param force_update:
:return:
"""
if not self.has_thumbnail or force_update:
# webtoon/{self.webtoon.title_id}에 해당하는 폴더 생성
os.makedirs(self.thumbnail_dir, exist_ok=True)
response = requests.get(self.url_thumbnail)
filepath = f'{self.thumbnail_dir}/{self.no}.jpg'
if not os.path.exists(filepath):
with open(filepath, 'wb') as f:
f.write(response.content)
def save_contents(self):
"""
:return:
"""
self._save_images()
self._make_html()
def _save_images(self, force_update=False):
"""
자기자신 페이지 (각 episode페이지)의 img들을 다운로드
webtoon
/{self.webtoon.title_id}_images
/{self.episode.no}
/{각 loop index}.jpg
:return:
"""
os.makedirs(self.image_dir, exist_ok=True)
# 웹툰 본문 페이지 (url_contents)
params = {
'titleId': self.webtoon.title_id,
'no': self.no
}
url_contents = 'http://comic.naver.com/webtoon/detail.nhn?' \
+ urlencode(params)
# 본문 페이지에 대한 HTTP요청 응답
response = requests.get(url_contents)
# 응답의 text를 이용해 Soup객체 생성
soup = BeautifulSoup(response.text)
# soup객체에서 img tag들의 목록을 찾아내기
img_list = soup.select_one('.wt_viewer').find_all('img')
# img tag들에서 'src'속성만 가져와 url_img_list리스트를 생성
url_img_list = [img['src'] for img in img_list]
# 리스트를 순회하며 (각 item은 img의 src가 된다)
for index, url in enumerate(url_img_list):
force = os.path.exists(f'{self.image_dir}/{index + 1}.jpg')
if not force or force_update:
# img에 대한 각 requests.get에는 url_contents가 Referer인 header가 필요
headers = {
'Referer': url_contents
}
# requests.get요청을 보냄
response = requests.get(url, headers=headers)
# 파일을 저장
with open(f'{self.image_dir}/{index + 1}.jpg', 'wb') as f:
f.write(response.content)
def _make_html(self, force_update=False):
force = os.path.exists(f'{self.episode_dir}/{self.no}.html')
if not force or force_update:
os.makedirs(self.episode_dir, exist_ok=True)
detail_html = open('html/detail_html.html', 'rt').read()
detail_html = detail_html.replace(
'*title*', '%s - %s' % (self.webtoon.title, self.title)
)
img_list_html = ''
for file in os.listdir(self.image_dir):
cur_img_tag = '<img src="../../../%s/%s">' % (self.image_dir, file)
img_list_html += cur_img_tag
detail_html = detail_html.replace('*contents*', img_list_html)
with open(f'{self.episode_dir}/{self.no}.html', 'wt') as f:
f.write(detail_html)
# if __name__ == '__main__':
# el = pickle.load(open('db/697680.txt', 'rb'))
# e = el[0]
# e._save_images()