pystatic/pystatic.py at master · Zedelghem/pystatic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
#        ██████╗ ██╗   ██╗███████╗████████╗ █████╗ ████████╗██╗ ██████╗
#        ██╔══██╗╚██╗ ██╔╝██╔════╝╚══██╔══╝██╔══██╗╚══██╔══╝██║██╔════╝
#        ██████╔╝ ╚████╔╝ ███████╗   ██║   ███████║   ██║   ██║██║
#        ██╔═══╝   ╚██╔╝  ╚════██║   ██║   ██╔══██║   ██║   ██║██║
#        ██║        ██║   ███████║   ██║   ██║  ██║   ██║   ██║╚██████╗
#        ╚═╝        ╚═╝   ╚══════╝   ╚═╝   ╚═╝  ╚═╝   ╚═╝   ╚═╝ ╚═════╝
#
#┌─┐┌┬┐┌─┐┌┬┐┬┌─┐  ┌┐ ┬  ┌─┐┌─┐┌─┐┬┌┐┌┌─┐  ┌┬┐┬ ┬┌─┐  ┌─┐┬ ┬┌┬┐┬ ┬┌─┐┌┐┌  ┬ ┬┌─┐┬ ┬
#└─┐ │ ├─┤ │ ││    ├┴┐│  │ ││ ┬│ ┬│││││ ┬   │ ├─┤├┤   ├─┘└┬┘ │ ├─┤│ ││││  │││├─┤└┬┘
#└─┘ ┴ ┴ ┴ ┴ ┴└─┘  └─┘┴─┘└─┘└─┘└─┘┴┘└┘└─┘   ┴ ┴ ┴└─┘  ┴   ┴  ┴ ┴ ┴└─┘┘└┘  └┴┘┴ ┴ ┴
#
##################################################################################
##################################################################################
###                         This is the build file for                         ###
###                                                                            ###
###                              Pystatic, v 1.1                               ###
###                                                                            ###
###                            as of July 23, 2018                             ###
###                  https://github.com/Zedelghem/pystatic/                    ###
###                                                                            ###
###                                      by                                    ###
###                                                                            ###
###                              Borys Jastrzębski                             ###
###                                                                            ###
###                          Licensed under GNU GPL 3.0                        ###
###                                                                            ###
###     Before attempting any changes in this file, please make sure you've    ###
###     read the documentation on the GitHub page of the project. I kept it    ###
###     concise and it can save you a lot of trouble.                          ###
###                                                                            ###
###     Also, take a look at the discussion on the GitHub forums before        ###
###     writing an extension to Pystatic. I might already be working on        ###
###     a similar feature.                                                     ###
###                                                                            ###
##################################################################################
##################################################################################

import glob
from dateutil import parser
import locale
from os import makedirs, listdir, path
import codecs
import markdown
import shutil
import math
import rfeed
import datetime

locale.setlocale(locale.LC_ALL, '')

# Setting up the post class to make things clearer and easier
class Post (object):
    def __init__(self, path, filename, author='Author', extension="md"):
        self.path = path
        self.filename = filename
        self.extension = extension

    def build_pretty_date(self, titleseparator="_", numberseparator="-", date_format="%b %d"):
        if hasattr(self, 'timestamp'):
            self.date = parser.parse(self.timestamp.split(" ")[0].split(titleseparator)[0]).strftime(date_format).capitalize()
            self.time = self.timestamp.split(" ")[1]
        else:
            # Useful to generate RSS items
            self.original_ugly_date = parser.parse(self.filename.split(titleseparator)[0])
            self.date = self.original_ugly_date.strftime(date_format).capitalize()

    def get_content(self, headerseparator="---", obligatory=['title'], optional=['author', 'timestamp', "tags", "excerpt"]):

        # Completing the post list by importing title and content from the files
        ########################################################################
        ### Structure of the default file ###
        ## It should have a header delineated by "---" at the bottom that includes:
        # 1. [Obligatory] title
        # 2. [Optional] author (if empty, default author used)
        # 3. [Optional] Timestamp (date and time in the iso format) to enforce order in case of multiple posts a day
        ########################################################################

        # Load post file
        post_file = open(self.path + "/" + self.filename + "." + self.extension, 'r')
        current_post = post_file.read()
        post_file.close()

        # Chop current_post into header and the rest
        current_post = current_post.split(headerseparator)

        # Extract header
        raw_header = current_post[0].rstrip().split("\n")

        # Extract header details and store them in a dictionary
        header = {}
        for entry in raw_header:
            if entry.split(": ")[0] in obligatory + optional:
                header[entry.split(": ")[0]] = entry.split(": ")[1]

        # If title was not set
        try:
            self.title = header['title']
        except Exception as e:
            print("You need to set title in the header of the post", self.filename, "!")
            print(str(e))

        # Check for and assign optional header declarations
        if 'author' in header.keys():
            self.author = header['author']
        else:
            self.author = None

        if 'timestamp' in header.keys():
            self.timestamp = header['timestamp']

        if 'tags' in header.keys():
            self.tags = header['tags']
        else:
            self.tags = ""

        if 'excerpt' in header.keys():
            self.excerpt = markdown.markdown(header['excerpt'])

        # Delete header from current_post
        del(current_post[0])

        # Extract content
        self.content = current_post[0].rstrip()

    # I left an option for sentences as a unit of excerpt_len
    # Not working properly now – doesn't crash but tuned only to fullstops.
    def get_excerpt(self, len_type="chars", excerpt_len="500"):
        if not hasattr(self, "excerpt"):
            parsed_content = markdown.markdown(self.content)

            try:
                length = int(excerpt_len)
            except Exception as e:
                print("Could not change the type of excerpt_len to int")
                print(str(e))

            if len_type == "chars":
                excerpt_ready = parsed_content[:length]
            elif len_type == "words":
                excerpt_ready = " ".join(parsed_content.split(" ")[:length])
            elif len_type == "sentences":
                excerpt_ready = ". ".join(parsed_content.split(". ")[:length])

            self.excerpt = excerpt_ready.rstrip() + "..."

    # Function to generate an RSS Item object for rfeed to generate the RSS Feed
    def make_rss_item(self):
        pass

# Function to generate post objects for every file of specific extention in a given path
def generate_posts(directory, extension="md"):
    # Loading filenames in the posts directory
    posts_paths = list(glob.glob(directory + "/*" + extension))
    # Strip extension
    posts_fnames = [post.split("/")[-1].replace("." + extension, "") for post in posts_paths]
    # Generate post object and store them in a dict
    posts_list = [Post(directory, fname, extension=extension) for fname in posts_fnames]

    return posts_list

# Function to filter out files with badly formatted dates in the filenames
def filter_bad_dates(posts_list, titleseparator="_", numberseparator="-"):

    to_ignore = []

    # Check if parts of the datestamp are in fact integers
    for index, post in enumerate(posts_list):
        datecheck = post.filename.split(titleseparator)[0].split(numberseparator)
        try:
            for num in datecheck:
                int(num)
        except Exception as e:
            print(str(e))
            to_ignore.append(index)
            continue

        # Check if YYYY has 4 nums, MM 2 and DD 2
        if len(datecheck[0]) != 4 or len(datecheck[1]) != 2 or len(datecheck[2]) != 2:
            to_ignore.append(index)
            print("Date in the filename badly formatted. Looking for isoformat (YYYY-MM-DD).")
        elif int(datecheck[1]) > 12 or int(datecheck[2]) > 31:
            to_ignore.append(index)
            print("Date in the filename badly formatted. There are only 12 months and max 31 days a month.")

    posts_to_return = [post for index, post in enumerate(posts_list) if index not in to_ignore]

    return posts_to_return

def order(posts_list, reversed_true_or_not=True):
    return sorted(posts_list, key=lambda post: post.filename, reverse=reversed_true_or_not)

# Copytree from https://stackoverflow.com/questions/1868714/how-do-i-copy-an-entire-directory-of-files-into-an-existing-directory-using-pyth
def copytree(src, dst, symlinks=False, ignore=None):
    for item in listdir(src):
        s = path.join(src, item)
        d = path.join(dst, item)
        if path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)

def build_site_folders():
    makedirs("site", exist_ok=True)
    makedirs("site/posts", exist_ok=True)
    makedirs("site/assets", exist_ok=True)
    makedirs("site/css", exist_ok=True)
    makedirs("site/lib", exist_ok=True)

def inject_markdowned_content(post_object, paste_where, paste_where_title, wrapper_class, template):
    try:
        # Trying to use markdown library with the footnotes extension
        content_html = markdown.markdown(post_object.content, extensions=['footnotes'])
    except Exception as e:
        print(str(e))
        content_html = markdown.markdown(post_object.content)

    # Checking for Author and adding information
    if post_object.author is not None:
        post_author_par = '\n <p class="post_author">' + post_object.author + '</p>'
    else:
        post_author_par = ""

    target = template.replace(paste_where, '<h1 class="post_title">' + post_object.title + '</h1>' + post_author_par + '\n <section class="' + wrapper_class + " " + post_object.tags + '">' + content_html + '</section>').replace(paste_where_title, post_object.title)

    output_file = codecs.open("site/posts/" + post_object.filename + ".html", "w", encoding="utf-8", errors="xmlcharrefreplace")
    output_file.write(target)
    output_file.close()

def build_posts_folder(posts_list, template_file, in_path="posts", ignore_empty=True, extension="md", paste_where="<!--###POST_CONTENT###-->", paste_where_title="<!--###POSTPAGE_TITLE###-->", wrapper_class="postcontent"):

    # Load template file
    template_f = open(template_file)
    template = template_f.read()
    template_f.close()

    for post in posts_list:

        if ignore_empty:
            if post.content != "":
                inject_markdowned_content(post, paste_where, paste_where_title, wrapper_class, template)
            else:
                print("Not adding", post.filename, "to the post index. It is empty! Write something first.")
        else:
            inject_markdowned_content(post, paste_where, paste_where_title, wrapper_class, template)

# Build main page of the blog
def build_index_page(posts_list, template_file, paste_where="<!--###POSTS_LIST###-->", ul_class="postlist", ignore_empty=True, excerpts_on=False, posts_per_page=0, pages_in_multiple_files=False, readmore="Read more >>"):
    # Function will look for paste_where and replace it with the generated ul_list
    # Generate <ul> with <li> for every post in the posts_sorted
    if posts_per_page == 0:
        ul_list = ['<ul class="' + ul_class + '">']
        for post in posts_list:

            if excerpts_on:
                excerpt = '<div class="excerpt">' +  post.excerpt + '<span class="readmore">' + '<a href="posts/' + post.filename + ".html" + '">' + readmore + '</a>' + '</span>' + '</div>'
            else:
                excerpt = ""
            if ignore_empty:
                # Ignore posts with empty content attribute
                if post.content == "":
                    print("Not adding", post.filename, "to the posts folder. It is empty! Write something before publishing. ;)")
                    continue
                else:
                    ul_list.append('<li class= "' + post.tags + '"><span class="date">' + post.date + '</span><span class="title"><a href="posts/' + post.filename + ".html" + '">' + post.title + '</a></span>' + excerpt + '</li>')
            else:
                ul_list.append('<li class= "' + post.tags + '"><span class="date">' + post.date + '</span><span class="title"><a href="posts/' + post.filename + ".html" + '">' + post.title + '</a></span>' + excerpt + '</li>')

        ul_list.append("</ul>")

        # Below: take the .html blueprint in and inject the ul_list in the space provided
        template = open(template_file)
        target = template.read().replace(paste_where, "".join(ul_list))
        template.close()

        output_file = open("site/index.html", 'w')
        output_file.write(target)
        output_file.close()

    elif posts_per_page > 0:

        # Populate pages
        num_of_pages = math.ceil(len(posts_list) / posts_per_page)
        pages = [[] for num in range(0, num_of_pages)]
        current_page_number = 0

        for post in posts_list:

            if excerpts_on:
                excerpt = '<div class="excerpt">' +  post.excerpt + '<span class="readmore">' + '<a href="posts/' + post.filename + ".html" + '">' + readmore + '</a>' + '</span>' + '</div>'
            else:
                excerpt = ""

            if ignore_empty:
                # Ignore posts with empty content attribute
                if post.content == "":
                    print("Not adding", post.filename, "to the posts folder. It is empty! Write something before publishing. ;)")
                    continue
                else:
                    pages[current_page_number].append('<li class= "' + post.tags + '"><span class="date">' + post.date + '</span><span class="title"><a href="posts/' + post.filename + ".html" + '">' + post.title + '</a></span>' + excerpt + '</li>')
            else:
                pages[current_page_number].append('<li class= "' + post.tags + '"><span class="date">' + post.date + '</span><span class="title"><a href="posts/' + post.filename + ".html" + '">' + post.title + '</a></span>' + excerpt + '</li>')

            if len(pages[current_page_number]) == posts_per_page:
                current_page_number += 1

        pages_parsed = []
        for index, page in enumerate(pages):
            pages_parsed.append('<ul id="page' + str(index+1) + '" class="' + ul_class + '">' + "".join(page) + "</ul>")

        # Interpret pages as either multiple .html files...
        if pages_in_multiple_files:
            # Add navigation between pagefiles
            pagenav = ['<div id="page_navigation">']
            for pnum in range(num_of_pages):
                if pnum == 0:
                    index_number = ""
                else:
                    index_number = str(pnum+1)
                pagenav.append('<a href="index' + index_number + '.html">' + str(pnum+1) + '</a>')
            pagenav.append("</div>")

            for index, pagefile in enumerate(pages_parsed):
                # Below: take the .html blueprint in and inject the list in the space provided
                template = open(template_file)
                target = template.read().replace(paste_where, " ".join(pagenav) + pagefile)
                template.close()

                if index == 0:
                    index_number = ""
                else:
                    index_number = str(index+1)

                output_file = open("site/index" + index_number + ".html", 'w')
                output_file.write(target)
                output_file.close()

        # ...or as multiple <ul>s within one index.html file, for example for the purpose of CSS tabs-based pagination
        else:

            # Add navigation between <ul>s with different ids
            pagenav = ['<div id="page_navigation">']
            for pnum in range(num_of_pages):
                index_number = str(pnum+1)
                pagenav.append('<a href="#page' + index_number + '">' + str(pnum+1) + '</a>')
            pagenav.append("</div>")

            template = open(template_file)
            target = template.read().replace(paste_where, " ".join(pagenav) + "".join(pages_parsed))
            template.close()

            output_file = open("site/index.html", 'w')
            output_file.write(target)
            output_file.close()

    else:
        print("Invalid posts_per_page value. Should be an integer >= 0")

def strRepresentsInt(s):
    try:
        int(s)
        return True
    except ValueError:
        return False

def parse_config(filename):
    # Load in the config file and close it
    cfg_file = open(filename, "r")
    cfg_lines = cfg_file.readlines()
    cfg_file.close()

    # Extract options, i.e. lines with "$", and drop "$"
    cfg = [line[1:] for line in cfg_lines if line[0] == "$"]

    # Create a dict of options
    options = {}
    for option in cfg:
        options[option.split(":")[0]] = option.split(":")[1].strip()

    # Extract positional arguments
    # For now it means only: in_path
    positional_args = [options["in_path"]]
    del options["in_path"]

    # Extract arguments coming in lists
    # For now it means: obligatory_header and optional_header
    # If more list-valued arguments pop up, add them to list_args
    list_args = ["obligatory_header", "optional_header"]
    for arg in list_args:
        options[arg] = options[arg].split(", ")

    # Look for potential booleans and ints to convert
    for key, val in options.items():
        if val in ["True", "False"]:
            try:
                if val == "True":
                    options[key] =  True
                elif val == "False":
                    options[key] = False
            except ValueError:
                print("Trouble converting a boolean-like string to a boolean value.")

        if type(val) is not list and strRepresentsInt(val) == True:
            try:
                options[key] = int(val)
            except ValueError:
                print("Trouble converting an integer-like string to an integer.")

    # Combine all the arguments into the final list of options
    list_of_options = []
    list_of_options.extend(positional_args)
    list_of_options.append(options.copy())

    return list_of_options

def build_website(in_path, ignore_empty_posts=True, index_template="templates/index.html", post_template="templates/post.html", css_and_assets_path="templates", extension="md", index_paste_where="<!--###POSTS_LIST###-->", post_paste_where="<!--###POST_CONTENT###-->", title_paste_where="<!--###POSTPAGE_TITLE###-->",ul_class="postlist", post_wrapper="postcontent", headerseparator="---", obligatory_header=['title'], optional_header=['author', 'timestamp', 'tags', 'excerpt'], excerpt_type="chars", excerpt_len="500", excerpts_on=False, readmore="Read more >>", posts_per_page=0, pages_in_multiple_files=False, postlist_date_format="%d %b '%y", rss_feed_on=True, rss_feed_url="rss", blurb_is_manual_excerpt=False, rss_max_posts_number=10, blog_domain="", rss_feed_description='', rss_feed_title="My blog's RSS feed"):
    # Call everything
    try:
        fresh_posts = generate_posts(in_path, extension)
    except Exception as e:
        print("Could not generate posts. Did you provide correct path to the post folder?")
        print(str(e))

    try:
        filtered_posts = filter_bad_dates(fresh_posts)
    except Exception as e:
        print("Could not filter posts. Dunno why.")
        print(str(e))

    try:
        ordered_posts = order(filtered_posts)
    except Exception as e:
        print("Could not order posts. It's impossible.")
        print(str(e))

    try:
        for post in ordered_posts:
            post.get_content(headerseparator=headerseparator, obligatory=obligatory_header, optional=optional_header)
            post.build_pretty_date(date_format=postlist_date_format)
            post.get_excerpt(len_type=excerpt_type, excerpt_len=excerpt_len)
    except Exception as e:
        print("Something went wrong with generating content and prettyfying dates. WHY?")
        print(str(e))

    # Delete target folder so it can be rebuilt without conflicts
    try:
        shutil.rmtree("site", ignore_errors=True)
    except Exception as e:
        print("Could not delete previous site folder. Check file permissions for the script.")
        print(str(e))

    try:
        build_site_folders()
    except Exception as e:
        print("Folders could not be built. Check file permissions.")
        print(str(e))

    try:
        build_index_page(ordered_posts, index_template, ignore_empty=ignore_empty_posts, paste_where=index_paste_where, ul_class=ul_class, excerpts_on=excerpts_on, readmore=readmore, posts_per_page=posts_per_page, pages_in_multiple_files=pages_in_multiple_files)
    except Exception as e:
        print("Could not build index page. Did you provide a template?")
        print(str(e))

    try:
        build_posts_folder(ordered_posts, post_template, ignore_empty=ignore_empty_posts, in_path=in_path, extension=extension, paste_where=post_paste_where, paste_where_title=title_paste_where, wrapper_class=post_wrapper)
    except Exception as e:
        print("Could not build post pages. Did you provide a template?")
        print(str(e))

    try:
        # Build RSS Feed
        if rss_feed_on == True:

            # Loop to make items for the rfeed feed object
            rss_item_list = []
            last_post_index = min(rss_max_posts_number, len(ordered_posts)-1)

            for post in ordered_posts[0:last_post_index]:
                rss_item_list.append(rfeed.Item(
                    title=post.title,
                    link=blog_domain + "/posts/" + post.filename + ".html",
                    description=post.excerpt,
                    author=post.author,
                    guid=rfeed.Guid(blog_domain + "/posts/" + post.filename + ".html"),
                    pubDate=post.original_ugly_date
                ))

            rss_feed = rfeed.Feed(
                title = rss_feed_title,
                link = blog_domain + "/" + rss_feed_url,
                description = rss_feed_description,
                language = locale.getlocale()[0],
                lastBuildDate = datetime.datetime.now(),
                items=rss_item_list
            )

            # Writing the RSS feed to a file at the specified location
            with open("site/" + rss_feed_url, 'w+') as rss_target:
                rss_target.write(rss_feed.rss())

    except Exception as e:
        print("Could not generate the RSS feed or decide whether it should be generated at all")
        print(str(e))

    # Copy all css, assets and lib
    try:
        copytree(css_and_assets_path + "/css", "site/css")
    except Exception as e:
        print("Tried to copy contents of", css_and_assets_path, "/css folder but the folder does not exist! Make one, even empty!")
        print(str(e))

    try:
        copytree(css_and_assets_path + "/assets", "site/assets")
    except Exception as e:
        print("Tried to copy contents of", css_and_assets_path, "/assets folder but the folder does not exist! Make one, even empty!")
        print(str(e))

    try:
        copytree(css_and_assets_path + "/lib", "site/lib")
    except Exception as e:
        print("Tried to copy contents of", css_and_assets_path, "/lib folder but the folder does not exist! Make one, even empty!")
        print(str(e))