story_plot_generator/custom_markov.py at master · criticallycode/story_plot_generator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import random
from nltk.corpus import stopwords
import unidecode
import re

class Markov(object):

    def __init__(self, order):

        # order refers to how far back the process will look or remember

        self.order = order

        # controls the actual size of the word groups to be analyzed
        self.group_size = self.order + 1

        # the training text

        self.text = None

        #graph dictionary will hold the actual information
        self.graph = {}

        return

    def train(self, filename):
        self.text = filename.read().split()

        # this appends the beginning of the text to the end of the text
        # so that it always has something to generate
        self.text = self.text + self.text[:self.order]

        # iterate one by one over text, for the entire range of the text starting
        # from word 0 to the last possible groups of word
        for i in range(0, len(self.text) - self.group_size):

            # key is the few words that came before the value
            key = tuple(self.text[i:i + self.order])
            # value is the word that is coming up now, final word in the sequence
            # order 2 markov chain will have value be word 3
            value = self.text[i + self.order]

            # if the word has already been seen, just append the value to the end of the dict
            if key in self.graph:
                self.graph[key].append(value)
            # if word hasn't been seen before, just add it to value of
            # all words we've seen come after specific word pair
            # save the data
            else:
                self.graph[key] = [value]

    def generate(self, length):

        # index defines where the text generation begins at, picks a randomn start word
        index = random.randint(0, len(self.text) - self.order)

        # result comes after the randomly chosen word
        result = self.text[index: index + self.order]

        for i in range(length):

            # current state is the last few words of the current result
            state = tuple(result[len(result) - self.order:])
            # next word is randomly chosen from possible values in the graph
            next_word = random.choice(self.graph[state])
            # append the value to the result
            result.append(next_word)

        print(" ".join(result[self.order:]))