forked from timothybeal/kjvbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
markovbot.py
75 lines (55 loc) · 1.99 KB
/
markovbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
Created on Mar 10, 2016
@author: timothybeal and textpotential
The functions used to create a markov chain utterance and tweet if desired.
'''
from collections import defaultdict
from itertools import tee
from random import choice
import re
from nltk.tokenize import sent_tokenize
import tweepy
def nwise(iterable, n=2):
if len(iterable) < n:
return
iterables = tee(iterable, n)
for i, iter_ in enumerate(iterables):
for num in range(i):
next(iter_)
return zip(*iterables)
def build_sentence(seed, sent_tokens):
token = ''
while token not in set('.?!'):
last_tokens = tuple(seed[-3:])
new_token = choice(sent_tokens[last_tokens])
seed.append(new_token)
token = new_token
sentence = ' '.join(seed)
sentence = re.sub(r'\s+([.,?!])',r'\1', sentence)
return sentence
def markovize(word1, word2, word3, fileid, char_limit=None):
with open(fileid, encoding='utf-8') as f:
text = f.read()
sentences = sent_tokenize(text)
sent_tokens = defaultdict(list)
for sentence in sentences:
tokens = re.findall(r"[\w']+|[.,?!]", sentence)
nwise_ = nwise(tokens, n=4)
if nwise_:
for token1, token2, token3, token4 in nwise_:
sent_tokens[token1, token2, token3].append(token4)
too_long = True
while too_long:
sentence = [word1, word2, word3]
utterance = build_sentence(sentence, sent_tokens)
len_utterance = len(utterance)
if char_limit != None and len_utterance > char_limit:
too_long = True
else:
too_long = False
return utterance
def post_tweet(consumer_key, consumer_secret, access_key, access_secret, tweet):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
api.update_status(tweet)