-
Notifications
You must be signed in to change notification settings - Fork 0
/
tsuTEAv1.4.py
205 lines (149 loc) · 5.91 KB
/
tsuTEAv1.4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# -*- coding: utf-8 -*-
"""tsunamifis_twittr_sentiment_analysis_botv1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1c3F8klCW9uaJllbkFLyeQipfOu2rDRPr
# Tsunamifi's Twitter Sentiment Analysis Bot
This notebook will allow you to plug in a user OR topic from twitter and determine if their tweets are Positive, Negative or Neutral.
Why? for fun, probably."""
#core setup.
##importing things here!
### streamlit is a python web app framework, we're using it for its widgets and cute gui.
import streamlit as st
### pandas helps us sort data, we'll need it if you want to see results.
import pandas as pd
### matplot helps us visualize data too.
import matplotlib.pyplot as plt
### ntlk is a word processing library, we can use it to parse our tweets for our goal.
import nltk
from wordcloud import WordCloud,STOPWORDS
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
### tweepy is the official twitter python library/api; this is how we'll be able to source our tweets.
### textblob is a text processing library.
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob
### MISC
import re
import time
# streamlit formatting
st.set_page_config(layout="wide")
st.title("Twitter Sentiment Analysis Bot")
st.write("This WebAPP will allow you to plug in a topic from twitter and determine if the general discussion is positive, negative or neutral.")
st.title("Choose Topic on twitter to analyze")
with st.form(key='vars'):
texti = st.text_input(label='Choose topic')
numberi = st.number_input(label= 'How many tweets should we source?', step=1, value=15)
submit = st.form_submit_button(label='Submit')
# inner workings
## defining twitter auth setup here!
def auth():
### keys and token from twitter'
consumer_key = "KLhloeEIOr2de1Cnz7ddcxcmT"
consumer_secret = "WPQeRE5skCsCfBK8inJSPFTOEFMCUrPGMUyKsi1kjo8xKDaoxQ"
access_token = "1450493640132923397-NQ1fupgKuJZKZbPsi3gYw9EnngxapO"
access_token_secret = "eH3IvjkUatJuU7EdYtoSMZckBbIWtPpvYNFUW078VvMK4"
### attempting auth...
try:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
except:
st.error("Error: Authentification Failed, try again?")
exit(1)
## this will clean unnecessary and maybe complicated things out of a tweet
## like links or #'s
def cleanup(text):
### replaces all letters and numbers associated with chars like "\/:"
### (which are chars used in links) with spaces which removes them.
### we're also tokenizing each word here
text = re.sub('(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)', ' ', text)
text_tokens = word_tokenize(text)
text = [word for word in text_tokens if not word in stopwords.words()]
text = ' '.join(text)
return text
## here we're getting rid of parts of words that dont mean anything
## in sentiment analysis so we'll end up with just scoring rootwords
def root(text):
porter = PorterStemmer()
token_words = word_tokenize(text)
root_sentence = []
for word in token_words:
root_sentence.append(porter.stem(word))
return " ".join(root_sentence)
## lets find out the cleaned tweets' general emotion!
def get_tweet_score(analysis):
###"scoring" tweet
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity < 0:
return 'negative'
else:
return 'neutral'
## we're gonna grab this x amount of tweets to parse
def fetch_tweets(query, count = 50):
api = auth()
### empty list to hold tweets
tweets = []
collected_tweets = api.search(q = query + ' -filter:retweets', count = count)
### this is a pipeline to do all our our tweet processing
for Tweet in collected_tweets:
parsed_tweet = Tweet.text
clean_tweet = cleanup(parsed_tweet)
stem_tweet = TextBlob(root(clean_tweet))
scored_tweet = get_tweet_score(stem_tweet)
tweets.append((parsed_tweet, clean_tweet, scored_tweet))
return tweets
# Take off...
## this is how everything comes into play
def run():
tweets = fetch_tweets(query = texti, count = numberi)
### stuff our collected tweets in to a pandas dataframe
### and also specifying which columns.
df = pd.DataFrame(tweets, columns= ['Tweets', 'Scrubbed Tweets', 'Result'])
### dropping duplicate tweets too..
df = df.drop_duplicates(subset='Scrubbed Tweets')
df.to_csv('tweetbank.csv', index= False)
### calculate and display total percentages
ptweets = df[df['Result'] == 'positive']
posper = (100*len(ptweets)/len(tweets))
ntweets = df[df['Result'] == 'negative']
negper = (100*len(ntweets)/len(tweets))
nuper = (100 - posper - negper)
st.write("Here's the overall climate concerning " + texti)
col1, col2, col3 = st.columns(3)
col1.metric('Positive Tweets', f'{posper}%')
col2.metric('Negative Tweets', f'{negper}%')
col3.metric('Neutral Tweets', f'{nuper}%')
### generate wordcloud
twt = " ".join(df['Scrubbed Tweets'])
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=2000, height=2000).generate(twt)
plt.show()
fig = plt.figure(1,figsize=(8, 8))
plt.axis('off')
plt.imshow(wordcloud)
col4, col5 = st.columns(2)
with col4:
st.caption('Here is our data for reference')
st.dataframe(df)
with col5:
st.caption(f' Here are the words most commonly associated with {texti}')
st.pyplot(fig)
st.success('Done!')
## loading spinner, why because its cute.
def spin():
with st.spinner('Collecting tweets...'):
time.sleep(3)
if submit:
spin()
try:
run()
except tweepy.TweepError as e:
st.error("There's something afoot... looks like " f'{e}')
else:
pass