-
Notifications
You must be signed in to change notification settings - Fork 1
/
Reddit_API.py
109 lines (82 loc) · 3.56 KB
/
Reddit_API.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
"""
This file was mostly used for debugging
"""
import time
from class_architecture import SubmissionCollection
from wordcount import calculateJacquard
from wordcount import pearsonCorrelation
from histograms import separateOverlapSubCommentHists
from histograms import mixedOverlapSubCommentHists
from negative_sent import negative_entities
from LDA import performLDA
import os
def main():
# nltk.download('stopwords')
# nltk.download('punkt')
# nltk.download('wordnet')
# Main loop, parameters input (query, subReddit, submission limit, comment limit)
# Main loop, parameters input (query, subReddit, submission limit, comment limit)
print("Climate Change News Analysis. Discovering Arguments")
try:
continueBool = input("Do you want to start a new search ? (y,n): ")
# Continuing the loop
# while(True)
if continueBool == 'y':
query = input("Enter a query : ")
subReddit = input("What subreddit do you want to browse (type 'all' for browsing everything): ")
subLimit = int(input("How much submissions do you want to process: "))
comLimit = int(input("How much comments per submissions do you want to process: "))
print("Processing request... \n")
start_time = time.time()
subColl = SubmissionCollection(subLimit, comLimit, query, subReddit)
print("Execution time for " + str(subLimit) + " submissions, with " + str(comLimit) + " comments each : " + str(
(time.time() - start_time)) + " seconds\n")
print("List of averge comment's length per submission : " + str(subColl.getCommentLengthAverage()) + "\n")
# Subloop, what to display (histogram mixed/seperate histogram)
# while(True):
print("Now, what to display ?\n")
mode = input(
"Histogram (type 'hist'), Jaccard indexes (type 'jacc'), others?, or to stop the display loop, type 'stop'")
if mode == "hist":
sepMix = input("Seperate or Mixed Word Overlap (s/m)")
if sepMix == "s":
print("Drawing (comments words are separeted) articles/comments histograms ...\n")
separateOverlapSubCommentHists(subColl)
elif sepMix == "m":
print("Drawing (comments words are mixed) articles/comments histograms ...\n")
mixedOverlapSubCommentHists(subColl)
time.sleep(1.0)
#Jacquard
elif mode == "jacc":
print("Jaccard index :\n")
print(calculateJacquard(subColl))
#LDA model
elif mode == 'lda':
print(performLDA(subColl, 3))
#Pearson
elif mode=="pears":
print("Pearson correlation :\n")
print(pearsonCorrelation(subColl))
elif mode == "stop":
pass
# break
# Stoping the looping
elif continueBool == 'n':
print("Goodbye !")
# break
except:
print("Unknown error")
# Wrong input
else:
print("Wrong input, please retry !")
# main()
subLimit=4
comLimit=20
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
query="industrial farming"
subReddit="news"
print("Processing submission collection... \n\n")
subColl = SubmissionCollection(subLimit, comLimit, query, subReddit)
print('negative entities')
performLDA(subColl)