-
Notifications
You must be signed in to change notification settings - Fork 0
/
reviewRecipe.py
122 lines (103 loc) · 4.03 KB
/
reviewRecipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import requests
import sys
import json
from alchemyapi import AlchemyAPI
alchemyapi = AlchemyAPI()
def main(sku, maxReviews):
print "Searching Best Buy for product reviews and performing NLP analysis..."
pages = (maxReviews/100) + 2
reviews=[]
apiKey= open('bestbuy_key.txt', 'r').read().strip()
#Call review API for single product
for page in range(1, pages):
try:
url='http://api.remix.bestbuy.com/v1/reviews(sku=%s)?format=json&pageSize=100&page=%s&apiKey=' % (sku, page) + apiKey
response = json.loads(requests.get(url).text)['reviews']
for review in response:
reviews.append(review)
except Exception as e:
print e
return
#Process reviews
eReviews=[]
reviewId=0
for review in reviews:
fullReview = review['comment']
eReviews.append({'fullReview': fullReview, 'sentiment': "", 'keywords': [], 'entities': [], 'concepts': []})
try:
#Sentiment
response = alchemyapi.sentiment("text", fullReview)
eReviews[reviewId]['sentiment']=response["docSentiment"]["type"]
#Keywords
response = alchemyapi.keywords("text", fullReview)
for keyword in response['keywords']:
eReviews[reviewId]['keywords'].append(keyword['text'])
#Entities
response = alchemyapi.entities("text", fullReview)
for entity in response['entities']:
eReviews[reviewId]['entities'].append(entity['text'])
#Concepts
response = alchemyapi.concepts("text", fullReview)
for concept in response['concepts']:
eReviews[reviewId]['concepts'].append(concept['text'])
except Exception as e:
print e
#progress indicator
sys.stdout.write(".")
sys.stdout.flush()
reviewId+=1
#exit loop if we've reached the maximum number of reviews
if reviewId == maxReviews:
break
#Send to file
toFile(eReviews)
def toFile(eReviews):
#open all files, one with all results and one each for individual call
g = open('reviewFullText.csv', 'w')
s = open('sentiment.csv', 'w')
k = open('keywords.csv', 'w')
e = open('entities.csv', 'w')
c = open('concepts.csv', 'w')
#add headers
g.write("'Review ID','Review Text','Sentiment','Keywords','Entities','Concepts'\n")
s.write("'Review ID','Sentiment'\n")
k.write("'Review ID','Keywords'\n")
e.write("'Review ID','Entities'\n")
c.write("'Review ID','Concepts'\n")
#loop through all reviews
for rId, review in enumerate(eReviews):
index = str(rId)
#add ID, full reviews, and sentiment
g.write(index + ",'" + review['fullReview'] + "','" + review['sentiment'] + "','")
s.write(index + ",'" + review['sentiment'] + "'\n")
#add keywords
for keyword in review['keywords']:
g.write(keyword + "\n")
k.write(index + ",'" + keyword + "'\n")
g.write("','")
#add entities
for entity in review['entities']:
g.write(entity + "\n")
e.write(index + ",'" + entity + "'\n")
g.write("','")
#add concepts
for concept in review['concepts']:
g.write(concept + "\n")
c.write(index + ",'" + concept + "'\n")
g.write("'\n")
#close all files
g.close()
s.close()
e.close()
c.close()
print "\nAnalysis successful. Results in reviewFullText.csv, sentiment.csv, keywords.csv, entities.csv, and concepts.csv"
if __name__ == "__main__":
#check for invalid command line input
if not len(sys.argv) == 3:
print "ERROR: invalid number of command line arguments"
print "SYNTAX: python reviewRecipe.py <SKU> <MAX_RESULTS>"
print "\t<SKU> : SKU for product on Best Buy"
print "\t<MAX_RESULTS> : Maximum number of reviews to analyze"
sys.exit()
else:
main(sys.argv[1], int(sys.argv[2]))