-
Notifications
You must be signed in to change notification settings - Fork 0
/
festival.py
executable file
·139 lines (104 loc) · 4.95 KB
/
festival.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Festival
import math
import StringIO
import time
import urllib
import urllib2
from lxml import etree
from operator import itemgetter
# Retrieve festival's lineup, need eventid
def festival(eventid=3182649):
# Sasquatch event id: 3182649
url = 'http://ws.audioscrobbler.com/2.0/?method=event.getinfo&event=' + eventid + '&api_key=b25b959554ed76058ac220b7b2e0a026'
response = urllib2.urlopen(url).read()
tree = etree.parse(StringIO.StringIO(response))
artists = []
# Extract artists from XML
for t in tree.xpath('//artist'):
artists.append(t.text)
# Check if artist is cached, if not, download XML
for a in artists:
check(a)
return artists
# Download an artist's similar artists
def download(artist):
artist = artist.replace(' ', '+') # URLs have '+' instead of spaces in artist name
# URL for similar artists XML
url = u'http://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&artist=' + artist + '&api_key=b25b959554ed76058ac220b7b2e0a026'
url = url.encode('utf8') # Need to encode URL in UTF8 as per last.fm
urllib.urlretrieve(url, 'cache/' + artist + '_similar.xml') # caches XML
print 'Downloading ' + artist
# Wait 5 seconds between downloads
for i in range(5):
time.sleep(1.0)
print '.'
# Check if an artist's similar artists XML has been cached or not
def check(artist):
artist = artist.replace(' ', '+') # URLs have '+' instead of spaces in artist name
type = 'similar'
try:
file = open('cache/' + artist + '_' + type + '.xml')
except IOError:
download(artist)
else:
file.close()
# Download a last.fm profile's top artists
# Option argument for chart period
def profile(username, period='overall'):
url = 'http://ws.audioscrobbler.com/2.0/?method=user.gettopartists&user=' + username + '&period=' + period + '&limit=300&api_key=b25b959554ed76058ac220b7b2e0a026'
response = urllib2.urlopen(url).read()
tree = etree.parse(StringIO.StringIO(response))
my_artists = []
# Artist element has name and total playcount tags
for t in tree.xpath('//artist'):
my_artist = {}
my_artist['name'] = t.xpath('name')[0].text
my_artist['plays'] = int(t.xpath('playcount')[0].text)
my_artists.append(my_artist)
return my_artists
#####
# Create custom playlist for a given lineup and profile
def create_playlist(lineup, my_artists):
scores = []
# For every artist in the lineup, check if any of their similar artists match the artists in the given profile
# If there is a match, use the matching percentage and playcount to add to the score
for artist in lineup:
# print artist
file_artist = artist.replace(' ', '+') # URLs have '+' instead of spaces in artist name
# Load the artist's similar artist XML
file = open('cache/' + file_artist + "_" + 'similar' + '.xml')
tree = etree.parse(StringIO.StringIO(file.read()))
file.close()
# Give the artist a match number for themselves
# The max_match number controls how similar artists must be have substantial influence
max_match = 2.5
self_match = max_match - 0.01
# Extract all similar artists and their match percentage
similar_artists = [{'name': artist, 'match': self_match}]
for t in tree.xpath('//artist'):
similar_artist = {}
similar_artist['name'] = t.xpath('name')[0].text
similar_artist['match'] = float(t.xpath('match')[0].text)
similar_artists.append(similar_artist)
# Loop through all artists in the profile
# If a similar artist exists in the profile, record the match number
# Currently this uses an exponential distance metric to force give more weight to bands that are very similar to bands in the profile
# This prevents the problem of a band not being very much like any other bands, but sitting between them all and being given lots of weight for being slightly similar to many bands
# Should vectorize this
weights = []
for ma in my_artists:
try:
i = [a['name'] for a in similar_artists].index(ma['name'])
except ValueError:
weights.append(0)
else:
w = [a['match'] for a in similar_artists][i]
# Match number is subject to the exponential distance metric
weights.append(math.exp(-1 * (max_match - w)))
# Total score calculated by multiplying weight by playcount
scores.append(sum([a * b for (a, b) in zip(weights, [a['plays'] for a in my_artists])]))
# Associate scores with lineup artists
playlist = zip(lineup, scores)
# Sort by score and return
playlist = sorted(playlist, key=lambda playlist: playlist[1], reverse=True)
return playlist