forked from hbunke/BibsOnGitHub
-
Notifications
You must be signed in to change notification settings - Fork 1
/
csv-to-json.py
52 lines (44 loc) · 1.55 KB
/
csv-to-json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import re
import json
import requests
import sys
# fill in username and token to get 5000 instead of 60 requests per hour
username = ""
token = ""
github_session = requests.Session()
github_session.auth = (username, token)
libraries = open("libraries.csv", "rt", encoding="utf-8")
lines = libraries.readlines()
json_result_string ='''
{
"organisations" : [
'''
for line in lines:
json_result_string += " {\n"
parts = line.split(",")
country = parts[0]
city = parts[1]
name = parts[2]
link = parts[3]
json_result_string += " \"name\": \"" + name + "\",\n"
json_result_string += " \"country\": \"" + country + "\",\n"
json_result_string += " \"city\": \"" + city + "\",\n"
json_result_string += " \"url\": \"" + link.replace("\n","") + "\",\n"
json_result_string += " \"repositories\": "
github_orga = re.findall("\/([^\/^$]+)$", link)[0].replace("\n","")
print(country + " " + city + " " + github_orga)
api_url = "https://api.github.com/orgs/" + github_orga + "/repos?per_page=100&page=1"
res = github_session.get(url=api_url)
repo_data = res.json()
while 'next' in res.links.keys():
res=requests.get(res.links['next']['url'])
repo_data.extend(res.json())
json_result_string += json.dumps(repo_data, indent=4, ensure_ascii=0) + "\n"
json_result_string += " },\n"
json_result_string = json_result_string[:-2]
json_result_string += '''
]
}
'''
with open("all-libs.json", "w", encoding="utf-8") as json_file:
json_file.write(json_result_string)