-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sourcery Starbot ⭐ refactored wkunzhi/Python3-Spider #28
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,11 +108,11 @@ def is_pixel_similar(self, image1, image2, x, y): | |
# 阈值 允许误差 | ||
threshold = 10 | ||
# 对比 | ||
if abs(c_pixel[0] - ic_pixel[0]) < threshold and \ | ||
abs(c_pixel[1] - ic_pixel[1]) < threshold and \ | ||
abs(c_pixel[2] - ic_pixel[2]) < threshold: | ||
return True | ||
return False | ||
return ( | ||
abs(c_pixel[0] - ic_pixel[0]) < threshold | ||
and abs(c_pixel[1] - ic_pixel[1]) < threshold | ||
and abs(c_pixel[2] - ic_pixel[2]) < threshold | ||
) | ||
|
||
def get_slice_gap(self, image1, image2): | ||
"""获取缺口的偏移量 | ||
|
@@ -168,12 +168,7 @@ def get_track(self, distance): | |
v = 0 | ||
|
||
while current < distance: | ||
if current < mid: | ||
# 加速度为正2 | ||
a = 20 | ||
else: | ||
# 加速度为负3 | ||
a = -30 | ||
a = 20 if current < mid else -30 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
# 初速度v0 | ||
v0 = v | ||
# 当前速度v = v0 + at | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,9 +48,9 @@ def get_cid_list(self): | |
if not title: | ||
title = video_title | ||
title = re.sub(r'[\/\\:*?"<>|]', '', title) # 替换为空的 | ||
print('标题:' + title, 'ID', cid) | ||
print(f'标题:{title}', 'ID', cid) | ||
page = str(item['page']) | ||
start_url = start_url + "/?p=" + page | ||
start_url = f"{start_url}/?p={page}" | ||
Comment on lines
-51
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
video_list = down.get_play_list(start_url, cid, quality) | ||
self.start_time = time.time() | ||
down.down_video(video_list, title, start_url, page) | ||
|
@@ -65,20 +65,19 @@ def get_play_list(start_url, cid, quality): | |
""" | ||
entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg' | ||
appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':') | ||
params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, quality, quality) | ||
params = f'appkey={appkey}&cid={cid}&otype=json&qn={quality}&quality={quality}&type=' | ||
chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest() | ||
url_api = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum) | ||
url_api = f'https://interface.bilibili.com/v2/playurl?{params}&sign={chksum}' | ||
headers = { | ||
'Referer': start_url, | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' | ||
} | ||
html = requests.get(url_api, headers=headers).json() | ||
video_list = [html['durl'][0]['url']] | ||
return video_list | ||
return [html['durl'][0]['url']] | ||
Comment on lines
-68
to
+76
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
def schedule_cmd(self, blocknum, blocksize, totalsize): | ||
speed = (blocknum * blocksize) / (time.time() - self.start_time) | ||
speed_str = " Speed: %s" % self.format_size(speed) | ||
speed_str = f" Speed: {self.format_size(speed)}" | ||
Comment on lines
-81
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
recv_size = blocknum * blocksize | ||
|
||
# 设置下载进度条 | ||
|
@@ -95,7 +94,7 @@ def schedule(self, blocknum, blocksize, totalsize): | |
"""时间表 | ||
""" | ||
speed = (blocknum * blocksize) / (time.time() - self.start_time) | ||
speed_str = " Speed: %s" % self.format_size(speed) | ||
speed_str = f" Speed: {self.format_size(speed)}" | ||
Comment on lines
-98
to
+97
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
recv_size = blocknum * blocksize | ||
|
||
# 设置下载进度条 | ||
|
@@ -118,23 +117,17 @@ def format_size(bytes): | |
except: | ||
print("传入的字节格式不对") | ||
return "Error" | ||
if kb >= 1024: | ||
M = kb / 1024 | ||
if M >= 1024: | ||
G = M / 1024 | ||
return "%.3fG" % (G) | ||
else: | ||
return "%.3fM" % (M) | ||
else: | ||
if kb < 1024: | ||
return "%.3fK" % (kb) | ||
M = kb / 1024 | ||
return "%.3fG" % (M / 1024) if M >= 1024 else "%.3fM" % (M) | ||
Comment on lines
-121
to
+123
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
def down_video(self, video_list, title, start_url, page): | ||
"""下载视频 | ||
""" | ||
num = 1 | ||
print('正在下载请稍等...'.format(page)) | ||
current_video_path = os.path.join(sys.path[0], 'bilibili下载目录', title) # 当前目录作为下载目录 | ||
for i in video_list: | ||
for num, i in enumerate(video_list, start=1): | ||
Comment on lines
-134
to
+130
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
opener = urllib.request.build_opener() | ||
# 请求头 | ||
opener.addheaders = [ | ||
|
@@ -154,13 +147,19 @@ def down_video(self, video_list, title, start_url, page): | |
os.makedirs(current_video_path) | ||
# 开始下载 | ||
if len(video_list) > 1: | ||
urllib.request.urlretrieve(url=i, | ||
filename=os.path.join(current_video_path, r'{}-{}.mp4'.format(title, num)), | ||
reporthook=self.schedule_cmd) | ||
urllib.request.urlretrieve( | ||
url=i, | ||
filename=os.path.join( | ||
current_video_path, f'{title}-{num}.mp4' | ||
), | ||
reporthook=self.schedule_cmd, | ||
) | ||
else: | ||
urllib.request.urlretrieve(url=i, filename=os.path.join(current_video_path, r'{}.mp4'.format(title)), | ||
reporthook=self.schedule_cmd) | ||
num += 1 | ||
urllib.request.urlretrieve( | ||
url=i, | ||
filename=os.path.join(current_video_path, f'{title}.mp4'), | ||
reporthook=self.schedule_cmd, | ||
) | ||
|
||
@staticmethod | ||
def combine_video(video_list, title): | ||
|
@@ -169,7 +168,7 @@ def combine_video(video_list, title): | |
current_video_path = os.path.join(sys.path[0], 'bilibili_video', title) # 当前目录作为下载目录 | ||
if len(video_list) >= 2: | ||
# 视频大于一段才要合并 | ||
print('下载完成,正在合并视频...' + title) | ||
print(f'下载完成,正在合并视频...{title}') | ||
Comment on lines
-172
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
# 定义一个数组 | ||
L = [] | ||
# 访问 video 文件夹 (假设视频都放在这里面) | ||
|
@@ -187,12 +186,14 @@ def combine_video(video_list, title): | |
# 拼接视频 | ||
final_clip = concatenate_videoclips(L) | ||
# 生成目标视频文件 | ||
final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False) | ||
print('视频合并完成' + title) | ||
final_clip.to_videofile( | ||
os.path.join(root_dir, f'{title}.mp4'), fps=24, remove_temp=False | ||
) | ||
print(f'视频合并完成{title}') | ||
|
||
else: | ||
# 视频只有一段则直接打印下载完成 | ||
print('视频合并完成:' + title) | ||
print(f'视频合并完成:{title}') | ||
|
||
|
||
if __name__ == '__main__': | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,10 +38,8 @@ def clean_data(self, data): | |
清洗数据 | ||
:return: | ||
""" | ||
columns = [] | ||
|
||
for item in data.get('result'): | ||
columns.append([ | ||
columns = [ | ||
[ | ||
Comment on lines
-41
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
item.get('code'), | ||
item.get('date'), | ||
item.get('week'), | ||
|
@@ -56,8 +54,9 @@ def clean_data(self, data): | |
item.get('prizegrades')[1].get('typenum'), | ||
item.get('prizegrades')[2].get('typemoney'), | ||
item.get('prizegrades')[2].get('typenum'), | ||
]) | ||
|
||
] | ||
for item in data.get('result') | ||
] | ||
df = pd.DataFrame( | ||
columns, | ||
columns=["期数", "开奖日期", "星期数", "红球", "蓝球", "销售金额", "奖池", "中奖地区", "一等奖金", "一等奖人数", "二等奖金", "二等奖人数", "三等奖金", "三等奖人数"], # 指定列 | ||
|
@@ -79,8 +78,7 @@ def set_data(self, df): | |
for i in df['中奖地区']: | ||
for addr in i.split(',')[:-1]: | ||
name, num = jieba.cut(addr[:-1]) | ||
for n in range(int(num)): | ||
cut_text.append(name) | ||
cut_text.extend(name for _ in range(int(num))) | ||
Comment on lines
-82
to
+81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
print(" ".join(cut_text)) | ||
|
||
w = wordcloud.WordCloud(font_path=self.font, background_color="white", scale=4) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,7 +38,7 @@ def down_load(self, file_url, file_full_name, now_photo_count, all_photo_count): | |
all_photo_count), end=" ") | ||
# 下载完图片后获取图片扩展名,并为其增加扩展名 | ||
file_type = guess(file_full_name) | ||
rename(file_full_name, file_full_name + '.' + file_type.extension) | ||
rename(file_full_name, f'{file_full_name}.{file_type.extension}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
def crawler_photo(self, type_id, photo_count): | ||
""" | ||
|
@@ -70,8 +70,8 @@ def crawler_photo(self, type_id, photo_count): | |
for photo in photo_data: | ||
|
||
# 创建一个文件夹存放我们下载的图片 | ||
if not exists('./' + str(type_id)): | ||
makedirs('./' + str(type_id)) | ||
if not exists(f'./{str(type_id)}'): | ||
makedirs(f'./{str(type_id)}') | ||
Comment on lines
-73
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
# 准备下载的图片链接 | ||
file_url = photo['urls']['raw'] | ||
|
@@ -81,7 +81,7 @@ def crawler_photo(self, type_id, photo_count): | |
file_name_only = file_name_only[len(file_name_only) - 1] | ||
|
||
# 准备保存到本地的完整路径 | ||
file_full_name = './' + str(type_id) + '/' + file_name_only | ||
file_full_name = f'./{str(type_id)}/{file_name_only}' | ||
|
||
# 开始下载图片 | ||
self.down_load(file_url, file_full_name, now_photo_count, all_photo_count) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,11 +35,9 @@ def decode(C): | |
I = -1 | ||
H = 0 | ||
B = '' | ||
J = len(C) | ||
G = ord(C[-1]) | ||
J = len(C) - 1 | ||
C = C[:-1] | ||
J -= 1 | ||
|
||
Comment on lines
-38
to
-42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
for E in range(J): | ||
D = int(C[E], cha) - add | ||
if D >= add: | ||
|
@@ -51,7 +49,7 @@ def decode(C): | |
|
||
A = int(B[:I], digi) | ||
F = int(B[I + 1:], digi) | ||
L = (A + F - int(G)) / 2 | ||
L = (A + F - G) / 2 | ||
K = float(F - L) / 100000 | ||
L = float(L) / 100000 | ||
return {'lng': L, 'lat': K} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,8 +25,7 @@ def get_css(self, html): | |
if not svg_text_css: | ||
raise Exception("未找到链接") | ||
css_url = svg_text_css.group(1) | ||
content = self.parse_url('https:' + css_url) | ||
return content | ||
return self.parse_url(f'https:{css_url}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
# 获取定义偏移量的css文件后将结果以字典形式存储 | ||
@ staticmethod | ||
|
@@ -36,21 +35,15 @@ def get_css_offset(content_css): | |
:return: {'xxx': ['192', '1550']} | ||
""" | ||
offset_item = re.findall(r'(\.[a-zA-Z0-9-]+)\{background:-(\d+).0px -(\d+).0px', content_css) | ||
result = {} | ||
for item in offset_item: | ||
css_class = item[0][1:] | ||
x_offset = item[1] | ||
y_offset = item[2] | ||
result[css_class] = [x_offset, y_offset] | ||
return result | ||
return {item[0][1:]: [item[1], item[2]] for item in offset_item} | ||
Comment on lines
-39
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
# 获取svg url组 | ||
@staticmethod | ||
def get_svg_url_dict(content_css): | ||
items = re.findall(r'span\[class\^="(.*?)"\].*?width: (\d+)px;.*?background-image: url\((.*?)\);', content_css) | ||
result = {} | ||
for code, size, url in items: | ||
svg_list = [int(size), 'https:' + url] | ||
svg_list = [int(size), f'https:{url}'] | ||
Comment on lines
-53
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
result[code] = svg_list | ||
return result | ||
|
||
|
@@ -100,7 +93,7 @@ def get_comment_num(self): | |
svg_url = svg[1] | ||
new_num = self.parse_comment_css(svg_url, size, x_offset, y_offset) | ||
num = num * 10 + int(new_num) | ||
print("餐馆: {}, 点评数: {}".format(shop_name, num)) | ||
print(f"餐馆: {shop_name}, 点评数: {num}") | ||
Comment on lines
-103
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
if __name__ == '__main__': | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,8 +50,8 @@ def parse_ttf(self, code): | |
result_list = self.r.hmget(HASH_TABLE, self.name_list) # 取出对应字库表(已修复bug) | ||
for result in result_list: | ||
json_data = json.loads(result) | ||
if 'uni' + clean_code in json_data: | ||
return json_data['uni' + clean_code] | ||
if f'uni{clean_code}' in json_data: | ||
return json_data[f'uni{clean_code}'] | ||
Comment on lines
-53
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
return False | ||
|
||
def add_hash(self, name, json_data): | ||
|
@@ -86,13 +86,13 @@ def install_ttf(self, ttf_list): | |
# 已存在无需安装 | ||
continue | ||
# 安装字体 | ||
with open(name + '.woff', 'wb+') as f: | ||
f.write(requests.get('http://' + ttf_list[index]).content) # 下载写入 | ||
font = TTFont(name + '.woff') | ||
with open(f'{name}.woff', 'wb+') as f: | ||
f.write(requests.get(f'http://{ttf_list[index]}').content) | ||
font = TTFont(f'{name}.woff') | ||
uni_list = font['cmap'].tables[0].ttFont.getGlyphOrder() # 取出字形保存到uniList中 | ||
json_data = json.dumps(dict(zip(uni_list, self.FONT_LIST)), ensure_ascii=False) | ||
self.add_hash(name, json_data) | ||
os.remove(name + '.woff') # 用完了删掉,节省资源占用 | ||
os.remove(f'{name}.woff') | ||
Comment on lines
-89
to
+95
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
|
||
@staticmethod | ||
def get_ttf_urls(text): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
从网页下载一个字体文件获取对应推导式,动态获取请自行拓展 | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
This removes the following comments ( why? ):
|
||
|
||
from fontTools.ttLib import TTFont | ||
import re | ||
|
||
|
@@ -15,7 +16,7 @@ | |
xml = f.read() # 读取tyc-num.xml赋值给xml | ||
GlyphID = re.findall(r'<GlyphID id="(.*?)" name="(\d+)"/>', xml) # 获得对应关系 | ||
print(GlyphID) | ||
GlyphIDNameLists = list(set([int(Gname) for Gid, Gname in GlyphID])) # 对应关系数量转换 | ||
GlyphIDNameLists = list({int(Gname) for Gid, Gname in GlyphID}) | ||
print(GlyphIDNameLists) | ||
DigitalDicts = {str(i): str(GlyphIDNameLists[i - 2]) for i in range(2, len(GlyphIDNameLists)+2)} # 数字对应关系的字典推导式 | ||
print(DigitalDicts) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ class ParseVideo: | |
|
||
def __init__(self, share): | ||
path = self.get_url(share) | ||
self.url = 'https://v.douyin.com/' + path + '/' | ||
self.url = f'https://v.douyin.com/{path}/' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
self.headers = { | ||
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', | ||
} | ||
|
@@ -25,12 +25,13 @@ def get_url(share_url): | |
def go_location(self): | ||
response = self.session.get(self.url, headers=self.headers) | ||
self.first_url = response.url | ||
result = re.search(r'itemId: "(.*?)",[\s\S]*?uid: "(.*?)",[\s\S]*?authorName: "(.*?)",[\s\S]*?dytk: "(.*?)"', | ||
response.text) | ||
return result | ||
return re.search( | ||
r'itemId: "(.*?)",[\s\S]*?uid: "(.*?)",[\s\S]*?authorName: "(.*?)",[\s\S]*?dytk: "(.*?)"', | ||
response.text, | ||
) | ||
Comment on lines
-28
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
def go_message(self, ret): | ||
url = 'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' + ret.group(1) + '&dytk=' + ret.group(4) | ||
url = f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={ret.group(1)}&dytk={ret.group(4)}' | ||
Comment on lines
-33
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
response = self.session.get(url, headers=self.headers) | ||
json_data = json.loads(response.text) | ||
user_id = ret.group(2) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
LoginBli.is_pixel_similar
refactored with the following changes:reintroduce-else
)assign-if-exp
)boolean-if-exp-identity
)remove-unnecessary-cast
)