Skip to content

Commit

Permalink
fix Data too long for column 'tags' at row 1, 239 characters truncated
Browse files Browse the repository at this point in the history
  • Loading branch information
thekingofcity committed May 26, 2018
1 parent 9a250df commit 3b396ea
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions page_parse/user/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,21 +92,21 @@ def get_detail(html, uid):
user.description = description.encode('gbk', 'ignore').decode('gbk')
elif '注册时间:' in each_str:
user.register_time = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace(
'\r\n', '')
'\r\n', '').replace(' ', '')

if '标签信息' in basic_str:
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
for each in basic_info:
if '标签:' in each.get_text():
user.tags = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace(
'\n\n\n', '') .strip().replace('\r\n', ';')
'\n\n\n', '') .strip().replace('\r\n', ';').replace(' ', '')

if '教育信息' in basic_str:
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
for each in basic_info:
if '大学:' in each.get_text():
user.education_info = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\r\n', ',') \
.replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';')
.replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';').replace(' ', '')

if '工作信息' in basic_str:
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
Expand All @@ -116,7 +116,7 @@ def get_detail(html, uid):
jobs = each.find_all(attrs={'class': 'pt_detail'})
for job in jobs:
jobs_info.append(job.get_text().replace('\r\n', '').replace('\t', '').replace('\n', ''))
user.work_info = ';'.join(jobs_info)
user.work_info = ';'.join(jobs_info).replace(' ', '')

if '联系信息' in basic_str:
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
Expand All @@ -129,7 +129,7 @@ def get_detail(html, uid):
contact_info.append('email:' + each.find(attrs={'class': 'pt_detail'}).get_text())
if 'MSN:' in each.get_text():
contact_info.append('msn:' + each.find(attrs={'class': 'pt_detail'}).get_text())
user.contact_info = ';'.join(contact_info)
user.contact_info = ';'.join(contact_info).replace(' ', '')
except Exception as why:
print('解析出错,具体原因为{why}'.format(why=why))

Expand Down

0 comments on commit 3b396ea

Please sign in to comment.