Skip to content

Commit

Permalink
fix bug in GoogleImageCrawler
Browse files Browse the repository at this point in the history
Signed-off-by: Zhiyuan Chen <[email protected]>
  • Loading branch information
ZhiyuanChen committed May 31, 2024
1 parent e062fd0 commit f5c0acd
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions icrawler/builtin/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,12 @@ def parse(self, response):
# uris = [img[1][3][0] for img in data if img[0] == 1]

uris = re.findall(r"http[^\[]*?.(?:jpg|png|bmp)", txt)
if not uris:
uris = re.findall(r"http[^\[]*?\.(?:jpg|png|bmp)", txt)
uris = [bytes(uri, "utf-8").decode("unicode-escape") for uri in uris]
if uris:
return [{"file_url": uri} for uri in uris]

uris = re.findall(r"http[^\[]*?\.(?:jpg|png|bmp)", txt)
return [{"file_url": uri} for uri in uris]


class GoogleImageCrawler(Crawler):
def __init__(
Expand Down

0 comments on commit f5c0acd

Please sign in to comment.