-
Notifications
You must be signed in to change notification settings - Fork 0
/
bot.py
51 lines (41 loc) · 1.6 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from selenium import webdriver
# import PyPDF2
from selenium.webdriver.common.keys import Keys #allows us to type in the text bar
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
from general import *
# chrome driver path
PATH = "E:\Projects\selenium_web_bot\chromedriver\chromedriver.exe"
driver = webdriver.Chrome(PATH)
# change this keyword for different searches
search_keyword = "apple"
driver.get("https://ulm.louislibraries.org/uhtbin/ezproxylogin_louis_ulm.x?url=http://search.ebscohost.com/login.aspx?direct=true&site=eds-live&scope=site&type=0&mode=and&cli0=FT1&clv0=Y&authtype=ip&bquery="+search_keyword)
# first page automated authentication
search = driver.find_element_by_name("id")
search.send_keys("30098693")
search = driver.find_element_by_name("pin")
search.send_keys("0797")
search.send_keys(Keys.RETURN)
# wait till the results are completed
try:
pdf = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "resultListInnerDiv"))
)
articles = pdf.find_elements_by_link_text("PDF Full Text") #creates an array
create_project_file(search_keyword)
count = 1
for article in articles:
data = str(count)+'. ' + article.get_attribute("href")
append_to_file(search_keyword+'.txt', data)
count+=1
# if there is exception in scraping
except:
print("Exception!!!! QUITTING!")
driver.quit()
# closes the window screen after all the tasks complete
finally:
print('QUITTING!')
driver.quit()