forked from ThePornDatabase/scrapy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tester.py
119 lines (89 loc) · 3.74 KB
/
tester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import sys
from pathlib import Path
from scrapy.http import TextResponse
from scrapy.utils import project
from PySide6.QtUiTools import QUiLoader
from PySide6.QtWidgets import QApplication, QStyleFactory, QTreeWidgetItem
from PySide6.QtCore import QFile, QIODevice, QCoreApplication, Qt
from tpdb.helpers.http import Http
from tpdb.helpers.scrapy_dpath import DPathResponse
from tpdb.BaseScraper import BaseScraper
class GUI:
request = None
response = None
headers = {}
def __init__(self):
QCoreApplication.setAttribute(Qt.AA_ShareOpenGLContexts)
app = QApplication(sys.argv)
app.setStyle(QStyleFactory.create('Fusion'))
ui_file_name = '%s.ui' % Path(__file__).stem
ui_file = QFile(ui_file_name)
if not ui_file.open(QIODevice.ReadOnly):
print('Cannot open %s: %s' % (ui_file_name, ui_file.errorString()))
sys.exit(-1)
loader = QUiLoader()
self.window = loader.load(ui_file)
ui_file.close()
if not self.window:
print(loader.errorString())
sys.exit(-1)
self.connect()
self.setting()
self.window.show()
sys.exit(app.exec())
def connect(self):
self.window.pushButton.pressed.connect(self.load)
self.window.lineEdit_2.editingFinished.connect(self.get)
def setting(self):
settings = project.get_project_settings()
self.headers['User-Agent'] = settings.get('USER_AGENT', default='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
def get_response(self, content, request=None):
url = request.url if request else ''
response = TextResponse(url=url, headers=self.headers, body=content)
response = DPathResponse(request, response)
return response
def load(self):
self.response = None
url = self.window.lineEdit.text()
self.request = Http.get(url, headers=self.headers)
if self.request is not None:
self.response = self.get_response(self.request.content, self.request)
self.window.label.setText('<a href="{0}">{0}</a>'.format(url))
self.window.plainTextEdit.setPlainText(self.request.text)
else:
text = self.window.plainTextEdit.toPlainText().encode('UTF-8')
if text:
self.response = self.get_response(text)
self.window.label.setText('From TextBox')
def get(self):
result = None
self.window.treeWidget.clear()
selector = self.window.lineEdit_2.text().strip()
if self.response:
result = BaseScraper.process_xpath(self.response, selector)
if result:
self.window.lineEdit_3.setText(result.get().strip())
data = {k: v.strip() for k, v in enumerate(result.getall())}
tree = QTreeWidgetItem()
items = self.fill_item(tree, data)
self.window.treeWidget.addTopLevelItems(items)
self.window.treeWidget.expandAll()
def fill_item(self, item, value):
def new_item(parent, text, val=None):
child = QTreeWidgetItem([text])
self.fill_item(child, val)
parent.addChild(child)
if value is None:
return None
if isinstance(value, dict):
for key, val in sorted(value.items()):
new_item(item, str(key), val)
elif isinstance(value, (list, tuple)):
for val in value:
text = (str(val) if not isinstance(val, (dict, list, tuple)) else '[%s]' % type(val).__name__)
new_item(item, text, val)
else:
new_item(item, str(value))
return [item]
if __name__ == '__main__':
GUI()