Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include URL in BrowserThread name, so it will be part of every log li… #72

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions umbra/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def callback(body, message):
if self._consumer_stop.is_set() or time.time() - start >= timeout or self._reconnect_requested:
browser.stop()
self._browser_pool.release(browser)
self.logger.debug("Released browser on port %s", browser.chrome.port)
break

except brozzler.browser.NoBrowsersAvailable:
Expand All @@ -153,6 +154,7 @@ def callback(body, message):
self.logger.critical("problem with browser initialization", exc_info=True)
time.sleep(0.5)
finally:
self.logger.info("Consumer is done")
consumer.callbacks = None

def _wait_for_active_browsers(self):
Expand Down Expand Up @@ -219,7 +221,7 @@ def on_response(chrome_msg):
req = chrome_msg['params']['response']['requestHeadersText']
payload['method'] = req[:req.index(' ')]
else:
self.logger.warn('unable to identify http method (assuming GET) chrome_msg=%s',
self.logger.warning('unable to identify http method (assuming GET) chrome_msg=%s',
chrome_msg)
payload['method'] = 'GET'

Expand Down Expand Up @@ -280,8 +282,8 @@ def prune_outlinks(dirty_links, block_list=None):

def browse_page_sync():
self.logger.info(
'browser=%s client_id=%s url=%s behavior_parameters=%s',
browser, client_id, url, behavior_parameters)
'browser=%s client_id=%s behavior_parameters=%s',
browser, client_id, behavior_parameters)
try:
browser.start()
final_page_url, outlinks = browser.browse_page(
Expand All @@ -297,13 +299,13 @@ def browse_page_sync():
self.logger.info("page interstitial shown, likely unsupported http auth, for url {} - {}".format(url, e))
message.reject()
except brozzler.ShutdownRequested as e:
self.logger.info("browsing did not complete normally, requeuing url {} - {}".format(url, e))
self.logger.info("browsing did not complete normally, requeuing - {}".format(e))
message.requeue() # republish?
except BrowsingException as e:
self.logger.warn("browsing did not complete normally, republishing url {} - {}".format(url, e))
self.logger.warning("browsing did not complete normally, republishing - {}".format(e))
republish_amqp(self, message)
except:
self.logger.critical("problem browsing page, republishing url {}, may have lost browser process".format(url), exc_info=True)
self.logger.critical("problem browsing page, republishing, may have lost browser process {}".format(browser), exc_info=True)
republish_amqp(self, message)
finally:
browser.stop()
Expand Down Expand Up @@ -344,10 +346,13 @@ def browse_thread_run_then_cleanup():
with self._browsing_threads_lock:
self._browsing_threads.remove(threading.current_thread())

thread_name = "BrowsingThread:%s" % browser.chrome.port

thread_name = "BrowsingThread:%s-%s" % (browser.chrome.port,url)
th = threading.Thread(target=browse_thread_run_then_cleanup, name=thread_name)

self.logger.info('adding thread %s to self._browsing_threads', th)
with self._browsing_threads_lock:
self._browsing_threads.add(th)

th.start()