From 9c8791fe442f7b0719210791219acaafd94fb003 Mon Sep 17 00:00:00 2001 From: Asger Askov Blekinge Date: Mon, 8 Apr 2019 10:37:40 +0200 Subject: [PATCH] Include URL in BrowserThread name, so it will be part of every log line from Browser thread. Thus, do not log it explicitly --- umbra/controller.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/umbra/controller.py b/umbra/controller.py index 68f362e..a813d57 100755 --- a/umbra/controller.py +++ b/umbra/controller.py @@ -144,6 +144,7 @@ def callback(body, message): if self._consumer_stop.is_set() or time.time() - start >= timeout or self._reconnect_requested: browser.stop() self._browser_pool.release(browser) + self.logger.debug("Released browser on port %s", browser.chrome.port) break except brozzler.browser.NoBrowsersAvailable: @@ -153,6 +154,7 @@ def callback(body, message): self.logger.critical("problem with browser initialization", exc_info=True) time.sleep(0.5) finally: + self.logger.info("Consumer is done") consumer.callbacks = None def _wait_for_active_browsers(self): @@ -219,7 +221,7 @@ def on_response(chrome_msg): req = chrome_msg['params']['response']['requestHeadersText'] payload['method'] = req[:req.index(' ')] else: - self.logger.warn('unable to identify http method (assuming GET) chrome_msg=%s', + self.logger.warning('unable to identify http method (assuming GET) chrome_msg=%s', chrome_msg) payload['method'] = 'GET' @@ -280,8 +282,8 @@ def prune_outlinks(dirty_links, block_list=None): def browse_page_sync(): self.logger.info( - 'browser=%s client_id=%s url=%s behavior_parameters=%s', - browser, client_id, url, behavior_parameters) + 'browser=%s client_id=%s behavior_parameters=%s', + browser, client_id, behavior_parameters) try: browser.start() final_page_url, outlinks = browser.browse_page( @@ -297,13 +299,13 @@ def browse_page_sync(): self.logger.info("page interstitial shown, likely unsupported http auth, for url {} - {}".format(url, e)) message.reject() except brozzler.ShutdownRequested as e: - self.logger.info("browsing did not complete normally, requeuing url {} - {}".format(url, e)) + self.logger.info("browsing did not complete normally, requeuing - {}".format(e)) message.requeue() # republish? except BrowsingException as e: - self.logger.warn("browsing did not complete normally, republishing url {} - {}".format(url, e)) + self.logger.warning("browsing did not complete normally, republishing - {}".format(e)) republish_amqp(self, message) except: - self.logger.critical("problem browsing page, republishing url {}, may have lost browser process".format(url), exc_info=True) + self.logger.critical("problem browsing page, republishing, may have lost browser process {}".format(browser), exc_info=True) republish_amqp(self, message) finally: browser.stop() @@ -344,10 +346,13 @@ def browse_thread_run_then_cleanup(): with self._browsing_threads_lock: self._browsing_threads.remove(threading.current_thread()) - thread_name = "BrowsingThread:%s" % browser.chrome.port + + thread_name = "BrowsingThread:%s-%s" % (browser.chrome.port,url) th = threading.Thread(target=browse_thread_run_then_cleanup, name=thread_name) + self.logger.info('adding thread %s to self._browsing_threads', th) with self._browsing_threads_lock: self._browsing_threads.add(th) + th.start()