Skip to content

Commit

Permalink
Clean up tests from un needed log messages
Browse files Browse the repository at this point in the history
Trying to reduce the number of log messages we're getting
it tests during the normal crawly operations. Doing capture_log
in the places where it's applicapable
  • Loading branch information
oltarasenko committed Aug 29, 2024
1 parent 0f7a583 commit 50998dc
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 40 deletions.
6 changes: 6 additions & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import Config

config :logger,
:console,
backends: [:console, {LoggerFileBackend, :info_log}],
metadata: [:spider_name, :crawl_id],
level: :warning

config :crawly,
start_http_api?: true,
manager_operations_timeout: 500,
Expand Down
39 changes: 25 additions & 14 deletions test/data_storage_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,35 @@ defmodule DataStorageTest do
end

test "Items without all required fields are dropped", context do
Crawly.DataStorage.store(context.crawler, %{
author: "me",
time: "Now",
url: "http://example.com"
})

{:stored_items, 0} = Crawly.DataStorage.stats(context.crawler)
log =
ExUnit.CaptureLog.capture_log(fn ->
Crawly.DataStorage.store(context.crawler, %{
author: "me",
time: "Now",
url: "http://example.com"
})

{:stored_items, 0} = Crawly.DataStorage.stats(context.crawler)

end)
log =~ "Dropping item:"
log =~ "Reason: missing required fields"
end

test "Items without all required fields are dropped nils", context do
Crawly.DataStorage.store(context.crawler, %{
title: "title",
author: nil,
time: "Now",
url: "http://example.com"
})

{:stored_items, 0} = Crawly.DataStorage.stats(context.crawler)
log = ExUnit.CaptureLog.capture_log(
fn ->
Crawly.DataStorage.store(context.crawler, %{
title: "title",
author: nil,
time: "Now",
url: "http://example.com"
})
assert {:stored_items, 0} == Crawly.DataStorage.stats(context.crawler)
end)
log =~ "Dropping item:"
log =~ "Reason: missing required fields"
end

test "Starting child worker twice", context do
Expand Down
17 changes: 12 additions & 5 deletions test/fetchers/crawly_render_server_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defmodule Crawly.Fetchers.CrawlyRenderServerTest do
use ExUnit.Case
import Crawly.Fetchers.CrawlyRenderServer

alias Crawly.Fetchers.CrawlyRenderServer

test "throws an error when base_url is not set" do
request = %{
Expand All @@ -10,9 +11,15 @@ defmodule Crawly.Fetchers.CrawlyRenderServerTest do

client_options = []

assert_raise RuntimeError, fn ->
fetch(request, client_options)
end
log =
ExUnit.CaptureLog.capture_log(fn ->
assert_raise RuntimeError, fn ->
CrawlyRenderServer.fetch(request, client_options)
end
end)

assert log =~
"The base_url is not set. CrawlyRenderServer can't be used! Please set :base_url"
end

test "composes correct request to render server" do
Expand All @@ -33,6 +40,6 @@ defmodule Crawly.Fetchers.CrawlyRenderServerTest do
assert %{:"User-Agent" => "Custom User Agent"} == body.headers
end)

fetch(request, client_options)
CrawlyRenderServer.fetch(request, client_options)
end
end
2 changes: 1 addition & 1 deletion test/mix/tasks/gen_spider_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule GenSpiderTest do
use ExUnit.Case, async: true
use ExUnit.Case, async: false
import ExUnit.CaptureIO

test "when path is incorrect it sends an error message to console" do
Expand Down
20 changes: 15 additions & 5 deletions test/pipelines/csv_encoder_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule Pipelines.CSVEncoderTest do
use ExUnit.Case, async: false
use ExUnit.Case

@item %{first: "some", second: "data"}
@state %{spider_name: Test, crawl_id: "test"}
Expand All @@ -16,16 +16,26 @@ defmodule Pipelines.CSVEncoderTest do
test "Drops an item if fields are empty" do
pipelines = [{Crawly.Pipelines.CSVEncoder, fields: []}]

{item, _state} = Crawly.Utils.pipe(pipelines, @item, @state)
log =
ExUnit.CaptureLog.capture_log(fn ->
{item, _state} = Crawly.Utils.pipe(pipelines, @item, @state)
assert item == false
end)

assert item == false
assert log =~
"Dropping item: %{first: \"some\", second: \"data\"}. Reason: No fields declared for CSVEncoder"
end

test "Drops an item if fields are not declared" do
pipelines = [{Crawly.Pipelines.CSVEncoder}]

{item, _state} = Crawly.Utils.pipe(pipelines, @item, @state)
log =
ExUnit.CaptureLog.capture_log(fn ->
{item, _state} = Crawly.Utils.pipe(pipelines, @item, @state)
assert item == false
end)

assert item == false
assert log =~
"Dropping item: %{first: \"some\", second: \"data\"}. Reason: No fields declared for CSVEncoder"
end
end
27 changes: 16 additions & 11 deletions test/pipelines/duplicates_filter_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,21 @@ defmodule Pipelines.DuplicatesFilterTest do
item = @valid
state = %{spider_name: Test, crawl_id: "test"}

{item, state} = Crawly.Utils.pipe(pipelines, item, state)

# filter state is not updated
assert Map.has_key?(state, :duplicates_filter) == false

# run with same item and updated state should not drop the item
assert {%{} = item, state} = Crawly.Utils.pipe(pipelines, item, state)
assert Map.has_key?(state, :duplicates_filter) == false

# unchanged
assert item == @valid
log =
ExUnit.CaptureLog.capture_log(fn ->
{item, state} = Crawly.Utils.pipe(pipelines, item, state)
# filter state is not updated
assert Map.has_key?(state, :duplicates_filter) == false

# run with same item and updated state should not drop the item
assert {%{} = item, state} = Crawly.Utils.pipe(pipelines, item, state)
assert Map.has_key?(state, :duplicates_filter) == false

# unchanged
assert item == @valid
end)

assert log =~
"Duplicates filter pipeline is inactive, item_id option is required"
end
end
22 changes: 19 additions & 3 deletions test/pipelines/validate_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,38 @@ defmodule Pipelines.ValidateTest do
item = @invalid_missing
state = %{spider_name: Test, crawl_id: "test"}

{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
log =
ExUnit.CaptureLog.capture_log(fn ->
{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
end)

log =~ "missing required fields"
end

test "Drops items when missing required fields with tuple config" do
pipelines = [{Crawly.Pipelines.Validate, fields: [:title, :author]}]
item = @invalid_missing
state = %{spider_name: Test, crawl_id: "test"}

{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
log =
ExUnit.CaptureLog.capture_log(fn ->
{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
end)

log =~ "missing required fields"
end

test "Drops items when required fields are equal to nil" do
pipelines = [{Crawly.Pipelines.Validate, fields: [:title, :author]}]
item = @invalid_nil
state = %{spider_name: Test, crawl_id: "test"}

{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
log =
ExUnit.CaptureLog.capture_log(fn ->
{result, _state} = Crawly.Utils.pipe(pipelines, item, state)
assert result == false
end)

assert log =~ "missing required fields"
end
end
4 changes: 3 additions & 1 deletion test/utils_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ defmodule UtilsTest do
assert "Enables search using CSS selectors" == Map.get(item, "body")
end

@compile {:no_warn_undefined, BooksSpiderForTest}
test "Can load a spider from a YML format" do
spider_yml = """
name: BooksSpiderForTest
Expand All @@ -240,7 +241,8 @@ defmodule UtilsTest do

Crawly.Models.YMLSpider.load(spider_yml)

assert "https://books.toscrape.com/" == BooksSpiderForTest.base_url()
assert "https://books.toscrape.com/" == Elixir.BooksSpiderForTest.base_url()


assert [
start_urls: [
Expand Down
3 changes: 3 additions & 0 deletions test/worker_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ defmodule WorkerTest do

assert response != false
assert response.retries == 1
Process.sleep(1000)
assert ExUnit.CaptureLog.capture_log(fn -> nil end)

assert response.retries == 1
assert Process.alive?(context.crawler)
end

Expand Down

0 comments on commit 50998dc

Please sign in to comment.