From 7c55b38981d5ed97147c74166730a2d7a8f04f69 Mon Sep 17 00:00:00 2001 From: Oleg Date: Thu, 29 Aug 2024 10:06:31 +0200 Subject: [PATCH] Fix very basic credo errors --- .credo.exs | 18 +++++++ config/config.exs | 4 +- lib/crawly/api.ex | 4 +- lib/crawly/loggers/send_to_ui_backend.ex | 4 +- lib/crawly/models/yml_spider.ex | 3 +- .../pipelines/experimental/send_to_ui.ex | 3 +- .../requests_storage/requests_storage.ex | 2 +- lib/crawly/simple_storage.ex | 3 +- lib/crawly/utils.ex | 50 ++++++++++--------- lib/mix/tasks/crawly.gen.spider.ex | 10 ++-- mix.exs | 3 +- mix.lock | 10 ++-- test/engine_test.exs | 9 +--- test/middlewares/request_options_test.exs | 4 +- test/test_utils.ex | 4 ++ 15 files changed, 74 insertions(+), 57 deletions(-) create mode 100644 .credo.exs diff --git a/.credo.exs b/.credo.exs new file mode 100644 index 00000000..ccc83058 --- /dev/null +++ b/.credo.exs @@ -0,0 +1,18 @@ +%{ + configs: [ + %{ + name: "default", + plugins: [], + requires: [], + strict: false, + parse_timeout: 5000, + color: true, + checks: %{ + disabled: [ + # this means that `TabsOrSpaces` will not run + {Credo.Check.Design.TagTODO, []} + ] + } + } + ] +} diff --git a/config/config.exs b/config/config.exs index 87bba751..4b2ff617 100644 --- a/config/config.exs +++ b/config/config.exs @@ -3,7 +3,9 @@ import Config config :logger, - backends: [:console, {LoggerFileBackend, :info_log}] + :console, + backends: [:console, {LoggerFileBackend, :info_log}], + metadata: [:spider_name, :crawl_id] config :crawly, start_http_api?: true, diff --git a/lib/crawly/api.ex b/lib/crawly/api.ex index 234bc6e1..2f2295d2 100644 --- a/lib/crawly/api.ex +++ b/lib/crawly/api.ex @@ -172,7 +172,7 @@ defmodule Crawly.API.Router do %{"name" => spider_name} = yml -> # Check if spider already registered, but allow editing spiders - case {is_spider_registered(spider_name), + case {spider_registered?(spider_name), spider_name == name_from_query_params} do {true, false} -> {:error, @@ -400,7 +400,7 @@ defmodule Crawly.API.Router do end end - defp is_spider_registered(name) do + defp spider_registered?(name) do module_name_str = "Elixir." <> name module_name = String.to_atom(module_name_str) Enum.member?(Crawly.Utils.list_spiders(), module_name) diff --git a/lib/crawly/loggers/send_to_ui_backend.ex b/lib/crawly/loggers/send_to_ui_backend.ex index 9721a784..9c5a2f8d 100644 --- a/lib/crawly/loggers/send_to_ui_backend.ex +++ b/lib/crawly/loggers/send_to_ui_backend.ex @@ -1,6 +1,6 @@ defmodule Crawly.Loggers.SendToUiBackend do - # TODO: Write doc - # Initialize the configuration + @moduledoc false + def init({__MODULE__, name}) do {:ok, configure(name, [])} end diff --git a/lib/crawly/models/yml_spider.ex b/lib/crawly/models/yml_spider.ex index b4c935c2..4efe75ea 100644 --- a/lib/crawly/models/yml_spider.ex +++ b/lib/crawly/models/yml_spider.ex @@ -1,6 +1,5 @@ defmodule Crawly.Models.YMLSpider do - @moduledoc """ - """ + @moduledoc false @table_name __MODULE__ diff --git a/lib/crawly/pipelines/experimental/send_to_ui.ex b/lib/crawly/pipelines/experimental/send_to_ui.ex index 02bd8afc..916a7d1e 100644 --- a/lib/crawly/pipelines/experimental/send_to_ui.ex +++ b/lib/crawly/pipelines/experimental/send_to_ui.ex @@ -1,6 +1,5 @@ defmodule Crawly.Pipelines.Experimental.SendToUI do - @moduledoc """ - """ + @moduledoc false @behaviour Crawly.Pipeline require Logger diff --git a/lib/crawly/requests_storage/requests_storage.ex b/lib/crawly/requests_storage/requests_storage.ex index 9026929f..f200ce1a 100644 --- a/lib/crawly/requests_storage/requests_storage.ex +++ b/lib/crawly/requests_storage/requests_storage.ex @@ -110,7 +110,7 @@ defmodule Crawly.RequestsStorage do {:reply, msg, state} end - def handle_call({:pop, spider_name}, _from, state = %{workers: workers}) do + def handle_call({:pop, spider_name}, _from, %{workers: workers} = state) do resp = case Map.get(workers, spider_name) do nil -> diff --git a/lib/crawly/simple_storage.ex b/lib/crawly/simple_storage.ex index fbb0c729..02c3d966 100644 --- a/lib/crawly/simple_storage.ex +++ b/lib/crawly/simple_storage.ex @@ -1,7 +1,6 @@ defmodule Crawly.SimpleStorage do - @moduledoc """ + @moduledoc false - """ @dets_table :dets_simple_storage require Logger diff --git a/lib/crawly/utils.ex b/lib/crawly/utils.ex index e6832f4a..0d43d7a0 100644 --- a/lib/crawly/utils.ex +++ b/lib/crawly/utils.ex @@ -329,29 +329,7 @@ defmodule Crawly.Utils do # Work only with 5 first URLs, so we don't timeout Enum.take(start_urls, 5), fn url -> - case HTTPoison.get(url) do - {:error, reason} -> - %{ - url: url, - error: "#{inspect(reason)}" - } - - {:ok, response} -> - {:ok, document} = Floki.parse_document(response.body) - - extracted_urls = - document - |> Crawly.Utils.extract_requests(links, base_url) - |> Enum.map(fn req -> req.url end) - # restrict number of shown urls, so output is not too big - |> Enum.take(10) - - %{ - url: url, - items: Crawly.Utils.extract_items(document, fields), - requests: extracted_urls - } - end + fetch(url, base_url, fields, links) end ) @@ -360,6 +338,32 @@ defmodule Crawly.Utils do end end + defp fetch(url, base_url, fields, links) do + case HTTPoison.get(url) do + {:error, reason} -> + %{ + url: url, + error: "#{inspect(reason)}" + } + + {:ok, response} -> + {:ok, document} = Floki.parse_document(response.body) + + extracted_urls = + document + |> Crawly.Utils.extract_requests(links, base_url) + |> Enum.map(fn req -> req.url end) + # restrict number of shown urls, so output is not too big + |> Enum.take(10) + + %{ + url: url, + items: Crawly.Utils.extract_items(document, fields), + requests: extracted_urls + } + end + end + @doc """ Composes the log file path for a given spider and crawl ID. diff --git a/lib/mix/tasks/crawly.gen.spider.ex b/lib/mix/tasks/crawly.gen.spider.ex index 9270a500..27b979ac 100644 --- a/lib/mix/tasks/crawly.gen.spider.ex +++ b/lib/mix/tasks/crawly.gen.spider.ex @@ -28,12 +28,10 @@ defmodule Mix.Tasks.Crawly.Gen.Spider do end defp response({opts, _word}) do - cond do - opts[:help] != nil -> - help() - - true -> - Map.new(opts) |> generate_spider() + if opts[:help] != nil do + help() + else + Map.new(opts) |> generate_spider() end end diff --git a/mix.exs b/mix.exs index d362ed69..8ddcf862 100644 --- a/mix.exs +++ b/mix.exs @@ -50,14 +50,13 @@ defmodule Crawly.Mixfile do {:poison, "~> 3.1"}, {:gollum, "~> 0.5.0", hex: :new_gollum}, {:plug_cowboy, "~> 2.0"}, - {:credo, "~> 1.5.0", only: [:dev, :test], runtime: false}, + {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, {:earmark, "~> 1.2", only: :dev}, {:meck, "~> 0.9", only: :test}, {:excoveralls, "~> 0.14.6", only: :test}, {:yaml_elixir, "~> 2.9"}, {:ex_json_schema, "~> 0.9.2"}, - # Add floki only for crawly standalone release {:floki, "~> 0.33.0", only: [:dev, :test, :standalone_crawly]}, {:logger_file_backend, "~> 0.0.11", diff --git a/mix.lock b/mix.lock index 3a7b0e96..7ce2e1b1 100644 --- a/mix.lock +++ b/mix.lock @@ -1,26 +1,26 @@ %{ - "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, + "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.1", "240b9edb4e9e94f8f56ab39d8d2d0a57f49e46c56aced8f873892df8ff64ff5a", [:mix], [], "hexpm", "b4951de93c224d44fac71614beabd88b71932d0b1dea80d2f80fb9044e01bbb3"}, "certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"}, "cowboy": {:hex, :cowboy, "2.9.0", "865dd8b6607e14cf03282e10e934023a1bd8be6f6bacf921a7e2a96d800cd452", [:make, :rebar3], [{:cowlib, "2.11.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "2c729f934b4e1aa149aff882f57c6372c15399a20d54f65c8d67bef583021bde"}, "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.3.1", "ebd1a1d7aff97f27c66654e78ece187abdc646992714164380d8a041eda16754", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3a6efd3366130eab84ca372cbd4a7d3c3a97bdfcfb4911233b035d117063f0af"}, "cowlib": {:hex, :cowlib, "2.11.0", "0b9ff9c346629256c42ebe1eeb769a83c6cb771a6ee5960bd110ab0b9b872063", [:make, :rebar3], [], "hexpm", "2b3e9da0b21c4565751a6d4901c20d1b4cc25cbb7fd50d91d2ab6dd287bc86a9"}, - "credo": {:hex, :credo, "1.5.6", "e04cc0fdc236fefbb578e0c04bd01a471081616e741d386909e527ac146016c6", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "4b52a3e558bd64e30de62a648518a5ea2b6e3e5d2b164ef5296244753fc7eb17"}, - "decimal": {:hex, :decimal, "2.0.0", "a78296e617b0f5dd4c6caf57c714431347912ffb1d0842e998e9792b5642d697", [:mix], [], "hexpm", "34666e9c55dea81013e77d9d87370fe6cb6291d1ef32f46a1600230b1d44f577"}, + "credo": {:hex, :credo, "1.7.7", "771445037228f763f9b2afd612b6aa2fd8e28432a95dbbc60d8e03ce71ba4446", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8bc87496c9aaacdc3f90f01b7b0582467b69b4bd2441fe8aae3109d843cc2f2e"}, + "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, "earmark": {:hex, :earmark, "1.4.15", "2c7f924bf495ec1f65bd144b355d0949a05a254d0ec561740308a54946a67888", [:mix], [{:earmark_parser, ">= 1.4.13", [hex: :earmark_parser, repo: "hexpm", optional: false]}], "hexpm", "3b1209b85bc9f3586f370f7c363f6533788fb4e51db23aa79565875e7f9999ee"}, "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, "elixir_uuid": {:hex, :elixir_uuid, "1.2.1", "dce506597acb7e6b0daeaff52ff6a9043f5919a4c3315abb4143f0b00378c097", [:mix], [], "hexpm", "f7eba2ea6c3555cea09706492716b0d87397b88946e6380898c2889d68585752"}, "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"}, "ex_json_schema": {:hex, :ex_json_schema, "0.9.2", "c9a42e04e70cd70eb11a8903a22e8ec344df16edef4cb8e6ec84ed0caffc9f0f", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "4854329cb352b6c01c4c4b8dbfb3be14dc5bea19ea13e0eafade4ff22ba55224"}, "excoveralls": {:hex, :excoveralls, "0.14.6", "610e921e25b180a8538229ef547957f7e04bd3d3e9a55c7c5b7d24354abbba70", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "0eceddaa9785cfcefbf3cd37812705f9d8ad34a758e513bb975b081dce4eb11e"}, - "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, + "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"}, "floki": {:hex, :floki, "0.33.1", "f20f1eb471e726342b45ccb68edb9486729e7df94da403936ea94a794f072781", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "461035fd125f13fdf30f243c85a0b1e50afbec876cbf1ceefe6fddd2e6d712c6"}, "gollum": {:hex, :new_gollum, "0.5.0", "871dd0ee15c65b38932da5b6eac1413c2be96545d6cf5d6419081ce85a9a883a", [:mix], [{:httpoison, "~> 2.2", [hex: :httpoison, repo: "hexpm", optional: false]}], "hexpm", "81722a31ef162270432fbfc3dbf1f57d08530a9e572a57bc528748942d020f84"}, "hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"}, "html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"}, "httpoison": {:hex, :httpoison, "2.2.1", "87b7ed6d95db0389f7df02779644171d7319d319178f6680438167d7b69b1f3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "51364e6d2f429d80e14fe4b5f8e39719cacd03eb3f9a9286e61e216feac2d2df"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, - "jason": {:hex, :jason, "1.4.0", "e855647bc964a44e2f67df589ccf49105ae039d4179db7f6271dfd3843dc27e6", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "79a3791085b2a0f743ca04cec0f7be26443738779d09302e01318f97bdb82121"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "logger_file_backend": {:hex, :logger_file_backend, "0.0.12", "5afaa76a0cb6123cd19900c0f414044cfc46c24c6a1b80842a9b0e7f6c755e57", [:mix], [], "hexpm", "7335cc4e186a3804f9d3651f2fb42243a11748f1e384421bdd17623ed53fed79"}, "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, diff --git a/test/engine_test.exs b/test/engine_test.exs index 4babb16e..d898baee 100644 --- a/test/engine_test.exs +++ b/test/engine_test.exs @@ -19,10 +19,7 @@ defmodule EngineTest do spiders = Crawly.Engine.list_known_spiders() assert [_ | _] = spiders - assert status = - Enum.find(spiders, fn s -> s.name == TestSpider end) - |> IO.inspect(label: __ENV__.line) - + status = Enum.find(spiders, fn s -> s.name == TestSpider end) assert status.status == :stopped # test a started spider @@ -44,9 +41,7 @@ defmodule EngineTest do test "get_spider_info/1 return the spider currently status in the engine" do Crawly.Engine.refresh_spider_list() - spider_info = - Crawly.Engine.get_spider_info(TestSpider) - |> IO.inspect(label: __ENV__.line) + spider_info = Crawly.Engine.get_spider_info(TestSpider) assert :stopped == spider_info.status diff --git a/test/middlewares/request_options_test.exs b/test/middlewares/request_options_test.exs index 2af13fb6..b163affd 100644 --- a/test/middlewares/request_options_test.exs +++ b/test/middlewares/request_options_test.exs @@ -7,12 +7,12 @@ defmodule Middlewares.RequestOptionsTest do middlewares = [ { Crawly.Middlewares.RequestOptions, - [timeout: 30_000, recv_timeout: 15000] + [timeout: 30_000, recv_timeout: 15_000] } ] {new_request, _state} = Crawly.Utils.pipe(middlewares, req, %{}) - assert [timeout: 30000, recv_timeout: 15000] == new_request.options + assert [timeout: 30_000, recv_timeout: 15_000] == new_request.options end end diff --git a/test/test_utils.ex b/test/test_utils.ex index e4dff48e..b5703e42 100644 --- a/test/test_utils.ex +++ b/test/test_utils.ex @@ -1,4 +1,5 @@ defmodule TestUtils do + @moduledoc false def stop_process(pid) do :erlang.exit(pid, :shutdown) wait_pid(pid) @@ -20,6 +21,7 @@ defmodule TestUtils do end defmodule TestSpider do + @moduledoc false use Crawly.Spider def base_url() do @@ -47,6 +49,7 @@ defmodule TestSpider do end defmodule UtilsTestSpider do + @moduledoc false use Crawly.Spider @impl true @@ -73,6 +76,7 @@ defmodule UtilsTestSpider do end defmodule PipelineTestSpider do + @moduledoc false use Crawly.Spider @impl true