diff --git a/lib/gscraper/google/http_client.ex b/lib/gscraper/google/http_client.ex new file mode 100644 index 0000000..cfee479 --- /dev/null +++ b/lib/gscraper/google/http_client.ex @@ -0,0 +1,27 @@ +defmodule Gscraper.Google.HttpClient do + @google_search_base_url "https://www.google.com" + + def get(path, query_params \\ %{}, headers \\ []) do + client() + |> Tesla.get(path, query: Map.to_list(query_params), header: headers) + |> handle_response() + end + + defp client do + middleware = [ + {Tesla.Middleware.BaseUrl, @google_search_base_url} + ] + + adapter = {Tesla.Adapter.Hackney, []} + + Tesla.client(middleware, adapter) + end + + defp handle_response({:ok, %{status: status, body: body}}) when status in 200..299, + do: {:ok, body} + + defp handle_response({:ok, %{status: status, body: body}}), + do: {:error, %{status: status, body: body}} + + defp handle_response({:error, reason}), do: {:error, reason} +end diff --git a/lib/gscraper/search/schemas/keyword.ex b/lib/gscraper/search/schemas/keyword.ex index 426d07d..7102fe2 100644 --- a/lib/gscraper/search/schemas/keyword.ex +++ b/lib/gscraper/search/schemas/keyword.ex @@ -2,13 +2,14 @@ defmodule Gscraper.Search.Schemas.Keyword do use Ecto.Schema import Ecto.Changeset - alias Gscraper.Account.Schemas.User + alias Gscraper.Account.Schemas.{Report, User} schema "keywords" do field :keyword, :string field :status, Ecto.Enum, values: [:pending, :failed, :completed] belongs_to :user, User + has_one :report, Report timestamps() end diff --git a/lib/gscraper/search/schemas/report.ex b/lib/gscraper/search/schemas/report.ex new file mode 100644 index 0000000..29bf181 --- /dev/null +++ b/lib/gscraper/search/schemas/report.ex @@ -0,0 +1,30 @@ +defmodule Gscraper.Search.Schemas.Report do + use Ecto.Schema + import Ecto.Changeset + + alias Gscraper.Search.Schemas.Keyword + + # credo:disable-for-next-line Credo.Check.Readability.MaxLineLength + @fields ~w(ads_count top_ads_count top_ads_urls organic_result_count organic_urls links_count raw_html) + + schema "keywords" do + field :ads_count, :integer + field :top_ads_count, :integer + field :top_ads_urls, {:array, :string} + field :organic_result_count, :integer + field :organic_urls, {:array, :string} + field :links_count, :integer + field :raw_html, :string + + belongs_to :keyword, Keyword + + timestamps() + end + + def create_changeset(report \\ %__MODULE__{}, attrs) do + report + |> cast(attrs, @fields) + |> validate_required(@fields) + |> assoc_constraint(:keyword) + end +end diff --git a/lib/gscraper/search/scraper.ex b/lib/gscraper/search/scraper.ex new file mode 100644 index 0000000..d55945e --- /dev/null +++ b/lib/gscraper/search/scraper.ex @@ -0,0 +1,36 @@ +defmodule Gscraper.Search.Scraper do + alias Gscraper.Google.HttpClient, as: GoogleClient + alias Gscraper.Search.SearchResultParser + + # credo:disable-for-lines:7 Credo.Check.Readability.MaxLineLength + @user_agents [ + 'Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/98.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15' + ] + + def scrape(keyword) do + with {:ok, raw_html} <- search_keyword(keyword), + {:ok, parsed_result} <- SearchResultParser.parse(raw_html) do + {:ok, parsed_result} + else + {:error, :failed_to_parse, reason} -> + {:error, :failed_to_parse, "Failed to parse search result: #{inspect(reason)}"} + + {:error, reason} -> + {:error, :http_client_error, "Search page cannot be fetched: #{inspect(reason)}"} + end + end + + defp search_keyword(keyword) do + query_params = %{q: "#{keyword}"} + headers = ["User-Agent": random_user_agent()] + + GoogleClient.get("search", query_params, headers) + end + + defp random_user_agent, do: @user_agents |> Enum.shuffle() |> Enum.take(1) +end diff --git a/lib/gscraper/search/search_result_parser.ex b/lib/gscraper/search/search_result_parser.ex new file mode 100644 index 0000000..bd8edc5 --- /dev/null +++ b/lib/gscraper/search/search_result_parser.ex @@ -0,0 +1,67 @@ +defmodule Gscraper.Search.SearchResultParser do + @top_ads_selector "#tads .uEierd" + @top_ads_link_selector "#tads a" + @sidebar_ads_selector ".Yi78Pd .pla-unit" + @organic_link_selector "#search .g" + @organic_result_selector "#search .g a" + @href_attribute "a[href]" + + def parse(raw_html) do + case Floki.parse_document(raw_html) do + {:ok, parsed_html} -> + results = %{ + ads_count: get_ads_count(parsed_html), + top_ads_count: get_top_ads_count(parsed_html), + top_ads_urls: get_top_ads_urls(parsed_html), + organic_result_count: get_organic_result_count(parsed_html), + organic_urls: get_organic_urls(parsed_html), + link_count: get_link_count(parsed_html), + raw_html: raw_html + } + + {:ok, results} + + {:error, reason} -> + {:error, :failed_to_parse, reason} + end + end + + defp get_ads_count(parsed_html), + do: get_sidebar_ads_count(parsed_html) + get_top_ads_count(parsed_html) + + defp get_sidebar_ads_count(parsed_html) do + parsed_html + |> Floki.find(@sidebar_ads_selector) + |> Enum.count() + end + + defp get_top_ads_count(parsed_html) do + parsed_html + |> Floki.find(@top_ads_selector) + |> Enum.count() + end + + defp get_top_ads_urls(parsed_html) do + parsed_html + |> Floki.find(@top_ads_link_selector) + |> Floki.attribute(@href_attribute) + end + + defp get_organic_result_count(parsed_html) do + parsed_html + |> Floki.find(@organic_link_selector) + |> Enum.count() + end + + defp get_organic_urls(parsed_html) do + parsed_html + |> Floki.find(@organic_result_selector) + |> Floki.attribute(@href_attribute) + end + + defp get_link_count(parsed_html) do + parsed_html + |> Floki.find(@organic_link_selector) + |> Enum.count() + end +end diff --git a/lib/gscraper/search/searches.ex b/lib/gscraper/search/searches.ex index 9a05445..1c8ef44 100644 --- a/lib/gscraper/search/searches.ex +++ b/lib/gscraper/search/searches.ex @@ -2,7 +2,10 @@ defmodule Gscraper.Search.Searches do alias Gscraper.Account.Schemas.User alias Gscraper.Repo alias Gscraper.Search.Queries.KeywordQuery - alias Gscraper.Search.Schemas.Keyword + alias Gscraper.Search.Schemas.{Keyword, Report} + alias Gscraper.Search.ScraperWorker + + def find_keyword_by_id(id), do: Repo.get(Keyword, id) def list_keywords_by_user(user) do user @@ -18,15 +21,27 @@ defmodule Gscraper.Search.Searches do def process_keyword_list(keyword_list, %User{id: user_id}) do Enum.each(keyword_list, fn keyword -> - create_params = %{ - user_id: user_id, - keyword: keyword - } + create_params = %{user_id: user_id, keyword: keyword} Ecto.Multi.new() - |> Ecto.Multi.run(:keyword, fn _, _ -> create_keyword(create_params) end) - # Todo: enqueue a new search job for the given keyword + |> Ecto.Multi.run(:create_keyword, fn _, _ -> create_keyword(create_params) end) + |> Ecto.Multi.run(:enqueue_search_job, fn _, %{keyword: keyword} -> + enqueue_search_job(keyword) + end) |> Repo.transaction() end) end + + def create_report(%Keyword{} = keyword, attrs \\ %{}) do + keyword + |> Ecto.build_assoc(:report) + |> Report.create_changeset(attrs) + |> Repo.insert() + end + + defp enqueue_search_job(%Keyword{id: keyword_id}) do + %{keyword_id: keyword_id} + |> ScraperWorker.new() + |> Oban.insert() + end end diff --git a/lib/gscraper/search/worker/scraper_worker.ex b/lib/gscraper/search/worker/scraper_worker.ex new file mode 100644 index 0000000..fa27c8d --- /dev/null +++ b/lib/gscraper/search/worker/scraper_worker.ex @@ -0,0 +1,24 @@ +defmodule Gscraper.Search.ScraperWorker do + use Oban.Worker, max_attempts: 10, unique: [period: 30] + + alias Gscraper.Search.Schemas.Keyword + alias Gscraper.Search.{Scraper, Searches} + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"keyword_id" => keyword_id}}) do + with {%Keyword{} = keyword} <- Searches.find_keyword_by_id(keyword_id), + {:ok, parsed_result} <- Scraper.scrape(keyword), + {:ok, _} <- Searches.create_report(keyword, parsed_result) do + :ok + else + {:error, :not_found} -> + {:error, "Keyword not found for ID: #{keyword_id}"} + + {:error, :failed_to_parse, reason} -> + {:error, reason} + + {:error, :http_client_error, reason} -> + {:error, reason} + end + end +end diff --git a/mix.exs b/mix.exs index 0b8c0d3..9d9264d 100644 --- a/mix.exs +++ b/mix.exs @@ -41,13 +41,14 @@ defmodule Gscraper.MixProject do defp deps do [ {:argon2_elixir, "~> 2.0"}, - {:wallaby, "~> 0.28.0", [only: :test, runtime: false]}, + {:wallaby, "~> 0.29.1", [only: :test, runtime: false]}, {:sobelow, "~> 0.11.1", [only: [:dev, :test], runtime: false]}, {:oban, "~> 2.6.1"}, {:mimic, "~> 1.4.0", [only: :test]}, {:ex_machina, "~> 2.7.0", [only: :test]}, {:excoveralls, "~> 0.14.0", [only: :test]}, {:faker, "~> 0.16", only: :test}, + {:floki, "~> 0.33.0"}, {:dialyxir, "~> 1.1.0", [only: [:dev], runtime: false]}, {:credo, "~> 1.5.6", [only: [:dev, :test], runtime: false]}, {:phoenix, "~> 1.5.12"}, @@ -63,7 +64,9 @@ defmodule Gscraper.MixProject do {:jason, "~> 1.0"}, {:plug_cowboy, "~> 2.5"}, {:guardian, "~> 2.0"}, - {:nimble_csv, "~> 1.1"} + {:nimble_csv, "~> 1.1"}, + {:tesla, "~> 1.4"}, + {:hackney, "~> 1.17"} ] end diff --git a/mix.lock b/mix.lock index ed96f63..c0cbbc0 100644 --- a/mix.lock +++ b/mix.lock @@ -1,33 +1,35 @@ %{ "argon2_elixir": {:hex, :argon2_elixir, "2.4.0", "2a22ea06e979f524c53b42b598fc6ba38cdcbc977a155e33e057732cfb1fb311", [:make, :mix], [{:comeonin, "~> 5.3", [hex: :comeonin, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "4ea82e183cf8e7f66dab1f767fedcfe6a195e140357ef2b0423146b72e0a551d"}, "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, - "certifi": {:hex, :certifi, "2.6.1", "dbab8e5e155a0763eea978c913ca280a6b544bfa115633fa20249c3d396d9493", [:rebar3], [], "hexpm", "524c97b4991b3849dd5c17a631223896272c6b0af446778ba4675a1dff53bb7e"}, + "certifi": {:hex, :certifi, "2.9.0", "6f2a475689dd47f19fb74334859d460a2dc4e3252a3324bd2111b8f0429e7e21", [:rebar3], [], "hexpm", "266da46bdb06d6c6d35fde799bcb28d36d985d424ad7c08b5bb48f5b5cdd4641"}, "comeonin": {:hex, :comeonin, "5.3.2", "5c2f893d05c56ae3f5e24c1b983c2d5dfb88c6d979c9287a76a7feb1e1d8d646", [:mix], [], "hexpm", "d0993402844c49539aeadb3fe46a3c9bd190f1ecf86b6f9ebd71957534c95f04"}, "connection": {:hex, :connection, "1.1.0", "ff2a49c4b75b6fb3e674bfc5536451607270aac754ffd1bdfe175abe4a6d7a68", [:mix], [], "hexpm", "722c1eb0a418fbe91ba7bd59a47e28008a189d47e37e0e7bb85585a016b2869c"}, "cowboy": {:hex, :cowboy, "2.9.0", "865dd8b6607e14cf03282e10e934023a1bd8be6f6bacf921a7e2a96d800cd452", [:make, :rebar3], [{:cowlib, "2.11.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "2c729f934b4e1aa149aff882f57c6372c15399a20d54f65c8d67bef583021bde"}, "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.3.1", "ebd1a1d7aff97f27c66654e78ece187abdc646992714164380d8a041eda16754", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3a6efd3366130eab84ca372cbd4a7d3c3a97bdfcfb4911233b035d117063f0af"}, "cowlib": {:hex, :cowlib, "2.11.0", "0b9ff9c346629256c42ebe1eeb769a83c6cb771a6ee5960bd110ab0b9b872063", [:make, :rebar3], [], "hexpm", "2b3e9da0b21c4565751a6d4901c20d1b4cc25cbb7fd50d91d2ab6dd287bc86a9"}, "credo": {:hex, :credo, "1.5.6", "e04cc0fdc236fefbb578e0c04bd01a471081616e741d386909e527ac146016c6", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "4b52a3e558bd64e30de62a648518a5ea2b6e3e5d2b164ef5296244753fc7eb17"}, - "db_connection": {:hex, :db_connection, "2.4.0", "d04b1b73795dae60cead94189f1b8a51cc9e1f911c234cc23074017c43c031e5", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ad416c21ad9f61b3103d254a71b63696ecadb6a917b36f563921e0de00d7d7c8"}, + "db_connection": {:hex, :db_connection, "2.4.2", "f92e79aff2375299a16bcb069a14ee8615c3414863a6fef93156aee8e86c2ff3", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "4fe53ca91b99f55ea249693a0229356a08f4d1a7931d8ffa79289b145fe83668"}, "decimal": {:hex, :decimal, "2.0.0", "a78296e617b0f5dd4c6caf57c714431347912ffb1d0842e998e9792b5642d697", [:mix], [], "hexpm", "34666e9c55dea81013e77d9d87370fe6cb6291d1ef32f46a1600230b1d44f577"}, "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"}, "ecto": {:hex, :ecto, "3.7.1", "a20598862351b29f80f285b21ec5297da1181c0442687f9b8329f0445d228892", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d36e5b39fc479e654cffd4dbe1865d9716e4a9b6311faff799b6f90ab81b8638"}, - "ecto_sql": {:hex, :ecto_sql, "3.7.0", "2fcaad4ab0c8d76a5afbef078162806adbe709c04160aca58400d5cbbe8eeac6", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.7.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.4.0 or ~> 0.5.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a26135dfa1d99bf87a928c464cfa25bba6535a4fe761eefa56077a4febc60f70"}, + "ecto_sql": {:hex, :ecto_sql, "3.7.2", "55c60aa3a06168912abf145c6df38b0295c34118c3624cf7a6977cd6ce043081", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.7.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.4.0 or ~> 0.5.0 or ~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0 or ~> 0.16.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c218ea62f305dcaef0b915fb56583195e7b91c91dcfb006ba1f669bfacbff2a"}, "elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"}, "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, "ex_machina": {:hex, :ex_machina, "2.7.0", "b792cc3127fd0680fecdb6299235b4727a4944a09ff0fa904cc639272cd92dc7", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "419aa7a39bde11894c87a615c4ecaa52d8f107bbdd81d810465186f783245bf8"}, "excoveralls": {:hex, :excoveralls, "0.14.0", "4b562d2acd87def01a3d1621e40037fdbf99f495ed3a8570dfcf1ab24e15f76d", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "94f17478b0cca020bcd85ce7eafea82d2856f7ed022be777734a2f864d36091a"}, "faker": {:hex, :faker, "0.16.0", "1e2cf3e8d60d44a30741fb98118fcac18b2020379c7e00d18f1a005841b2f647", [:mix], [], "hexpm", "fbcb9bf1299dff3c9dd7e50f41802bbc472ffbb84e7656394c8aa913ec315141"}, "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, + "floki": {:hex, :floki, "0.33.1", "f20f1eb471e726342b45ccb68edb9486729e7df94da403936ea94a794f072781", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "461035fd125f13fdf30f243c85a0b1e50afbec876cbf1ceefe6fddd2e6d712c6"}, "gettext": {:hex, :gettext, "0.18.2", "7df3ea191bb56c0309c00a783334b288d08a879f53a7014341284635850a6e55", [:mix], [], "hexpm", "f9f537b13d4fdd30f3039d33cb80144c3aa1f8d9698e47d7bcbcc8df93b1f5c5"}, "guardian": {:hex, :guardian, "2.1.1", "1f02b349f6ba765647cc834036a8d76fa4bd65605342fe3a031df3c99d0d411a", [:mix], [{:jose, "~> 1.8", [hex: :jose, repo: "hexpm", optional: false]}, {:plug, "~> 1.3.3 or ~> 1.4", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "189b87ba7ce6b40d6ba029138098b96ffc4ae78f229f5b39539b9141af8bf0f8"}, - "hackney": {:hex, :hackney, "1.17.4", "99da4674592504d3fb0cfef0db84c3ba02b4508bae2dff8c0108baa0d6e0977c", [:rebar3], [{:certifi, "~>2.6.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "de16ff4996556c8548d512f4dbe22dd58a587bf3332e7fd362430a7ef3986b16"}, - "httpoison": {:hex, :httpoison, "1.7.0", "abba7d086233c2d8574726227b6c2c4f6e53c4deae7fe5f6de531162ce9929a0", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "975cc87c845a103d3d1ea1ccfd68a2700c211a434d8428b10c323dc95dc5b980"}, + "hackney": {:hex, :hackney, "1.18.1", "f48bf88f521f2a229fc7bae88cf4f85adc9cd9bcf23b5dc8eb6a1788c662c4f6", [:rebar3], [{:certifi, "~>2.9.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "a4ecdaff44297e9b5894ae499e9a070ea1888c84afdd1fd9b7b2bc384950128e"}, + "html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"}, + "httpoison": {:hex, :httpoison, "1.8.0", "6b85dea15820b7804ef607ff78406ab449dd78bed923a49c7160e1886e987a3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "28089eaa98cf90c66265b6b5ad87c59a3729bea2e74e9d08f9b51eb9729b3c3a"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, - "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"}, + "jason": {:hex, :jason, "1.3.0", "fa6b82a934feb176263ad2df0dbd91bf633d4a46ebfdffea0c8ae82953714946", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "53fc1f51255390e0ec7e50f9cb41e751c260d065dcba2bf0d08dc51a4002c2ac"}, "jose": {:hex, :jose, "1.11.1", "59da64010c69aad6cde2f5b9248b896b84472e99bd18f246085b7b9fe435dcdb", [:mix, :rebar3], [], "hexpm", "078f6c9fb3cd2f4cfafc972c814261a7d1e8d2b3685c0a76eb87e158efff1ac5"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, - "mime": {:hex, :mime, "1.6.0", "dabde576a497cef4bbdd60aceee8160e02a6c89250d6c0b29e56c0dfb00db3d2", [:mix], [], "hexpm", "31a1a8613f8321143dde1dafc36006a17d28d02bdfecb9e95a880fa7aabd19a7"}, + "mime": {:hex, :mime, "2.0.2", "0b9e1a4c840eafb68d820b0e2158ef5c49385d17fb36855ac6e7e087d4b1dcc5", [:mix], [], "hexpm", "e6a3f76b4c277739e36c2e21a2c640778ba4c3846189d5ab19f97f126df5f9b7"}, "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, "mimic": {:hex, :mimic, "1.4.0", "2712fa9f26df858414cb95ba2d6f0343b0bd0f36c6a26f529e58570727d5f142", [:mix], [], "hexpm", "2c744d50bae5dac0772d624507b5a72bdd1413a73ae11291cae9264de9d0865f"}, "nimble_csv": {:hex, :nimble_csv, "1.1.0", "b1dba4a86be9e03065c9de829050468e591f569100332db949e7ce71be0afc25", [:mix], [], "hexpm", "e986755bc302832cac429be6deda0fc9d82d3c82b47abefb68b3c17c9d949a3f"}, @@ -35,23 +37,23 @@ "parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"}, "phoenix": {:hex, :phoenix, "1.5.12", "75fddb14c720388eea93d33886166a690416a7ff8633fbd93f364355b6fe1166", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.13 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.0", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.10", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.2", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.1.2 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "8f0ae6734fcc18bbaa646c161e2febc46fb899eae43f82679b92530983324113"}, "phoenix_ecto": {:hex, :phoenix_ecto, "4.4.0", "0672ed4e4808b3fbed494dded89958e22fb882de47a97634c0b13e7b0b5f7720", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "09864e558ed31ee00bd48fcc1d4fc58ae9678c9e81649075431e69dbabb43cc1"}, - "phoenix_html": {:hex, :phoenix_html, "3.0.3", "32812d70841c7e975e01edb591989b2b002b69797db1005b8d0adc1fe717be30", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "e8152ae9e8c60705659761edb8d8c4bb7e29130a9b0803ec1854fe137ec62dde"}, + "phoenix_html": {:hex, :phoenix_html, "3.2.0", "1c1219d4b6cb22ac72f12f73dc5fad6c7563104d083f711c3fcd8551a1f4ae11", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "36ec97ba56d25c0136ef1992c37957e4246b649d620958a1f9fa86165f8bc54f"}, "phoenix_live_dashboard": {:hex, :phoenix_live_dashboard, "0.5.0", "3282d8646e1bfc1ef1218f508d9fcefd48cf47f9081b7667bd9b281b688a49cf", [:mix], [{:ecto, "~> 3.6.2 or ~> 3.7", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_psql_extras, "~> 0.6", [hex: :ecto_psql_extras, repo: "hexpm", optional: true]}, {:phoenix_live_view, "~> 0.16.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6.0", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "609740be43de94ae0abd2c4300ff0356a6e8a9487bf340e69967643a59fa7ec8"}, "phoenix_live_reload": {:hex, :phoenix_live_reload, "1.3.0", "f35f61c3f959c9a01b36defaa1f0624edd55b87e236b606664a556d6f72fd2e7", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm", "02c1007ae393f2b76ec61c1a869b1e617179877984678babde131d716f95b582"}, "phoenix_live_view": {:hex, :phoenix_live_view, "0.16.3", "f6f597c74cfc8b00919eb717852b9b750fc326f815ef6b8d6ae503c7d9d09871", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.5.9 or ~> 1.6.0", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "42bac6b82fd182f10b0b3b39d57aaf2fbe5eab423f8216afeeea9c6b1bd14554"}, "phoenix_pubsub": {:hex, :phoenix_pubsub, "2.0.0", "a1ae76717bb168cdeb10ec9d92d1480fec99e3080f011402c0a2d68d47395ffb", [:mix], [], "hexpm", "c52d948c4f261577b9c6fa804be91884b381a7f8f18450c5045975435350f771"}, - "plug": {:hex, :plug, "1.12.1", "645678c800601d8d9f27ad1aebba1fdb9ce5b2623ddb961a074da0b96c35187d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d57e799a777bc20494b784966dc5fbda91eb4a09f571f76545b72a634ce0d30b"}, + "plug": {:hex, :plug, "1.13.4", "addb6e125347226e3b11489e23d22a60f7ab74786befb86c14f94fb5f23ca9a4", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "06114c1f2a334212fe3ae567dbb3b1d29fd492c1a09783d52f3d489c1a6f4cf2"}, "plug_cowboy": {:hex, :plug_cowboy, "2.5.1", "7cc96ff645158a94cf3ec9744464414f02287f832d6847079adfe0b58761cbd0", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "107d0a5865fa92bcb48e631cc0729ae9ccfa0a9f9a1bd8f01acb513abf1c2d64"}, "plug_crypto": {:hex, :plug_crypto, "1.2.2", "05654514ac717ff3a1843204b424477d9e60c143406aa94daf2274fdd280794d", [:mix], [], "hexpm", "87631c7ad914a5a445f0a3809f99b079113ae4ed4b867348dd9eec288cecb6db"}, - "postgrex": {:hex, :postgrex, "0.15.10", "2809dee1b1d76f7cbabe570b2a9285c2e7b41be60cf792f5f2804a54b838a067", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "1560ca427542f6b213f8e281633ae1a3b31cdbcd84ebd7f50628765b8f6132be"}, + "postgrex": {:hex, :postgrex, "0.16.2", "0f83198d0e73a36e8d716b90f45f3bde75b5eebf4ade4f43fa1f88c90a812f74", [:mix], [{:connection, "~> 1.1", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "a9ea589754d9d4d076121090662b7afe155b374897a6550eb288f11d755acfa0"}, "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, "sobelow": {:hex, :sobelow, "0.11.1", "23438964486f8112b41e743bbfd402da3e5b296fdc9eacab29914b79c48916dd", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "9897363a7eff96f4809304a90aad819e2ad5e5d24db547af502885146746a53c"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "telemetry": {:hex, :telemetry, "0.4.3", "a06428a514bdbc63293cd9a6263aad00ddeb66f608163bdec7c8995784080818", [:rebar3], [], "hexpm", "eb72b8365ffda5bed68a620d1da88525e326cb82a75ee61354fc24b844768041"}, "telemetry_metrics": {:hex, :telemetry_metrics, "0.6.1", "315d9163a1d4660aedc3fee73f33f1d355dcc76c5c3ab3d59e76e3edf80eef1f", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7be9e0871c41732c233be71e4be11b96e56177bf15dde64a8ac9ce72ac9834c6"}, "telemetry_poller": {:hex, :telemetry_poller, "0.5.1", "21071cc2e536810bac5628b935521ff3e28f0303e770951158c73eaaa01e962a", [:rebar3], [{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "4cab72069210bc6e7a080cec9afffad1b33370149ed5d379b81c7c5f0c663fd4"}, - "tesla": {:hex, :tesla, "1.3.3", "26ae98627af5c406584aa6755ab5fc96315d70d69a24dd7f8369cfcb75094a45", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "2648f1c276102f9250299e0b7b57f3071c67827349d9173f34c281756a1b124c"}, + "tesla": {:hex, :tesla, "1.4.4", "bb89aa0c9745190930366f6a2ac612cdf2d0e4d7fff449861baa7875afd797b2", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.3", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "d5503a49f9dec1b287567ea8712d085947e247cb11b06bc54adb05bfde466457"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, - "wallaby": {:hex, :wallaby, "0.28.0", "2ff217c0f245cadb3e5d91748ebcf0102873ceb9ef8a3507717c8bdd73915668", [:mix], [{:ecto_sql, ">= 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}, {:httpoison, "~> 0.12 or ~> 1.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:phoenix_ecto, ">= 3.0.0", [hex: :phoenix_ecto, repo: "hexpm", optional: true]}, {:web_driver_client, "~> 0.1.0", [hex: :web_driver_client, repo: "hexpm", optional: false]}], "hexpm", "e58112650d0b51e81714a626eab7d486d7a77342c9bbc2ba262b6653f9b22558"}, - "web_driver_client": {:hex, :web_driver_client, "0.1.0", "19466a989c76b7ec803c796cec0fec4611a64f445fd5120ce50c9e3817e09c2c", [:mix], [{:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:tesla, "~> 1.3.0", [hex: :tesla, repo: "hexpm", optional: false]}], "hexpm", "c9c031ca915e8fc75b5e24ac93503244f3cc406dd7f53047087a45aa62d60e9e"}, + "wallaby": {:hex, :wallaby, "0.29.1", "7ad61c98a1425db291a392e45504e897b1271c746b40fe9f19f87a86729d2fac", [:mix], [{:ecto_sql, ">= 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}, {:httpoison, "~> 0.12 or ~> 1.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:phoenix_ecto, ">= 3.0.0", [hex: :phoenix_ecto, repo: "hexpm", optional: true]}, {:web_driver_client, "~> 0.2.0", [hex: :web_driver_client, repo: "hexpm", optional: false]}], "hexpm", "21b1360c4b13adbb0a1ff5c1053204cc4489ac81e9579c6c5011a27915cb7415"}, + "web_driver_client": {:hex, :web_driver_client, "0.2.0", "63b76cd9eb3b0716ec5467a0f8bead73d3d9612e63f7560d21357f03ad86e31a", [:mix], [{:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:tesla, "~> 1.3", [hex: :tesla, repo: "hexpm", optional: false]}], "hexpm", "83cc6092bc3e74926d1c8455f0ce927d5d1d36707b74d9a65e38c084aab0350f"}, } diff --git a/priv/repo/migrations/20211129154200_create_reports_table.exs b/priv/repo/migrations/20211129154200_create_reports_table.exs new file mode 100644 index 0000000..ffc7126 --- /dev/null +++ b/priv/repo/migrations/20211129154200_create_reports_table.exs @@ -0,0 +1,20 @@ +defmodule Gscraper.Repo.Migrations.CreateReportsTable do + use Ecto.Migration + + def change do + create table(:reports) do + add :keyword_id, references(:keywords, on_delete: :delete_all) + add :ads_count, :integer + add :top_ads_count, :integer + add :top_ads_urls, {:array, :string} + add :organic_result_count, :integer + add :organic_urls, {:array, :string} + add :links_count, :integer + add :raw_html, :string + + timestamps() + end + + create index(:reports, [:keyword_id]) + end +end diff --git a/xyz.do b/xyz.do new file mode 100644 index 0000000..e69de29 diff --git a/xyz.ok b/xyz.ok new file mode 100644 index 0000000..e69de29