From a2e9eac6f1fcf217751d78bcdea9b5cbc88cc63e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 17:00:55 +0000 Subject: [PATCH] chore(elixir-sdk): regenerate from OpenAPI spec (v1.0.2) --- apps/elixir-sdk/lib/firecrawl.ex | 63 ++++++++++++++++++++++++++++---- apps/elixir-sdk/mix.exs | 2 +- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/apps/elixir-sdk/lib/firecrawl.ex b/apps/elixir-sdk/lib/firecrawl.ex index 392ad2ab1e..e2a4184404 100644 --- a/apps/elixir-sdk/lib/firecrawl.ex +++ b/apps/elixir-sdk/lib/firecrawl.ex @@ -265,12 +265,14 @@ defmodule Firecrawl do delay: [type: {:or, [:integer, :float]}, doc: "Delay in seconds between scrapes. This helps respect website rate limits. Setting this forces concurrency to 1."], exclude_paths: [type: {:list, :string}, doc: "URL pathname regex patterns that exclude matching URLs from the crawl. For example, if you set \"excludePaths\": [\"blog/.*\"] for the base URL firecrawl.dev, any results matching that pattern will be excluded, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."], ignore_query_parameters: [type: :boolean, doc: "Do not re-scrape the same path with different (or none) query parameters"], + ignore_robots_txt: [type: :boolean, doc: "Ignore the website's robots.txt rules. Enterprise only — contact support@firecrawl.com to enable."], include_paths: [type: {:list, :string}, doc: "URL pathname regex patterns that include matching URLs in the crawl. Only the paths that match the specified patterns will be included in the response. Note: the starting URL is also checked against these patterns — if it does not match, the crawl may return 0 pages. For example, if you set \"includePaths\": [\"blog/.*\"] for the base URL firecrawl.dev/blog, only pages under /blog/ will be included in the results, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."], limit: [type: :integer, doc: "Maximum number of pages to crawl. Default limit is 10000."], max_concurrency: [type: :integer, doc: "Maximum number of concurrent scrapes. This parameter allows you to set a concurrency limit for this crawl. If not specified, the crawl adheres to your team's concurrency limit."], max_discovery_depth: [type: :integer, doc: "Maximum depth to crawl based on discovery order. The root site and sitemapped pages has a discovery depth of 0. For example, if you set it to 1, and you set `sitemap: 'skip'`, you will only crawl the entered URL and all URLs that are linked on that page."], prompt: [type: :string, doc: "A prompt to use to generate the crawler options (all the parameters below) from natural language. Explicitly set parameters will override the generated equivalents."], regex_on_full_url: [type: :boolean, doc: "When true, includePaths and excludePaths regex patterns are matched against the full URL (including query parameters) instead of just the URL pathname. Useful when you need to filter URLs based on query strings."], + robots_user_agent: [type: :string, doc: "Custom User-Agent string for robots.txt evaluation. When set, robots.txt is fetched with this User-Agent and allow/disallow rules are matched against it instead of the default. Enterprise only — contact support@firecrawl.com to enable."], scrape_options: [type: :keyword_list], sitemap: [type: {:in, [:skip, :include, :only]}, doc: "Sitemap mode when crawling. If you set it to 'skip', the crawler will ignore the website sitemap and only crawl the entered URL and discover pages from there onwards. If you set it to 'only', the crawler will only crawl URLs from the sitemap (plus the start URL) and will not discover links from HTML."], url: [type: :string, required: true, doc: "The base URL to start crawling from"], @@ -278,7 +280,7 @@ defmodule Firecrawl do zero_data_retention: [type: :boolean, doc: "If true, this will enable zero data retention for this crawl. To enable this feature, please contact help@firecrawl.dev"] ]) - @crawl_urls_key_mapping %{allow_external_links: "allowExternalLinks", allow_subdomains: "allowSubdomains", crawl_entire_domain: "crawlEntireDomain", delay: "delay", exclude_paths: "excludePaths", ignore_query_parameters: "ignoreQueryParameters", include_paths: "includePaths", limit: "limit", max_concurrency: "maxConcurrency", max_discovery_depth: "maxDiscoveryDepth", prompt: "prompt", regex_on_full_url: "regexOnFullURL", scrape_options: "scrapeOptions", sitemap: "sitemap", url: "url", webhook: "webhook", zero_data_retention: "zeroDataRetention"} + @crawl_urls_key_mapping %{allow_external_links: "allowExternalLinks", allow_subdomains: "allowSubdomains", crawl_entire_domain: "crawlEntireDomain", delay: "delay", exclude_paths: "excludePaths", ignore_query_parameters: "ignoreQueryParameters", ignore_robots_txt: "ignoreRobotsTxt", include_paths: "includePaths", limit: "limit", max_concurrency: "maxConcurrency", max_discovery_depth: "maxDiscoveryDepth", prompt: "prompt", regex_on_full_url: "regexOnFullURL", robots_user_agent: "robotsUserAgent", scrape_options: "scrapeOptions", sitemap: "sitemap", url: "url", webhook: "webhook", zero_data_retention: "zeroDataRetention"} @doc """ Crawl multiple URLs based on options @@ -894,6 +896,47 @@ defmodule Firecrawl do end + @parse_file_schema NimbleOptions.new!([ + + ]) + + @parse_file_key_mapping %{} + + @doc """ + Upload and parse a file + + `POST /parse` + + Tag: Scraping + + ## Parameters + + Validated by `NimbleOptions`. Pass params as a keyword list with snake_case keys. + See `@parse_file_schema` for the full schema. + + ## Returns + + * `{:ok, %Req.Response{}}` on success + * `{:error, exception}` on HTTP or validation failure + """ + @spec parse_file(keyword(), keyword()) :: response() + def parse_file(params \\ [], opts \\ []) do + with {:ok, params} <- NimbleOptions.validate(params, @parse_file_schema) do + Req.post(client(opts), url: "/parse", json: to_body(params, @parse_file_key_mapping)) + end + end + + + @doc """ + Bang variant of `parse_file`. Raises on error. + """ + @spec parse_file!(keyword(), keyword()) :: Req.Response.t() + def parse_file!(params \\ [], opts \\ []) do + params = NimbleOptions.validate!(params, @parse_file_schema) + Req.post!(client(opts), url: "/parse", json: to_body(params, @parse_file_key_mapping)) + end + + @scrape_and_extract_from_url_schema NimbleOptions.new!([ url: [type: :string, required: true, doc: "The URL to scrape"], actions: [type: {:list, :any}, doc: "Actions to perform on the page before grabbing the content"], @@ -903,10 +946,12 @@ defmodule Firecrawl do headers: [type: :any, doc: "Headers to send with the request. Can be used to send cookies, user-agent, etc."], include_tags: [type: {:list, :string}, doc: "Tags to include in the output."], location: [type: :keyword_list, doc: "Location settings for the request. When specified, this will use an appropriate proxy if available and emulate the corresponding language and timezone settings. Defaults to 'US' if not specified."], + lockdown: [type: :boolean, doc: "If true, serves the request from Firecrawl's cache only and never makes an outbound request to the target URL. Designed for compliance-constrained or air-gapped environments where the scrape request itself could leak sensitive information. On cache miss, returns a 404 with error code SCRAPE_LOCKDOWN_CACHE_MISS (the URL is never logged on miss). Lockdown requests are treated as zero data retention. Default maxAge is extended to 2 years so existing cached pages remain eligible. Billed at 5 credits on hit, 1 credit on cache miss."], max_age: [type: :integer, doc: "Returns a cached version of the page if it is younger than this age in milliseconds. If a cached version of the page is older than this value, the page will be scraped. If you do not need extremely fresh data, enabling this can speed up your scrapes by 500%. Defaults to 2 days."], min_age: [type: :integer, doc: "When set, the request only checks the cache and never triggers a fresh scrape. The value is in milliseconds and specifies the minimum age the cached data must be. If matching cached data exists, it is returned instantly. If no cached data is found, a 404 with error code SCRAPE_NO_CACHED_DATA is returned. Set to 1 to accept any cached data regardless of age."], mobile: [type: :boolean, doc: "Set to true if you want to emulate scraping from a mobile device. Useful for testing responsive pages and taking mobile screenshots."], - only_main_content: [type: :boolean, doc: "Only return the main content of the page excluding headers, navs, footers, etc."], + only_clean_content: [type: :boolean, doc: "Beta. Run an additional LLM-based pass over the generated markdown to remove residual boilerplate that `onlyMainContent` can miss (cookie banners, ad blocks, social share widgets, breadcrumbs, newsletter signups, comment sections, related-article lists). Headings, lists, tables, code blocks, image references, and inline links are preserved. Can be combined with `onlyMainContent` (the most common setup) or used on its own. Skipped with a warning when the markdown exceeds the cleaning model's output token limit (the original markdown is preserved). Not supported on zero-data-retention requests."], + only_main_content: [type: :boolean, doc: "Only return the main content of the page excluding headers, navs, footers, etc. This is a deterministic HTML-level filter applied before markdown is generated; no LLM is involved."], parsers: [type: {:list, :any}, doc: "Controls how files are processed during scraping. When \"pdf\" is included (default), the PDF content is extracted and converted to markdown format, with billing based on the number of pages (1 credit per page). When an empty array is passed, the PDF file is returned in base64 encoding with a flat rate of 1 credit for the entire PDF."], profile: [type: :keyword_list, doc: "Enable persistent browser storage across scrape and interact sessions. Pass a profile when scraping to preserve cookies, localStorage, and session data. Sessions with the same profile name share browser state."], proxy: [type: {:in, [:basic, :enhanced, :auto]}, doc: "Specifies the type of proxy to use.\n\n - **basic**: Proxies for scraping sites with none to basic anti-bot solutions. Fast and usually works.\n - **enhanced**: Enhanced proxies for scraping sites with advanced anti-bot solutions. Slower, but more reliable on certain sites. Costs up to 5 credits per request.\n - **auto**: Firecrawl will automatically retry scraping with enhanced proxies if the basic proxy fails. If the retry with enhanced is successful, 5 credits will be billed for the scrape. If the first attempt with basic is successful, only the regular cost will be billed."], @@ -918,7 +963,7 @@ defmodule Firecrawl do zero_data_retention: [type: :boolean, doc: "If true, this will enable zero data retention for this scrape. To enable this feature, please contact help@firecrawl.dev"] ]) - @scrape_and_extract_from_url_key_mapping %{url: "url", actions: "actions", block_ads: "blockAds", exclude_tags: "excludeTags", formats: "formats", headers: "headers", include_tags: "includeTags", location: "location", max_age: "maxAge", min_age: "minAge", mobile: "mobile", only_main_content: "onlyMainContent", parsers: "parsers", profile: "profile", proxy: "proxy", remove_base64_images: "removeBase64Images", skip_tls_verification: "skipTlsVerification", store_in_cache: "storeInCache", timeout: "timeout", wait_for: "waitFor", zero_data_retention: "zeroDataRetention"} + @scrape_and_extract_from_url_key_mapping %{url: "url", actions: "actions", block_ads: "blockAds", exclude_tags: "excludeTags", formats: "formats", headers: "headers", include_tags: "includeTags", location: "location", lockdown: "lockdown", max_age: "maxAge", min_age: "minAge", mobile: "mobile", only_clean_content: "onlyCleanContent", only_main_content: "onlyMainContent", parsers: "parsers", profile: "profile", proxy: "proxy", remove_base64_images: "removeBase64Images", skip_tls_verification: "skipTlsVerification", store_in_cache: "storeInCache", timeout: "timeout", wait_for: "waitFor", zero_data_retention: "zeroDataRetention"} @doc """ Scrape a single URL and optionally extract information using an LLM @@ -967,10 +1012,12 @@ defmodule Firecrawl do headers: [type: :any, doc: "Headers to send with the request. Can be used to send cookies, user-agent, etc."], include_tags: [type: {:list, :string}, doc: "Tags to include in the output."], location: [type: :keyword_list, doc: "Location settings for the request. When specified, this will use an appropriate proxy if available and emulate the corresponding language and timezone settings. Defaults to 'US' if not specified."], + lockdown: [type: :boolean, doc: "If true, serves the request from Firecrawl's cache only and never makes an outbound request to the target URL. Designed for compliance-constrained or air-gapped environments where the scrape request itself could leak sensitive information. On cache miss, returns a 404 with error code SCRAPE_LOCKDOWN_CACHE_MISS (the URL is never logged on miss). Lockdown requests are treated as zero data retention. Default maxAge is extended to 2 years so existing cached pages remain eligible. Billed at 5 credits on hit, 1 credit on cache miss."], max_age: [type: :integer, doc: "Returns a cached version of the page if it is younger than this age in milliseconds. If a cached version of the page is older than this value, the page will be scraped. If you do not need extremely fresh data, enabling this can speed up your scrapes by 500%. Defaults to 2 days."], min_age: [type: :integer, doc: "When set, the request only checks the cache and never triggers a fresh scrape. The value is in milliseconds and specifies the minimum age the cached data must be. If matching cached data exists, it is returned instantly. If no cached data is found, a 404 with error code SCRAPE_NO_CACHED_DATA is returned. Set to 1 to accept any cached data regardless of age."], mobile: [type: :boolean, doc: "Set to true if you want to emulate scraping from a mobile device. Useful for testing responsive pages and taking mobile screenshots."], - only_main_content: [type: :boolean, doc: "Only return the main content of the page excluding headers, navs, footers, etc."], + only_clean_content: [type: :boolean, doc: "Beta. Run an additional LLM-based pass over the generated markdown to remove residual boilerplate that `onlyMainContent` can miss (cookie banners, ad blocks, social share widgets, breadcrumbs, newsletter signups, comment sections, related-article lists). Headings, lists, tables, code blocks, image references, and inline links are preserved. Can be combined with `onlyMainContent` (the most common setup) or used on its own. Skipped with a warning when the markdown exceeds the cleaning model's output token limit (the original markdown is preserved). Not supported on zero-data-retention requests."], + only_main_content: [type: :boolean, doc: "Only return the main content of the page excluding headers, navs, footers, etc. This is a deterministic HTML-level filter applied before markdown is generated; no LLM is involved."], parsers: [type: {:list, :any}, doc: "Controls how files are processed during scraping. When \"pdf\" is included (default), the PDF content is extracted and converted to markdown format, with billing based on the number of pages (1 credit per page). When an empty array is passed, the PDF file is returned in base64 encoding with a flat rate of 1 credit for the entire PDF."], profile: [type: :keyword_list, doc: "Enable persistent browser storage across scrape and interact sessions. Pass a profile when scraping to preserve cookies, localStorage, and session data. Sessions with the same profile name share browser state."], proxy: [type: {:in, [:basic, :enhanced, :auto]}, doc: "Specifies the type of proxy to use.\n\n - **basic**: Proxies for scraping sites with none to basic anti-bot solutions. Fast and usually works.\n - **enhanced**: Enhanced proxies for scraping sites with advanced anti-bot solutions. Slower, but more reliable on certain sites. Costs up to 5 credits per request.\n - **auto**: Firecrawl will automatically retry scraping with enhanced proxies if the basic proxy fails. If the retry with enhanced is successful, 5 credits will be billed for the scrape. If the first attempt with basic is successful, only the regular cost will be billed."], @@ -982,7 +1029,7 @@ defmodule Firecrawl do zero_data_retention: [type: :boolean, doc: "If true, this will enable zero data retention for this batch scrape. To enable this feature, please contact help@firecrawl.dev"] ]) - @scrape_and_extract_from_urls_key_mapping %{ignore_invalid_urls: "ignoreInvalidURLs", max_concurrency: "maxConcurrency", urls: "urls", webhook: "webhook", actions: "actions", block_ads: "blockAds", exclude_tags: "excludeTags", formats: "formats", headers: "headers", include_tags: "includeTags", location: "location", max_age: "maxAge", min_age: "minAge", mobile: "mobile", only_main_content: "onlyMainContent", parsers: "parsers", profile: "profile", proxy: "proxy", remove_base64_images: "removeBase64Images", skip_tls_verification: "skipTlsVerification", store_in_cache: "storeInCache", timeout: "timeout", wait_for: "waitFor", zero_data_retention: "zeroDataRetention"} + @scrape_and_extract_from_urls_key_mapping %{ignore_invalid_urls: "ignoreInvalidURLs", max_concurrency: "maxConcurrency", urls: "urls", webhook: "webhook", actions: "actions", block_ads: "blockAds", exclude_tags: "excludeTags", formats: "formats", headers: "headers", include_tags: "includeTags", location: "location", lockdown: "lockdown", max_age: "maxAge", min_age: "minAge", mobile: "mobile", only_clean_content: "onlyCleanContent", only_main_content: "onlyMainContent", parsers: "parsers", profile: "profile", proxy: "proxy", remove_base64_images: "removeBase64Images", skip_tls_verification: "skipTlsVerification", store_in_cache: "storeInCache", timeout: "timeout", wait_for: "waitFor", zero_data_retention: "zeroDataRetention"} @doc """ Scrape multiple URLs and optionally extract information using an LLM @@ -1023,8 +1070,10 @@ defmodule Firecrawl do categories: [type: {:list, :any}, doc: "Categories to filter results by. Defaults to [], which means results will not be filtered by any categories."], country: [type: :string, doc: "ISO country code for geo-targeting search results (e.g. `US`). For best results, set both this and the `location` parameter."], enterprise: [type: {:list, :string}, doc: "Enterprise search options for Zero Data Retention (ZDR). Use `[\"zdr\"]` for end-to-end ZDR (10 credits / 10 results) or `[\"anon\"]` for anonymized ZDR (2 credits / 10 results). Must be enabled for your team."], + exclude_domains: [type: {:list, :string}, doc: "Excludes search results from the specified domains. Domains should be hostnames only, without protocol or path. Cannot be used with includeDomains."], ignore_invalid_urls: [type: :boolean, doc: "Excludes URLs from the search results that are invalid for other Firecrawl endpoints. This helps reduce errors if you are piping data from search into other Firecrawl API endpoints."], - limit: [type: :integer, doc: "Maximum number of results to return"], + include_domains: [type: {:list, :string}, doc: "Restricts search results to the specified domains. Domains should be hostnames only, without protocol or path. Cannot be used with excludeDomains."], + limit: [type: :integer, doc: "Maximum number of results to return (per source type when using multiple sources)"], location: [type: :string, doc: "Location parameter for search results (e.g. `San Francisco,California,United States`). For best results, set both this and the `country` parameter."], query: [type: :string, required: true, doc: "The search query"], scrape_options: [type: :keyword_list, doc: "Options for scraping search results"], @@ -1033,7 +1082,7 @@ defmodule Firecrawl do timeout: [type: :integer, doc: "Timeout in milliseconds"] ]) - @search_and_scrape_key_mapping %{categories: "categories", country: "country", enterprise: "enterprise", ignore_invalid_urls: "ignoreInvalidURLs", limit: "limit", location: "location", query: "query", scrape_options: "scrapeOptions", sources: "sources", tbs: "tbs", timeout: "timeout"} + @search_and_scrape_key_mapping %{categories: "categories", country: "country", enterprise: "enterprise", exclude_domains: "excludeDomains", ignore_invalid_urls: "ignoreInvalidURLs", include_domains: "includeDomains", limit: "limit", location: "location", query: "query", scrape_options: "scrapeOptions", sources: "sources", tbs: "tbs", timeout: "timeout"} @doc """ Search and optionally scrape search results diff --git a/apps/elixir-sdk/mix.exs b/apps/elixir-sdk/mix.exs index 174f62fe87..e04c3bcb78 100644 --- a/apps/elixir-sdk/mix.exs +++ b/apps/elixir-sdk/mix.exs @@ -1,7 +1,7 @@ defmodule Firecrawl.MixProject do use Mix.Project - @version "1.0.1" + @version "1.0.2" @source_url "https://github.com/firecrawl/firecrawl/tree/main/apps/elixir-sdk" def project do