From b07d32129d46116e083ca7847d15ff30c3d2159b Mon Sep 17 00:00:00 2001 From: Martin Splitt Date: Tue, 31 Mar 2026 12:42:04 +0000 Subject: [PATCH] Adds WASM build --- BUILD | 22 ++++++++++++++++++++++ MODULE.bazel | 5 +++++ robots_wasm.cc | 19 +++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 robots_wasm.cc diff --git a/BUILD b/BUILD index 9b8d5eb..90d523c 100644 --- a/BUILD +++ b/BUILD @@ -1,4 +1,5 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") +load("@emsdk//emscripten_toolchain:wasm_rules.bzl", "wasm_cc_binary") package(default_visibility = ["//visibility:public"]) @@ -60,3 +61,24 @@ cc_binary( ":robots", ], ) + +cc_binary( + name = "robots_js", + srcs = ["robots_wasm.cc"], + deps = [ + ":robots", + ], + copts = [ + "--bind" + ], + linkopts = [ + "-l", "embind", + "-s", "EXPORTED_FUNCTIONS=_IsAllowed", + "-s", "EXPORTED_RUNTIME_METHODS=ccall,cwrap" + ], +) + +wasm_cc_binary( + name = "robots_wasm", + cc_target = ":robots_js", +) diff --git a/MODULE.bazel b/MODULE.bazel index 79981ab..735b506 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -23,3 +23,8 @@ bazel_dep( name = "rules_cc", version = "0.2.17", ) + +bazel_dep( + name = "emsdk", + version = "5.0.4" +) diff --git a/robots_wasm.cc b/robots_wasm.cc new file mode 100644 index 0000000..38854d5 --- /dev/null +++ b/robots_wasm.cc @@ -0,0 +1,19 @@ +#include +#include "robots.h" + +// Returns true if the given user agent is allowed to crawl the URL given the robots content +// Otherwise, returns false. +// We also need to make sure our function gets exported as IsAllowed +bool IsAllowed(std::string user_agent, std::string url, std::string robots_content) asm("IsAllowed"); +bool IsAllowed(std::string user_agent, std::string url, std::string robots_content) { + googlebot::RobotsMatcher matcher; + std::vector user_agents(1, user_agent); + return matcher.AllowedByRobots(robots_content, &user_agents, url); +} + +// Create a binding so we can call the function from JavaScript +// Example: Module.IsAllowed('googlebot', 'https://example.com/test', 'user-agent: *\ndisallow: /') // will return false +EMSCRIPTEN_BINDINGS(my_module) { + emscripten::function("IsAllowed", &IsAllowed); +} +