From d94091cfb26252b3af4017e643db41559995bca1 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Fri, 8 May 2026 09:07:44 -0500 Subject: [PATCH] BugReference: Add new BugReference type; adds #92 * Add a str as well as an href output, this allows printing the matching bug number, or the resulting url based on the configured prefix. --- README.md | 23 +++++++++++-- src/bug.rs | 51 ++++++++++++++++++++++++++++ src/finder.rs | 75 +++++++++++++++++++++++++++++++++++------- src/lib.rs | 24 ++++++++++++-- tests/bug_reference.rs | 73 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 231 insertions(+), 15 deletions(-) create mode 100644 src/bug.rs create mode 100644 tests/bug_reference.rs diff --git a/README.md b/README.md index 6a720fa..955aecd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Linkify ======= -Linkify is a Rust library to find links such as URLs and email addresses in +Linkify is a Rust library to find links such as URLs, email addresses, and bug references in plain text. It's smart about where a link ends, such as with trailing punctuation. @@ -29,7 +29,7 @@ Seems simple enough. But then we also have these cases: This library behaves as you'd expect in the above cases and many more. It uses a simple scan with linear runtime. -In addition to URLs, it can also find email addresses. +In addition to URLs, it can also find email addresses and bug references like `#12345`. ## Demo 🧑‍🔬 @@ -62,6 +62,7 @@ assert_eq!(1, links.len()); let link = &links[0]; assert_eq!("http://example.com", link.as_str()); +assert_eq!("http://example.com", link.href()); assert_eq!(14, link.start()); assert_eq!(32, link.end()); assert_eq!(&LinkKind::Url, link.kind()); @@ -98,6 +99,24 @@ assert_eq!("foo@example.com", link.as_str()); assert_eq!(&LinkKind::Email, link.kind()); ``` +Configure a bug reference prefix: + +```rust +use linkify::{LinkFinder, LinkKind}; + +let input = "Fixed in #12345"; +let mut finder = LinkFinder::new(); +finder.kinds(&[LinkKind::BugReference]); +finder.bug_reference_prefix("https://example.org/bugs/"); +let links: Vec<_> = finder.links(input).collect(); + +assert_eq!(1, links.len()); +let link = &links[0]; +assert_eq!("#12345", link.as_str()); +assert_eq!("https://example.org/bugs/12345", link.href()); +assert_eq!(&LinkKind::BugReference, link.kind()); +``` + See full documentation on [docs.rs](https://docs.rs/linkify). ## Conformance diff --git a/src/bug.rs b/src/bug.rs new file mode 100644 index 0000000..5f26ea4 --- /dev/null +++ b/src/bug.rs @@ -0,0 +1,51 @@ +use std::ops::Range; + +use crate::scanner::Scanner; + +/// Scan for bug references such as `#12345`. +pub struct BugReferenceScanner; + +impl Scanner for BugReferenceScanner { + fn scan(&self, s: &str, hash: usize) -> Option> { + if !self.find_start(&s[..hash]) { + return None; + } + + let after_hash = hash + 1; + let digits = s[after_hash..] + .bytes() + .take_while(|byte| byte.is_ascii_digit()) + .count(); + + if digits == 0 { + return None; + } + + let end = after_hash + digits; + if !self.find_end(&s[end..]) { + return None; + } + + Some(Range { start: hash, end }) + } +} + +impl BugReferenceScanner { + fn find_start(&self, s: &str) -> bool { + match s.chars().next_back() { + Some(c) => !Self::identifier_char(c) && c != '#', + None => true, + } + } + + fn find_end(&self, s: &str) -> bool { + match s.chars().next() { + Some(c) => !Self::identifier_char(c), + None => true, + } + } + + fn identifier_char(c: char) -> bool { + c.is_alphanumeric() || c == '_' + } +} diff --git a/src/finder.rs b/src/finder.rs index e4bb7e2..45414ff 100644 --- a/src/finder.rs +++ b/src/finder.rs @@ -1,8 +1,7 @@ use std::fmt; use std::iter::Peekable; -use memchr::{memchr, memchr2, memchr3}; - +use crate::bug::BugReferenceScanner; use crate::email::EmailScanner; use crate::scanner::Scanner; use crate::url::{DomainScanner, UrlScanner}; @@ -13,6 +12,7 @@ pub struct Link<'t> { text: &'t str, start: usize, end: usize, + href: Option, kind: LinkKind, } @@ -35,6 +35,16 @@ impl<'t> Link<'t> { &self.text[self.start..self.end] } + /// Get the link destination. + /// + /// For most links this is the same as `as_str()`. Bug references return the + /// configured prefix plus the bug number when a bug reference prefix was set + /// on the `LinkFinder`. + #[inline] + pub fn href(&self) -> &str { + self.href.as_deref().unwrap_or_else(|| self.as_str()) + } + /// The type of the link. #[inline] pub fn kind(&self) -> &LinkKind { @@ -50,6 +60,8 @@ pub enum LinkKind { Url, /// E-mail links like "foo@example.org" Email, + /// Bug references like "#12345". + BugReference, } /// Span within the input text. @@ -95,6 +107,8 @@ impl<'t> Span<'t> { /// A configured link finder. #[derive(Debug)] pub struct LinkFinder { + bug_reference: bool, + bug_reference_prefix: Option, email: bool, email_domain_must_have_dot: bool, url: bool, @@ -108,8 +122,10 @@ type TriggerFinder = dyn Fn(&[u8]) -> Option; pub struct Links<'t> { text: &'t str, rewind: usize, + bug_reference_prefix: Option, trigger_finder: Box, + bug_reference_scanner: BugReferenceScanner, email_scanner: EmailScanner, url_scanner: UrlScanner, domain_scanner: DomainScanner, @@ -129,6 +145,8 @@ impl LinkFinder { /// If you only want to find a certain kind of links, use the `kinds` method. pub fn new() -> LinkFinder { LinkFinder { + bug_reference: true, + bug_reference_prefix: None, email: true, email_domain_must_have_dot: true, url: true, @@ -163,12 +181,24 @@ impl LinkFinder { self } + /// Set a prefix used to rewrite bug reference links. + /// + /// For example, with a prefix of `https://example.org/bugs/`, a matched + /// bug reference `#12345` will keep `as_str()` as `#12345` and return + /// `https://example.org/bugs/12345` from `href()`. + pub fn bug_reference_prefix(&mut self, prefix: &str) -> &mut LinkFinder { + self.bug_reference_prefix = Some(prefix.to_owned()); + self + } + /// Restrict the kinds of links that should be found to the specified ones. pub fn kinds(&mut self, kinds: &[LinkKind]) -> &mut LinkFinder { + self.bug_reference = false; self.email = false; self.url = false; for kind in kinds { match *kind { + LinkKind::BugReference => self.bug_reference = true, LinkKind::Email => self.email = true, LinkKind::Url => self.url = true, } @@ -182,6 +212,8 @@ impl LinkFinder { pub fn links<'t>(&self, text: &'t str) -> Links<'t> { Links::new( text, + self.bug_reference, + self.bug_reference_prefix.clone(), self.url, self.url_must_have_scheme, self.email, @@ -217,12 +249,15 @@ impl Default for LinkFinder { impl<'t> Links<'t> { fn new( text: &'t str, + bug_reference: bool, + bug_reference_prefix: Option, url: bool, url_must_have_scheme: bool, email: bool, email_domain_must_have_dot: bool, iri_parsing_enabled: bool, ) -> Links<'t> { + let bug_reference_scanner = BugReferenceScanner; let url_scanner = UrlScanner { iri_parsing_enabled, }; @@ -233,19 +268,28 @@ impl<'t> Links<'t> { domain_must_have_dot: email_domain_must_have_dot, }; - // With optional schemes URLs don't have unique `:`, then search for `.` as well - let trigger_finder: Box = match (url, email) { - (true, true) if url_must_have_scheme => Box::new(|s| memchr2(b':', b'@', s)), - (true, true) => Box::new(|s| memchr3(b':', b'@', b'.', s)), - (true, false) if url_must_have_scheme => Box::new(|s| memchr(b':', s)), - (true, false) => Box::new(|s| memchr2(b':', b'.', s)), - (false, true) => Box::new(|s| memchr(b'@', s)), - (false, false) => Box::new(|_| None), - }; + // With optional schemes URLs don't have unique `:`, then search for `.` as well. + let mut triggers = Vec::new(); + if url { + triggers.push(b':'); + if !url_must_have_scheme { + triggers.push(b'.'); + } + } + if email { + triggers.push(b'@'); + } + if bug_reference { + triggers.push(b'#'); + } + let trigger_finder: Box = + Box::new(move |s| s.iter().position(|byte| triggers.contains(byte))); Links { text, rewind: 0, + bug_reference_prefix, trigger_finder, + bug_reference_scanner, email_scanner, url_scanner, domain_scanner, @@ -263,6 +307,7 @@ impl<'t> Iterator for Links<'t> { while let Some(i) = (self.trigger_finder)(slice[find_from..].as_bytes()) { let trigger = slice.as_bytes()[find_from + i]; let (scanner, kind): (&dyn Scanner, LinkKind) = match trigger { + b'#' => (&self.bug_reference_scanner, LinkKind::BugReference), b':' => (&self.url_scanner, LinkKind::Url), b'.' => (&self.domain_scanner, LinkKind::Url), b'@' => (&self.email_scanner, LinkKind::Email), @@ -272,10 +317,18 @@ impl<'t> Iterator for Links<'t> { let start = self.rewind + range.start; let end = self.rewind + range.end; self.rewind = end; + let href = if kind == LinkKind::BugReference { + self.bug_reference_prefix + .as_ref() + .map(|prefix| format!("{}{}", prefix, &self.text[start + 1..end])) + } else { + None + }; let link = Link { text: self.text, start, end, + href, kind, }; return Some(link); diff --git a/src/lib.rs b/src/lib.rs index 24f8cf2..453fb0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -//! Linkify finds links such as URLs and email addresses in plain text. +//! Linkify finds links such as URLs, email addresses, and bug references in plain text. //! It's smart about where a link ends, such as with trailing punctuation. //! //! Your reaction might be: "Do I need a library for this? Why not a regex?". @@ -17,7 +17,7 @@ //! This library behaves as you'd expect in the above cases and many more. //! It uses a simple scan with linear runtime. //! -//! In addition to URLs, it can also find emails. +//! In addition to URLs, it can also find emails and bug references like `#12345`. //! //! ### Usage //! @@ -34,6 +34,7 @@ //! let link = &links[0]; //! //! assert_eq!("http://example.com", link.as_str()); +//! assert_eq!("http://example.com", link.href()); //! assert_eq!(14, link.start()); //! assert_eq!(32, link.end()); //! assert_eq!(&LinkKind::Url, link.kind()); @@ -70,6 +71,24 @@ //! assert_eq!(&LinkKind::Email, link.kind()); //! ``` //! +//! Configure a bug reference prefix: +//! +//! ``` +//! use linkify::{LinkFinder, LinkKind}; +//! +//! let input = "Fixed in #12345"; +//! let mut finder = LinkFinder::new(); +//! finder.kinds(&[LinkKind::BugReference]); +//! finder.bug_reference_prefix("https://example.org/bugs/"); +//! let links: Vec<_> = finder.links(input).collect(); +//! +//! assert_eq!(1, links.len()); +//! let link = &links[0]; +//! assert_eq!("#12345", link.as_str()); +//! assert_eq!("https://example.org/bugs/12345", link.href()); +//! assert_eq!(&LinkKind::BugReference, link.kind()); +//! ``` +//! //! Split the text into consecutive spans (mixed links and plain text). //! //! ``` @@ -119,6 +138,7 @@ #![deny(missing_docs)] #![deny(missing_debug_implementations)] +mod bug; mod domains; mod email; mod finder; diff --git a/tests/bug_reference.rs b/tests/bug_reference.rs new file mode 100644 index 0000000..a857965 --- /dev/null +++ b/tests/bug_reference.rs @@ -0,0 +1,73 @@ +mod common; + +use crate::common::assert_linked_with; +use linkify::{LinkFinder, LinkKind}; + +#[test] +fn no_links() { + assert_not_linked(""); + assert_not_linked("#"); + assert_not_linked("#abc"); + assert_not_linked("foo#123"); + assert_not_linked("#123abc"); + assert_not_linked("##123"); +} + +#[test] +fn simple() { + assert_linked("#12345", "|#12345|"); + assert_linked("See #12345.", "See |#12345|."); + assert_linked("(#12345)", "(|#12345|)"); +} + +#[test] +fn multiple() { + assert_linked("#1 and #22", "|#1| and |#22|"); +} + +#[test] +fn kind_filtering() { + let mut finder = LinkFinder::new(); + finder.kinds(&[LinkKind::BugReference]); + + assert_linked_with( + &finder, + "#12 foo@example.com http://example.com", + "|#12| foo@example.com http://example.com", + ); +} + +#[test] +fn default_finder_includes_bug_references() { + let finder = LinkFinder::new(); + let link = finder.links("fixed in #12345").next().unwrap(); + + assert_eq!(link.kind(), &LinkKind::BugReference); + assert_eq!(link.as_str(), "#12345"); + assert_eq!(link.href(), "#12345"); +} + +#[test] +fn href_uses_configured_prefix() { + let mut finder = LinkFinder::new(); + finder.bug_reference_prefix("https://example.org/bugs/"); + + let link = finder.links("fixed in #12345").next().unwrap(); + + assert_eq!(link.kind(), &LinkKind::BugReference); + assert_eq!(link.as_str(), "#12345"); + assert_eq!(link.href(), "https://example.org/bugs/12345"); +} + +fn assert_not_linked(s: &str) { + let mut finder = LinkFinder::new(); + finder.kinds(&[LinkKind::BugReference]); + let result = finder.links(s); + assert_eq!(result.count(), 0, "expected no links in {:?}", s); +} + +fn assert_linked(input: &str, expected: &str) { + let mut finder = LinkFinder::new(); + finder.kinds(&[LinkKind::BugReference]); + assert_linked_with(&finder, input, expected); +}