Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/v2/filters/select.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,22 @@ assets = ["asset17jd78wukhtrnmjh3fngzasxm8rck0l2r4hhyyt"]
datum = "datum1httkxyxp8x0dlpdt3k6cwng5pxj3j"
```

## Metadata Filtering

Match any tx that holds a particular metadata label

```toml
predicate = "#674"
```

Match transactions with metadata containing a regex pattern (recursively searches arrays and maps — including map keys and values — and matches only text metadatum)

```toml
[filters.predicate.match.metadata]
label = 674

[filters.predicate.match.metadata.value.text]
regex = "(?i)hello.*world" # Case-insensitive
```


42 changes: 42 additions & 0 deletions examples/metadata_regex_filter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Metadata Regex Filter Example

Filter transactions by metadata content using regex patterns.

## Configuration

```toml
[[filters]]
type = "Select"
skip_uncertain = false

[filters.predicate.match.metadata]
label = 674

[filters.predicate.match.metadata.value.text]
regex = "testing regex"
```

## Running

```bash
oura daemon --config ./daemon.toml
```

## Features

- **Recursive search**: Automatically searches through nested arrays and maps
- **Flexible patterns**: Use standard regex syntax
- **Optional label**: Omit `label` field to search across all metadata

## Common Patterns

```toml
regex = "(?i)keyword" # Case-insensitive
regex = "^MyApp:" # Starts with
regex = "payment|donation" # Multiple keywords
```

## See Also

- [Select Filter Documentation](../../docs/v2/filters/select.mdx)
- [CIP-20 Specification](https://cips.cardano.org/cips/cip20/)
28 changes: 28 additions & 0 deletions examples/metadata_regex_filter/daemon.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[chain]
type = "preprod"

[source]
type = "N2N"
peers = ["preprod-node.world.dev.cardano.org:30000"]

[intersect]
type = "Tip"

[[filters]]
type = "SplitBlock"

[[filters]]
type = "ParseCbor"

[[filters]]
type = "Select"
skip_uncertain = false

[filters.predicate.match.metadata]
label = 674

[filters.predicate.match.metadata.value.text]
regex = "Hello World"

[sink]
type = "Stdout"
64 changes: 64 additions & 0 deletions src/filters/select/eval/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::*;

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum MetadatumPattern {
Text(TextPattern),
Int(NumericPattern<i64>),
Expand Down Expand Up @@ -120,4 +121,67 @@ mod tests {
));
assert_eq!(positives, Vec::<usize>::new());
}

/// Tests regex pattern construction and basic matching.
#[test]
fn regex_text_value_match() {
use regex::Regex;

let pattern = MetadataPattern {
label: Some(674),
value: Some(MetadatumPattern::Text(TextPattern::Regex(
Regex::new(r"testing regex").unwrap(),
))),
};

assert!(pattern.label.is_some());
assert!(pattern.value.is_some());

if let Some(MetadatumPattern::Text(TextPattern::Regex(regex))) = &pattern.value {
assert!(regex.is_match("testing regex"));
assert!(regex.is_match("this contains testing regex inside"));
assert!(!regex.is_match("no match here"));
} else {
panic!("Expected Text(Regex) pattern");
}
}

/// Tests regex pattern matching against different metadatum types.
#[test]
fn regex_text_value_matches_metadatum() {
use pallas::interop::utxorpc::spec::cardano::metadatum;
use regex::Regex;

let text_pattern = TextPattern::Regex(Regex::new(r"Hello World").unwrap());

let text_metadatum = Metadatum {
metadatum: metadatum::Metadatum::Text("Hello World".to_string()).into(),
};
assert_eq!(
text_pattern.is_match(&text_metadatum),
MatchOutcome::Positive
);

let no_match = Metadatum {
metadatum: metadatum::Metadatum::Text("Goodbye".to_string()).into(),
};
assert_eq!(text_pattern.is_match(&no_match), MatchOutcome::Negative);

let int_metadatum = Metadatum {
metadatum: metadatum::Metadatum::Int(42).into(),
};
assert_eq!(
text_pattern.is_match(&int_metadatum),
MatchOutcome::Negative
);

let bytes_metadatum = Metadatum {
metadatum: metadatum::Metadatum::Bytes(vec![0xFF, 0xFE, 0xFD].into()).into(),
};
assert_eq!(
text_pattern.is_match(&bytes_metadatum),
MatchOutcome::Negative
);
}

}
73 changes: 68 additions & 5 deletions src/filters/select/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,29 @@ impl PatternOf<u64> for CoinPattern {
}
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "lowercase")]
pub enum TextPattern {
Exact(String),
// TODO: Regex
#[serde(with = "serde_ext::regex_pattern")]
Regex(regex::Regex),
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

impl PartialEq for TextPattern {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(TextPattern::Exact(a), TextPattern::Exact(b)) => a.eq(b),
(TextPattern::Regex(a), TextPattern::Regex(b)) => a.as_str() == b.as_str(),
_ => false,
}
}
}

impl PatternOf<&str> for TextPattern {
fn is_match(&self, subject: &str) -> MatchOutcome {
match self {
TextPattern::Exact(x) => MatchOutcome::if_equal(x.as_str(), subject),
TextPattern::Exact(x) => MatchOutcome::if_true(x.eq(subject)),
TextPattern::Regex(x) => MatchOutcome::if_true(x.is_match(subject)),
}
}
}
Expand All @@ -221,9 +234,17 @@ impl PatternOf<&[u8]> for TextPattern {

impl PatternOf<&Metadatum> for TextPattern {
fn is_match(&self, subject: &Metadatum) -> MatchOutcome {
use pallas::interop::utxorpc::spec::cardano::metadatum::Metadatum as M;

match subject.metadatum.as_ref() {
Some(pallas::interop::utxorpc::spec::cardano::metadatum::Metadatum::Text(subject)) => {
self.is_match(subject.as_str())
Some(M::Text(text)) => self.is_match(text.as_str()),
Some(M::Array(array)) => self.is_any_match(array.items.iter()),
Some(M::Map(map)) => {
let key_matches =
self.is_any_match(map.pairs.iter().filter_map(|p| p.key.as_ref()));
let value_matches =
self.is_any_match(map.pairs.iter().filter_map(|p| p.value.as_ref()));
key_matches + value_matches
}
_ => MatchOutcome::Negative,
}
Expand Down Expand Up @@ -642,6 +663,48 @@ mod tests {
assert!(matches!(pattern, Pattern::Metadata(..)));
}

/// Tests PartialEq implementation for TextPattern.
#[test]
fn text_pattern_equality() {
use regex::Regex;

let pattern1 = TextPattern::Regex(Regex::new(r"test").unwrap());
let pattern2 = TextPattern::Regex(Regex::new(r"test").unwrap());
let pattern3 = TextPattern::Regex(Regex::new(r"different").unwrap());
let pattern4 = TextPattern::Exact("test".to_string());
let pattern5 = TextPattern::Exact("test".to_string());

assert_eq!(pattern1, pattern2);
assert_ne!(pattern1, pattern3);
assert_eq!(pattern4, pattern5);
assert_ne!(pattern1, pattern4);
}

#[test]
fn text_pattern_exact_match() {
let pattern = TextPattern::Exact("hello".to_string());

assert_eq!(pattern.is_match("hello"), MatchOutcome::Positive);
assert_eq!(pattern.is_match("hello world"), MatchOutcome::Negative);
}

/// Tests TextPattern matching against UTF-8 and invalid byte slices.
#[test]
fn text_pattern_matches_utf8_bytes() {
use regex::Regex;

let pattern = TextPattern::Regex(Regex::new(r"hello").unwrap());

let utf8_bytes = b"hello world";
assert_eq!(pattern.is_match(&utf8_bytes[..]), MatchOutcome::Positive);

let utf8_no_match = b"goodbye";
assert_eq!(pattern.is_match(&utf8_no_match[..]), MatchOutcome::Negative);

let invalid_utf8 = vec![0xFF, 0xFE, 0xFD];
assert_eq!(pattern.is_match(&invalid_utf8[..]), MatchOutcome::Uncertain);
}

#[test]
fn deser_predicate() {
serde_json::from_str::<StringOrStruct<Predicate>>("\"addr1qx2fxv2umyhttkxyxp8x0dlpdt3k6cwng5pxj3jhsydzer3n0d3vllmyqwsx5wktcd8cc3sq835lu7drv2xwl2wywfgse35a3x\"").unwrap();
Expand Down
23 changes: 23 additions & 0 deletions src/filters/select/eval/serde_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,26 @@ pub trait FromBech32: Sized {
.ok_or_else(|| anyhow::anyhow!("bech32 hrp '{}' is not compatible for this type", hrp))
}
}

/// Serde serialization/deserialization helpers for regex patterns.
pub mod regex_pattern {
use regex::Regex;
use serde::{Deserialize, Deserializer, Serializer};

/// Serializes a Regex as its string representation.
pub fn serialize<S>(regex: &Regex, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(regex.as_str())
}

/// Deserializes a string into a Regex.
pub fn deserialize<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Regex::new(&s).map_err(serde::de::Error::custom)
}
}