diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 633728aa..7345c05f 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -176,14 +176,19 @@ struct ActiveMonitors : NonCopyable { auto res = allAuthors.try_emplace(Bytes32(f.authors->at(i))); res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); } - } else if (f.tags.size()) { - for (const auto &[tagName, filterSet] : f.tags) { - for (size_t i = 0; i < filterSet.size(); i++) { - auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); - auto res = allTags.try_emplace(tagSpec); - res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } else if (f.tags.size() || f.tagsAnd.size()) { + auto addTags = [&](const auto &map){ + for (const auto &[tagName, filterSet] : map) { + for (size_t i = 0; i < filterSet.size(); i++) { + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); + auto res = allTags.try_emplace(tagSpec); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } } - } + }; + + addTags(f.tags); + addTags(f.tagsAnd); } else if (f.kinds) { for (size_t i = 0; i < f.kinds->size(); i++) { auto res = allKinds.try_emplace(f.kinds->at(i)); @@ -211,15 +216,20 @@ struct ActiveMonitors : NonCopyable { monSet.erase(&f); if (monSet.empty()) allAuthors.erase(author); } - } else if (f.tags.size()) { - for (const auto &[tagName, filterSet] : f.tags) { - for (size_t i = 0; i < filterSet.size(); i++) { - auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); - auto &monSet = allTags.at(tagSpec); - monSet.erase(&f); - if (monSet.empty()) allTags.erase(tagSpec); + } else if (f.tags.size() || f.tagsAnd.size()) { + auto removeTags = [&](const auto &map){ + for (const auto &[tagName, filterSet] : map) { + for (size_t i = 0; i < filterSet.size(); i++) { + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); + auto &monSet = allTags.at(tagSpec); + monSet.erase(&f); + if (monSet.empty()) allTags.erase(tagSpec); + } } - } + }; + + removeTags(f.tags); + removeTags(f.tagsAnd); } else if (f.kinds) { for (size_t i = 0; i < f.kinds->size(); i++) { uint64_t kind = f.kinds->at(i); diff --git a/src/DBQuery.h b/src/DBQuery.h index 7e91f4bd..338b23d9 100644 --- a/src/DBQuery.h +++ b/src/DBQuery.h @@ -120,28 +120,52 @@ struct DBScan : NonCopyable { } ); } - } else if (f.tags.size()) { + } else if (f.tags.size() || f.tagsAnd.size()) { indexDbi = env.dbi_Event__tag; desc = "Tag"; char tagName = '\0'; + bool fromAnd = false; { uint64_t numTags = MAX_U64; - for (const auto &[tn, filterSet] : f.tags) { - if (filterSet.size() < numTags) { - numTags = filterSet.size(); - tagName = tn; + auto consider = [&](const auto &map, bool isAnd){ + for (const auto &[tn, filterSet] : map) { + size_t filterSize = filterSet.size(); + if (filterSize == 0) continue; + uint64_t cost = isAnd ? 1 : filterSize; + if (cost < numTags || (cost == numTags && isAnd && !fromAnd)) { + numTags = cost; + tagName = tn; + fromAnd = isAnd; + } } - } + }; + + consider(f.tags, false); + consider(f.tagsAnd, true); } - const auto &filterSet = f.tags.at(tagName); + const auto &filterSet = fromAnd ? f.tagsAnd.at(tagName) : f.tags.at(tagName); + if (fromAnd) indexOnly = false; + + std::vector searchVals; + if (fromAnd) { + // For AND filters, matching any single required value implies the event also contains + // every other AND value (otherwise it will be rejected later), so only scan using one + // value to avoid redundant cursor work. + searchVals.emplace_back(filterSet.at(0)); + } else { + searchVals.reserve(filterSet.size()); + for (uint64_t i = 0; i < filterSet.size(); i++) { + searchVals.emplace_back(filterSet.at(i)); + } + } - cursors.reserve(filterSet.size()); - for (uint64_t i = 0; i < filterSet.size(); i++) { + cursors.reserve(searchVals.size()); + for (const auto &val : searchVals) { std::string search; search += tagName; - search += filterSet.at(i); + search += val; cursors.emplace_back( search + std::string(8, '\xFF'), diff --git a/src/filters.h b/src/filters.h index 8bcb8e25..b1c92088 100644 --- a/src/filters.h +++ b/src/filters.h @@ -39,6 +39,25 @@ struct FilterSetBytes { if (buf.size() > 65535) throw herr("total filter items too large"); } + // Direct constructor from already-decoded values + FilterSetBytes(const std::vector &arrBytes, size_t minSize, size_t maxSize) { + if (maxSize > MAX_INDEXED_TAG_VAL_SIZE) throw herr("maxSize bigger than max indexed tag size"); + + std::vector arr = arrBytes; + std::sort(arr.begin(), arr.end()); + + for (size_t i = 0; i < arr.size(); i++) { + const auto &item = arr[i]; + if (item.size() < minSize) throw herr("filter item too small"); + if (item.size() > maxSize) throw herr("filter item too large"); + if (i > 0 && item == arr[i - 1]) continue; // remove duplicates + items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] }); + buf += item; + } + + if (buf.size() > 65535) throw herr("total filter items too large"); + } + std::string at(size_t n) const { if (n >= items.size()) throw herr("FilterSetBytes access out of bounds"); auto &item = items[n]; @@ -111,6 +130,7 @@ struct NostrFilter { std::optional authors; std::optional kinds; flat_hash_map tags; + flat_hash_map tagsAnd; uint64_t since = 0; uint64_t until = MAX_U64; @@ -119,7 +139,10 @@ struct NostrFilter { bool indexOnlyScans = false; explicit NostrFilter(const tao::json::value &filterObj, uint64_t maxFilterLimit) { - uint64_t numMajorFields = 0; + uint64_t numMajorFieldsNonTag = 0; + flat_hash_set tagKeySet; + flat_hash_map> rawTagsOr; + flat_hash_map> rawTagsAnd; if (!filterObj.is_object()) throw herr("provided filter is not an object"); @@ -131,25 +154,27 @@ struct NostrFilter { if (k == "ids") { ids.emplace(v, true, 32, 32); - numMajorFields++; + numMajorFieldsNonTag++; } else if (k == "authors") { authors.emplace(v, true, 32, 32); - numMajorFields++; + numMajorFieldsNonTag++; } else if (k == "kinds") { kinds.emplace(v); - numMajorFields++; - } else if (k.starts_with('#')) { - numMajorFields++; - if (k.size() == 2) { - char tag = k[1]; + numMajorFieldsNonTag++; + } else if (k.starts_with('#') || k.starts_with('&')) { + bool isAnd = k.starts_with('&'); + if (k.size() != 2) throw herr(isAnd ? "unindexed AND tag filter" : "unindexed tag filter"); + char tag = k[1]; + tagKeySet.insert(tag); + + auto &vec = isAnd ? rawTagsAnd[tag] : rawTagsOr[tag]; + for (const auto &elem : v.get_array()) { if (tag == 'p' || tag == 'e') { - tags.emplace(tag, FilterSetBytes(v, true, 32, 32)); + vec.emplace_back(from_hex(elem.get_string(), false)); } else { - tags.emplace(tag, FilterSetBytes(v, false, 0, MAX_INDEXED_TAG_VAL_SIZE)); + vec.emplace_back(elem.get_string()); } - } else { - throw herr("unindexed tag filter"); } } else if (k == "since") { since = v.get_unsigned(); @@ -162,11 +187,49 @@ struct NostrFilter { } } - if (tags.size() > 3) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it + // Build AND sets first + for (const auto &[tagName, vals] : rawTagsAnd) { + if (tagName == 'p' || tagName == 'e') { + tagsAnd.emplace(tagName, FilterSetBytes(vals, 32, 32)); + } else { + tagsAnd.emplace(tagName, FilterSetBytes(vals, 0, MAX_INDEXED_TAG_VAL_SIZE)); + } + } + + // Build OR sets, skipping any values present in AND for the same tag + for (const auto &[tagName, vals] : rawTagsOr) { + const auto andIt = tagsAnd.find(tagName); + std::vector filtered; + filtered.reserve(vals.size()); + + for (const auto &v : vals) { + if (andIt != tagsAnd.end() && andIt->second.doesMatch(v)) continue; + filtered.emplace_back(v); + } + + if (filtered.empty()) continue; + + if (tagName == 'p' || tagName == 'e') { + tags.emplace(tagName, FilterSetBytes(filtered, 32, 32)); + } else { + tags.emplace(tagName, FilterSetBytes(filtered, 0, MAX_INDEXED_TAG_VAL_SIZE)); + } + } + + size_t tagKeyCount = 0; + { + // tagKeySet already contains the union of keys seen in # and & + tagKeyCount = tagKeySet.size(); + } + + if (tagKeyCount > 3) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it if (limit > maxFilterLimit) limit = maxFilterLimit; + uint64_t numMajorFields = numMajorFieldsNonTag + tagKeyCount; + indexOnlyScans = (numMajorFields <= 1) || (numMajorFields == 2 && authors && kinds); + if (tagsAnd.size()) indexOnlyScans = false; // AND semantics require reading full events } bool doesMatchTimes(uint64_t created) const { @@ -184,6 +247,24 @@ struct NostrFilter { if (authors && !authors->doesMatch(ev.pubkey())) return false; if (kinds && !kinds->doesMatch(ev.kind())) return false; + // AND tags: every value in tagsAnd[tag] must be present in the event + for (const auto &[tag, filt] : tagsAnd) { + for (size_t i = 0; i < filt.size(); i++) { + auto requiredVal = filt.at(i); + bool foundMatch = false; + + ev.foreachTag([&](char tagName, std::string_view tagVal){ + if (tagName == tag && tagVal == requiredVal) { + foundMatch = true; + return false; + } + return true; + }); + + if (!foundMatch) return false; + } + } + for (const auto &[tag, filt] : tags) { bool foundMatch = false; diff --git a/test/dumbFilter.pl b/test/dumbFilter.pl index 1aefbec1..0fa9bb34 100755 --- a/test/dumbFilter.pl +++ b/test/dumbFilter.pl @@ -76,41 +76,56 @@ sub doesMatchSingle { return 0 if !$found; } - if ($filter->{'#e'}) { - my $found; - foreach my $search (@{ $filter->{'#e'} }) { - foreach my $tag (@{ $ev->{tags} }) { - if ($tag->[0] eq 'e' && $tag->[1] eq $search) { - $found = 1; - last; - } - } + # AND / OR tag handling (including NIP-119 AND filters) + my %tagAnd; + my %tagOr; + for my $k (keys %$filter) { + if ($k =~ /^#(.)$/) { + $tagOr{$1} = $filter->{$k}; + } elsif ($k =~ /^&(.)$/) { + $tagAnd{$1} = $filter->{$k}; } - return 0 if !$found; } - if ($filter->{'#p'}) { - my $found; - foreach my $search (@{ $filter->{'#p'} }) { - foreach my $tag (@{ $ev->{tags} }) { - if ($tag->[0] eq 'p' && $tag->[1] eq $search) { + # Remove overlaps: AND values are ignored in OR sets for the same tag + for my $tag (keys %tagAnd) { + next unless $tagOr{$tag}; + my %andVals = map { $_ => 1 } @{ $tagAnd{$tag} }; + my @remaining = grep { !exists $andVals{$_} } @{ $tagOr{$tag} }; + if (@remaining) { + $tagOr{$tag} = \@remaining; + } else { + delete $tagOr{$tag}; + } + } + + # AND: every required value must be present + for my $tag (keys %tagAnd) { + for my $required (@{ $tagAnd{$tag} }) { + my $found; + foreach my $evTag (@{ $ev->{tags} }) { + next if @$evTag < 2; + if ($evTag->[0] eq $tag && $evTag->[1] eq $required) { $found = 1; last; } } + return 0 if !$found; } - return 0 if !$found; } - if ($filter->{'#t'}) { + # OR: at least one value must be present per tag key + for my $tag (keys %tagOr) { my $found; - foreach my $search (@{ $filter->{'#t'} }) { - foreach my $tag (@{ $ev->{tags} }) { - if ($tag->[0] eq 't' && $tag->[1] eq $search) { + foreach my $search (@{ $tagOr{$tag} }) { + foreach my $evTag (@{ $ev->{tags} }) { + next if @$evTag < 2; + if ($evTag->[0] eq $tag && $evTag->[1] eq $search) { $found = 1; last; } } + last if $found; } return 0 if !$found; } diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl index a49bf630..14899518 100755 --- a/test/filterFuzzTest.pl +++ b/test/filterFuzzTest.pl @@ -151,6 +151,27 @@ sub genRandomFilterGroup { push @{$f->{'#t'}}, $topics->[int(rand() * @$topics)]; } } + + if (rand() < .12) { + $f->{'&t'} = []; + for (1..(rand()*3)+1) { + push @{$f->{'&t'}}, $topics->[int(rand() * @$topics)]; + } + } + + if (rand() < .08) { + $f->{'&e'} = []; + for (1..(rand()*4)+1) { + push @{$f->{'&e'}}, $ids->[int(rand() * @$ids)]; + } + } + + if (rand() < .08) { + $f->{'&p'} = []; + for (1..(rand()*3)+1) { + push @{$f->{'&p'}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } } if (rand() < .2) {