Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions src/ActiveMonitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,19 @@ struct ActiveMonitors : NonCopyable {
auto res = allAuthors.try_emplace(Bytes32(f.authors->at(i)));
res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
}
} else if (f.tags.size()) {
for (const auto &[tagName, filterSet] : f.tags) {
for (size_t i = 0; i < filterSet.size(); i++) {
auto &tagSpec = getTagSpec(tagName, filterSet.at(i));
auto res = allTags.try_emplace(tagSpec);
res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
} else if (f.tags.size() || f.tagsAnd.size()) {
auto addTags = [&](const auto &map){
for (const auto &[tagName, filterSet] : map) {
for (size_t i = 0; i < filterSet.size(); i++) {
auto &tagSpec = getTagSpec(tagName, filterSet.at(i));
auto res = allTags.try_emplace(tagSpec);
res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
}
}
}
};

addTags(f.tags);
addTags(f.tagsAnd);
} else if (f.kinds) {
for (size_t i = 0; i < f.kinds->size(); i++) {
auto res = allKinds.try_emplace(f.kinds->at(i));
Expand Down Expand Up @@ -211,15 +216,20 @@ struct ActiveMonitors : NonCopyable {
monSet.erase(&f);
if (monSet.empty()) allAuthors.erase(author);
}
} else if (f.tags.size()) {
for (const auto &[tagName, filterSet] : f.tags) {
for (size_t i = 0; i < filterSet.size(); i++) {
auto &tagSpec = getTagSpec(tagName, filterSet.at(i));
auto &monSet = allTags.at(tagSpec);
monSet.erase(&f);
if (monSet.empty()) allTags.erase(tagSpec);
} else if (f.tags.size() || f.tagsAnd.size()) {
auto removeTags = [&](const auto &map){
for (const auto &[tagName, filterSet] : map) {
for (size_t i = 0; i < filterSet.size(); i++) {
auto &tagSpec = getTagSpec(tagName, filterSet.at(i));
auto &monSet = allTags.at(tagSpec);
monSet.erase(&f);
if (monSet.empty()) allTags.erase(tagSpec);
}
}
}
};

removeTags(f.tags);
removeTags(f.tagsAnd);
} else if (f.kinds) {
for (size_t i = 0; i < f.kinds->size(); i++) {
uint64_t kind = f.kinds->at(i);
Expand Down
44 changes: 34 additions & 10 deletions src/DBQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,52 @@ struct DBScan : NonCopyable {
}
);
}
} else if (f.tags.size()) {
} else if (f.tags.size() || f.tagsAnd.size()) {
indexDbi = env.dbi_Event__tag;
desc = "Tag";

char tagName = '\0';
bool fromAnd = false;
{
uint64_t numTags = MAX_U64;
for (const auto &[tn, filterSet] : f.tags) {
if (filterSet.size() < numTags) {
numTags = filterSet.size();
tagName = tn;
auto consider = [&](const auto &map, bool isAnd){
for (const auto &[tn, filterSet] : map) {
size_t filterSize = filterSet.size();
if (filterSize == 0) continue;
uint64_t cost = isAnd ? 1 : filterSize;
if (cost < numTags || (cost == numTags && isAnd && !fromAnd)) {
numTags = cost;
tagName = tn;
fromAnd = isAnd;
}
}
}
};

consider(f.tags, false);
consider(f.tagsAnd, true);
}

const auto &filterSet = f.tags.at(tagName);
const auto &filterSet = fromAnd ? f.tagsAnd.at(tagName) : f.tags.at(tagName);
if (fromAnd) indexOnly = false;

std::vector<std::string> searchVals;
if (fromAnd) {
// For AND filters, matching any single required value implies the event also contains
// every other AND value (otherwise it will be rejected later), so only scan using one
// value to avoid redundant cursor work.
searchVals.emplace_back(filterSet.at(0));
} else {
searchVals.reserve(filterSet.size());
for (uint64_t i = 0; i < filterSet.size(); i++) {
searchVals.emplace_back(filterSet.at(i));
}
}

cursors.reserve(filterSet.size());
for (uint64_t i = 0; i < filterSet.size(); i++) {
cursors.reserve(searchVals.size());
for (const auto &val : searchVals) {
std::string search;
search += tagName;
search += filterSet.at(i);
search += val;

cursors.emplace_back(
search + std::string(8, '\xFF'),
Expand Down
107 changes: 94 additions & 13 deletions src/filters.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ struct FilterSetBytes {
if (buf.size() > 65535) throw herr("total filter items too large");
}

// Direct constructor from already-decoded values
FilterSetBytes(const std::vector<std::string> &arrBytes, size_t minSize, size_t maxSize) {
if (maxSize > MAX_INDEXED_TAG_VAL_SIZE) throw herr("maxSize bigger than max indexed tag size");

std::vector<std::string> arr = arrBytes;
std::sort(arr.begin(), arr.end());

for (size_t i = 0; i < arr.size(); i++) {
const auto &item = arr[i];
if (item.size() < minSize) throw herr("filter item too small");
if (item.size() > maxSize) throw herr("filter item too large");
if (i > 0 && item == arr[i - 1]) continue; // remove duplicates
items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] });
buf += item;
}

if (buf.size() > 65535) throw herr("total filter items too large");
}

std::string at(size_t n) const {
if (n >= items.size()) throw herr("FilterSetBytes access out of bounds");
auto &item = items[n];
Expand Down Expand Up @@ -111,6 +130,7 @@ struct NostrFilter {
std::optional<FilterSetBytes> authors;
std::optional<FilterSetUint> kinds;
flat_hash_map<char, FilterSetBytes> tags;
flat_hash_map<char, FilterSetBytes> tagsAnd;

uint64_t since = 0;
uint64_t until = MAX_U64;
Expand All @@ -119,7 +139,10 @@ struct NostrFilter {
bool indexOnlyScans = false;

explicit NostrFilter(const tao::json::value &filterObj, uint64_t maxFilterLimit) {
uint64_t numMajorFields = 0;
uint64_t numMajorFieldsNonTag = 0;
flat_hash_set<char> tagKeySet;
flat_hash_map<char, std::vector<std::string>> rawTagsOr;
flat_hash_map<char, std::vector<std::string>> rawTagsAnd;

if (!filterObj.is_object()) throw herr("provided filter is not an object");

Expand All @@ -131,25 +154,27 @@ struct NostrFilter {

if (k == "ids") {
ids.emplace(v, true, 32, 32);
numMajorFields++;
numMajorFieldsNonTag++;
} else if (k == "authors") {
authors.emplace(v, true, 32, 32);
numMajorFields++;
numMajorFieldsNonTag++;
} else if (k == "kinds") {
kinds.emplace(v);
numMajorFields++;
} else if (k.starts_with('#')) {
numMajorFields++;
if (k.size() == 2) {
char tag = k[1];
numMajorFieldsNonTag++;
} else if (k.starts_with('#') || k.starts_with('&')) {
bool isAnd = k.starts_with('&');
if (k.size() != 2) throw herr(isAnd ? "unindexed AND tag filter" : "unindexed tag filter");

char tag = k[1];
tagKeySet.insert(tag);

auto &vec = isAnd ? rawTagsAnd[tag] : rawTagsOr[tag];
for (const auto &elem : v.get_array()) {
if (tag == 'p' || tag == 'e') {
tags.emplace(tag, FilterSetBytes(v, true, 32, 32));
vec.emplace_back(from_hex(elem.get_string(), false));
} else {
tags.emplace(tag, FilterSetBytes(v, false, 0, MAX_INDEXED_TAG_VAL_SIZE));
vec.emplace_back(elem.get_string());
}
} else {
throw herr("unindexed tag filter");
}
} else if (k == "since") {
since = v.get_unsigned();
Expand All @@ -162,11 +187,49 @@ struct NostrFilter {
}
}

if (tags.size() > 3) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it
// Build AND sets first
for (const auto &[tagName, vals] : rawTagsAnd) {
if (tagName == 'p' || tagName == 'e') {
tagsAnd.emplace(tagName, FilterSetBytes(vals, 32, 32));
} else {
tagsAnd.emplace(tagName, FilterSetBytes(vals, 0, MAX_INDEXED_TAG_VAL_SIZE));
}
}

// Build OR sets, skipping any values present in AND for the same tag
for (const auto &[tagName, vals] : rawTagsOr) {
const auto andIt = tagsAnd.find(tagName);
std::vector<std::string> filtered;
filtered.reserve(vals.size());

for (const auto &v : vals) {
if (andIt != tagsAnd.end() && andIt->second.doesMatch(v)) continue;
filtered.emplace_back(v);
}

if (filtered.empty()) continue;

if (tagName == 'p' || tagName == 'e') {
tags.emplace(tagName, FilterSetBytes(filtered, 32, 32));
} else {
tags.emplace(tagName, FilterSetBytes(filtered, 0, MAX_INDEXED_TAG_VAL_SIZE));
}
}

size_t tagKeyCount = 0;
{
// tagKeySet already contains the union of keys seen in # and &
tagKeyCount = tagKeySet.size();
}

if (tagKeyCount > 3) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it

if (limit > maxFilterLimit) limit = maxFilterLimit;

uint64_t numMajorFields = numMajorFieldsNonTag + tagKeyCount;

indexOnlyScans = (numMajorFields <= 1) || (numMajorFields == 2 && authors && kinds);
if (tagsAnd.size()) indexOnlyScans = false; // AND semantics require reading full events
}

bool doesMatchTimes(uint64_t created) const {
Expand All @@ -184,6 +247,24 @@ struct NostrFilter {
if (authors && !authors->doesMatch(ev.pubkey())) return false;
if (kinds && !kinds->doesMatch(ev.kind())) return false;

// AND tags: every value in tagsAnd[tag] must be present in the event
for (const auto &[tag, filt] : tagsAnd) {
for (size_t i = 0; i < filt.size(); i++) {
auto requiredVal = filt.at(i);
bool foundMatch = false;

ev.foreachTag([&](char tagName, std::string_view tagVal){
if (tagName == tag && tagVal == requiredVal) {
foundMatch = true;
return false;
}
return true;
});

if (!foundMatch) return false;
}
}

for (const auto &[tag, filt] : tags) {
bool foundMatch = false;

Expand Down
55 changes: 35 additions & 20 deletions test/dumbFilter.pl
Original file line number Diff line number Diff line change
Expand Up @@ -76,41 +76,56 @@ sub doesMatchSingle {
return 0 if !$found;
}

if ($filter->{'#e'}) {
my $found;
foreach my $search (@{ $filter->{'#e'} }) {
foreach my $tag (@{ $ev->{tags} }) {
if ($tag->[0] eq 'e' && $tag->[1] eq $search) {
$found = 1;
last;
}
}
# AND / OR tag handling (including NIP-119 AND filters)
my %tagAnd;
my %tagOr;
for my $k (keys %$filter) {
if ($k =~ /^#(.)$/) {
$tagOr{$1} = $filter->{$k};
} elsif ($k =~ /^&(.)$/) {
$tagAnd{$1} = $filter->{$k};
}
return 0 if !$found;
}

if ($filter->{'#p'}) {
my $found;
foreach my $search (@{ $filter->{'#p'} }) {
foreach my $tag (@{ $ev->{tags} }) {
if ($tag->[0] eq 'p' && $tag->[1] eq $search) {
# Remove overlaps: AND values are ignored in OR sets for the same tag
for my $tag (keys %tagAnd) {
next unless $tagOr{$tag};
my %andVals = map { $_ => 1 } @{ $tagAnd{$tag} };
my @remaining = grep { !exists $andVals{$_} } @{ $tagOr{$tag} };
if (@remaining) {
$tagOr{$tag} = \@remaining;
} else {
delete $tagOr{$tag};
}
}

# AND: every required value must be present
for my $tag (keys %tagAnd) {
for my $required (@{ $tagAnd{$tag} }) {
my $found;
foreach my $evTag (@{ $ev->{tags} }) {
next if @$evTag < 2;
if ($evTag->[0] eq $tag && $evTag->[1] eq $required) {
$found = 1;
last;
}
}
return 0 if !$found;
}
return 0 if !$found;
}

if ($filter->{'#t'}) {
# OR: at least one value must be present per tag key
for my $tag (keys %tagOr) {
my $found;
foreach my $search (@{ $filter->{'#t'} }) {
foreach my $tag (@{ $ev->{tags} }) {
if ($tag->[0] eq 't' && $tag->[1] eq $search) {
foreach my $search (@{ $tagOr{$tag} }) {
foreach my $evTag (@{ $ev->{tags} }) {
next if @$evTag < 2;
if ($evTag->[0] eq $tag && $evTag->[1] eq $search) {
$found = 1;
last;
}
}
last if $found;
}
return 0 if !$found;
}
Expand Down
21 changes: 21 additions & 0 deletions test/filterFuzzTest.pl
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,27 @@ sub genRandomFilterGroup {
push @{$f->{'#t'}}, $topics->[int(rand() * @$topics)];
}
}

if (rand() < .12) {
$f->{'&t'} = [];
for (1..(rand()*3)+1) {
push @{$f->{'&t'}}, $topics->[int(rand() * @$topics)];
}
}

if (rand() < .08) {
$f->{'&e'} = [];
for (1..(rand()*4)+1) {
push @{$f->{'&e'}}, $ids->[int(rand() * @$ids)];
}
}

if (rand() < .08) {
$f->{'&p'} = [];
for (1..(rand()*3)+1) {
push @{$f->{'&p'}}, $pubkeys->[int(rand() * @$pubkeys)];
}
}
}

if (rand() < .2) {
Expand Down