Skip to content

Commit c8cd387

Browse files
committed
don't continuously re-retrieve documents that have incorrect size in database
1 parent 3ef4b8c commit c8cd387

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

tkpull.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ int main(int argc, char** argv)
113113
toRetrieve.insert({get<string>(d["id"]), get<string>(d["enclosure"]),
114114
contentLength ? *contentLength : 0});
115115
if(isPresentNonEmpty(get<string>(d["id"]), prefix))
116-
fmt::print("Re-retrieving {}, has wrong size on disk\n", get<string>(d["id"]));
116+
fmt::print("Re-retrieving {}, has wrong size on disk, should be {}\n", get<string>(d["id"]), get<int64_t>(d["contentLength"]));
117117
}
118118
}
119119
fmt::print("We have {} files to retrieve, {} are already present\n", toRetrieve.size(), present);
@@ -149,8 +149,13 @@ int main(int argc, char** argv)
149149
}
150150

151151
fmt::print("Got {} bytes\n", res->body.size());
152-
storeDocument(need.id, res->body, prefix);
153-
retrieved++;
152+
if(res->body.size() == (unsigned int)need.contentLength) {
153+
storeDocument(need.id, res->body, prefix);
154+
retrieved++;
155+
}
156+
else {
157+
fmt::print("Unexpected size received, not storing document\n");
158+
}
154159
}
155160
fmt::print("Retrieved {} documents, {} were too large, {} errors\n", retrieved, toolarge, error);
156161
}

0 commit comments

Comments
 (0)