Skip to content

Commit 9756d08

Browse files
committed
Update to latest crawl and disable throttling, seems not necessary currently
1 parent 73b626b commit 9756d08

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

src/main/java/org/dstadler/commoncrawl/index/DownloadURLIndex.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class DownloadURLIndex {
3030
private static final Logger log = LoggerFactory.make();
3131

3232
// https://commoncrawl.org/blog/
33-
public static final String CURRENT_CRAWL = "CC-MAIN-2024-42";
33+
public static final String CURRENT_CRAWL = "CC-MAIN-2025-43";
3434
public static final File COMMON_CRAWL_FILE = new File("commoncrawl-" + CURRENT_CRAWL + ".txt");
3535

3636
private static final int START_INDEX = 0;
@@ -144,7 +144,7 @@ protected static void handleInputStream(String url, InputStream stream, int inde
144144
StringUtils.abbreviate(FOUND_MIME_TYPES.sortedMap().toString(), 95));
145145
lastLog = System.currentTimeMillis();
146146

147-
Utils.throttleDownloads();
147+
//Utils.throttleDownloads();
148148
}
149149
}
150150
}

0 commit comments

Comments
 (0)