From 984dac3d95f0151969160cdc6b0f7cfc581459cf Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 13 May 2026 22:56:43 +0200 Subject: [PATCH] NUTCH-3177 Fetcher to report idle threads not as hung threads - reset reprUrl in FetcherThread after fetch is finished - report idle threads properly --- src/java/org/apache/nutch/fetcher/Fetcher.java | 13 +++++++++---- .../org/apache/nutch/fetcher/FetcherThread.java | 3 +++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/nutch/fetcher/Fetcher.java b/src/java/org/apache/nutch/fetcher/Fetcher.java index 029d95ff7c..d8f83a96ba 100644 --- a/src/java/org/apache/nutch/fetcher/Fetcher.java +++ b/src/java/org/apache/nutch/fetcher/Fetcher.java @@ -448,16 +448,21 @@ else if (bandwidthTargetCheckCounter == bandwidthTargetCheckEveryNSecs) { * fetcher.threads.timeout.divisor. */ if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) { - LOG.warn("Timeout reached with no new requests since {} seconds.", + LOG.warn( + "Timeout reached with no new requests since {} milliseconds.", timeout); - LOG.warn("Aborting with {} hung threads{}.", activeThreads, + LOG.warn("Aborting with {} hung or idle threads{}.", activeThreads, feeder.isAlive() ? " (queue feeder still alive)" : ""); hungThreadsCounter.increment(activeThreads.get()); for (int i = 0; i < fetcherThreads.size(); i++) { FetcherThread thread = fetcherThreads.get(i); if (thread.isAlive()) { - LOG.warn("Thread #{} hung while processing {}", i, - thread.getReprUrl()); + if (thread.getReprUrl() != null) { + LOG.warn("Thread #{} hung while processing {}", i, + thread.getReprUrl()); + } else { + LOG.warn("Thread #{} idle", i); + } StackTraceElement[] stack = thread.getStackTrace(); StringBuilder sb = new StringBuilder(); sb.append("Stack of thread #").append(i).append(":\n"); diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index e3ee092b44..e894f01f16 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -537,6 +537,9 @@ public void run() { output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED, CrawlDatum.STATUS_FETCH_RETRY); } + + // done: unset reprUrl for reporting + setReprUrl(null); } } catch (Throwable e) {