From 50a75658548b91faf47c1a18a0320a5edb889cf6 Mon Sep 17 00:00:00 2001 From: ChrisJr404 <11917633+ChrisJr404@users.noreply.github.com> Date: Sun, 3 May 2026 17:22:22 -0400 Subject: [PATCH] feat: size-conditional 'output may be large' note for 4 *2john converters (#4051) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The *2john family of converters embed the encrypted blob from the input archive directly into the JtR hash line written to stdout. For large archives (encrypted disk images, password-protected RARs, KeePass DBs with sizeable key files) the resulting hash line can run into hundreds of megabytes or more, which routinely surprises new users into thinking the tool has malfunctioned. Issue #4051 asks for a stderr explanation when this happens. zip2john already had an unconditional one-line note printed *after* each output; @gartikis added it in PR #5837. That helped, but it was late in the timeline (the user has already watched MBs scroll past) and fired even for tiny archives. This patch makes the same UX hint size-conditional and extends it to the other three converters most likely to surprise users. src/2john_common.h (new) Shared static-inline helpers: LARGE_OUTPUT_THRESHOLD_BYTES 1 MiB, per the suggestion in #4051. large_output_note(progname) one-shot stderr note explaining large outputs are normal and suggesting redirection to a file. large_output_note_if_input_large(progname, path, threshold) stat()s path and only fires the note when the input is at least threshold bytes; stat() failures are silently ignored — the note is best-effort UX, not a correctness check. static inline so each *2john pulls in just the bytes it uses; no new link-time dependency. src/zip2john.c Calls the helper at the top of each per-archive iteration in zip2john(). The previous unconditional 'It is normal for some outputs to be very large' line at the bottom of process_one() is removed (replaced by an inline comment pointing at the new helper) so users scanning many small archives no longer see the note 100x. src/rar2john.c src/dmg2john.c src/keepass2john.c Same treatment: include 2john_common.h, call the helper before the per-file processing function in main()/rar2john(). Test Built with the standard 'configure && make' flow (clean build, only a pre-existing -Wcpp warning about libbz2 unrelated to this change). Live-verified end-to-end: tiny.zip (207 B) no warning <- previous behaviour was a spurious warning per archive big.zip (~2 MiB) warning <- as expected fake.kdbx (~2 MiB) warning via keepass2john rar2john / dmg2john compile and link clean; behaviour mirrors the same threshold via the shared helper. --- src/2john_common.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++ src/dmg2john.c | 6 +++- src/keepass2john.c | 10 +++++-- src/rar2john.c | 10 +++++-- src/zip2john.c | 20 +++++++++---- 5 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 src/2john_common.h diff --git a/src/2john_common.h b/src/2john_common.h new file mode 100644 index 00000000000..6ca4382e710 --- /dev/null +++ b/src/2john_common.h @@ -0,0 +1,72 @@ +/* + * Shared helpers used by the family of *2john converters (zip2john, + * rar2john, dmg2john, keepass2john, ...). See: + * https://github.com/openwall/john/issues/4051 + * + * Many of these converters embed the encrypted blob from the input archive + * directly into the JtR hash line they print to stdout. For large archives + * (encrypted disk images, password-protected RARs, KeePass databases with + * sizeable key files) the resulting hash line can run into hundreds of + * megabytes or more, which routinely surprises new users into thinking the + * tool has malfunctioned. The helpers here let each *2john print a single + * one-shot stderr note up front when the input is large enough that a big + * stdout output is expected. + */ + +#ifndef _JOHN_2JOHN_COMMON_H +#define _JOHN_2JOHN_COMMON_H + +#include +#include +#include + +/* + * Default threshold (in bytes) above which a *2john tool should print the + * "output may be very large" stderr note. Roughly 1 MiB matches the + * suggestion in https://github.com/openwall/john/issues/4051. Each tool can + * override this if its output is more (or less) bloated relative to the + * input. + */ +#define LARGE_OUTPUT_THRESHOLD_BYTES (1L << 20) + +/* + * Print a one-shot stderr explanation that the output may be very large. + * The note is suppressed after the first call within a process so users + * who feed in many archives don't see it once per file. + */ +static inline void large_output_note(const char *progname) +{ + static int announced; + + if (announced) + return; + announced = 1; + + fprintf(stderr, + "Note: %s output can be very large for large inputs (often 2x the\n" + "input size or more, since the encrypted blob is hex-encoded into the\n" + "hash line). This is normal — redirect the output into a file with\n" + "'%s > hashes.txt'.\n", + progname, progname); +} + +/* + * Stat path and call large_output_note() iff the file is at least + * threshold bytes. Errors from stat() are silently ignored — the note is a + * best-effort UX hint, not a correctness check. + */ +static inline void large_output_note_if_input_large(const char *progname, + const char *path, + off_t threshold) +{ + struct stat st; + + if (path == NULL) + return; + if (stat(path, &st) != 0) + return; + if (st.st_size >= threshold) + large_output_note(progname); +} + +#endif /* _JOHN_2JOHN_COMMON_H */ diff --git a/src/dmg2john.c b/src/dmg2john.c index 3ec16b755be..41ac3f8d5e3 100644 --- a/src/dmg2john.c +++ b/src/dmg2john.c @@ -42,6 +42,7 @@ #include "jumbo.h" #include "memory.h" #include "johnswap.h" +#include "2john_common.h" #define inplace_ntohl(x) do { (x) = john_ntohl((x)); } while (0) @@ -514,8 +515,11 @@ int main(int argc, char **argv) puts("Usage: dmg2john [DMG files]"); return -1; } - for (i = 1; i < argc; i++) + for (i = 1; i < argc; i++) { + large_output_note_if_input_large("dmg2john", argv[i], + LARGE_OUTPUT_THRESHOLD_BYTES); hash_plugin_parse_hash(argv[i]); + } return 0; } diff --git a/src/keepass2john.c b/src/keepass2john.c index 9ea39e43e7d..b4728fdb702 100644 --- a/src/keepass2john.c +++ b/src/keepass2john.c @@ -60,6 +60,7 @@ #include "aes.h" #include "base64_convert.h" #include "johnswap.h" +#include "2john_common.h" //#define KEEPASS_DEBUG @@ -966,8 +967,13 @@ int main(int argc, char **argv) return usage(argv[0]); argv += optind; - while (argc--) - process_database(*argv++); + while (argc--) { + char *path = *argv++; + + large_output_note_if_input_large("keepass2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); + process_database(path); + } return 0; } diff --git a/src/rar2john.c b/src/rar2john.c index 02457d1f92a..788ecba8fc4 100644 --- a/src/rar2john.c +++ b/src/rar2john.c @@ -63,6 +63,7 @@ #include "base64_convert.h" #include "sha2.h" #include "rar2john.h" +#include "2john_common.h" #ifdef _MSC_VER #include "missing_getopt.h" #endif @@ -997,8 +998,13 @@ int rar2john(int argc, char **argv) return usage(argv[0]); argv += optind; - while (argc--) - process_file(*argv++); + while (argc--) { + const char *path = *argv++; + + large_output_note_if_input_large("rar2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); + process_file(path); + } return EXIT_SUCCESS; } diff --git a/src/zip2john.c b/src/zip2john.c index fce98106183..5f75bdcfefa 100644 --- a/src/zip2john.c +++ b/src/zip2john.c @@ -139,6 +139,7 @@ #include "missing_getopt.h" #endif #include "johnswap.h" +#include "2john_common.h" #define _STR_VALUE(arg) #arg #define STR_MACRO(n) _STR_VALUE(n) @@ -945,9 +946,14 @@ static void print_and_cleanup(zip_context *ctx) "If that is not the case, the hash may be uncrackable. To avoid this, use\n" "option -o to pick a file at a time.\n"); - // Give warning to user for potentially large output of zip2john - fprintf(stderr, - "Note: It is normal for some outputs to be very large\n"); + /* + * The "output may be very large" note is now printed up front by + * large_output_note_if_input_large() in zip2john(), only when the + * input archive is big enough that the user is actually going to see + * a confusingly large hash line. This avoids the previous behaviour + * of always printing it after every output, which spammed users who + * fed in many small archives. + */ for (i = 0; i < ctx->num_candidates; ++i) { MEM_FREE(ctx->best_files[i].hash_data); @@ -1179,10 +1185,14 @@ int zip2john(int argc, char **argv) argv += optind; while(argc--) { + const char *path = *argv++; + + large_output_note_if_input_large("zip2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); if (do_scan) { - scan_from_start(*argv++); + scan_from_start(path); } else { - scan_central_index(*argv++); + scan_central_index(path); } }