diff --git a/src/2john_common.h b/src/2john_common.h new file mode 100644 index 00000000000..6ca4382e710 --- /dev/null +++ b/src/2john_common.h @@ -0,0 +1,72 @@ +/* + * Shared helpers used by the family of *2john converters (zip2john, + * rar2john, dmg2john, keepass2john, ...). See: + * https://github.com/openwall/john/issues/4051 + * + * Many of these converters embed the encrypted blob from the input archive + * directly into the JtR hash line they print to stdout. For large archives + * (encrypted disk images, password-protected RARs, KeePass databases with + * sizeable key files) the resulting hash line can run into hundreds of + * megabytes or more, which routinely surprises new users into thinking the + * tool has malfunctioned. The helpers here let each *2john print a single + * one-shot stderr note up front when the input is large enough that a big + * stdout output is expected. + */ + +#ifndef _JOHN_2JOHN_COMMON_H +#define _JOHN_2JOHN_COMMON_H + +#include +#include +#include + +/* + * Default threshold (in bytes) above which a *2john tool should print the + * "output may be very large" stderr note. Roughly 1 MiB matches the + * suggestion in https://github.com/openwall/john/issues/4051. Each tool can + * override this if its output is more (or less) bloated relative to the + * input. + */ +#define LARGE_OUTPUT_THRESHOLD_BYTES (1L << 20) + +/* + * Print a one-shot stderr explanation that the output may be very large. + * The note is suppressed after the first call within a process so users + * who feed in many archives don't see it once per file. + */ +static inline void large_output_note(const char *progname) +{ + static int announced; + + if (announced) + return; + announced = 1; + + fprintf(stderr, + "Note: %s output can be very large for large inputs (often 2x the\n" + "input size or more, since the encrypted blob is hex-encoded into the\n" + "hash line). This is normal — redirect the output into a file with\n" + "'%s > hashes.txt'.\n", + progname, progname); +} + +/* + * Stat path and call large_output_note() iff the file is at least + * threshold bytes. Errors from stat() are silently ignored — the note is a + * best-effort UX hint, not a correctness check. + */ +static inline void large_output_note_if_input_large(const char *progname, + const char *path, + off_t threshold) +{ + struct stat st; + + if (path == NULL) + return; + if (stat(path, &st) != 0) + return; + if (st.st_size >= threshold) + large_output_note(progname); +} + +#endif /* _JOHN_2JOHN_COMMON_H */ diff --git a/src/dmg2john.c b/src/dmg2john.c index 3ec16b755be..41ac3f8d5e3 100644 --- a/src/dmg2john.c +++ b/src/dmg2john.c @@ -42,6 +42,7 @@ #include "jumbo.h" #include "memory.h" #include "johnswap.h" +#include "2john_common.h" #define inplace_ntohl(x) do { (x) = john_ntohl((x)); } while (0) @@ -514,8 +515,11 @@ int main(int argc, char **argv) puts("Usage: dmg2john [DMG files]"); return -1; } - for (i = 1; i < argc; i++) + for (i = 1; i < argc; i++) { + large_output_note_if_input_large("dmg2john", argv[i], + LARGE_OUTPUT_THRESHOLD_BYTES); hash_plugin_parse_hash(argv[i]); + } return 0; } diff --git a/src/keepass2john.c b/src/keepass2john.c index 9ea39e43e7d..b4728fdb702 100644 --- a/src/keepass2john.c +++ b/src/keepass2john.c @@ -60,6 +60,7 @@ #include "aes.h" #include "base64_convert.h" #include "johnswap.h" +#include "2john_common.h" //#define KEEPASS_DEBUG @@ -966,8 +967,13 @@ int main(int argc, char **argv) return usage(argv[0]); argv += optind; - while (argc--) - process_database(*argv++); + while (argc--) { + char *path = *argv++; + + large_output_note_if_input_large("keepass2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); + process_database(path); + } return 0; } diff --git a/src/rar2john.c b/src/rar2john.c index 02457d1f92a..788ecba8fc4 100644 --- a/src/rar2john.c +++ b/src/rar2john.c @@ -63,6 +63,7 @@ #include "base64_convert.h" #include "sha2.h" #include "rar2john.h" +#include "2john_common.h" #ifdef _MSC_VER #include "missing_getopt.h" #endif @@ -997,8 +998,13 @@ int rar2john(int argc, char **argv) return usage(argv[0]); argv += optind; - while (argc--) - process_file(*argv++); + while (argc--) { + const char *path = *argv++; + + large_output_note_if_input_large("rar2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); + process_file(path); + } return EXIT_SUCCESS; } diff --git a/src/zip2john.c b/src/zip2john.c index fce98106183..5f75bdcfefa 100644 --- a/src/zip2john.c +++ b/src/zip2john.c @@ -139,6 +139,7 @@ #include "missing_getopt.h" #endif #include "johnswap.h" +#include "2john_common.h" #define _STR_VALUE(arg) #arg #define STR_MACRO(n) _STR_VALUE(n) @@ -945,9 +946,14 @@ static void print_and_cleanup(zip_context *ctx) "If that is not the case, the hash may be uncrackable. To avoid this, use\n" "option -o to pick a file at a time.\n"); - // Give warning to user for potentially large output of zip2john - fprintf(stderr, - "Note: It is normal for some outputs to be very large\n"); + /* + * The "output may be very large" note is now printed up front by + * large_output_note_if_input_large() in zip2john(), only when the + * input archive is big enough that the user is actually going to see + * a confusingly large hash line. This avoids the previous behaviour + * of always printing it after every output, which spammed users who + * fed in many small archives. + */ for (i = 0; i < ctx->num_candidates; ++i) { MEM_FREE(ctx->best_files[i].hash_data); @@ -1179,10 +1185,14 @@ int zip2john(int argc, char **argv) argv += optind; while(argc--) { + const char *path = *argv++; + + large_output_note_if_input_large("zip2john", path, + LARGE_OUTPUT_THRESHOLD_BYTES); if (do_scan) { - scan_from_start(*argv++); + scan_from_start(path); } else { - scan_central_index(*argv++); + scan_central_index(path); } }