diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fdabb0d7..121f4ea23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ option(ENABLE_SVG "Enable SVG support, for generating SVG background images and include_directories(${CMAKE_SOURCE_DIR}/src) -set(PDF2HTMLEX_VERSION "0.14.6") +set(PDF2HTMLEX_VERSION "0.15.0") set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) add_custom_target(dist COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 0f4680abb..385e29efa 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -373,6 +373,14 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) #endif } +namespace { + +void output_map_file_header(std::ostream& out) { + out << "glyph_code mapped_code unicode" << std::endl; +} + +} // namespace + void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) { if(param.debug) @@ -528,6 +536,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo ffw_reencode_glyph_order(); GfxCIDFont * _font = dynamic_cast(font); + assert(_font != nullptr); // To locate CID2GID for the font // as in CairoFontEngine.cc @@ -574,6 +583,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo map_filename = (char*)str_fmt("%s/f%llx.map", param.tmp_dir.c_str(), info.id); tmp_files.add(map_filename); map_outf.open(map_filename); + output_map_file_header(map_outf); } unordered_set codeset; @@ -650,6 +660,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo { map_outf.close(); map_outf.open(map_filename); + output_map_file_header(map_outf); } continue; } diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 6a54194e5..2e14eeec8 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -120,11 +120,13 @@ void HTMLRenderer::process(PDFDoc *doc) for(int i = param.first_page; i <= param.last_page ; ++i) { if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) { - cerr << "Stop processing, reach max size\n"; + if(param.quiet == 0) + cerr << "Stop processing, reach max size\n"; break; } - cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if (param.quiet == 0) + cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if(param.split_pages) { @@ -153,9 +155,11 @@ void HTMLRenderer::process(PDFDoc *doc) f_curpage = nullptr; } } - if(page_count >= 0) + if(page_count >= 0 && param.quiet == 0) cerr << "Working: " << page_count << "/" << page_count; - cerr << endl; + + if(param.quiet == 0) + cerr << endl; //////////////////////// // Process Outline @@ -167,7 +171,8 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer = nullptr; fallback_bg_renderer = nullptr; - cerr << endl; + if(param.quiet == 0) + cerr << endl; } void HTMLRenderer::setDefaultCTM(double *ctm) diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 3c90ab5c3..b8b412935 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -56,73 +56,70 @@ static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int & // dec sout << "[" << pageno; - if(dest) + switch(dest->getKind()) { - switch(dest->getKind()) - { - case destXYZ: - { - sout << ",\"XYZ\","; - if(dest->getChangeLeft()) - sout << (dest->getLeft()); - else - sout << "null"; - sout << ","; - if(dest->getChangeTop()) - sout << (dest->getTop()); - else - sout << "null"; - sout << ","; - if(dest->getChangeZoom()) - sout << (dest->getZoom()); - else - sout << "null"; - } - break; - case destFit: - sout << ",\"Fit\""; - break; - case destFitH: - sout << ",\"FitH\","; - if(dest->getChangeTop()) - sout << (dest->getTop()); - else - sout << "null"; - break; - case destFitV: - sout << ",\"FitV\","; + case destXYZ: + { + sout << ",\"XYZ\","; if(dest->getChangeLeft()) sout << (dest->getLeft()); else sout << "null"; - break; - case destFitR: - sout << ",\"FitR\"," - << (dest->getLeft()) << "," - << (dest->getBottom()) << "," - << (dest->getRight()) << "," - << (dest->getTop()); - break; - case destFitB: - sout << ",\"FitB\""; - break; - case destFitBH: - sout << ",\"FitBH\","; + sout << ","; if(dest->getChangeTop()) sout << (dest->getTop()); else sout << "null"; - break; - case destFitBV: - sout << ",\"FitBV\","; - if(dest->getChangeLeft()) - sout << (dest->getLeft()); + sout << ","; + if(dest->getChangeZoom()) + sout << (dest->getZoom()); else sout << "null"; - break; - default: - break; - } + } + break; + case destFit: + sout << ",\"Fit\""; + break; + case destFitH: + sout << ",\"FitH\","; + if(dest->getChangeTop()) + sout << (dest->getTop()); + else + sout << "null"; + break; + case destFitV: + sout << ",\"FitV\","; + if(dest->getChangeLeft()) + sout << (dest->getLeft()); + else + sout << "null"; + break; + case destFitR: + sout << ",\"FitR\"," + << (dest->getLeft()) << "," + << (dest->getBottom()) << "," + << (dest->getRight()) << "," + << (dest->getTop()); + break; + case destFitB: + sout << ",\"FitB\""; + break; + case destFitBH: + sout << ",\"FitBH\","; + if(dest->getChangeTop()) + sout << (dest->getTop()); + else + sout << "null"; + break; + case destFitBV: + sout << ",\"FitBV\","; + if(dest->getChangeLeft()) + sout << (dest->getLeft()); + else + sout << "null"; + break; + default: + break; } sout << "]"; @@ -166,6 +163,7 @@ string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail) case actionURI: { auto * real_action = dynamic_cast(action); + assert(real_action != nullptr); dest_str = real_action->getURI()->getCString(); } break; diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc index a0be2865d..e74b663bb 100644 --- a/src/HTMLTextLine.cc +++ b/src/HTMLTextLine.cc @@ -378,13 +378,12 @@ void HTMLTextLine::optimize_normal(std::vector & lines) new_offsets.reserve(offsets.size()); auto offset_iter1 = offsets.begin(); - for(auto state_iter2 = states.begin(), state_iter1 = state_iter2++; - state_iter1 != states.end(); - ++state_iter1, ++state_iter2) + for(auto state_iter1 = states.begin(); state_iter1 != states.end(); ++state_iter1) { + const auto state_iter2 = std::next(state_iter1); const size_t text_idx1 = state_iter1->start_idx; const size_t text_idx2 = (state_iter2 == states.end()) ? text.size() : state_iter2->start_idx; - size_t text_count = text_idx2 - text_idx1; + const size_t text_count = text_idx2 - text_idx1; // there might be some offsets before the first state while((offset_iter1 != offsets.end()) diff --git a/src/Param.h b/src/Param.h index 571fa28d6..7ea852b29 100644 --- a/src/Param.h +++ b/src/Param.h @@ -79,6 +79,7 @@ struct Param std::string tmp_dir; int debug; int proof; + int quiet; std::string input_filename, output_filename; }; diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index a8859ad52..42318b20e 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -45,7 +45,8 @@ void Preprocessor::process(PDFDoc * doc) int page_count = (param.last_page - param.first_page + 1); for(int i = param.first_page; i <= param.last_page ; ++i) { - cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if(param.quiet == 0) + cerr << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush; doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, 0, @@ -54,9 +55,11 @@ void Preprocessor::process(PDFDoc * doc) false, // printing nullptr, nullptr, nullptr, nullptr); } - if(page_count >= 0) + if(page_count >= 0 && param.quiet == 0) cerr << "Preprocessing: " << page_count << "/" << page_count; - cerr << endl; + + if(param.quiet == 0) + cerr << endl; } void Preprocessor::drawChar(GfxState *state, double x, double y, diff --git a/src/StateManager.h b/src/StateManager.h index 0a19df085..ed1361c78 100644 --- a/src/StateManager.h +++ b/src/StateManager.h @@ -84,7 +84,7 @@ class StateManager // Be careful about the mixed usage of Matrix and const double * // the input is usually double *, which might be changed, so we have to copy the content out -// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing +// in the map we use Matrix instead of double * such that the array may be automatically release when destructing template class StateManager { @@ -96,7 +96,7 @@ class StateManager // return id long long install(const double * new_value) { Matrix m; - memcpy(m.m, new_value, sizeof(m.m)); + memcpy(m.m, new_value, 4 * sizeof(double)); auto iter = value_map.lower_bound(m); if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4))) { diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index cf568b210..f474a823e 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -111,7 +111,7 @@ void prepare_directories() errno = 0; - unique_ptr pBuf(new char[tmp_dir.size() + 1]); + unique_ptr pBuf(new char[tmp_dir.size() + 1]); strcpy(pBuf.get(), tmp_dir.c_str()); auto p = mkdtemp(pBuf.get()); if(p == nullptr) @@ -160,7 +160,7 @@ void parse_options (int argc, char **argv) .add("process-form", ¶m.process_form, 0, "include text fields and radio buttons") .add("printing", ¶m.printing, 1, "enable printing support") .add("fallback", ¶m.fallback, 0, "output in fallback mode") - .add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit.") + .add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit") // fonts .add("embed-external-font", ¶m.embed_external_font, 1, "embed local match for external fonts") @@ -186,8 +186,8 @@ void parse_options (int argc, char **argv) // background image .add("bg-format", ¶m.bg_format, "png", "specify background image format") .add("svg-node-count-limit", ¶m.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit," - " fall back this page to bitmap background; negative value means no limit.") - .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.") + " fall back this page to bitmap background; negative value means no limit") + .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible") // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) @@ -196,11 +196,12 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") - .add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of temporary directory.") + .add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of temporary directory") .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") .add("poppler-data-dir", ¶m.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory") .add("debug", ¶m.debug, 0, "print debugging information") - .add("proof", ¶m.proof, 0, "texts are drawn on both text layer and background for proof.") + .add("proof", ¶m.proof, 0, "texts are drawn on both text layer and background for proof") + .add("quiet", ¶m.quiet, 0, "perform operations quietly") // meta .add("version,v", "print copyright and version info", &show_version_and_exit) diff --git a/src/util/unicode.cc b/src/util/unicode.cc index 4a2a03433..5611aa6d5 100644 --- a/src/util/unicode.cc +++ b/src/util/unicode.cc @@ -40,7 +40,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font) { if(!font->isCIDFont()) { - char * cname = dynamic_cast(font)->getCharName(code); + auto * font2 = dynamic_cast(font); + assert(font2 != nullptr); + char * cname = font2->getCharName(code); // may be untranslated ligature if(cname) {