From b0d2f0e21ff6440543e605b43511bfcc94f0f13e Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 13 Apr 2016 03:08:33 +0800 Subject: [PATCH 1/6] Show header in font map files --- src/HTMLRenderer/font.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 0f4680abb..73b8f3029 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -373,6 +373,14 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) #endif } +namespace { + +void output_map_file_header(std::ostream& out) { + out << "glyph_code mapped_code unicode" << std::endl; +} + +} // namespace + void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) { if(param.debug) @@ -574,6 +582,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo map_filename = (char*)str_fmt("%s/f%llx.map", param.tmp_dir.c_str(), info.id); tmp_files.add(map_filename); map_outf.open(map_filename); + output_map_file_header(map_outf); } unordered_set codeset; @@ -650,6 +659,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo { map_outf.close(); map_outf.open(map_filename); + output_map_file_header(map_outf); } continue; } From fac0808d659e4426f9d736f517c6889e214864e8 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 13 Apr 2016 08:50:55 +0800 Subject: [PATCH 2/6] fix a usage of unique_ptr with array --- src/pdf2htmlEX.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index cf568b210..96e58ea59 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -111,7 +111,7 @@ void prepare_directories() errno = 0; - unique_ptr pBuf(new char[tmp_dir.size() + 1]); + unique_ptr pBuf(new char[tmp_dir.size() + 1]); strcpy(pBuf.get(), tmp_dir.c_str()); auto p = mkdtemp(pBuf.get()); if(p == nullptr) From f934c0a8990ab68160508263dfd1a9b531e4b99c Mon Sep 17 00:00:00 2001 From: Daniel Nagy Date: Fri, 11 Mar 2016 09:59:14 +0100 Subject: [PATCH 3/6] Added '--quiet' argument to hide progress messages (resolves #503) --- src/HTMLRenderer/general.cc | 18 ++++++++++++------ src/Param.h | 1 + src/Preprocessor.cc | 13 ++++++++----- src/pdf2htmlEX.cc | 11 ++++++----- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 6a54194e5..0e882e4cf 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -38,6 +38,7 @@ using std::max; using std::min_element; using std::vector; using std::abs; +using std::cout; using std::cerr; using std::endl; @@ -120,11 +121,13 @@ void HTMLRenderer::process(PDFDoc *doc) for(int i = param.first_page; i <= param.last_page ; ++i) { if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) { - cerr << "Stop processing, reach max size\n"; + if(param.quiet == 0) + cout << "Stop processing, reach max size\n"; break; } - cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if (param.quiet == 0) + cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if(param.split_pages) { @@ -153,9 +156,11 @@ void HTMLRenderer::process(PDFDoc *doc) f_curpage = nullptr; } } - if(page_count >= 0) - cerr << "Working: " << page_count << "/" << page_count; - cerr << endl; + if(page_count >= 0 && param.quiet == 0) + cout << "Working: " << page_count << "/" << page_count; + + if(param.quiet == 0) + cout << endl; //////////////////////// // Process Outline @@ -167,7 +172,8 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer = nullptr; fallback_bg_renderer = nullptr; - cerr << endl; + if(param.quiet == 0) + cout << endl; } void HTMLRenderer::setDefaultCTM(double *ctm) diff --git a/src/Param.h b/src/Param.h index 571fa28d6..7ea852b29 100644 --- a/src/Param.h +++ b/src/Param.h @@ -79,6 +79,7 @@ struct Param std::string tmp_dir; int debug; int proof; + int quiet; std::string input_filename, output_filename; }; diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index a8859ad52..546bc2271 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -20,7 +20,7 @@ namespace pdf2htmlEX { -using std::cerr; +using std::cout; using std::endl; using std::flush; using std::max; @@ -45,7 +45,8 @@ void Preprocessor::process(PDFDoc * doc) int page_count = (param.last_page - param.first_page + 1); for(int i = param.first_page; i <= param.last_page ; ++i) { - cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if(param.quiet == 0) + cout << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush; doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, 0, @@ -54,9 +55,11 @@ void Preprocessor::process(PDFDoc * doc) false, // printing nullptr, nullptr, nullptr, nullptr); } - if(page_count >= 0) - cerr << "Preprocessing: " << page_count << "/" << page_count; - cerr << endl; + if(page_count >= 0 && param.quiet == 0) + cout << "Preprocessing: " << page_count << "/" << page_count; + + if(param.quiet == 0) + cout << endl; } void Preprocessor::drawChar(GfxState *state, double x, double y, diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 96e58ea59..f474a823e 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -160,7 +160,7 @@ void parse_options (int argc, char **argv) .add("process-form", ¶m.process_form, 0, "include text fields and radio buttons") .add("printing", ¶m.printing, 1, "enable printing support") .add("fallback", ¶m.fallback, 0, "output in fallback mode") - .add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit.") + .add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit") // fonts .add("embed-external-font", ¶m.embed_external_font, 1, "embed local match for external fonts") @@ -186,8 +186,8 @@ void parse_options (int argc, char **argv) // background image .add("bg-format", ¶m.bg_format, "png", "specify background image format") .add("svg-node-count-limit", ¶m.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit," - " fall back this page to bitmap background; negative value means no limit.") - .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.") + " fall back this page to bitmap background; negative value means no limit") + .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible") // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) @@ -196,11 +196,12 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") - .add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of temporary directory.") + .add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of temporary directory") .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") .add("poppler-data-dir", ¶m.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory") .add("debug", ¶m.debug, 0, "print debugging information") - .add("proof", ¶m.proof, 0, "texts are drawn on both text layer and background for proof.") + .add("proof", ¶m.proof, 0, "texts are drawn on both text layer and background for proof") + .add("quiet", ¶m.quiet, 0, "perform operations quietly") // meta .add("version,v", "print copyright and version info", &show_version_and_exit) From 4ee82f5b3223e4e32c5c079c35fcb78e358ce3c6 Mon Sep 17 00:00:00 2001 From: Daniel Nagy Date: Thu, 14 Apr 2016 09:55:10 +0200 Subject: [PATCH 4/6] Revert cout messages to cerr (see #622) --- src/HTMLRenderer/general.cc | 9 ++++----- src/Preprocessor.cc | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 0e882e4cf..2e14eeec8 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -38,7 +38,6 @@ using std::max; using std::min_element; using std::vector; using std::abs; -using std::cout; using std::cerr; using std::endl; @@ -122,7 +121,7 @@ void HTMLRenderer::process(PDFDoc *doc) { if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) { if(param.quiet == 0) - cout << "Stop processing, reach max size\n"; + cerr << "Stop processing, reach max size\n"; break; } @@ -157,10 +156,10 @@ void HTMLRenderer::process(PDFDoc *doc) } } if(page_count >= 0 && param.quiet == 0) - cout << "Working: " << page_count << "/" << page_count; + cerr << "Working: " << page_count << "/" << page_count; if(param.quiet == 0) - cout << endl; + cerr << endl; //////////////////////// // Process Outline @@ -173,7 +172,7 @@ void HTMLRenderer::process(PDFDoc *doc) fallback_bg_renderer = nullptr; if(param.quiet == 0) - cout << endl; + cerr << endl; } void HTMLRenderer::setDefaultCTM(double *ctm) diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index 546bc2271..78c2301d4 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -20,7 +20,6 @@ namespace pdf2htmlEX { -using std::cout; using std::endl; using std::flush; using std::max; @@ -46,7 +45,7 @@ void Preprocessor::process(PDFDoc * doc) for(int i = param.first_page; i <= param.last_page ; ++i) { if(param.quiet == 0) - cout << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush; + cerr << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush; doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, 0, @@ -56,10 +55,10 @@ void Preprocessor::process(PDFDoc * doc) nullptr, nullptr, nullptr, nullptr); } if(page_count >= 0 && param.quiet == 0) - cout << "Preprocessing: " << page_count << "/" << page_count; + cerr << "Preprocessing: " << page_count << "/" << page_count; if(param.quiet == 0) - cout << endl; + cerr << endl; } void Preprocessor::drawChar(GfxState *state, double x, double y, From 0198047d91daf382887d88842c2d5841c9a16ac1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Thu, 14 Apr 2016 21:12:26 +0200 Subject: [PATCH 5/6] bump version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fdabb0d7..121f4ea23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ option(ENABLE_SVG "Enable SVG support, for generating SVG background images and include_directories(${CMAKE_SOURCE_DIR}/src) -set(PDF2HTMLEX_VERSION "0.14.6") +set(PDF2HTMLEX_VERSION "0.15.0") set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) add_custom_target(dist COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD From 4a6a31f08439a0cd335dc241dfb75e931f8b447a Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Thu, 14 Apr 2016 22:15:14 +0200 Subject: [PATCH 6/6] fix build; fix some coverity warnings --- src/HTMLRenderer/font.cc | 1 + src/HTMLRenderer/link.cc | 110 +++++++++++++++++++-------------------- src/HTMLTextLine.cc | 7 ++- src/Preprocessor.cc | 1 + src/StateManager.h | 4 +- src/util/unicode.cc | 4 +- 6 files changed, 64 insertions(+), 63 deletions(-) diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 73b8f3029..385e29efa 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -536,6 +536,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo ffw_reencode_glyph_order(); GfxCIDFont * _font = dynamic_cast(font); + assert(_font != nullptr); // To locate CID2GID for the font // as in CairoFontEngine.cc diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 3c90ab5c3..b8b412935 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -56,73 +56,70 @@ static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int & // dec sout << "[" << pageno; - if(dest) + switch(dest->getKind()) { - switch(dest->getKind()) - { - case destXYZ: - { - sout << ",\"XYZ\","; - if(dest->getChangeLeft()) - sout << (dest->getLeft()); - else - sout << "null"; - sout << ","; - if(dest->getChangeTop()) - sout << (dest->getTop()); - else - sout << "null"; - sout << ","; - if(dest->getChangeZoom()) - sout << (dest->getZoom()); - else - sout << "null"; - } - break; - case destFit: - sout << ",\"Fit\""; - break; - case destFitH: - sout << ",\"FitH\","; - if(dest->getChangeTop()) - sout << (dest->getTop()); - else - sout << "null"; - break; - case destFitV: - sout << ",\"FitV\","; + case destXYZ: + { + sout << ",\"XYZ\","; if(dest->getChangeLeft()) sout << (dest->getLeft()); else sout << "null"; - break; - case destFitR: - sout << ",\"FitR\"," - << (dest->getLeft()) << "," - << (dest->getBottom()) << "," - << (dest->getRight()) << "," - << (dest->getTop()); - break; - case destFitB: - sout << ",\"FitB\""; - break; - case destFitBH: - sout << ",\"FitBH\","; + sout << ","; if(dest->getChangeTop()) sout << (dest->getTop()); else sout << "null"; - break; - case destFitBV: - sout << ",\"FitBV\","; - if(dest->getChangeLeft()) - sout << (dest->getLeft()); + sout << ","; + if(dest->getChangeZoom()) + sout << (dest->getZoom()); else sout << "null"; - break; - default: - break; - } + } + break; + case destFit: + sout << ",\"Fit\""; + break; + case destFitH: + sout << ",\"FitH\","; + if(dest->getChangeTop()) + sout << (dest->getTop()); + else + sout << "null"; + break; + case destFitV: + sout << ",\"FitV\","; + if(dest->getChangeLeft()) + sout << (dest->getLeft()); + else + sout << "null"; + break; + case destFitR: + sout << ",\"FitR\"," + << (dest->getLeft()) << "," + << (dest->getBottom()) << "," + << (dest->getRight()) << "," + << (dest->getTop()); + break; + case destFitB: + sout << ",\"FitB\""; + break; + case destFitBH: + sout << ",\"FitBH\","; + if(dest->getChangeTop()) + sout << (dest->getTop()); + else + sout << "null"; + break; + case destFitBV: + sout << ",\"FitBV\","; + if(dest->getChangeLeft()) + sout << (dest->getLeft()); + else + sout << "null"; + break; + default: + break; } sout << "]"; @@ -166,6 +163,7 @@ string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail) case actionURI: { auto * real_action = dynamic_cast(action); + assert(real_action != nullptr); dest_str = real_action->getURI()->getCString(); } break; diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc index a0be2865d..e74b663bb 100644 --- a/src/HTMLTextLine.cc +++ b/src/HTMLTextLine.cc @@ -378,13 +378,12 @@ void HTMLTextLine::optimize_normal(std::vector & lines) new_offsets.reserve(offsets.size()); auto offset_iter1 = offsets.begin(); - for(auto state_iter2 = states.begin(), state_iter1 = state_iter2++; - state_iter1 != states.end(); - ++state_iter1, ++state_iter2) + for(auto state_iter1 = states.begin(); state_iter1 != states.end(); ++state_iter1) { + const auto state_iter2 = std::next(state_iter1); const size_t text_idx1 = state_iter1->start_idx; const size_t text_idx2 = (state_iter2 == states.end()) ? text.size() : state_iter2->start_idx; - size_t text_count = text_idx2 - text_idx1; + const size_t text_count = text_idx2 - text_idx1; // there might be some offsets before the first state while((offset_iter1 != offsets.end()) diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index 78c2301d4..42318b20e 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -20,6 +20,7 @@ namespace pdf2htmlEX { +using std::cerr; using std::endl; using std::flush; using std::max; diff --git a/src/StateManager.h b/src/StateManager.h index 0a19df085..ed1361c78 100644 --- a/src/StateManager.h +++ b/src/StateManager.h @@ -84,7 +84,7 @@ class StateManager // Be careful about the mixed usage of Matrix and const double * // the input is usually double *, which might be changed, so we have to copy the content out -// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing +// in the map we use Matrix instead of double * such that the array may be automatically release when destructing template class StateManager { @@ -96,7 +96,7 @@ class StateManager // return id long long install(const double * new_value) { Matrix m; - memcpy(m.m, new_value, sizeof(m.m)); + memcpy(m.m, new_value, 4 * sizeof(double)); auto iter = value_map.lower_bound(m); if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4))) { diff --git a/src/util/unicode.cc b/src/util/unicode.cc index 4a2a03433..5611aa6d5 100644 --- a/src/util/unicode.cc +++ b/src/util/unicode.cc @@ -40,7 +40,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font) { if(!font->isCIDFont()) { - char * cname = dynamic_cast(font)->getCharName(code); + auto * font2 = dynamic_cast(font); + assert(font2 != nullptr); + char * cname = font2->getCharName(code); // may be untranslated ligature if(cname) {