|
@@ -1,6 +1,12 @@
|
|
|
package com.yaoyicloud.render.test;
|
|
|
|
|
|
+import com.lowagie.text.Image;
|
|
|
+import com.lowagie.text.PageSize;
|
|
|
import com.lowagie.text.pdf.BaseFont;
|
|
|
+import com.lowagie.text.pdf.PdfContentByte;
|
|
|
+import com.lowagie.text.pdf.PdfReader;
|
|
|
+import com.lowagie.text.pdf.PdfStamper;
|
|
|
+import com.lowagie.text.pdf.parser.PdfTextExtractor;
|
|
|
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
|
|
|
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
|
|
|
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
|
|
@@ -24,6 +30,11 @@ import java.io.FileWriter;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.OutputStream;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.LinkedHashMap;
|
|
|
+import java.util.Map;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
@@ -53,19 +64,21 @@ public class TestPdf {
|
|
|
System.err.println("写入 2.html 文件时发生错误: " + e.getMessage());
|
|
|
e.printStackTrace();
|
|
|
}
|
|
|
- String docxHtml1 = formatHtml(docxHtml);
|
|
|
+
|
|
|
+ String docxHtml1 = formatHtml(docxHtml,true);
|
|
|
try (BufferedWriter writer1 = new BufferedWriter(new FileWriter("1.html"))) {
|
|
|
writer1.write(docxHtml1);
|
|
|
} catch (IOException e) {
|
|
|
System.err.println("写入 1.html 文件时发生错误: " + e.getMessage());
|
|
|
e.printStackTrace();
|
|
|
}
|
|
|
-
|
|
|
-
|
|
|
convertHtmlToPdf(docxHtml1, "./output.pdf");
|
|
|
|
|
|
+ convertHtmlToPdf2(docxHtml, "./output2.pdf");
|
|
|
+
|
|
|
}
|
|
|
|
|
|
+ private static Map<String, Integer> pageNumberMap = new LinkedHashMap<>();
|
|
|
|
|
|
/**
|
|
|
* 将docx转为html
|
|
@@ -131,55 +144,224 @@ public class TestPdf {
|
|
|
* @return 规范化后的html
|
|
|
*/
|
|
|
|
|
|
- public static String formatHtml(String html) {
|
|
|
+// public static String formatHtml(String html,boolean flag) {
|
|
|
+// Document doc = Jsoup.parse(html);
|
|
|
+//
|
|
|
+// removeEmptyParagraphs(doc);
|
|
|
+// Elements images = doc.select("img");
|
|
|
+// Element firstImg = images.first();
|
|
|
+// // 4. 删除第一个img元素
|
|
|
+// firstImg.parent().remove();
|
|
|
+// String baseCss = "@page {"
|
|
|
+// + " size: A4;"
|
|
|
+// + "}"
|
|
|
+// + "body { font-family: 思源黑体; }"
|
|
|
+// + "table {"
|
|
|
+// + " width: 100%;"
|
|
|
+// + " border-collapse: collapse;"
|
|
|
+// + " page-break-inside: auto;"
|
|
|
+// + " -fs-table-paginate: paginate;"
|
|
|
+// + "}"
|
|
|
+// + "thead {"
|
|
|
+// + " display: table-header-group;"
|
|
|
+// + "}"
|
|
|
+// + "td, th {"
|
|
|
+// + " -fs-table-paginate: paginate;"
|
|
|
+// + " background-clip: padding-box;"
|
|
|
+// + " -webkit-print-color-adjust: exact;"
|
|
|
+// + " page-break-inside: avoid; /* 防止单元格跨页 */"
|
|
|
+// + "}"
|
|
|
+// + ".avoid-break {"
|
|
|
+// + " break-inside: avoid;"
|
|
|
+// + " page-break-inside: avoid;"
|
|
|
+// + "}"
|
|
|
+// + "p.X1.X2 {"
|
|
|
+// + " -fs-pdf-bookmark-level: 1;"
|
|
|
+// + " -fs-pdf-bookmark-open: true;"
|
|
|
+// + "}"
|
|
|
+// + "p.X1.X3 {"
|
|
|
+// + " -fs-pdf-bookmark-level: 2;"
|
|
|
+// + "}";
|
|
|
+//
|
|
|
+// doc.head().appendElement("style").text(baseCss);
|
|
|
+//
|
|
|
+// Elements trs = doc.select("tr");
|
|
|
+// for (Element tr : trs) {
|
|
|
+// String trStyle = tr.attr("style");
|
|
|
+// trStyle = (trStyle == null) ? "" : trStyle;
|
|
|
+// trStyle += " page-break-inside: avoid !important;"; // 强制不分页
|
|
|
+// tr.attr("style", trStyle);
|
|
|
+// }
|
|
|
+//
|
|
|
+// processTableCells(doc);
|
|
|
+// Elements divs = doc.select("div");
|
|
|
+// divs.attr("style", "");
|
|
|
+// addTableOfContents(doc,flag);
|
|
|
+//
|
|
|
+// // 7. 处理特殊span元素
|
|
|
+// Elements spans = doc.select("span.X1.X2");
|
|
|
+// for (Element span : spans) {
|
|
|
+// String style = span.attr("style");
|
|
|
+// style = style.replaceAll("margin-left:\\s*[^;]+;?", "");
|
|
|
+// if (!span.text().contains("重要声明")) {
|
|
|
+// style += "color:#1677ff; ";
|
|
|
+// }
|
|
|
+// span.attr("style", style);
|
|
|
+// }
|
|
|
+// // 8. 一级标题前分页样式
|
|
|
+// Elements paragraphs = doc.select("p.X1.X2");
|
|
|
+// for (Element p : paragraphs) {
|
|
|
+// p.attr("style", p.attr("style") + "page-break-before:always;");
|
|
|
+// }
|
|
|
+//
|
|
|
+// processTables(doc);
|
|
|
+// doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
|
|
+// doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
|
|
|
+// doc.head().prepend("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">");
|
|
|
+//
|
|
|
+// return doc.html();
|
|
|
+// }
|
|
|
+
|
|
|
+ public static String formatHtml(String html,boolean flag) {
|
|
|
Document doc = Jsoup.parse(html);
|
|
|
+ Elements pTags = doc.select("p");
|
|
|
+ for (Element p : pTags) {
|
|
|
+ boolean isValidEmpty = true;
|
|
|
|
|
|
- removeEmptyParagraphs(doc);
|
|
|
- String baseCss = "@page {"
|
|
|
- + " size: A4;"
|
|
|
- + " @bottom-center {"
|
|
|
- + " content: \"第 \" counter(page) \" 页,共 \" counter(pages) \" 页\";"
|
|
|
- + " font-family: 思源黑体;"
|
|
|
- + " font-size: 10pt;"
|
|
|
- + " color: #000000;"
|
|
|
- + " }"
|
|
|
- + "}"
|
|
|
- + "body { font-family: 思源黑体; }"
|
|
|
- + "table {"
|
|
|
- + " width: 100%;"
|
|
|
- + " border-collapse: collapse;"
|
|
|
- + " page-break-inside: auto;"
|
|
|
- + " -fs-table-paginate: paginate;"
|
|
|
- + "}"
|
|
|
- + "thead {"
|
|
|
- + " display: table-header-group;"
|
|
|
- + "}"
|
|
|
- + "td, th {"
|
|
|
- + " -fs-table-paginate: paginate;"
|
|
|
- + " background-clip: padding-box;"
|
|
|
- + " -webkit-print-color-adjust: exact;"
|
|
|
- + "}"
|
|
|
- + ".avoid-break {"
|
|
|
- + " break-inside: avoid;"
|
|
|
- + " page-break-inside: avoid;"
|
|
|
- + "}"
|
|
|
- + "p.X1.X2 {"
|
|
|
- + " -fs-pdf-bookmark-level: 1;"
|
|
|
- + " -fs-pdf-bookmark-open: true;"
|
|
|
- + "}"
|
|
|
- + "p.X1.X3 {"
|
|
|
- + " -fs-pdf-bookmark-level: 2;"
|
|
|
+ for (org.jsoup.nodes.Node child : p.childNodes()) {
|
|
|
+ if (child instanceof Element) {
|
|
|
+ if (!((Element) child).tagName().equalsIgnoreCase("br")) {
|
|
|
+ isValidEmpty = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (!child.outerHtml().trim().isEmpty()) {
|
|
|
+ isValidEmpty = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (isValidEmpty) {
|
|
|
+ p.attr("style", "line-height: 25px;");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Integer startpage = pageNumberMap.getOrDefault("startpage", 0);
|
|
|
+
|
|
|
+ String baseCss =
|
|
|
+ "@page {" +
|
|
|
+ " size: A4;" +
|
|
|
+ " @bottom-center {" +
|
|
|
+ " content: none;" + // 只显示数字页码
|
|
|
+ " font-family: 思源黑体;" +
|
|
|
+ " font-size: 10pt;" +
|
|
|
+ " color: #000000;" +
|
|
|
+ " }" +
|
|
|
+ "}" +
|
|
|
+ "@page show-page-number {" +
|
|
|
+ " @bottom-center {" +
|
|
|
+ " content: counter(page);" +
|
|
|
+ " font-family: 思源黑体;" +
|
|
|
+ " font-size: 10pt;" +
|
|
|
+ " color: #000000;" +
|
|
|
+ " }" +
|
|
|
+ " counter-reset: page 1;" + // 关键!在这里重置页码
|
|
|
+ "}" +
|
|
|
+
|
|
|
+ // 为最后一个div设置页码显示并重置计数器
|
|
|
+ ".start-counting {" +
|
|
|
+ " page: show-page-number;" +
|
|
|
+ " counter-reset: page 1;" + // 重置页码
|
|
|
+ " page-break-before: always;" + // 强制新页开始
|
|
|
+ "}"
|
|
|
+// // 隐藏 startpage 之前的所有页码
|
|
|
+// "@page :nth(1-5) {" +
|
|
|
+// " @bottom-center { content: none; }" +
|
|
|
+// "}" +
|
|
|
+// // 重置计数器,使 startpage 页显示为1
|
|
|
+// "@page :nth(8) {" +
|
|
|
+// " -fs-counter-reset: page 0;" + // 从0开始计数,下一页就是1
|
|
|
+// "}"
|
|
|
+
|
|
|
+
|
|
|
+ + "td, th { "
|
|
|
+ + " page-break-inside: avoid; " // 尽量保持单元格不分页
|
|
|
+ + " -fs-table-paginate: paginate; " // 允许分页
|
|
|
+ + " background-clip: padding-box; " // 确保背景色覆盖
|
|
|
+ + " -webkit-print-color-adjust: exact; " // 确保打印时颜色准确
|
|
|
+ "}";
|
|
|
-
|
|
|
+ Elements table = doc.select("table");
|
|
|
+ String tbaleStyle = table.attr("style");
|
|
|
+ tbaleStyle += "width:100%;";
|
|
|
+ table.attr("style", tbaleStyle);
|
|
|
+
|
|
|
+ Elements trs = doc.select("tr");
|
|
|
+ for (Element tr : trs) {
|
|
|
+ String trStyle = tr.attr("style");
|
|
|
+ trStyle = (trStyle == null) ? "" : trStyle;
|
|
|
+ trStyle += " page-break-inside: avoid !important;"; // 强制不分页
|
|
|
+ tr.attr("style", trStyle);
|
|
|
+ }
|
|
|
doc.head().appendElement("style").text(baseCss);
|
|
|
- processFirstImageAsBackground(doc);
|
|
|
+ Elements tds = doc.select("td");
|
|
|
+ for (Element td : tds) {
|
|
|
+ Elements ps = td.select("p");
|
|
|
+ if (ps.size() > 1) {
|
|
|
+ for (int i = 1; i < ps.size(); i++) {
|
|
|
+ ps.get(i).remove();
|
|
|
+ }
|
|
|
+ Element p = ps.first();
|
|
|
+ String pStyle = p.attr("style");
|
|
|
+ pStyle = removeWhiteSpacePreWrap(pStyle);
|
|
|
+ pStyle += " display: table-cell; vertical-align: middle;";
|
|
|
+ p.attr("style", pStyle);
|
|
|
+ }
|
|
|
|
|
|
+ if (ps.size() > 0) {
|
|
|
+ Element p = ps.first();
|
|
|
+ String pStyle = p.attr("style");
|
|
|
+ pStyle = removeWhiteSpacePreWrap(pStyle);
|
|
|
+ p.attr("style", pStyle);
|
|
|
+
|
|
|
+ Elements spans = p.select("span");
|
|
|
+ if (!spans.isEmpty()) {
|
|
|
+ for (Element span : spans) {
|
|
|
+ String spanStyle = span.attr("style");
|
|
|
+ spanStyle = removeWhiteSpacePreWrap(spanStyle);
|
|
|
+ spanStyle = (spanStyle == null) ? "" : spanStyle;
|
|
|
+ spanStyle += " margin-left: 0.5em;";
|
|
|
+ span.attr("style", spanStyle);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ String oriPstyle = p.attr("style");
|
|
|
+ oriPstyle = removeWhiteSpacePreWrap(oriPstyle);
|
|
|
+ p.attr("style", oriPstyle + " margin-left: 0.5em;");
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- processTableCells(doc);
|
|
|
+ String oristyle = td.attr("style");
|
|
|
+ oristyle = (oristyle == null) ? "" : oristyle;
|
|
|
+ oristyle += " border-collapse: collapse; border: 0.75pt solid #E3EDFB;";
|
|
|
+ oristyle += " background-clip: padding-box; break-inside: avoid !important; page-break-inside: avoid";
|
|
|
+ td.attr("style", oristyle);
|
|
|
+ }
|
|
|
Elements divs = doc.select("div");
|
|
|
divs.attr("style", "");
|
|
|
-
|
|
|
- addTableOfContents(doc);
|
|
|
+divs.last().addClass("start-counting");
|
|
|
+ divs.last().attr("style", "-fs-page-sequence:start");
|
|
|
+ Elements images = doc.select("img");
|
|
|
+ Element firstImg = images.first();
|
|
|
+ // 4. 删除第一个img元素
|
|
|
+ firstImg.parent().remove();
|
|
|
+ // 方法二:将所有 white-space:pre-wrap 改为 normal去除转换时的奇怪空白
|
|
|
+ Elements allElements = doc.getAllElements();
|
|
|
+
|
|
|
+ for (Element element : allElements) {
|
|
|
+ String style = element.attr("style");
|
|
|
+ if (style.contains("white-space:pre-wrap")) {
|
|
|
+ style = style.replaceAll("white-space\\s*:\\s*[^;]+;", "");
|
|
|
+ element.attr("style", style);
|
|
|
+ }
|
|
|
+ }
|
|
|
// 7. 处理特殊span元素
|
|
|
Elements spans = doc.select("span.X1.X2");
|
|
|
for (Element span : spans) {
|
|
@@ -196,14 +378,14 @@ public class TestPdf {
|
|
|
p.attr("style", p.attr("style") + "page-break-before:always;");
|
|
|
}
|
|
|
|
|
|
- processTables(doc);
|
|
|
+ addTableOfContents(doc,flag);
|
|
|
+
|
|
|
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
|
|
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
|
|
|
doc.head().prepend("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">");
|
|
|
|
|
|
return doc.html();
|
|
|
}
|
|
|
-
|
|
|
/**
|
|
|
* 删除空白段落 清除因转换出的空白段落造成的空白页
|
|
|
* @param doc
|
|
@@ -228,145 +410,109 @@ public class TestPdf {
|
|
|
}
|
|
|
|
|
|
if (isValidEmpty) {
|
|
|
- p.remove();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- /**
|
|
|
- * 处理第一张图片作为背景
|
|
|
- * @param doc
|
|
|
- */
|
|
|
- private static void processFirstImageAsBackground(Document doc) {
|
|
|
- Element firstDiv = doc.select("div").first();
|
|
|
- if (firstDiv != null) {
|
|
|
- Element firstImg = firstDiv.select("img").first();
|
|
|
- if (firstImg != null) {
|
|
|
- String imgSrc = firstImg.absUrl("src");
|
|
|
-
|
|
|
- Element pageContainer = new Element("div")
|
|
|
- .attr("style",
|
|
|
- "position: relative;"
|
|
|
- + "width: 100%;"
|
|
|
- + "height: 100vh;"
|
|
|
- + "page-break-after: always;"
|
|
|
- + "overflow: hidden;");
|
|
|
-
|
|
|
- Element backgroundLayer = new Element("div")
|
|
|
- .attr("style",
|
|
|
- "position: absolute;"
|
|
|
- + "top: 0;"
|
|
|
- + "left: 0;"
|
|
|
- + "width: 100%;"
|
|
|
- + "height: 100%;"
|
|
|
- + "background-image: url('" + imgSrc + "');"
|
|
|
- + "background-size: cover;"
|
|
|
- + "background-position: center;"
|
|
|
- + "background-repeat: no-repeat;"
|
|
|
- + "z-index: 0;");
|
|
|
-
|
|
|
- Element contentContainer = new Element("div")
|
|
|
- .attr("style",
|
|
|
- "position: relative;"
|
|
|
- + "z-index: 1;"
|
|
|
- + "height: 100%;"
|
|
|
- + "width: 100%;")
|
|
|
- .html(firstDiv.html());
|
|
|
-
|
|
|
- firstImg.remove();
|
|
|
- pageContainer.appendChild(backgroundLayer);
|
|
|
- pageContainer.appendChild(contentContainer);
|
|
|
- firstDiv.replaceWith(pageContainer);
|
|
|
- }
|
|
|
+ p.attr("style", "line-height: 25px;"); }
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
/**
|
|
|
* 添加目录
|
|
|
* @param doc
|
|
|
*/
|
|
|
- private static void addTableOfContents(Document doc) {
|
|
|
- // 目录样式
|
|
|
- String tocCss = ".toc-container { margin: 20px 0; }"
|
|
|
- + ".toc-title { "
|
|
|
- + " text-align: center; "
|
|
|
- + " font-size: 18pt; "
|
|
|
- + " margin-bottom: 15px; "
|
|
|
- + " color: black;"
|
|
|
- + "}"
|
|
|
- + ".toc-list { "
|
|
|
- + " list-style-type: none; "
|
|
|
- + " padding: 0; "
|
|
|
- + " width: 100%; "
|
|
|
- + "}"
|
|
|
- + ".toc-item { "
|
|
|
- + " margin: 5px 0; "
|
|
|
- + "}"
|
|
|
- + ".toc-level-1 { padding-left: 0; }"
|
|
|
- + ".toc-level-2 { padding-left: 2em; }"
|
|
|
- + ".toc-link { "
|
|
|
- + " display: block; "
|
|
|
- + " position: relative; "
|
|
|
- + " color: black !important; "
|
|
|
- + " text-decoration: none !important; "
|
|
|
- + "}"
|
|
|
- + ".toc-content { "
|
|
|
- + " display: flex; "
|
|
|
- + "}"
|
|
|
- + ".toc-text { "
|
|
|
- + " white-space: normal; "
|
|
|
- + "}"
|
|
|
- + ".toc-dots { "
|
|
|
- + " vertical-align: bottom; "
|
|
|
- + " min-width: 20px; "
|
|
|
- + " border-bottom: 1px dotted #000000; "
|
|
|
- + " margin: 0 5px; "
|
|
|
- + " height: 1em; "
|
|
|
- + " flex-grow: 1; "
|
|
|
- + "}"
|
|
|
- + ".toc-page { "
|
|
|
- + " position: absolute; "
|
|
|
- + " right: 0; "
|
|
|
- + " bottom: 0; "
|
|
|
- + "}";
|
|
|
- doc.head().appendElement("style").text(tocCss);
|
|
|
-
|
|
|
- // 构建目录内容
|
|
|
- Element tocList = new Element("ul").addClass("toc-list");
|
|
|
- doc.select("p.X1.X2, p.X1.X3").forEach(el -> {
|
|
|
- boolean isLevel1 = el.hasClass("X2");
|
|
|
- String id = "sec_" + el.text().hashCode();
|
|
|
-
|
|
|
- el.attr("id", id);
|
|
|
-
|
|
|
- Element li = tocList.appendElement("li")
|
|
|
- .addClass("toc-item " + (isLevel1 ? "toc-level-1" : "toc-level-2"));
|
|
|
-
|
|
|
- Element link = li.appendElement("a")
|
|
|
- .attr("href", "#" + id)
|
|
|
- .addClass("toc-link");
|
|
|
-
|
|
|
- Element content = link.appendElement("span").addClass("toc-content");
|
|
|
- content.appendElement("span").addClass("toc-text").text(el.text());
|
|
|
- content.appendElement("span").addClass("toc-dots");
|
|
|
- content.appendElement("span").addClass("toc-page").text("1");
|
|
|
- });
|
|
|
-
|
|
|
- // 插入目录
|
|
|
- Element firstDiv = doc.select("div").first();
|
|
|
- if (firstDiv != null) {
|
|
|
- firstDiv.after(
|
|
|
- "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
- + "<h1 class='toc-title'>目录</h1>"
|
|
|
- + tocList.outerHtml()
|
|
|
- + "</div>"
|
|
|
- );
|
|
|
- } else {
|
|
|
- doc.body().prepend(
|
|
|
- "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
- + "<h1 class='toc-title'>目录</h1>"
|
|
|
- + tocList.outerHtml()
|
|
|
- + "</div>"
|
|
|
- );
|
|
|
+ private static void addTableOfContents(Document doc,boolean flag) {
|
|
|
+ if (flag) {
|
|
|
+ // 目录样式
|
|
|
+ String tocCss = ".toc-container { margin: 20px 0; font-family: 思源黑体; }"
|
|
|
+ + ".toc-title { text-align: center; font-size: 18pt; margin-bottom: 15px; color: black; }"
|
|
|
+ + ".toc-list { list-style-type: none; padding: 0; width: 100%; }"
|
|
|
+ + ".toc-item { margin: 5px 0; padding-top: 2px; padding-bottom: 2px; line-height: 2; }"
|
|
|
+ + ".toc-level-1 { padding-left: 0; }"
|
|
|
+ + ".toc-level-2 { padding-left: 2em; }"
|
|
|
+ + ".toc-link { "
|
|
|
+ + " display: block; "
|
|
|
+ + " position: relative; "
|
|
|
+ + " color: black !important; "
|
|
|
+ + " text-decoration: none !important; "
|
|
|
+ + " line-height: 1.5; " // 新增:控制整体行高
|
|
|
+ + "}"
|
|
|
+ + ".toc-line-container { "
|
|
|
+ + " display: table; "
|
|
|
+ + " width: 100%; "
|
|
|
+ + " vertical-align: middle; " // 关键:控制容器内垂直对齐
|
|
|
+ + "}"
|
|
|
+ + ".toc-text { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " font-size: 9pt; "
|
|
|
+ + " white-space: nowrap; "
|
|
|
+ + " padding-right: 5px; "
|
|
|
+ + " vertical-align: middle; " // 改为middle对齐
|
|
|
+ + "}"
|
|
|
+ + ".toc-dots { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " width: 100%; "
|
|
|
+ + " vertical-align: middle; " // 关键:改为middle对齐
|
|
|
+ + " border-bottom: 1px dotted #000000; "
|
|
|
+ + " height: 1em; " // 固定高度
|
|
|
+ + " margin-top: 2px; " // 关键:正值下移,负值上移(按需调整)
|
|
|
+ + "}"
|
|
|
+ + "p.X1.X2 { -fs-pdf-bookmark: level 1; }"
|
|
|
+ + "p.X1.X3 { -fs-pdf-bookmark: level 2; }"
|
|
|
+ + ".toc-page { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " font-size: 9pt; "
|
|
|
+ + " white-space: nowrap; "
|
|
|
+ + " padding-left: 5px; "
|
|
|
+ + " vertical-align: middle; " // 改为middle对齐
|
|
|
+ + "}";
|
|
|
+ doc.head().appendElement("style").text(tocCss);
|
|
|
+
|
|
|
+ // 构建目录内容
|
|
|
+ Element tocList = new Element("ul").addClass("toc-list");
|
|
|
+ doc.select("p.X1.X2, p.X1.X3").forEach(el -> {
|
|
|
+ boolean isLevel1 = el.hasClass("X2");
|
|
|
+ String id = "sec_" + el.text().hashCode();
|
|
|
+
|
|
|
+ el.attr("id", id);
|
|
|
+ System.out.println(el.text());
|
|
|
+ Integer pageNumber = pageNumberMap.getOrDefault(el.text(), 0);
|
|
|
+
|
|
|
+ Element li = tocList.appendElement("li")
|
|
|
+ .addClass("toc-item " + (isLevel1 ? "toc-level-1" : "toc-level-2"));
|
|
|
+
|
|
|
+ Element link = li.appendElement("a")
|
|
|
+ .attr("href", "#" + id)
|
|
|
+ .addClass("toc-link");
|
|
|
+
|
|
|
+ // 使用表格布局替代flex
|
|
|
+ Element lineContainer = link.appendElement("div").addClass("toc-line-container");
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ lineContainer.appendElement("span").addClass("toc-text").text(el.text());
|
|
|
+
|
|
|
+
|
|
|
+ lineContainer.appendElement("span").addClass("toc-dots");
|
|
|
+ lineContainer.appendElement("span").addClass("toc-page").text(String.valueOf(pageNumber));
|
|
|
+ });
|
|
|
+
|
|
|
+ // 插入目录
|
|
|
+ Element firstDiv = doc.select("div").first();
|
|
|
+ if (firstDiv != null) {
|
|
|
+ firstDiv.after(
|
|
|
+ "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
+ + "<h1 class='toc-title'>目录</h1>"
|
|
|
+ + tocList.outerHtml()
|
|
|
+ + "</div>"
|
|
|
+ );
|
|
|
+ } else {
|
|
|
+ doc.body().prepend(
|
|
|
+ "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
+ + "<h1 class='toc-title'>目录</h1>"
|
|
|
+ + tocList.outerHtml()
|
|
|
+ + "</div>"
|
|
|
+ );
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -413,7 +559,7 @@ public class TestPdf {
|
|
|
String oristyle = td.attr("style");
|
|
|
oristyle = (oristyle == null) ? "" : oristyle;
|
|
|
oristyle += " border-collapse: collapse; border: 0.75pt solid #E3EDFB;";
|
|
|
- oristyle += " background-clip: padding-box;";
|
|
|
+ oristyle += " background-clip: padding-box; break-inside: avoid !important; page-break-inside: avoid";
|
|
|
td.attr("style", oristyle);
|
|
|
}
|
|
|
}
|
|
@@ -556,11 +702,126 @@ public class TestPdf {
|
|
|
.replace("\\", "/");
|
|
|
// 设置HTML(确保CSS中使用相同的font-family)
|
|
|
renderer.setDocumentFromString(html, "file:///");
|
|
|
+ // 渲染PDF
|
|
|
+ renderer.layout();
|
|
|
+ renderer.createPDF(os);
|
|
|
+ }
|
|
|
+ addBackgroundToPdf(outputPdfPath,"./ou.pdf","C:\\Users\\yyy\\dev\\yyc3\\easier-be\\file\\image\\image1.jpeg");
|
|
|
+
|
|
|
+ }
|
|
|
|
|
|
+ public static void convertHtmlToPdf2(String html, String outputPdfPath) throws Exception {
|
|
|
+ String s = formatHtml(html, true);
|
|
|
+
|
|
|
+ try (OutputStream os = new FileOutputStream(outputPdfPath)) {
|
|
|
+ ITextRenderer renderer = new ITextRenderer();
|
|
|
+ ITextFontResolver fontResolver = renderer.getFontResolver();
|
|
|
+
|
|
|
+ // 字体路径
|
|
|
+ String mediumFont = "C:/Users/yyy/AppData/Local/Microsoft/Windows/Fonts/SourceHanSansSC-Medium-2.otf";
|
|
|
+ String boldFont = "C:/Users/yyy/AppData/Local/Microsoft/Windows/Fonts/SourceHanSansSC-Bold-2.otf";
|
|
|
+
|
|
|
+ // 注册字体并强制指定别名为 "思源黑体"
|
|
|
+ fontResolver.addFont(
|
|
|
+ mediumFont, // 字体文件路径
|
|
|
+ "思源黑体", // fontFamilyNameOverride:覆盖默认字体名
|
|
|
+ BaseFont.IDENTITY_H, // 编码(必须用于中文)
|
|
|
+ true, // 是否嵌入PDF
|
|
|
+ null // PFB路径(仅AFM/PFM字体需要)
|
|
|
+ );
|
|
|
+
|
|
|
+ fontResolver.addFont(
|
|
|
+ boldFont,
|
|
|
+ "思源黑体 Medium",
|
|
|
+ BaseFont.IDENTITY_H,
|
|
|
+ true,
|
|
|
+ null
|
|
|
+ );
|
|
|
+ html = html.replace("C:\\", "file:///C:/")
|
|
|
+ .replace("\\", "/");
|
|
|
+
|
|
|
+ // 设置HTML(确保CSS中使用相同的font-family)
|
|
|
+ renderer.setDocumentFromString(s, "file:///");
|
|
|
|
|
|
// 渲染PDF
|
|
|
renderer.layout();
|
|
|
renderer.createPDF(os);
|
|
|
}
|
|
|
+
|
|
|
+ addBackgroundToPdf(outputPdfPath,"./ou.pdf","C:\\Users\\yyy\\dev\\yyc3\\easier-be\\file\\image\\image1.jpeg");
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private static void addBackgroundToPdf(String inputPdfPath, String outputPdfPath, String backgroundImagePath)
|
|
|
+ throws Exception {
|
|
|
+ PdfReader reader = new PdfReader(inputPdfPath);
|
|
|
+ PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outputPdfPath));
|
|
|
+ // 获取第一页
|
|
|
+ PdfContentByte background = stamper.getUnderContent(1); // 在现有内容下方绘制
|
|
|
+ int startPage = 0;
|
|
|
+ // 加载背景图片
|
|
|
+ Image image = Image.getInstance(backgroundImagePath);
|
|
|
+ image.scaleAbsolute(PageSize.A4.getWidth(), PageSize.A4.getHeight()); // 撑满 A4
|
|
|
+ image.setAbsolutePosition(0, 0); // 从 (0,0) 开始
|
|
|
+
|
|
|
+ // 添加到第一页
|
|
|
+ background.addImage(image);
|
|
|
+
|
|
|
+ Pattern startPattern = Pattern.compile("^1\\.\\s+报告概述$");
|
|
|
+ for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
|
|
|
+ String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
|
|
|
+ String[] lines = pageText.split("\\r?\\n");
|
|
|
+ for (String line : lines) {
|
|
|
+ if (startPattern.matcher(line.trim()).matches()) {
|
|
|
+ startPage = pageNum;
|
|
|
+ pageNumberMap.put("startPage", startPage);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Pattern titlePattern = Pattern.compile(
|
|
|
+ "^((\\d+)\\.\\s+|(\\d+\\.\\d+)\\s+)([\\u4e00-\\u9fa5a-zA-Z].*)$",
|
|
|
+ Pattern.MULTILINE);
|
|
|
+ Pattern specialPattern = Pattern.compile("^重要声明\\s*[::]?\\s*(.*)$");
|
|
|
+ for (int pageNum = startPage; pageNum <= reader.getNumberOfPages(); pageNum++) {
|
|
|
+ String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
|
|
|
+ String[] lines = pageText.split("\\r?\\n");
|
|
|
+ for (int i = 0; i < lines.length; i++) {
|
|
|
+ String line = lines[i].trim();
|
|
|
+ if (line.isEmpty())
|
|
|
+ continue;
|
|
|
+ if (line.startsWith("6.3 ISO体系认证证书或行业经营许可证")) {
|
|
|
+ String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : "";
|
|
|
+ // 拼接当前行和下一行
|
|
|
+ line = line + (nextLine.isEmpty() ? "" : nextLine);
|
|
|
+ }
|
|
|
+ Matcher matcher = titlePattern.matcher(line);
|
|
|
+ if (matcher.matches()) {
|
|
|
+ pageNumberMap.put(line, pageNum - startPage + 1);
|
|
|
+ System.out.println("收集标题: " + line + " | 页码: " + pageNum);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (line.startsWith("重要声明")) {
|
|
|
+ Matcher specialMatcher = specialPattern.matcher(line);
|
|
|
+ if (specialMatcher.matches()) {
|
|
|
+ pageNumberMap.put(line, pageNum - startPage + 1);
|
|
|
+ System.out.println("收集特殊标题: " + line + " | 页码: " + pageNum);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ int lastPageIndex = reader.getNumberOfPages();
|
|
|
+ String lastPageText = new PdfTextExtractor(reader).getTextFromPage(lastPageIndex);
|
|
|
+ // 使用 selectPages 方法删除最后一页
|
|
|
+ ArrayList<Integer> pagesToKeep = new ArrayList<>();
|
|
|
+ for (int i = 1; i < lastPageIndex; i++) {
|
|
|
+ pagesToKeep.add(i);
|
|
|
+ }
|
|
|
+ reader.selectPages(pagesToKeep);
|
|
|
+
|
|
|
+ stamper.close();
|
|
|
+ reader.close();
|
|
|
}
|
|
|
}
|