|
@@ -0,0 +1,524 @@
|
|
|
+package com.yaoyicloud.tools;
|
|
|
+
|
|
|
+import com.lowagie.text.Image;
|
|
|
+import com.lowagie.text.PageSize;
|
|
|
+import com.lowagie.text.pdf.BaseFont;
|
|
|
+import com.lowagie.text.pdf.PdfContentByte;
|
|
|
+import com.lowagie.text.pdf.PdfReader;
|
|
|
+import com.lowagie.text.pdf.PdfStamper;
|
|
|
+import com.lowagie.text.pdf.parser.PdfTextExtractor;
|
|
|
+import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
|
|
|
+import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
|
|
|
+import fr.opensagres.poi.xwpf.converter.core.ImageManager;
|
|
|
+import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
|
|
|
+import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
|
|
|
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.nodes.Entities;
|
|
|
+import org.jsoup.select.Elements;
|
|
|
+import org.xhtmlrenderer.pdf.ITextFontResolver;
|
|
|
+import org.xhtmlrenderer.pdf.ITextRenderer;
|
|
|
+
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileInputStream;
|
|
|
+import java.io.FileOutputStream;
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.io.OutputStream;
|
|
|
+import java.nio.file.Path;
|
|
|
+import java.nio.file.Paths;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.LinkedHashMap;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+public class OfficeUtil1 {
|
|
|
+ private static final org.slf4j.Logger OFFICE_UTIL_LOGGER = org.slf4j.LoggerFactory.getLogger(OfficeUtil1.class);
|
|
|
+ private static Map<String, Integer> pageNumberMap = new LinkedHashMap<>();
|
|
|
+
|
|
|
+ public static String convert(String docxPath, String imageDir) throws IOException {
|
|
|
+ File imageDirFile = new File(imageDir);
|
|
|
+ if (!imageDirFile.exists() && !imageDirFile.mkdirs()) {
|
|
|
+ throw new IOException("无法创建图片目录: " + imageDir);
|
|
|
+ }
|
|
|
+
|
|
|
+ try (InputStream docxIn = new FileInputStream(docxPath);
|
|
|
+ XWPFDocument document = new XWPFDocument(docxIn);
|
|
|
+ ByteArrayOutputStream htmlOut = new ByteArrayOutputStream()) {
|
|
|
+
|
|
|
+ // 执行转换
|
|
|
+ XHTMLOptions options = createHtmlOptions(imageDirFile);
|
|
|
+ XHTMLConverter.getInstance().convert(document, htmlOut, options);
|
|
|
+
|
|
|
+ return htmlOut.toString("UTF-8");
|
|
|
+ } catch (Exception e) {
|
|
|
+ OFFICE_UTIL_LOGGER.error("转换失败: {}", e.getMessage(), e);
|
|
|
+ throw new IOException("DOCX转换失败", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 创建HTML转换选项
|
|
|
+ */
|
|
|
+ private static XHTMLOptions createHtmlOptions(File imageDirFile) {
|
|
|
+ @SuppressWarnings("deprecation")
|
|
|
+ XHTMLOptions options = XHTMLOptions.create()
|
|
|
+ .setImageManager(new ImageManager(imageDirFile, "") {
|
|
|
+ @Override
|
|
|
+ public String resolve(String uri) {
|
|
|
+ return new File(imageDirFile, uri).getAbsolutePath().replace("/", "\\");
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .URIResolver(new FileURIResolver(imageDirFile) {
|
|
|
+ @Override
|
|
|
+ public String resolve(String uri) {
|
|
|
+ String filename = uri.replace("word/media/", "");
|
|
|
+ return new File(imageDirFile, filename).getAbsolutePath().replace("/", "\\");
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ options.setIgnoreStylesIfUnused(false);
|
|
|
+ options.setExtractor(new FileImageExtractor(imageDirFile));
|
|
|
+ return options;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String formatHtml(String html) {
|
|
|
+ Document doc = Jsoup.parse(html);
|
|
|
+ Elements pTags = doc.select("p");
|
|
|
+ for (Element p : pTags) {
|
|
|
+ boolean isValidEmpty = true;
|
|
|
+
|
|
|
+ for (org.jsoup.nodes.Node child : p.childNodes()) {
|
|
|
+ if (child instanceof Element) {
|
|
|
+ if (!((Element) child).tagName().equalsIgnoreCase("br")) {
|
|
|
+ isValidEmpty = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (!child.outerHtml().trim().isEmpty()) {
|
|
|
+ isValidEmpty = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (isValidEmpty) {
|
|
|
+ p.attr("style", "line-height: 25px;");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String baseCss =
|
|
|
+ "@page {"
|
|
|
+ + " size: A4;"
|
|
|
+ + " @bottom-center {"
|
|
|
+ + " content: none;" // 只显示数字页码
|
|
|
+ + " }"
|
|
|
+ + "}"
|
|
|
+ + "@page show-page-number {"
|
|
|
+ + " @bottom-center {"
|
|
|
+ + " content: counter(page);"
|
|
|
+ + " font-family: 思源黑体;"
|
|
|
+ + " font-size: 9pt;"
|
|
|
+ + " color: #000000;"
|
|
|
+ + " }"
|
|
|
+ + "}"
|
|
|
+ + // 为最后一个div设置页码显示并重置计数器
|
|
|
+ ".start-counting {"
|
|
|
+ + " page: show-page-number;"
|
|
|
+ + "}"
|
|
|
+ + "td, th { "
|
|
|
+ + " page-break-inside: avoid; " // 尽量保持单元格不分页
|
|
|
+ + " -fs-table-paginate: paginate; " // 允许分页
|
|
|
+ + " background-clip: padding-box; " // 确保背景色覆盖
|
|
|
+ + " -webkit-print-color-adjust: exact; " // 确保打印时颜色准确
|
|
|
+ + "}";
|
|
|
+ Elements table = doc.select("table");
|
|
|
+ String tbaleStyle = table.attr("style");
|
|
|
+ tbaleStyle += "width:100%;";
|
|
|
+ table.attr("style", tbaleStyle);
|
|
|
+
|
|
|
+ Elements trs = doc.select("tr");
|
|
|
+ for (Element tr : trs) {
|
|
|
+ String trStyle = tr.attr("style");
|
|
|
+ trStyle = (trStyle == null) ? "" : trStyle;
|
|
|
+ trStyle += " page-break-inside: avoid !important;"; // 强制不分页
|
|
|
+ tr.attr("style", trStyle);
|
|
|
+ }
|
|
|
+ doc.head().appendElement("style").text(baseCss);
|
|
|
+ Elements tds = doc.select("td");
|
|
|
+ for (Element td : tds) {
|
|
|
+ Elements ps = td.select("p");
|
|
|
+ if (ps.size() > 1) {
|
|
|
+ for (int i = 1; i < ps.size(); i++) {
|
|
|
+ ps.get(i).remove();
|
|
|
+ }
|
|
|
+ Element p = ps.first();
|
|
|
+ String pStyle = p.attr("style");
|
|
|
+ pStyle = removeWhiteSpacePreWrap(pStyle);
|
|
|
+ pStyle += " display: table-cell; vertical-align: middle;";
|
|
|
+ p.attr("style", pStyle);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ps.size() > 0) {
|
|
|
+ Element p = ps.first();
|
|
|
+ String pStyle = p.attr("style");
|
|
|
+ pStyle = removeWhiteSpacePreWrap(pStyle);
|
|
|
+ p.attr("style", pStyle);
|
|
|
+
|
|
|
+ Elements spans = p.select("span");
|
|
|
+ if (!spans.isEmpty()) {
|
|
|
+ for (Element span : spans) {
|
|
|
+ String spanStyle = span.attr("style");
|
|
|
+ spanStyle = removeWhiteSpacePreWrap(spanStyle);
|
|
|
+ spanStyle = (spanStyle == null) ? "" : spanStyle;
|
|
|
+ spanStyle += " margin-left: 0.5em;";
|
|
|
+ span.attr("style", spanStyle);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ String oriPstyle = p.attr("style");
|
|
|
+ oriPstyle = removeWhiteSpacePreWrap(oriPstyle);
|
|
|
+ p.attr("style", oriPstyle + " margin-left: 0.5em;");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String oristyle = td.attr("style");
|
|
|
+ oristyle = (oristyle == null) ? "" : oristyle;
|
|
|
+ oristyle += " border-collapse: collapse; border: 0.75pt solid #E3EDFB;";
|
|
|
+ oristyle += " background-clip: padding-box; break-inside: avoid !important; page-break-inside: avoid";
|
|
|
+ td.attr("style", oristyle);
|
|
|
+ }
|
|
|
+ Elements divs = doc.select("div");
|
|
|
+ divs.attr("style", "");
|
|
|
+ divs.last().addClass("start-counting");
|
|
|
+ divs.last().attr("style", "-fs-page-sequence:start");
|
|
|
+ Elements images = doc.select("img");
|
|
|
+ Element firstImg = images.first();
|
|
|
+ // 4. 删除第一个img元素
|
|
|
+ firstImg.parent().remove();
|
|
|
+ // 方法二:将所有 white-space:pre-wrap 改为 normal去除转换时的奇怪空白
|
|
|
+ Elements allElements = doc.getAllElements();
|
|
|
+
|
|
|
+ for (Element element : allElements) {
|
|
|
+ String style = element.attr("style");
|
|
|
+ if (style.contains("white-space:pre-wrap")) {
|
|
|
+ style = style.replaceAll("white-space\\s*:\\s*[^;]+;", "");
|
|
|
+ element.attr("style", style);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 7. 处理特殊span元素
|
|
|
+ Elements spans = doc.select("span.X1.X2");
|
|
|
+ for (Element span : spans) {
|
|
|
+ String style = span.attr("style");
|
|
|
+ style = style.replaceAll("margin-left:\\s*[^;]+;?", "");
|
|
|
+ if (!span.text().contains("重要声明")) {
|
|
|
+ style += "color:#1677ff; ";
|
|
|
+ }
|
|
|
+ span.attr("style", style);
|
|
|
+ }
|
|
|
+ // 8. 一级标题前分页样式
|
|
|
+ Elements paragraphs = doc.select("p.X1.X2");
|
|
|
+ for (Element p : paragraphs) {
|
|
|
+ p.attr("style", p.attr("style") + "page-break-before:always;");
|
|
|
+ }
|
|
|
+
|
|
|
+ addTableOfContents(doc);
|
|
|
+
|
|
|
+ doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
|
|
+ doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
|
|
|
+ doc.head().prepend("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">");
|
|
|
+
|
|
|
+ return doc.html();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 移除 white-space:pre-wrap 并替换为 normal
|
|
|
+ */
|
|
|
+ private static String removeWhiteSpacePreWrap(String style) {
|
|
|
+ if (style == null) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ // 替换 pre-wrap 为 normal,并去除多余的分号
|
|
|
+ style = style.replaceAll("white-space\\s*:\\s*pre-wrap\\s*;?", "");
|
|
|
+ style = style.replaceAll(";\\s*;", ";"); // 清理多余分号
|
|
|
+ if (!style.contains("white-space")) {
|
|
|
+ style += " white-space: normal;";
|
|
|
+ }
|
|
|
+ return style.trim();
|
|
|
+ }
|
|
|
+ /**
|
|
|
+ * 添加目录
|
|
|
+ * @param doc
|
|
|
+ */
|
|
|
+ private static void addTableOfContents(Document doc) {
|
|
|
+
|
|
|
+ // 目录样式
|
|
|
+ String tocCss = ".toc-container { margin: 20px 0; font-family: 思源黑体; }"
|
|
|
+ + ".toc-title { text-align: center; font-size: 18pt; margin-bottom: 15px; color: black; }"
|
|
|
+ + ".toc-list { list-style-type: none; padding: 0; width: 100%; }"
|
|
|
+ + ".toc-item { margin: 5px 0; padding-top: 2px; padding-bottom: 2px; line-height: 2; }"
|
|
|
+ + ".toc-level-1 { padding-left: 0; }"
|
|
|
+ + ".toc-level-2 { padding-left: 2em; }"
|
|
|
+ + ".toc-link { "
|
|
|
+ + " display: block; "
|
|
|
+ + " position: relative; "
|
|
|
+ + " color: black !important; "
|
|
|
+ + " text-decoration: none !important; "
|
|
|
+ + " line-height: 1.5; " // 新增:控制整体行高
|
|
|
+ + "}"
|
|
|
+ + ".toc-line-container { "
|
|
|
+ + " display: table; "
|
|
|
+ + " width: 100%; "
|
|
|
+ + " vertical-align: middle; " // 关键:控制容器内垂直对齐
|
|
|
+ + "}"
|
|
|
+ + ".toc-text { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " font-size: 9pt; "
|
|
|
+ + " white-space: nowrap; "
|
|
|
+ + " padding-right: 5px; "
|
|
|
+ + " vertical-align: middle; " // 改为middle对齐
|
|
|
+ + "}"
|
|
|
+ + ".toc-dots { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " width: 100%; "
|
|
|
+ + " vertical-align: middle; " // 关键:改为middle对齐
|
|
|
+ + " border-bottom: 1px dotted #000000; "
|
|
|
+ + " height: 1em; " // 固定高度
|
|
|
+ + " margin-top: 2px; " // 关键:正值下移,负值上移(按需调整)
|
|
|
+ + "}"
|
|
|
+ + "p.X1.X2 { -fs-pdf-bookmark: level 1; }"
|
|
|
+ + "p.X1.X3 { -fs-pdf-bookmark: level 2; }"
|
|
|
+ + ".toc-page { "
|
|
|
+ + " display: table-cell; "
|
|
|
+ + " font-size: 9pt; "
|
|
|
+ + " white-space: nowrap; "
|
|
|
+ + " padding-left: 5px; "
|
|
|
+ + " vertical-align: middle; " // 改为middle对齐
|
|
|
+ + "}";
|
|
|
+ doc.head().appendElement("style").text(tocCss);
|
|
|
+
|
|
|
+ // 构建目录内容
|
|
|
+ Element tocList = new Element("ul").addClass("toc-list");
|
|
|
+ doc.select("p.X1.X2, p.X1.X3").forEach(el -> {
|
|
|
+ boolean isLevel1 = el.hasClass("X2");
|
|
|
+ String id = "sec_" + el.text().hashCode();
|
|
|
+ el.attr("id", id);
|
|
|
+ Integer pageNumber = pageNumberMap.getOrDefault(el.text(), 0);
|
|
|
+
|
|
|
+ Element li = tocList.appendElement("li")
|
|
|
+ .addClass("toc-item " + (isLevel1 ? "toc-level-1" : "toc-level-2"));
|
|
|
+
|
|
|
+ Element link = li.appendElement("a")
|
|
|
+ .attr("href", "#" + id)
|
|
|
+ .addClass("toc-link");
|
|
|
+ Element lineContainer = link.appendElement("div").addClass("toc-line-container");
|
|
|
+ lineContainer.appendElement("span").addClass("toc-text").text(el.text());
|
|
|
+ lineContainer.appendElement("span").addClass("toc-dots");
|
|
|
+ lineContainer.appendElement("span").addClass("toc-page").text(String.valueOf(pageNumber));
|
|
|
+ });
|
|
|
+
|
|
|
+ // 插入目录
|
|
|
+ Element firstDiv = doc.select("div").first();
|
|
|
+ if (firstDiv != null) {
|
|
|
+ firstDiv.after(
|
|
|
+ "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
+ + "<h1 class='toc-title'>目录</h1>"
|
|
|
+ + tocList.outerHtml()
|
|
|
+ + "</div>"
|
|
|
+ );
|
|
|
+ } else {
|
|
|
+ doc.body().prepend(
|
|
|
+ "<div class='toc-container' style='page-break-before: always;'>"
|
|
|
+ + "<h1 class='toc-title'>目录</h1>"
|
|
|
+ + tocList.outerHtml()
|
|
|
+ + "</div>"
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String convertHtmlToPdf(String html, String outputPdfPath, List<String> fontPaths, boolean flag) throws Exception {
|
|
|
+ String formattedHtml = formatHtml(html);
|
|
|
+ try (OutputStream os = new FileOutputStream(outputPdfPath)) {
|
|
|
+ ITextRenderer renderer = new ITextRenderer();
|
|
|
+ ITextFontResolver fontResolver = renderer.getFontResolver();
|
|
|
+ String boldFont = null;
|
|
|
+ String regularFont = null;
|
|
|
+ for (String path : fontPaths) {
|
|
|
+ if (path.contains("bold")) {
|
|
|
+ boldFont = path;
|
|
|
+ } else if (path.contains("regular")) {
|
|
|
+ regularFont = path;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 字体路径
|
|
|
+
|
|
|
+ // 注册字体并强制指定别名为 "思源黑体"
|
|
|
+ fontResolver.addFont(
|
|
|
+ regularFont, // 字体文件路径
|
|
|
+ "思源黑体", // fontFamilyNameOverride:覆盖默认字体名
|
|
|
+ BaseFont.IDENTITY_H, // 编码(必须用于中文)
|
|
|
+ true, // 是否嵌入PDF
|
|
|
+ null // PFB路径(仅AFM/PFM字体需要)
|
|
|
+ );
|
|
|
+
|
|
|
+ fontResolver.addFont(
|
|
|
+ boldFont,
|
|
|
+ "思源黑体 Medium",
|
|
|
+ BaseFont.IDENTITY_H,
|
|
|
+ true,
|
|
|
+ null
|
|
|
+ );
|
|
|
+ formattedHtml = formattedHtml.replace("C:\\", "file:///C:/")
|
|
|
+ .replace("\\", "/");
|
|
|
+ // 设置HTML(确保CSS中使用相同的font-family)
|
|
|
+ renderer.setDocumentFromString(formattedHtml, "file:///");
|
|
|
+ // 渲染PDF
|
|
|
+ renderer.layout();
|
|
|
+ renderer.createPDF(os);
|
|
|
+ }
|
|
|
+ Path path = Paths.get(outputPdfPath);
|
|
|
+ // 获取文件名和扩展名
|
|
|
+ String fileName = path.getFileName().toString();
|
|
|
+ int dotIndex = fileName.lastIndexOf('.');
|
|
|
+ String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
|
|
|
+ String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
|
|
|
+ // 构建新文件名
|
|
|
+ String newFileName = baseName + "1" + extension;
|
|
|
+ // 构建完整新路径
|
|
|
+ String newFilePath = path.resolveSibling(newFileName).toString();
|
|
|
+ pdfReader(outputPdfPath, newFilePath, "C:\\Users\\yyy\\dev\\yyc3\\easier-be\\file\\image\\image1.jpeg", flag);
|
|
|
+
|
|
|
+ return newFilePath;
|
|
|
+ }
|
|
|
+
|
|
|
+// public static void convertHtmlToPdf2(String html, String outputPdfPath) throws Exception {
|
|
|
+// String s = formatHtml(html);
|
|
|
+//
|
|
|
+// try (OutputStream os = new FileOutputStream(outputPdfPath)) {
|
|
|
+// ITextRenderer renderer = new ITextRenderer();
|
|
|
+// ITextFontResolver fontResolver = renderer.getFontResolver();
|
|
|
+//
|
|
|
+// // 字体路径
|
|
|
+// String mediumFont = "C:/Users/yyy/AppData/Local/Microsoft/Windows/Fonts/SourceHanSansSC-Medium-2.otf";
|
|
|
+// String boldFont = "C:/Users/yyy/AppData/Local/Microsoft/Windows/Fonts/SourceHanSansSC-Bold-2.otf";
|
|
|
+//
|
|
|
+// // 注册字体并强制指定别名为 "思源黑体"
|
|
|
+// fontResolver.addFont(
|
|
|
+// mediumFont, // 字体文件路径
|
|
|
+// "思源黑体", // fontFamilyNameOverride:覆盖默认字体名
|
|
|
+// BaseFont.IDENTITY_H, // 编码(必须用于中文)
|
|
|
+// true, // 是否嵌入PDF
|
|
|
+// null // PFB路径(仅AFM/PFM字体需要)
|
|
|
+// );
|
|
|
+//
|
|
|
+// fontResolver.addFont(
|
|
|
+// boldFont,
|
|
|
+// "思源黑体 Medium",
|
|
|
+// BaseFont.IDENTITY_H,
|
|
|
+// true,
|
|
|
+// null
|
|
|
+// );
|
|
|
+// html = html.replace("C:\\", "file:///C:/")
|
|
|
+// .replace("\\", "/");
|
|
|
+//
|
|
|
+// // 设置HTML(确保CSS中使用相同的font-family)
|
|
|
+// renderer.setDocumentFromString(s, "file:///");
|
|
|
+//
|
|
|
+// // 渲染PDF
|
|
|
+// renderer.layout();
|
|
|
+// renderer.createPDF(os);
|
|
|
+// }
|
|
|
+// Path path = Paths.get(outputPdfPath);
|
|
|
+// // 获取文件名和扩展名
|
|
|
+// String fileName = path.getFileName().toString();
|
|
|
+// int dotIndex = fileName.lastIndexOf('.');
|
|
|
+// String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
|
|
|
+// String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
|
|
|
+// // 构建新文件名
|
|
|
+// String newFileName = baseName + "1" + extension;
|
|
|
+// // 构建完整新路径
|
|
|
+// String newFilePath = path.resolveSibling(newFileName).toString();
|
|
|
+//
|
|
|
+// }
|
|
|
+
|
|
|
+ private static void pdfReader(String inputPdfPath, String outputPdfPath,
|
|
|
+ String backgroundImagePath, boolean onlyCollectPageNumbers)
|
|
|
+ throws Exception {
|
|
|
+ PdfReader reader = new PdfReader(inputPdfPath);
|
|
|
+ PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outputPdfPath));
|
|
|
+ if (onlyCollectPageNumbers) {
|
|
|
+ int startPage = 0;
|
|
|
+ Pattern startPattern = Pattern.compile("^1\\.\\s+报告概述$");
|
|
|
+
|
|
|
+ // 查找起始页
|
|
|
+ for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
|
|
|
+ String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
|
|
|
+ String[] lines = pageText.split("\\r?\\n");
|
|
|
+ for (String line : lines) {
|
|
|
+ if (startPattern.matcher(line.trim()).matches()) {
|
|
|
+ startPage = pageNum;
|
|
|
+ pageNumberMap.put("startPage", startPage);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 收集标题和页码
|
|
|
+ Pattern titlePattern = Pattern.compile(
|
|
|
+ "^((\\d+)\\.\\s+|(\\d+\\.\\d+)\\s+)([\\u4e00-\\u9fa5a-zA-Z].*)$",
|
|
|
+ Pattern.MULTILINE);
|
|
|
+ Pattern specialPattern = Pattern.compile("^重要声明\\s*[::]?\\s*(.*)$");
|
|
|
+
|
|
|
+ for (int pageNum = startPage; pageNum <= reader.getNumberOfPages(); pageNum++) {
|
|
|
+ String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
|
|
|
+ String[] lines = pageText.split("\\r?\\n");
|
|
|
+ for (int i = 0; i < lines.length; i++) {
|
|
|
+ String line = lines[i].trim();
|
|
|
+ if (line.isEmpty()) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (line.startsWith("6.3 ISO体系认证证书或行业经营许可证")) {
|
|
|
+ String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : "";
|
|
|
+ line = line + (nextLine.isEmpty() ? "" : nextLine);
|
|
|
+ }
|
|
|
+
|
|
|
+ Matcher matcher = titlePattern.matcher(line);
|
|
|
+ if (matcher.matches()) {
|
|
|
+ pageNumberMap.put(line, pageNum - startPage + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (line.startsWith("重要声明")) {
|
|
|
+ Matcher specialMatcher = specialPattern.matcher(line);
|
|
|
+ if (specialMatcher.matches()) {
|
|
|
+ pageNumberMap.put(line, pageNum - startPage + 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //添加背景
|
|
|
+ PdfContentByte background = stamper.getUnderContent(1);
|
|
|
+ Image image = Image.getInstance(backgroundImagePath);
|
|
|
+ image.scaleAbsolute(PageSize.A4.getWidth(), PageSize.A4.getHeight());
|
|
|
+ image.setAbsolutePosition(0, 0);
|
|
|
+ background.addImage(image);
|
|
|
+ int lastPageIndex = reader.getNumberOfPages();
|
|
|
+ //删除空白页
|
|
|
+ String lastPageText = new PdfTextExtractor(reader).getTextFromPage(lastPageIndex);
|
|
|
+ // 使用 selectPages 方法删除最后一页
|
|
|
+ ArrayList<Integer> pagesToKeep = new ArrayList<>();
|
|
|
+ for (int i = 1; i < lastPageIndex; i++) {
|
|
|
+ pagesToKeep.add(i);
|
|
|
+ }
|
|
|
+ reader.selectPages(pagesToKeep);
|
|
|
+
|
|
|
+ stamper.close();
|
|
|
+ reader.close();
|
|
|
+ }
|
|
|
+
|
|
|
+}
|