package com.yaoyicloud.tools; import com.lowagie.text.Image; import com.lowagie.text.PageSize; import com.lowagie.text.pdf.BaseFont; import com.lowagie.text.pdf.PdfContentByte; import com.lowagie.text.pdf.PdfReader; import com.lowagie.text.pdf.PdfStamper; import com.lowagie.text.pdf.parser.PdfTextExtractor; import fr.opensagres.poi.xwpf.converter.core.FileURIResolver; import fr.opensagres.poi.xwpf.converter.core.ImageManager; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.poi.xwpf.usermodel.XWPFTableCell; import org.apache.poi.xwpf.usermodel.XWPFTableRow; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Entities; import org.jsoup.select.Elements; import org.xhtmlrenderer.pdf.ITextFontResolver; import org.xhtmlrenderer.pdf.ITextRenderer; import java.awt.Color; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; public class OfficeUtil1 { private static final org.slf4j.Logger OFFICE_UTIL_LOGGER = org.slf4j.LoggerFactory.getLogger(OfficeUtil1.class); private static Map pageNumberMap = new LinkedHashMap<>(); public static String convert(String docxPath, String imageDir) throws IOException { File imageDirFile = new File(imageDir); if (!imageDirFile.exists() && !imageDirFile.mkdirs()) { throw new IOException("无法创建图片目录: " + imageDir); } try (InputStream docxIn = new FileInputStream(docxPath); XWPFDocument document = new XWPFDocument(docxIn); ByteArrayOutputStream htmlOut = new ByteArrayOutputStream()) { // 1. 遍历所有表格,检查是否需要删除 List tablesToRemove = new ArrayList<>(); for (XWPFTable table : document.getTables()) { if (shouldRemoveTable(table)) { tablesToRemove.add(table); } } // 2. 删除符合条件的表格(从后往前删,避免索引问题) for (int i = tablesToRemove.size() - 1; i >= 0; i--) { XWPFTable table = tablesToRemove.get(i); document.removeBodyElement(document.getPosOfTable(table)); } // 3. 转换剩余的文档为 HTML ImageManager imageManager = new ImageManager(imageDirFile, "") { @Override public String resolve(String uri) { return new File(imageDir, uri).getAbsolutePath().replace("/", "\\"); } }; XHTMLOptions options = XHTMLOptions.create() .setImageManager(imageManager) .URIResolver(new FileURIResolver(imageDirFile) { @Override public String resolve(String uri) { String filename = uri.replace("word/media/", ""); return new File(imageDirFile, filename) .getAbsolutePath() .replace("/", "\\"); } }); options.setIgnoreStylesIfUnused(false); XHTMLConverter.getInstance().convert(document, htmlOut, options); return htmlOut.toString("UTF-8"); } } /** * 检查表格是否需要删除(第一行第一个单元格是否包含 "删除") */ @SuppressWarnings("checkstyle:ReturnCount") private static boolean shouldRemoveTable(XWPFTable table) { if (table.getRows().isEmpty()) { return false; } XWPFTableRow firstRow = table.getRow(0); if (firstRow.getTableCells().isEmpty()) { return false; } // 只检查第一个单元格 XWPFTableCell firstCell = firstRow.getCell(0); for (XWPFParagraph para : firstCell.getParagraphs()) { for (XWPFRun run : para.getRuns()) { String text = run.text().trim(); if (text.contains("删除")) { return true; } } } return false; } @SuppressWarnings("checkstyle:MethodLength") public static String formatHtml(String html) { Document doc = Jsoup.parse(html); Elements pTags = doc.select("p"); for (Element p : pTags) { boolean isValidEmpty = true; for (org.jsoup.nodes.Node child : p.childNodes()) { if (child instanceof Element) { if (!((Element) child).tagName().equalsIgnoreCase("br")) { isValidEmpty = false; break; } } else { if (!child.outerHtml().trim().isEmpty()) { isValidEmpty = false; break; } } } if (isValidEmpty) { p.attr("style", "line-height: 30px;"); } } String baseCss = "@page {" + " size: A4;" + " @bottom-center {" + " content: none;" // 只显示数字页码 + " }" + "}" + "@page show-page-number {" + " @bottom-center {" + " content: counter(page);" + " font-family: 思源黑体 Medium;" + " font-size: 9pt;" + " color: #000000;" + " }" + "}" + // 为最后一个div设置页码显示并重置计数器 ".start-counting {" + " page: show-page-number;" + "}" + "td, th { " + " page-break-inside: avoid; " // 尽量保持单元格不分页 + " -fs-table-paginate: paginate; " // 允许分页 + " background-clip: padding-box; " // 确保背景色覆盖 + " -webkit-print-color-adjust: exact; " // 确保打印时颜色准确 + "}"; Elements table = doc.select("table"); String tbaleStyle = table.attr("style"); tbaleStyle += "width:100%;"; table.attr("style", tbaleStyle); Elements trs = doc.select("tr"); for (Element tr : trs) { String trStyle = tr.attr("style"); trStyle = (trStyle == null) ? "" : trStyle; trStyle += " page-break-inside: avoid !important;"; // 强制不分页 tr.attr("style", trStyle); } doc.head().appendElement("style").text(baseCss); // int groupId = 0; // for (Element row : doc.select("tr:has(td[rowspan], th[rowspan])")) { // int rowspan = Integer.parseInt(row.select("[rowspan]").first().attr("rowspan")); // row.attr("data-group-id", String.valueOf(groupId++)); // // // 标记被rowspan覆盖的行 // Element nextRow = row.nextElementSibling(); // for (int i = 1; i < rowspan && nextRow != null; i++) { // nextRow.attr("data-group-child", "true"); // nextRow = nextRow.nextElementSibling(); // } // } // // doc.head().appendElement("style") // .text("tr[data-group-id], tr[data-group-child] { " // + "page-break-inside: avoid !important; " // + "}"); // //合并单元格的处理 // Elements rowsWithRowspan = doc.select("tr:has(td[rowspan], th[rowspan])"); // // // 遍历所有包含合并单元格的行 // for (Element row : rowsWithRowspan) { // // 找到合并单元格的跨行数 // int rowspan = Integer.parseInt(row.select("td[rowspan], th[rowspan]").first().attr("rowspan")); // // // 创建新的 tbody 包裹当前行及后续受影响的行 // Element tbody = new Element("tbody").attr("style", "page-break-inside: avoid; width: 100%;"); // for (int i = 0; i < rowspan; i++) { // Element nextRow = row.nextElementSibling(); // row.before(tbody); // tbody.appendChild(row); // if (nextRow != null) { // row = nextRow; // 处理后续行 // } // } // } Elements tds = doc.select("td"); for (Element td : tds) { Elements ps = td.select("p"); for (Element p : ps) { String originalStyle = p.attr("style"); // 添加新样式,保留原有样式但覆盖冲突属性 String newStyle = "margin-left: 0.5em; margin-right: 0.5em; " + "line-height: 1.2; margin-top: 6px!important; margin-bottom: 6px!important; " + originalStyle; p.attr("style", newStyle); } if (ps.size() > 1) { for (int i = 1; i < ps.size(); i++) { ps.get(i).remove(); } Element p = ps.first(); String pStyle = p.attr("style"); pStyle = removeWhiteSpacePreWrap(pStyle); pStyle += " vertical-align: middle;"; p.attr("style", pStyle); } if (ps.size() > 0) { Element p = ps.first(); String pStyle = p.attr("style"); pStyle = removeWhiteSpacePreWrap(pStyle); p.attr("style", pStyle); Elements spans = p.select("span"); if (!spans.isEmpty()) { for (Element span : spans) { String spanStyle = span.attr("style"); spanStyle = removeWhiteSpacePreWrap(spanStyle); spanStyle = (spanStyle == null) ? "" : spanStyle; span.attr("style", spanStyle); } } else { String oriPstyle = p.attr("style"); oriPstyle = removeWhiteSpacePreWrap(oriPstyle); p.attr("style", oriPstyle); } } String oristyle = td.attr("style"); oristyle = (oristyle == null) ? "" : oristyle; oristyle += " border-collapse: collapse; border: 0.75pt solid #E3EDFB;"; oristyle += " background-clip: padding-box; break-inside: avoid !important; page-break-inside: avoid"; td.attr("style", oristyle); } Elements divs = doc.select("div"); divs.attr("style", ""); divs.last().addClass("start-counting"); divs.last().attr("style", "-fs-page-sequence:start"); Elements images = doc.select("img"); Element firstImg = images.first(); // 4. 删除第一个img元素 firstImg.parent().remove(); // 将所有 white-space:pre-wrap 改为 normal去除转换时的奇怪空白 Elements allElements = doc.getAllElements(); for (Element element : allElements) { String style = element.attr("style"); if (style.contains("white-space:pre-wrap")) { style = style.replaceAll("white-space\\s*:\\s*[^;]+;", ""); element.attr("style", style); } } // 7. 处理特殊span元素 Elements spans = doc.select("span.X1.X2"); for (Element span : spans) { String style = span.attr("style"); style = style.replaceAll("margin-left:\\s*[^;]+;?", ""); if (!span.text().contains("重要声明")) { style += "color:#1677ff; "; } span.attr("style", style); } // 8. 一级标题前分页样式 Elements paragraphs = doc.select("p.X1.X2"); for (Element p : paragraphs) { p.attr("style", "page-break-before:always;"); } //二级标题上边距调整 Elements secondaryHeadingStyle = doc.select("p.X1.X3"); for (Element element : secondaryHeadingStyle) { String text = element.text().trim(); // 检查标题文本是否匹配 if (text.equals("4.2 财务指标(一)") || text.equals("4.1 重要财务数据") || text.equals("2.2 业务主管单位情况")) { // 获取下一个同级元素 Element nextSibling = element.nextElementSibling(); // 如果存在下一个元素,则删除它 if (nextSibling != null) { nextSibling.remove(); } } String secondarycurrentStyle = element.attr("style"); if (secondarycurrentStyle.contains("margin-top:")) { secondarycurrentStyle = secondarycurrentStyle.replaceAll("margin-top:0.0pt", "margin-top: 13pt"); } else { secondarycurrentStyle += " margin-top: 13pt;"; } element.attr("style", secondarycurrentStyle + "line-height: 1.5; margin-bottom: 4pt; margin-left: 0.5em"); } //三级标题样式 Elements otherElements = doc.select("p.X1.X4"); for (Element element : otherElements) { element.attr("style", element + "line-height: 1.5;margin-top: 3pt; margin-bottom: 2pt !important; margin-left: 0.5em"); } //六级标题样式 Elements select1 = doc.select("p.X1.X6"); for (Element element : select1) { element.attr("style", element.attr("style") + "line-height: 1.5; margin-top: 5pt; margin-bottom: 5pt;"); } Elements select5 = doc.select("p.X1.X5"); for (Element element : select5) { element.attr("style", element.attr("style") + "line-height: 1.5; margin-top: 5pt; margin-bottom: 5pt;"); } //超链接 Elements select9 = doc.select("p.X1.X9"); for (Element element : select9) { element.attr("style", element.attr("style") + "word-break: break-all; overflow-wrap: anywhere; max-width: 100%;"); } Elements select8 = doc.select("p.X1.X8"); for (Element element : select8) { element.attr("style", element.attr("style") + "word-break: break-all; overflow-wrap: anywhere; max-width: 100%;"); } //1.3合并的单元格 不分页 Elements select11 = doc.select("td.X10.X11"); for (Element element : select11) { element.attr("style", element.attr("style") + "page-break-inside: avoid;"); } addTableOfContents(doc); doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); doc.head().prepend(""); return doc.html(); } /** * 合并表格中相同内容的单元格 * * @param doc HTML文档对象 */ public static void mergeSameContentCells(Document doc) { Elements tables = doc.select("table"); for (Element table : tables) { Elements rows = table.select("tr"); for (int colIndex = 0; colIndex < rows.first().select("td").size(); colIndex++) { int rowspan = 1; String currentCellText = ""; for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) { Element currentCell = rows.get(rowIndex).select("td").get(colIndex); String cellText = currentCell.text(); if (rowIndex == 0) { currentCellText = cellText; } else { if (cellText.equals(currentCellText)) { rowspan++; currentCell.remove(); } else { if (rowspan > 1) { Element prevCell = rows.get(rowIndex - rowspan).select("td").get(colIndex); prevCell.attr("rowspan", String.valueOf(rowspan)); } rowspan = 1; currentCellText = cellText; } } } if (rowspan > 1) { Element lastCell = rows.get(rows.size() - rowspan).select("td").get(colIndex); lastCell.attr("rowspan", String.valueOf(rowspan)); } } } } /** * 移除 white-space:pre-wrap 并替换为 normal */ private static String removeWhiteSpacePreWrap(String style) { if (style == null) { return ""; } // 替换 pre-wrap 为 normal,并去除多余的分号 style = style.replaceAll("white-space\\s*:\\s*pre-wrap\\s*;?", ""); style = style.replaceAll(";\\s*;", ";"); // 清理多余分号 if (!style.contains("white-space")) { style += " white-space: normal;"; } return style.trim(); } /** * 添加目录 * @param doc */ private static void addTableOfContents(Document doc) { // 目录样式 String tocCss = ".toc-container { margin: 20px 0; font-family: 思源黑体 Medium; }" + ".toc-title { text-align: center; font-size: 12pt; margin-bottom: 15px; color: black; }" + ".toc-list { list-style-type: none; padding: 0; width: 100%; }" + ".toc-item { margin: 5px 0; padding-top: 2px; padding-bottom: 2px; line-height: 2; }" + ".toc-level-1 { padding-left: 0; }" + ".toc-level-2 { padding-left: 2em; }" + ".toc-link { " + " display: block; " + " position: relative; " + " color: black !important; " + " text-decoration: none !important; " + " line-height: 1.5; " // 新增:控制整体行高 + "}" + ".toc-line-container { " + " display: table; " + " width: 100%; " + " vertical-align: middle; " // 关键:控制容器内垂直对齐 + "}" + ".toc-text { " + " display: table-cell; " + " font-size: 9pt; " + " white-space: nowrap; " + " padding-right: 5px; " + " vertical-align: middle; " // 改为middle对齐 + "}" + ".toc-dots { " + " display: table-cell; " + " width: 100%; " + " vertical-align: middle; " // 关键:改为middle对齐 + " border-bottom: 1px dotted #000000; " + " height: 1em; " // 固定高度 + " margin-top: 2px; " // 关键:正值下移,负值上移(按需调整) + "}" + "p.X1.X2 { -fs-pdf-bookmark: level 1; }" + "p.X1.X3 { -fs-pdf-bookmark: level 2; }" + ".toc-page { " + " display: table-cell; " + " font-size: 9pt; " + " white-space: nowrap; " + " padding-left: 5px; " + " vertical-align: middle; " // 改为middle对齐 + "}"; doc.head().appendElement("style").text(tocCss); // 构建目录内容 Element tocList = new Element("ul").addClass("toc-list"); doc.select("p.X1.X2, p.X1.X3").forEach(el -> { boolean isLevel1 = el.hasClass("X2"); String id = "sec_" + el.text().hashCode(); el.attr("id", id); Integer pageNumber = pageNumberMap.getOrDefault(el.text(), 1); Element li = tocList.appendElement("li") .addClass("toc-item " + (isLevel1 ? "toc-level-1" : "toc-level-2")); Element link = li.appendElement("a") .attr("href", "#" + id) .addClass("toc-link"); Element lineContainer = link.appendElement("div").addClass("toc-line-container"); lineContainer.appendElement("span").addClass("toc-text").text(el.text()); lineContainer.appendElement("span").addClass("toc-dots"); lineContainer.appendElement("span").addClass("toc-page").text(String.valueOf(pageNumber)); }); // 插入目录 Element firstDiv = doc.select("div").first(); if (firstDiv != null) { firstDiv.after( "
" + "

目录

" + tocList.outerHtml() + "
" ); } else { doc.body().prepend( "
" + "

目录

" + tocList.outerHtml() + "
" ); } } public static String convertHtmlToPdf(String html, String outputPdfPath, List fontPaths, String imagePath, boolean flag) throws Exception { try (OutputStream os = new FileOutputStream(outputPdfPath)) { ITextRenderer renderer = new ITextRenderer(); ITextFontResolver fontResolver = renderer.getFontResolver(); String boldFont = null; String regularFont = null; String mediumFont = null; for (String path : fontPaths) { if (path.contains("bold")) { boldFont = path; } else if (path.contains("medium")) { mediumFont = path; } else if (path.contains("regular")) { regularFont = path; } } // 字体路径 fontResolver.addFont( mediumFont, "思源黑体 Medium", BaseFont.IDENTITY_H, true, null ); html = html.replace("C:\\", "file:///C:/") .replace("\\", "/"); // 设置HTML(确保CSS中使用相同的font-family) renderer.setDocumentFromString(html, "file:///"); // 渲染PDF renderer.layout(); renderer.createPDF(os); } Path path = Paths.get(outputPdfPath); // 获取文件名和扩展名 String fileName = path.getFileName().toString(); int dotIndex = fileName.lastIndexOf('.'); String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex); String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex); // 构建新文件名 String newFileName = baseName + "1" + extension; // 构建完整新路径 String newFilePath = path.resolveSibling(newFileName).toString(); pdfReader(outputPdfPath, newFilePath, imagePath + File.separator + "image1.jpeg", flag); return newFilePath; } /** * 操作已生成的pdf * @param inputPdfPath 输入pdf * @param outputPdfPath 输出pdf * @param backgroundImagePath 图片文件夹位置 * @param onlyCollectPageNumbers 是否是遍历目录获取标题位置 * @throws Exception */ private static void pdfReader(String inputPdfPath, String outputPdfPath, String backgroundImagePath, boolean onlyCollectPageNumbers) throws Exception { PdfReader reader = new PdfReader(inputPdfPath); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outputPdfPath)); int startPage = 1; if (onlyCollectPageNumbers) { pageNumberMap.clear(); Pattern startPattern = Pattern.compile("^1\\.\\s+报告概述$"); // 查找起始页 for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum); String[] lines = pageText.split("\\r?\\n"); for (String line : lines) { if (line.equals("1. 报告概述")) { startPage = pageNum; pageNumberMap.put("startPage", startPage); } if (startPattern.matcher(line.trim()).matches()) { startPage = pageNum; pageNumberMap.put("startPage", startPage); } } } // 收集标题和页码 Pattern titlePattern = Pattern.compile( "^((\\d+)\\.\\s+|(\\d+\\.\\d+)\\s+)([\\u4e00-\\u9fa5a-zA-Z0-9].*)$", Pattern.MULTILINE ); Pattern specialPattern = Pattern.compile("^重要声明\\s*[::]?\\s*(.*)$"); for (int pageNum = startPage; pageNum <= reader.getNumberOfPages(); pageNum++) { String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum); String[] lines = pageText.split("\\r?\\n"); for (int i = 0; i < lines.length; i++) { String line = lines[i].trim(); if (line.isEmpty()) { continue; } if (line.startsWith("6.3 ISO体系认证证书或行业经营许可证")) { String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : ""; line = line + (nextLine.isEmpty() ? "" : nextLine); } if (line.startsWith("7.8 机构管理层(法定代表人,理事长,秘书长)")) { String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : ""; line = line + (nextLine.isEmpty() ? "" : nextLine); } Matcher matcher = titlePattern.matcher(line); if (matcher.matches()) { pageNumberMap.put(line, pageNum - startPage + 1); } if (line.startsWith("重要声明")) { Matcher specialMatcher = specialPattern.matcher(line); if (specialMatcher.matches()) { pageNumberMap.put(line, pageNum - startPage + 1); } } } } } //一级标题图形背景 Pattern firstLevelTitlePattern = Pattern.compile("^(\\d+)\\.\\s+([\\u4e00-\\u9fa5a-zA-Z].*)$"); Set styledPages = new HashSet<>(); startPage = pageNumberMap.get("startPage"); for (Map.Entry stringIntegerEntry : pageNumberMap.entrySet()) { String key = stringIntegerEntry.getKey(); int value = stringIntegerEntry.getValue(); if (firstLevelTitlePattern.matcher(key).find()) { styledPages.add(value + startPage - 1); } } // 在识别出的页面添加标题样式 for (Integer pageNum : styledPages) { if (pageNum < 1 || pageNum > reader.getNumberOfPages()) { continue; } PdfContentByte underContent = stamper.getUnderContent(pageNum); // 固定位置参数(可根据需要调整) float pageWidth = reader.getPageSize(pageNum).getWidth(); float pageHeight = reader.getPageSize(pageNum).getHeight(); float xPos = 50; // 左侧边距 float yPos = pageHeight - 50; // 距离顶部50单位 // 1. 绘制圆形背景 underContent.saveState(); underContent.setColorFill(new Color(210, 235, 255)); // 浅蓝色填充 underContent.circle( xPos, yPos - 8, // 圆形中心Y 10 // 半径 ); underContent.fill(); underContent.restoreState(); // 2. 绘制横线 underContent.saveState(); underContent.setColorStroke(new Color(0x16, 0x77, 0xFF)); // 浅蓝色线条 underContent.setLineWidth(1.5f); // 线宽 underContent.moveTo(xPos - 10, yPos - 20); underContent.lineTo(pageWidth - xPos + 10, yPos - 20); underContent.stroke(); underContent.restoreState(); } //封面背景 PdfContentByte background = stamper.getUnderContent(1); Image image = Image.getInstance(backgroundImagePath); image.scaleAbsolute(PageSize.A4.getWidth(), PageSize.A4.getHeight()); image.setAbsolutePosition(0, 0); background.addImage(image); stamper.close(); reader.close(); } // private static boolean isTableNearBottom(PdfWriter writer, PdfPTable table, float bottom) { // try { // // 获取当前页面的剩余高度 // float remainingHeight = writer.getVerticalPosition(true) - bottom; // // // 估算当前行高度 // float estimatedRowHeight = 30f; // float estimatedTableHeight = table.getRows().size() * estimatedRowHeight; // // // 如果剩余空间不足以容纳整个表格,则换页 // return remainingHeight < estimatedTableHeight; // } catch (Exception e) { // e.printStackTrace(); // return false; // } // } }