OfficeUtil1.java 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. package com.yaoyicloud.tools;
  2. import com.lowagie.text.Image;
  3. import com.lowagie.text.PageSize;
  4. import com.lowagie.text.pdf.BaseFont;
  5. import com.lowagie.text.pdf.PdfContentByte;
  6. import com.lowagie.text.pdf.PdfReader;
  7. import com.lowagie.text.pdf.PdfStamper;
  8. import com.lowagie.text.pdf.parser.PdfTextExtractor;
  9. import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
  10. import fr.opensagres.poi.xwpf.converter.core.ImageManager;
  11. import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
  12. import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
  13. import org.apache.poi.xwpf.usermodel.XWPFDocument;
  14. import org.apache.poi.xwpf.usermodel.XWPFParagraph;
  15. import org.apache.poi.xwpf.usermodel.XWPFRun;
  16. import org.apache.poi.xwpf.usermodel.XWPFTable;
  17. import org.apache.poi.xwpf.usermodel.XWPFTableCell;
  18. import org.apache.poi.xwpf.usermodel.XWPFTableRow;
  19. import org.jsoup.Jsoup;
  20. import org.jsoup.nodes.Document;
  21. import org.jsoup.nodes.Element;
  22. import org.jsoup.nodes.Entities;
  23. import org.jsoup.select.Elements;
  24. import org.xhtmlrenderer.pdf.ITextFontResolver;
  25. import org.xhtmlrenderer.pdf.ITextRenderer;
  26. import java.awt.Color;
  27. import java.io.ByteArrayOutputStream;
  28. import java.io.File;
  29. import java.io.FileInputStream;
  30. import java.io.FileOutputStream;
  31. import java.io.IOException;
  32. import java.io.InputStream;
  33. import java.io.OutputStream;
  34. import java.nio.file.Path;
  35. import java.nio.file.Paths;
  36. import java.util.ArrayList;
  37. import java.util.HashSet;
  38. import java.util.LinkedHashMap;
  39. import java.util.List;
  40. import java.util.Map;
  41. import java.util.Set;
  42. import java.util.regex.Matcher;
  43. import java.util.regex.Pattern;
  44. public class OfficeUtil1 {
  45. private static final org.slf4j.Logger OFFICE_UTIL_LOGGER = org.slf4j.LoggerFactory.getLogger(OfficeUtil1.class);
  46. private static Map<String, Integer> pageNumberMap = new LinkedHashMap<>();
  47. public static String convert(String docxPath, String imageDir) throws IOException {
  48. File imageDirFile = new File(imageDir);
  49. if (!imageDirFile.exists() && !imageDirFile.mkdirs()) {
  50. throw new IOException("无法创建图片目录: " + imageDir);
  51. }
  52. try (InputStream docxIn = new FileInputStream(docxPath);
  53. XWPFDocument document = new XWPFDocument(docxIn);
  54. ByteArrayOutputStream htmlOut = new ByteArrayOutputStream()) {
  55. // 1. 遍历所有表格,检查是否需要删除
  56. List<XWPFTable> tablesToRemove = new ArrayList<>();
  57. for (XWPFTable table : document.getTables()) {
  58. if (shouldRemoveTable(table)) {
  59. tablesToRemove.add(table);
  60. }
  61. }
  62. // 2. 删除符合条件的表格(从后往前删,避免索引问题)
  63. for (int i = tablesToRemove.size() - 1; i >= 0; i--) {
  64. XWPFTable table = tablesToRemove.get(i);
  65. document.removeBodyElement(document.getPosOfTable(table));
  66. }
  67. // 3. 转换剩余的文档为 HTML
  68. ImageManager imageManager = new ImageManager(imageDirFile, "") {
  69. @Override
  70. public String resolve(String uri) {
  71. return new File(imageDir, uri).getAbsolutePath().replace("/", "\\");
  72. }
  73. };
  74. XHTMLOptions options = XHTMLOptions.create()
  75. .setImageManager(imageManager)
  76. .URIResolver(new FileURIResolver(imageDirFile) {
  77. @Override
  78. public String resolve(String uri) {
  79. String filename = uri.replace("word/media/", "");
  80. return new File(imageDirFile, filename)
  81. .getAbsolutePath()
  82. .replace("/", "\\");
  83. }
  84. });
  85. options.setIgnoreStylesIfUnused(false);
  86. XHTMLConverter.getInstance().convert(document, htmlOut, options);
  87. return htmlOut.toString("UTF-8");
  88. }
  89. }
  90. /**
  91. * 检查表格是否需要删除(第一行第一个单元格是否包含 "删除")
  92. */
  93. @SuppressWarnings("checkstyle:ReturnCount")
  94. private static boolean shouldRemoveTable(XWPFTable table) {
  95. if (table.getRows().isEmpty()) {
  96. return false;
  97. }
  98. XWPFTableRow firstRow = table.getRow(0);
  99. if (firstRow.getTableCells().isEmpty()) {
  100. return false;
  101. }
  102. // 只检查第一个单元格
  103. XWPFTableCell firstCell = firstRow.getCell(0);
  104. for (XWPFParagraph para : firstCell.getParagraphs()) {
  105. for (XWPFRun run : para.getRuns()) {
  106. String text = run.text().trim();
  107. if (text.contains("删除")) {
  108. return true;
  109. }
  110. }
  111. }
  112. return false;
  113. }
  114. @SuppressWarnings("checkstyle:MethodLength")
  115. public static String formatHtml(String html) {
  116. Document doc = Jsoup.parse(html);
  117. Elements pTags = doc.select("p");
  118. for (Element p : pTags) {
  119. boolean isValidEmpty = true;
  120. for (org.jsoup.nodes.Node child : p.childNodes()) {
  121. if (child instanceof Element) {
  122. if (!((Element) child).tagName().equalsIgnoreCase("br")) {
  123. isValidEmpty = false;
  124. break;
  125. }
  126. } else {
  127. if (!child.outerHtml().trim().isEmpty()) {
  128. isValidEmpty = false;
  129. break;
  130. }
  131. }
  132. }
  133. if (isValidEmpty) {
  134. p.attr("style", "line-height: 30px;");
  135. }
  136. }
  137. String baseCss =
  138. "@page {"
  139. + " size: A4;"
  140. + " @bottom-center {"
  141. + " content: none;" // 只显示数字页码
  142. + " }"
  143. + "}"
  144. + "@page show-page-number {"
  145. + " @bottom-center {"
  146. + " content: counter(page);"
  147. + " font-family: 思源黑体 Medium;"
  148. + " font-size: 9pt;"
  149. + " color: #000000;"
  150. + " }"
  151. + "}"
  152. + // 为最后一个div设置页码显示并重置计数器
  153. ".start-counting {"
  154. + " page: show-page-number;"
  155. + "}"
  156. + "td, th { "
  157. + " page-break-inside: avoid; " // 尽量保持单元格不分页
  158. + " -fs-table-paginate: paginate; " // 允许分页
  159. + " background-clip: padding-box; " // 确保背景色覆盖
  160. + " -webkit-print-color-adjust: exact; " // 确保打印时颜色准确
  161. + "}";
  162. Elements table = doc.select("table");
  163. String tbaleStyle = table.attr("style");
  164. tbaleStyle += "width:100%;";
  165. table.attr("style", tbaleStyle);
  166. Elements trs = doc.select("tr");
  167. for (Element tr : trs) {
  168. String trStyle = tr.attr("style");
  169. trStyle = (trStyle == null) ? "" : trStyle;
  170. trStyle += " page-break-inside: avoid !important;"; // 强制不分页
  171. tr.attr("style", trStyle);
  172. }
  173. doc.head().appendElement("style").text(baseCss);
  174. // int groupId = 0;
  175. // for (Element row : doc.select("tr:has(td[rowspan], th[rowspan])")) {
  176. // int rowspan = Integer.parseInt(row.select("[rowspan]").first().attr("rowspan"));
  177. // row.attr("data-group-id", String.valueOf(groupId++));
  178. //
  179. // // 标记被rowspan覆盖的行
  180. // Element nextRow = row.nextElementSibling();
  181. // for (int i = 1; i < rowspan && nextRow != null; i++) {
  182. // nextRow.attr("data-group-child", "true");
  183. // nextRow = nextRow.nextElementSibling();
  184. // }
  185. // }
  186. //
  187. // doc.head().appendElement("style")
  188. // .text("tr[data-group-id], tr[data-group-child] { "
  189. // + "page-break-inside: avoid !important; "
  190. // + "}");
  191. // //合并单元格的处理
  192. // Elements rowsWithRowspan = doc.select("tr:has(td[rowspan], th[rowspan])");
  193. //
  194. // // 遍历所有包含合并单元格的行
  195. // for (Element row : rowsWithRowspan) {
  196. // // 找到合并单元格的跨行数
  197. // int rowspan = Integer.parseInt(row.select("td[rowspan], th[rowspan]").first().attr("rowspan"));
  198. //
  199. // // 创建新的 tbody 包裹当前行及后续受影响的行
  200. // Element tbody = new Element("tbody").attr("style", "page-break-inside: avoid; width: 100%;");
  201. // for (int i = 0; i < rowspan; i++) {
  202. // Element nextRow = row.nextElementSibling();
  203. // row.before(tbody);
  204. // tbody.appendChild(row);
  205. // if (nextRow != null) {
  206. // row = nextRow; // 处理后续行
  207. // }
  208. // }
  209. // }
  210. Elements tds = doc.select("td");
  211. for (Element td : tds) {
  212. Elements ps = td.select("p");
  213. for (Element p : ps) {
  214. String originalStyle = p.attr("style");
  215. // 添加新样式,保留原有样式但覆盖冲突属性
  216. String newStyle = "margin-left: 0.5em; margin-right: 0.5em; "
  217. + "line-height: 1.2; margin-top: 6px!important; margin-bottom: 6px!important; " + originalStyle;
  218. p.attr("style", newStyle);
  219. }
  220. if (ps.size() > 1) {
  221. for (int i = 1; i < ps.size(); i++) {
  222. ps.get(i).remove();
  223. }
  224. Element p = ps.first();
  225. String pStyle = p.attr("style");
  226. pStyle = removeWhiteSpacePreWrap(pStyle);
  227. pStyle += " vertical-align: middle;";
  228. p.attr("style", pStyle);
  229. }
  230. if (ps.size() > 0) {
  231. Element p = ps.first();
  232. String pStyle = p.attr("style");
  233. pStyle = removeWhiteSpacePreWrap(pStyle);
  234. p.attr("style", pStyle);
  235. Elements spans = p.select("span");
  236. if (!spans.isEmpty()) {
  237. for (Element span : spans) {
  238. String spanStyle = span.attr("style");
  239. spanStyle = removeWhiteSpacePreWrap(spanStyle);
  240. spanStyle = (spanStyle == null) ? "" : spanStyle;
  241. span.attr("style", spanStyle);
  242. }
  243. } else {
  244. String oriPstyle = p.attr("style");
  245. oriPstyle = removeWhiteSpacePreWrap(oriPstyle);
  246. p.attr("style", oriPstyle);
  247. }
  248. }
  249. String oristyle = td.attr("style");
  250. oristyle = (oristyle == null) ? "" : oristyle;
  251. oristyle += " border-collapse: collapse; border: 0.75pt solid #E3EDFB;";
  252. oristyle += " background-clip: padding-box; break-inside: avoid !important; page-break-inside: avoid";
  253. td.attr("style", oristyle);
  254. }
  255. Elements divs = doc.select("div");
  256. divs.attr("style", "");
  257. divs.last().addClass("start-counting");
  258. divs.last().attr("style", "-fs-page-sequence:start");
  259. Elements images = doc.select("img");
  260. Element firstImg = images.first();
  261. // 4. 删除第一个img元素
  262. firstImg.parent().remove();
  263. // 将所有 white-space:pre-wrap 改为 normal去除转换时的奇怪空白
  264. Elements allElements = doc.getAllElements();
  265. for (Element element : allElements) {
  266. String style = element.attr("style");
  267. if (style.contains("white-space:pre-wrap")) {
  268. style = style.replaceAll("white-space\\s*:\\s*[^;]+;", "");
  269. element.attr("style", style);
  270. }
  271. }
  272. // 7. 处理特殊span元素
  273. Elements spans = doc.select("span.X1.X2");
  274. for (Element span : spans) {
  275. String style = span.attr("style");
  276. style = style.replaceAll("margin-left:\\s*[^;]+;?", "");
  277. if (!span.text().contains("重要声明")) {
  278. style += "color:#1677ff; ";
  279. }
  280. span.attr("style", style);
  281. }
  282. // 8. 一级标题前分页样式
  283. Elements paragraphs = doc.select("p.X1.X2");
  284. for (Element p : paragraphs) {
  285. p.attr("style", "page-break-before:always;");
  286. }
  287. //二级标题上边距调整
  288. Elements secondaryHeadingStyle = doc.select("p.X1.X3");
  289. for (Element element : secondaryHeadingStyle) {
  290. String text = element.text().trim();
  291. // 检查标题文本是否匹配
  292. if (text.equals("4.2 财务指标(一)") || text.equals("4.1 重要财务数据") || text.equals("2.2 业务主管单位情况")) {
  293. // 获取下一个同级元素
  294. Element nextSibling = element.nextElementSibling();
  295. // 如果存在下一个元素,则删除它
  296. if (nextSibling != null) {
  297. nextSibling.remove();
  298. }
  299. }
  300. String secondarycurrentStyle = element.attr("style");
  301. if (secondarycurrentStyle.contains("margin-top:")) {
  302. secondarycurrentStyle = secondarycurrentStyle.replaceAll("margin-top:0.0pt", "margin-top: 13pt");
  303. } else {
  304. secondarycurrentStyle += " margin-top: 13pt;";
  305. }
  306. element.attr("style", secondarycurrentStyle + "line-height: 1.5; margin-bottom: 4pt; margin-left: 0.5em");
  307. }
  308. //三级标题样式
  309. Elements otherElements = doc.select("p.X1.X4");
  310. for (Element element : otherElements) {
  311. element.attr("style", element + "line-height: 1.5;margin-top: 3pt; margin-bottom: 2pt !important; margin-left: 0.5em");
  312. }
  313. //六级标题样式
  314. Elements select1 = doc.select("p.X1.X6");
  315. for (Element element : select1) {
  316. element.attr("style", element.attr("style") + "line-height: 1.5; margin-top: 5pt; margin-bottom: 5pt;");
  317. }
  318. Elements select5 = doc.select("p.X1.X5");
  319. for (Element element : select5) {
  320. element.attr("style", element.attr("style") + "line-height: 1.5; margin-top: 5pt; margin-bottom: 5pt;");
  321. }
  322. //超链接
  323. Elements select9 = doc.select("p.X1.X9");
  324. for (Element element : select9) {
  325. element.attr("style", element.attr("style") + "word-break: break-all; overflow-wrap: anywhere; max-width: 100%;");
  326. }
  327. Elements select8 = doc.select("p.X1.X8");
  328. for (Element element : select8) {
  329. element.attr("style", element.attr("style") + "word-break: break-all; overflow-wrap: anywhere; max-width: 100%;");
  330. }
  331. //1.3合并的单元格 不分页
  332. Elements select11 = doc.select("td.X10.X11");
  333. for (Element element : select11) {
  334. element.attr("style", element.attr("style") + "page-break-inside: avoid;");
  335. }
  336. addTableOfContents(doc);
  337. doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
  338. doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
  339. doc.head().prepend("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">");
  340. return doc.html();
  341. }
  342. /**
  343. * 合并表格中相同内容的单元格
  344. *
  345. * @param doc HTML文档对象
  346. */
  347. public static void mergeSameContentCells(Document doc) {
  348. Elements tables = doc.select("table");
  349. for (Element table : tables) {
  350. Elements rows = table.select("tr");
  351. for (int colIndex = 0; colIndex < rows.first().select("td").size(); colIndex++) {
  352. int rowspan = 1;
  353. String currentCellText = "";
  354. for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
  355. Element currentCell = rows.get(rowIndex).select("td").get(colIndex);
  356. String cellText = currentCell.text();
  357. if (rowIndex == 0) {
  358. currentCellText = cellText;
  359. } else {
  360. if (cellText.equals(currentCellText)) {
  361. rowspan++;
  362. currentCell.remove();
  363. } else {
  364. if (rowspan > 1) {
  365. Element prevCell = rows.get(rowIndex - rowspan).select("td").get(colIndex);
  366. prevCell.attr("rowspan", String.valueOf(rowspan));
  367. }
  368. rowspan = 1;
  369. currentCellText = cellText;
  370. }
  371. }
  372. }
  373. if (rowspan > 1) {
  374. Element lastCell = rows.get(rows.size() - rowspan).select("td").get(colIndex);
  375. lastCell.attr("rowspan", String.valueOf(rowspan));
  376. }
  377. }
  378. }
  379. }
  380. /**
  381. * 移除 white-space:pre-wrap 并替换为 normal
  382. */
  383. private static String removeWhiteSpacePreWrap(String style) {
  384. if (style == null) {
  385. return "";
  386. }
  387. // 替换 pre-wrap 为 normal,并去除多余的分号
  388. style = style.replaceAll("white-space\\s*:\\s*pre-wrap\\s*;?", "");
  389. style = style.replaceAll(";\\s*;", ";"); // 清理多余分号
  390. if (!style.contains("white-space")) {
  391. style += " white-space: normal;";
  392. }
  393. return style.trim();
  394. }
  395. /**
  396. * 添加目录
  397. * @param doc
  398. */
  399. private static void addTableOfContents(Document doc) {
  400. // 目录样式
  401. String tocCss = ".toc-container { margin: 20px 0; font-family: 思源黑体 Medium; }"
  402. + ".toc-title { text-align: center; font-size: 12pt; margin-bottom: 15px; color: black; }"
  403. + ".toc-list { list-style-type: none; padding: 0; width: 100%; }"
  404. + ".toc-item { margin: 5px 0; padding-top: 2px; padding-bottom: 2px; line-height: 2; }"
  405. + ".toc-level-1 { padding-left: 0; }"
  406. + ".toc-level-2 { padding-left: 2em; }"
  407. + ".toc-link { "
  408. + " display: block; "
  409. + " position: relative; "
  410. + " color: black !important; "
  411. + " text-decoration: none !important; "
  412. + " line-height: 1.5; " // 新增:控制整体行高
  413. + "}"
  414. + ".toc-line-container { "
  415. + " display: table; "
  416. + " width: 100%; "
  417. + " vertical-align: middle; " // 关键:控制容器内垂直对齐
  418. + "}"
  419. + ".toc-text { "
  420. + " display: table-cell; "
  421. + " font-size: 9pt; "
  422. + " white-space: nowrap; "
  423. + " padding-right: 5px; "
  424. + " vertical-align: middle; " // 改为middle对齐
  425. + "}"
  426. + ".toc-dots { "
  427. + " display: table-cell; "
  428. + " width: 100%; "
  429. + " vertical-align: middle; " // 关键:改为middle对齐
  430. + " border-bottom: 1px dotted #000000; "
  431. + " height: 1em; " // 固定高度
  432. + " margin-top: 2px; " // 关键:正值下移,负值上移(按需调整)
  433. + "}"
  434. + "p.X1.X2 { -fs-pdf-bookmark: level 1; }"
  435. + "p.X1.X3 { -fs-pdf-bookmark: level 2; }"
  436. + ".toc-page { "
  437. + " display: table-cell; "
  438. + " font-size: 9pt; "
  439. + " white-space: nowrap; "
  440. + " padding-left: 5px; "
  441. + " vertical-align: middle; " // 改为middle对齐
  442. + "}";
  443. doc.head().appendElement("style").text(tocCss);
  444. // 构建目录内容
  445. Element tocList = new Element("ul").addClass("toc-list");
  446. doc.select("p.X1.X2, p.X1.X3").forEach(el -> {
  447. boolean isLevel1 = el.hasClass("X2");
  448. String id = "sec_" + el.text().hashCode();
  449. el.attr("id", id);
  450. Integer pageNumber = pageNumberMap.getOrDefault(el.text(), 1);
  451. Element li = tocList.appendElement("li")
  452. .addClass("toc-item " + (isLevel1 ? "toc-level-1" : "toc-level-2"));
  453. Element link = li.appendElement("a")
  454. .attr("href", "#" + id)
  455. .addClass("toc-link");
  456. Element lineContainer = link.appendElement("div").addClass("toc-line-container");
  457. lineContainer.appendElement("span").addClass("toc-text").text(el.text());
  458. lineContainer.appendElement("span").addClass("toc-dots");
  459. lineContainer.appendElement("span").addClass("toc-page").text(String.valueOf(pageNumber));
  460. });
  461. // 插入目录
  462. Element firstDiv = doc.select("div").first();
  463. if (firstDiv != null) {
  464. firstDiv.after(
  465. "<div class='toc-container' style='page-break-before: always;'>"
  466. + "<h1 class='toc-title'>目录</h1>"
  467. + tocList.outerHtml()
  468. + "</div>"
  469. );
  470. } else {
  471. doc.body().prepend(
  472. "<div class='toc-container' style='page-break-before: always;'>"
  473. + "<h1 class='toc-title'>目录</h1>"
  474. + tocList.outerHtml()
  475. + "</div>"
  476. );
  477. }
  478. }
  479. public static String convertHtmlToPdf(String html, String outputPdfPath, List<String> fontPaths, String imagePath, boolean flag) throws Exception {
  480. try (OutputStream os = new FileOutputStream(outputPdfPath)) {
  481. ITextRenderer renderer = new ITextRenderer();
  482. ITextFontResolver fontResolver = renderer.getFontResolver();
  483. String boldFont = null;
  484. String regularFont = null;
  485. String mediumFont = null;
  486. for (String path : fontPaths) {
  487. if (path.contains("bold")) {
  488. boldFont = path;
  489. } else if (path.contains("medium")) {
  490. mediumFont = path;
  491. } else if (path.contains("regular")) {
  492. regularFont = path;
  493. }
  494. }
  495. // 字体路径
  496. fontResolver.addFont(
  497. mediumFont,
  498. "思源黑体 Medium",
  499. BaseFont.IDENTITY_H,
  500. true,
  501. null
  502. );
  503. html = html.replace("C:\\", "file:///C:/")
  504. .replace("\\", "/");
  505. // 设置HTML(确保CSS中使用相同的font-family)
  506. renderer.setDocumentFromString(html, "file:///");
  507. // 渲染PDF
  508. renderer.layout();
  509. renderer.createPDF(os);
  510. }
  511. Path path = Paths.get(outputPdfPath);
  512. // 获取文件名和扩展名
  513. String fileName = path.getFileName().toString();
  514. int dotIndex = fileName.lastIndexOf('.');
  515. String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
  516. String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
  517. // 构建新文件名
  518. String newFileName = baseName + "1" + extension;
  519. // 构建完整新路径
  520. String newFilePath = path.resolveSibling(newFileName).toString();
  521. pdfReader(outputPdfPath, newFilePath, imagePath + File.separator + "image1.jpeg", flag);
  522. return newFilePath;
  523. }
  524. /**
  525. * 操作已生成的pdf
  526. * @param inputPdfPath 输入pdf
  527. * @param outputPdfPath 输出pdf
  528. * @param backgroundImagePath 图片文件夹位置
  529. * @param onlyCollectPageNumbers 是否是遍历目录获取标题位置
  530. * @throws Exception
  531. */
  532. private static void pdfReader(String inputPdfPath, String outputPdfPath,
  533. String backgroundImagePath, boolean onlyCollectPageNumbers)
  534. throws Exception {
  535. PdfReader reader = new PdfReader(inputPdfPath);
  536. PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outputPdfPath));
  537. int startPage = 1;
  538. if (onlyCollectPageNumbers) {
  539. pageNumberMap.clear();
  540. Pattern startPattern = Pattern.compile("^1\\.\\s+报告概述$");
  541. // 查找起始页
  542. for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
  543. String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
  544. String[] lines = pageText.split("\\r?\\n");
  545. for (String line : lines) {
  546. if (line.equals("1. 报告概述")) {
  547. startPage = pageNum;
  548. pageNumberMap.put("startPage", startPage);
  549. }
  550. if (startPattern.matcher(line.trim()).matches()) {
  551. startPage = pageNum;
  552. pageNumberMap.put("startPage", startPage);
  553. }
  554. }
  555. }
  556. // 收集标题和页码
  557. Pattern titlePattern = Pattern.compile(
  558. "^((\\d+)\\.\\s+|(\\d+\\.\\d+)\\s+)([\\u4e00-\\u9fa5a-zA-Z0-9].*)$",
  559. Pattern.MULTILINE
  560. );
  561. Pattern specialPattern = Pattern.compile("^重要声明\\s*[::]?\\s*(.*)$");
  562. for (int pageNum = startPage; pageNum <= reader.getNumberOfPages(); pageNum++) {
  563. String pageText = new PdfTextExtractor(reader).getTextFromPage(pageNum);
  564. String[] lines = pageText.split("\\r?\\n");
  565. for (int i = 0; i < lines.length; i++) {
  566. String line = lines[i].trim();
  567. if (line.isEmpty()) {
  568. continue;
  569. }
  570. if (line.startsWith("6.3 ISO体系认证证书或行业经营许可证")) {
  571. String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : "";
  572. line = line + (nextLine.isEmpty() ? "" : nextLine);
  573. }
  574. if (line.startsWith("7.8 机构管理层(法定代表人,理事长,秘书长)")) {
  575. String nextLine = (i + 1 < lines.length) ? lines[i + 1].trim() : "";
  576. line = line + (nextLine.isEmpty() ? "" : nextLine);
  577. }
  578. Matcher matcher = titlePattern.matcher(line);
  579. if (matcher.matches()) {
  580. pageNumberMap.put(line, pageNum - startPage + 1);
  581. }
  582. if (line.startsWith("重要声明")) {
  583. Matcher specialMatcher = specialPattern.matcher(line);
  584. if (specialMatcher.matches()) {
  585. pageNumberMap.put(line, pageNum - startPage + 1);
  586. }
  587. }
  588. }
  589. }
  590. }
  591. //一级标题图形背景
  592. Pattern firstLevelTitlePattern = Pattern.compile("^(\\d+)\\.\\s+([\\u4e00-\\u9fa5a-zA-Z].*)$");
  593. Set<Integer> styledPages = new HashSet<>();
  594. startPage = pageNumberMap.get("startPage");
  595. for (Map.Entry<String, Integer> stringIntegerEntry : pageNumberMap.entrySet()) {
  596. String key = stringIntegerEntry.getKey();
  597. int value = stringIntegerEntry.getValue();
  598. if (firstLevelTitlePattern.matcher(key).find()) {
  599. styledPages.add(value + startPage - 1);
  600. }
  601. }
  602. // 在识别出的页面添加标题样式
  603. for (Integer pageNum : styledPages) {
  604. if (pageNum < 1 || pageNum > reader.getNumberOfPages()) {
  605. continue;
  606. }
  607. PdfContentByte underContent = stamper.getUnderContent(pageNum);
  608. // 固定位置参数(可根据需要调整)
  609. float pageWidth = reader.getPageSize(pageNum).getWidth();
  610. float pageHeight = reader.getPageSize(pageNum).getHeight();
  611. float xPos = 50; // 左侧边距
  612. float yPos = pageHeight - 50; // 距离顶部50单位
  613. // 1. 绘制圆形背景
  614. underContent.saveState();
  615. underContent.setColorFill(new Color(210, 235, 255)); // 浅蓝色填充
  616. underContent.circle(
  617. xPos,
  618. yPos - 8, // 圆形中心Y
  619. 10 // 半径
  620. );
  621. underContent.fill();
  622. underContent.restoreState();
  623. // 2. 绘制横线
  624. underContent.saveState();
  625. underContent.setColorStroke(new Color(0x16, 0x77, 0xFF)); // 浅蓝色线条
  626. underContent.setLineWidth(1.5f); // 线宽
  627. underContent.moveTo(xPos - 10, yPos - 20);
  628. underContent.lineTo(pageWidth - xPos + 10, yPos - 20);
  629. underContent.stroke();
  630. underContent.restoreState();
  631. }
  632. //封面背景
  633. PdfContentByte background = stamper.getUnderContent(1);
  634. Image image = Image.getInstance(backgroundImagePath);
  635. image.scaleAbsolute(PageSize.A4.getWidth(), PageSize.A4.getHeight());
  636. image.setAbsolutePosition(0, 0);
  637. background.addImage(image);
  638. stamper.close();
  639. reader.close();
  640. }
  641. // private static boolean isTableNearBottom(PdfWriter writer, PdfPTable table, float bottom) {
  642. // try {
  643. // // 获取当前页面的剩余高度
  644. // float remainingHeight = writer.getVerticalPosition(true) - bottom;
  645. //
  646. // // 估算当前行高度
  647. // float estimatedRowHeight = 30f;
  648. // float estimatedTableHeight = table.getRows().size() * estimatedRowHeight;
  649. //
  650. // // 如果剩余空间不足以容纳整个表格,则换页
  651. // return remainingHeight < estimatedTableHeight;
  652. // } catch (Exception e) {
  653. // e.printStackTrace();
  654. // return false;
  655. // }
  656. // }
  657. }