|
@@ -1,426 +0,0 @@
|
|
|
-package com.yaoyicloud.tools;
|
|
|
-import java.io.IOException;
|
|
|
-import java.nio.file.Path;
|
|
|
-import java.nio.file.Paths;
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.Collections;
|
|
|
-import java.util.HashMap;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-import com.itextpdf.io.font.FontProgramFactory;
|
|
|
-import com.itextpdf.io.image.ImageData;
|
|
|
-import com.itextpdf.io.image.ImageDataFactory;
|
|
|
-import com.itextpdf.kernel.colors.DeviceRgb;
|
|
|
-import com.itextpdf.kernel.font.PdfFont;
|
|
|
-import com.itextpdf.kernel.font.PdfFontFactory;
|
|
|
-import com.itextpdf.kernel.geom.LineSegment;
|
|
|
-import com.itextpdf.kernel.geom.Rectangle;
|
|
|
-import com.itextpdf.kernel.pdf.PdfDocument;
|
|
|
-import com.itextpdf.kernel.pdf.PdfOutline;
|
|
|
-import com.itextpdf.kernel.pdf.PdfPage;
|
|
|
-import com.itextpdf.kernel.pdf.PdfReader;
|
|
|
-import com.itextpdf.kernel.pdf.PdfWriter;
|
|
|
-import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
|
|
|
-import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.draw.DottedLine;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.EventType;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
|
|
|
-import com.itextpdf.kernel.pdf.canvas.parser.listener.SimpleTextExtractionStrategy;
|
|
|
-import com.itextpdf.kernel.pdf.navigation.PdfDestination;
|
|
|
-import com.itextpdf.kernel.pdf.navigation.PdfExplicitDestination;
|
|
|
-import com.itextpdf.layout.Canvas;
|
|
|
-import com.itextpdf.layout.element.Image;
|
|
|
-import com.itextpdf.layout.element.Paragraph;
|
|
|
-import com.itextpdf.layout.element.Tab;
|
|
|
-import com.itextpdf.layout.element.TabStop;
|
|
|
-import com.itextpdf.layout.layout.LayoutArea;
|
|
|
-import com.itextpdf.layout.layout.LayoutContext;
|
|
|
-import com.itextpdf.layout.properties.Property;
|
|
|
-import com.itextpdf.layout.properties.TabAlignment;
|
|
|
-import com.itextpdf.layout.properties.TextAlignment;
|
|
|
-
|
|
|
-
|
|
|
-public class PdfProcessor {
|
|
|
- public static String processPdf(String inputPath, String backgroundImagePath, String fontPath, Integer coverPage)
|
|
|
- throws IOException {
|
|
|
- // 标题列表
|
|
|
- String outputPath = getNewPdfPath(inputPath);
|
|
|
-
|
|
|
- ArrayList<TocEntity> headings = extractHeadings(inputPath, coverPage);
|
|
|
- ArrayList<TocEntity> tocEntities = processHeadings(headings);
|
|
|
- try (PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputPath), new PdfWriter(outputPath))) {
|
|
|
- // List<Integer> deletedPages = removeBlankPages(pdfDoc);
|
|
|
-
|
|
|
-
|
|
|
-// if (!deletedPages.isEmpty()) {
|
|
|
-// updateHeadingPageNumbers(tocEntities, deletedPages);
|
|
|
-// }
|
|
|
- if (backgroundImagePath != null && !backgroundImagePath.isEmpty()) {
|
|
|
- // 封面背景图
|
|
|
- addBackgroundToFirstPage(pdfDoc, backgroundImagePath);
|
|
|
- }
|
|
|
- // 标题背景样式
|
|
|
- modifyHeadings(pdfDoc, tocEntities, coverPage);
|
|
|
- // 目录
|
|
|
- if (tocEntities != null && !tocEntities.isEmpty()) {
|
|
|
- generateTableOfContents(pdfDoc, tocEntities, fontPath, coverPage);
|
|
|
- }
|
|
|
- }
|
|
|
- return outputPath;
|
|
|
- }
|
|
|
-
|
|
|
- protected static void updateHeadingPageNumbers(List<TocEntity> headings, List<Integer> deletedPages) {
|
|
|
- // 确保被删除的页码是升序排列(方便计算)
|
|
|
- Collections.sort(deletedPages);
|
|
|
-
|
|
|
- for (TocEntity heading : headings) {
|
|
|
- int originalPage = heading.getPageNumber();
|
|
|
- int offset = 0; // 计算页码需要减少的偏移量
|
|
|
-
|
|
|
- // 遍历所有被删除的页,计算当前标题的页码应该减多少
|
|
|
- for (int deletedPage : deletedPages) {
|
|
|
- if (deletedPage < originalPage) {
|
|
|
- offset++; // 如果被删除的页在当前页之前,当前页的页码要减1
|
|
|
- } else {
|
|
|
- break; // 后面的被删除页不影响当前页
|
|
|
- }
|
|
|
- }
|
|
|
- // 更新标题的页码
|
|
|
- heading.setPageNumber(originalPage - offset);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- public static String getNewPdfPath(String reportBastPath) {
|
|
|
- Path path = Paths.get(reportBastPath);
|
|
|
- // 获取文件名和扩展名
|
|
|
- String fileName = path.getFileName().toString();
|
|
|
- int dotIndex = fileName.lastIndexOf('.');
|
|
|
- String baseName = (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
|
|
|
- String extension = (dotIndex == -1) ? "" : fileName.substring(dotIndex);
|
|
|
- // 构建新文件名
|
|
|
- String newFileName = baseName + "1" + extension;
|
|
|
- // 构建完整新路径
|
|
|
- String newPdfPath = path.resolveSibling(newFileName).toString();
|
|
|
- return newPdfPath;
|
|
|
- }
|
|
|
-
|
|
|
- protected static List<Integer> removeBlankPages(PdfDocument pdfDoc) {
|
|
|
- int totalPages = pdfDoc.getNumberOfPages();
|
|
|
- List<Integer> deletedPages = new ArrayList<>(); // 记录被删除的页
|
|
|
-
|
|
|
- for (int i = totalPages; i > 0; i--) {
|
|
|
- PdfPage page = pdfDoc.getPage(i);
|
|
|
- String pageText = PdfTextExtractor.getTextFromPage(page);
|
|
|
-
|
|
|
- if (pageText.trim().isEmpty() || pageText.trim().matches("\\d+")) {
|
|
|
- pdfDoc.removePage(i);
|
|
|
- deletedPages.add(i);
|
|
|
-
|
|
|
- }
|
|
|
- }
|
|
|
- return deletedPages;
|
|
|
- }
|
|
|
-
|
|
|
- private static void addBackgroundToFirstPage(PdfDocument pdfDoc, String backgroundImagePath) throws IOException {
|
|
|
- PdfPage firstPage = pdfDoc.getFirstPage();
|
|
|
- ImageData imageData = ImageDataFactory.create(backgroundImagePath);
|
|
|
- Image backgroundImage = new Image(imageData);
|
|
|
-
|
|
|
- PdfCanvas pdfCanvas = new PdfCanvas(firstPage.newContentStreamBefore(), firstPage.getResources(), pdfDoc);
|
|
|
- Canvas canvas = new Canvas(pdfCanvas, firstPage.getPageSize());
|
|
|
-
|
|
|
- backgroundImage.setAutoScale(true);
|
|
|
- backgroundImage.setProperty(Property.UNDERLINE, true);
|
|
|
- canvas.add(backgroundImage);
|
|
|
- canvas.close();
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 标题添加圆圈背景
|
|
|
- *
|
|
|
- * @param pdfDoc
|
|
|
- * @param tocEntities
|
|
|
- *
|
|
|
- * @return TOC前面的页数
|
|
|
- */
|
|
|
- private static int modifyHeadings(PdfDocument pdfDoc, List<TocEntity> tocEntities, Integer coverPage) {
|
|
|
- Map<Integer, List<TocEntity>> tocByPage =
|
|
|
- tocEntities.stream().collect(Collectors.groupingBy(TocEntity::getPageNumber));
|
|
|
-
|
|
|
- int ret = 0;
|
|
|
- for (int pageNum = 1; pageNum <= pdfDoc.getNumberOfPages(); pageNum++) {
|
|
|
- if (!tocByPage.containsKey(pageNum)) {
|
|
|
- if (pageNum - ret == 1) {
|
|
|
- ret++;
|
|
|
- }
|
|
|
- continue; // 跳过没有标题的页面
|
|
|
- }
|
|
|
-
|
|
|
- PdfPage page = pdfDoc.getPage(pageNum + coverPage);
|
|
|
- List<TocEntity> pageTocEntities = tocByPage.get(pageNum);
|
|
|
-
|
|
|
- // 1. 背景层(圆圈)
|
|
|
- PdfCanvas backgroundCanvas = new PdfCanvas(page.newContentStreamBefore(), page.getResources(), pdfDoc);
|
|
|
- for (TocEntity info : pageTocEntities) {
|
|
|
- if (shouldStyleHeading(info)) { // 判断是否需要样式
|
|
|
- drawCircleBackground(backgroundCanvas, info);
|
|
|
- }
|
|
|
- }
|
|
|
- backgroundCanvas.release();
|
|
|
-
|
|
|
- // 2. 文本层
|
|
|
- PdfCanvas textCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), pdfDoc);
|
|
|
- Canvas canvas = new Canvas(textCanvas, page.getPageSize());
|
|
|
- for (TocEntity info : pageTocEntities) {
|
|
|
- if (shouldStyleHeading(info)) {
|
|
|
- Paragraph paragraph = createStyledParagraph(info, pageNum);
|
|
|
- paragraph.setNextRenderer(new FancyParagraphRenderer(paragraph, pdfDoc));
|
|
|
- canvas.add(paragraph);
|
|
|
- }
|
|
|
- }
|
|
|
- canvas.close();
|
|
|
- }
|
|
|
- return ret;
|
|
|
- }
|
|
|
-
|
|
|
- // 判断是否是应该应用样式的标题
|
|
|
- private static boolean shouldStyleHeading(TocEntity info) {
|
|
|
- return info.getFontSize() == 16 && info.getTitle().matches("^\\d+\\..*");
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 绘制圆圈背景
|
|
|
- *
|
|
|
- * @param canvas
|
|
|
- * @param info
|
|
|
- */
|
|
|
- private static void drawCircleBackground(PdfCanvas canvas, TocEntity info) {
|
|
|
- Rectangle rect = info.getRect();
|
|
|
- canvas.saveState().setFillColor(new DeviceRgb(210, 235, 255))
|
|
|
- .circle(rect.getX() + info.getParagraphHeight() / 2 - 1, rect.getY() + info.getParagraphHeight() / 2 - 1,
|
|
|
- info.getParagraphHeight() / 2 + 2)
|
|
|
- .fill().restoreState();
|
|
|
- }
|
|
|
-
|
|
|
- private static Paragraph createStyledParagraph(TocEntity info, int pageNum) {
|
|
|
- return new Paragraph().setFontSize(info.getFontSize()).setFixedPosition(pageNum, info.getRect().getX(),
|
|
|
- info.getRect().getY(), info.getRect().getWidth());
|
|
|
- }
|
|
|
-
|
|
|
- private static void generateTableOfContents(PdfDocument pdfDoc, List<TocEntity> headings, String fontPath,
|
|
|
- int coverPageCount) throws IOException {
|
|
|
- PdfFont font = PdfFontFactory.createFont(FontProgramFactory.createFont(fontPath));
|
|
|
- float pageHeight = pdfDoc.getDefaultPageSize().getHeight();
|
|
|
- float startY = pageHeight - 50;
|
|
|
- float endY = 50;
|
|
|
- float lineHeight = 15;
|
|
|
-
|
|
|
- List<List<TocEntity>> pagesHeadings = new ArrayList<>();
|
|
|
- List<TocEntity> currentPageHeadings = new ArrayList<>();
|
|
|
- float currentHeight = 0;
|
|
|
-
|
|
|
- for (TocEntity heading : headings) {
|
|
|
- // 计算当前目录项所需高度(考虑多行情况)
|
|
|
- float entryHeight = lineHeight * (1 + (float) Math.ceil(heading.getTitle().length() / 50f));
|
|
|
-
|
|
|
- if (currentHeight - entryHeight < endY) { // 如果剩余空间不足
|
|
|
- if (!currentPageHeadings.isEmpty()) {
|
|
|
- pagesHeadings.add(currentPageHeadings);
|
|
|
- }
|
|
|
- // 创建新页,并将当前heading作为新页的第一项
|
|
|
- currentPageHeadings = new ArrayList<>();
|
|
|
- currentPageHeadings.add(heading);
|
|
|
- currentHeight = startY - entryHeight; // 从顶部开始,减去当前项高度
|
|
|
- } else {
|
|
|
- // 当前页有足够空间,直接添加
|
|
|
- currentPageHeadings.add(heading);
|
|
|
- currentHeight -= entryHeight; // 更新当前Y位置
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // 添加最后一页的目录项
|
|
|
- if (!currentPageHeadings.isEmpty()) {
|
|
|
- pagesHeadings.add(currentPageHeadings);
|
|
|
- }
|
|
|
-
|
|
|
- Map<TocEntity, PdfDestination> destMap = new HashMap<>();
|
|
|
- float pageWidth = pdfDoc.getDefaultPageSize().getWidth();
|
|
|
- float leftMargin = 10;
|
|
|
- float rightMargin = 25;
|
|
|
- float tabStopPosition = pageWidth - rightMargin;
|
|
|
-
|
|
|
- for (int i = 0; i < pagesHeadings.size(); i++) {
|
|
|
- PdfPage tocPage = pdfDoc.addNewPage(coverPageCount + i + 1);
|
|
|
- Canvas canvas = new Canvas(tocPage, pdfDoc.getDefaultPageSize());
|
|
|
-
|
|
|
- float currentY;
|
|
|
- if (i == 0) {
|
|
|
- Paragraph title = new Paragraph("目录")
|
|
|
- .setFont(font)
|
|
|
- .setFontSize(12)
|
|
|
- .setBold()
|
|
|
- .setTextAlignment(TextAlignment.CENTER)
|
|
|
- .setMarginTop(50)
|
|
|
- .setMarginBottom(20);
|
|
|
- canvas.add(title);
|
|
|
- currentY = startY - 12 - title.getMarginBottom().getValue() - 15; // 标题高度 + 间距
|
|
|
- } else {
|
|
|
- currentY = startY;
|
|
|
- }
|
|
|
-
|
|
|
- for (TocEntity heading : pagesHeadings.get(i)) {
|
|
|
- float indent = leftMargin + heading.getLevel() * 15;
|
|
|
-
|
|
|
- Paragraph p = new Paragraph().setFont(font).setFontSize(10)
|
|
|
- .addTabStops(new TabStop(tabStopPosition, TabAlignment.RIGHT, new DottedLine()))
|
|
|
- .setFixedPosition(indent, currentY - lineHeight, pageWidth - indent - rightMargin);
|
|
|
-
|
|
|
- p.add(heading.getTitle()).add(new Tab()).add(String.valueOf(heading.getPageNumber()));
|
|
|
-
|
|
|
- float paragraphHeight = p.createRendererSubTree().setParent(canvas.getRenderer())
|
|
|
- .layout(new LayoutContext(new LayoutArea(1, new Rectangle(pageWidth - indent - rightMargin, 1000))))
|
|
|
- .getOccupiedArea().getBBox().getHeight();
|
|
|
- if (currentY - paragraphHeight < endY) {
|
|
|
- canvas.close();
|
|
|
- tocPage = pdfDoc.addNewPage(2 + ++i);
|
|
|
- canvas = new Canvas(tocPage, pdfDoc.getDefaultPageSize());
|
|
|
- currentY = startY;
|
|
|
- }
|
|
|
- canvas.add(p);
|
|
|
- PdfPage targetPage = pdfDoc.getPage(heading.getPageNumber() + i + 1 + coverPageCount);
|
|
|
- Rectangle targetRect = heading.getRect();
|
|
|
- PdfDestination dest =
|
|
|
- PdfExplicitDestination.createXYZ(targetPage, targetRect.getLeft(), targetRect.getTop(), 0);
|
|
|
- destMap.put(heading, dest);
|
|
|
-
|
|
|
- Rectangle clickRect = new Rectangle(indent, currentY - paragraphHeight,
|
|
|
- pageWidth - indent - rightMargin, paragraphHeight);
|
|
|
-
|
|
|
- PdfLinkAnnotation link = new PdfLinkAnnotation(clickRect).setDestination(dest)
|
|
|
- .setBorderStyle(PdfAnnotation.STYLE_UNDERLINE).setHighlightMode(PdfLinkAnnotation.HIGHLIGHT_INVERT);
|
|
|
-
|
|
|
- tocPage.addAnnotation(link);
|
|
|
- currentY -= paragraphHeight;
|
|
|
- }
|
|
|
- canvas.close();
|
|
|
- }
|
|
|
-
|
|
|
- PdfOutline root = pdfDoc.getOutlines(true);
|
|
|
- for (TocEntity heading : headings) {
|
|
|
- PdfOutline outline = root.addOutline(heading.getTitle());
|
|
|
- outline.addDestination(destMap.get(heading));
|
|
|
- }
|
|
|
- // 删除最后一页
|
|
|
- int lastPageIndex = pdfDoc.getNumberOfPages();
|
|
|
- PdfPage page = pdfDoc.getPage(lastPageIndex);
|
|
|
- String pageText = PdfTextExtractor.getTextFromPage(page);
|
|
|
-
|
|
|
- if (pageText.trim().isEmpty() || pageText.trim().matches("\\d+")) {
|
|
|
- pdfDoc.removePage(lastPageIndex);
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- public static ArrayList<TocEntity> extractHeadings(String pdfPath, Integer coverPage) throws IOException {
|
|
|
- ArrayList<TocEntity> headings = new ArrayList<>();
|
|
|
- try (PdfDocument pdfDoc = new PdfDocument(new PdfReader(pdfPath))) {
|
|
|
- for (int i = 1; i <= pdfDoc.getNumberOfPages(); i++) {
|
|
|
- PdfPage page = pdfDoc.getPage(i);
|
|
|
- final int currentPage = i;
|
|
|
- new PdfCanvasProcessor(new HeadingDetectionStrategy(headings, pdfDoc, currentPage - coverPage))
|
|
|
- .processPageContent(page);
|
|
|
- }
|
|
|
- }
|
|
|
- return headings;
|
|
|
- }
|
|
|
-
|
|
|
- public static ArrayList<TocEntity> processHeadings(ArrayList<TocEntity> headings) {
|
|
|
- ArrayList<TocEntity> result = new ArrayList<>();
|
|
|
- if (headings == null || headings.isEmpty()) {
|
|
|
- return result;
|
|
|
- }
|
|
|
- for (int i = 0; i < headings.size(); i++) {
|
|
|
- TocEntity current = headings.get(i);
|
|
|
-
|
|
|
- if (!current.getTitle().matches("^\\d+\\..*")) {
|
|
|
- if (!result.isEmpty()) {
|
|
|
- TocEntity last = result.get(result.size() - 1);
|
|
|
- if (last.getPageNumber() == current.getPageNumber()) {
|
|
|
- TocEntity combined = new TocEntity(last.getTitle() + current.getTitle(), last.getPageNumber(),
|
|
|
- last.getLevel(), last.getRect(), last.getFontSize(), last.getParagraphHeight());
|
|
|
- result.set(result.size() - 1, combined);
|
|
|
- continue;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (current.getTitle().matches("^\\d+\\.")) {
|
|
|
- if (i + 1 < headings.size()) {
|
|
|
- TocEntity next = headings.get(i + 1);
|
|
|
- TocEntity combined = new TocEntity(current.getTitle() + next.getTitle(), current.getPageNumber(),
|
|
|
- current.getLevel(), current.getRect(), current.getFontSize(), current.getParagraphHeight());
|
|
|
- result.add(combined);
|
|
|
- i++;
|
|
|
- continue;
|
|
|
- }
|
|
|
- }
|
|
|
- result.add(current);
|
|
|
- }
|
|
|
-
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
- private static class HeadingDetectionStrategy extends SimpleTextExtractionStrategy {
|
|
|
- private static final float LEVEL_1_FONT_SIZE = 16;
|
|
|
- private static final float LEVEL_2_FONT_SIZE = 12;
|
|
|
- private final ArrayList<TocEntity> headings;
|
|
|
- private final PdfDocument pdfDoc;
|
|
|
- private final int currentPage;
|
|
|
-
|
|
|
- HeadingDetectionStrategy(ArrayList<TocEntity> headings, PdfDocument pdfDoc, int currentPage) {
|
|
|
- this.headings = headings;
|
|
|
- this.pdfDoc = pdfDoc;
|
|
|
- this.currentPage = currentPage;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void eventOccurred(IEventData data, EventType type) {
|
|
|
- if (type == EventType.RENDER_TEXT) {
|
|
|
- TextRenderInfo info = (TextRenderInfo) data;
|
|
|
- float fontSize = info.getFontSize();
|
|
|
-
|
|
|
- if (fontSize == LEVEL_1_FONT_SIZE) {
|
|
|
- addHeading(info, 1, pdfDoc, currentPage, headings);
|
|
|
- } else if (fontSize == LEVEL_2_FONT_SIZE) {
|
|
|
- addHeading(info, 2, pdfDoc, currentPage, headings);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- super.eventOccurred(data, type);
|
|
|
- }
|
|
|
-
|
|
|
- private void addHeading(TextRenderInfo info, int level, PdfDocument pdfDoc, int i,
|
|
|
- ArrayList<TocEntity> headings) {
|
|
|
- String text = info.getText().trim();
|
|
|
- LineSegment ascentLine = info.getAscentLine();
|
|
|
- LineSegment descentLine = info.getDescentLine();
|
|
|
- float ascentY = ascentLine.getStartPoint().get(1);
|
|
|
- float descentY = descentLine.getStartPoint().get(1);
|
|
|
- float paragraphHeight = ascentY - descentY;
|
|
|
-
|
|
|
- Rectangle rect = info.getBaseline().getBoundingRectangle();
|
|
|
- if (!text.isEmpty()) {
|
|
|
- headings.add(new TocEntity(text, i, level, rect, paragraphHeight, info.getFontSize()
|
|
|
-
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
-}
|