新增招投标图片相似性

5 months ago · 4350264ab7
3 changed files with 145 additions and 61 deletions
--- a/ruoyi-admin/src/main/resources/application-dev.yml
+++ b/ruoyi-admin/src/main/resources/application-dev.yml
@ -52,7 +52,7 @@ spring:
 #          url: jdbc:mysql://localhost:3306/zaojia?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
 #          username: root
 #          password: root
-          url: jdbc:mysql://10.1.21.250:3306/aitable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
+          url: jdbc:mysql://10.1.21.250:3306/sjjtable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
 #          url: jdbc:mysql://218.0.1.42:53306/sjjtable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
          username: root
          password: 'HXj-6nR|D8xy*h#!I&:('
--- a/zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/DocumentTaskResultsServiceImpl.java
+++ b/zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/DocumentTaskResultsServiceImpl.java
@ -42,6 +42,7 @@ import javax.xml.parsers.SAXParser;
 import java.io.*;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@ -243,22 +244,22 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
        if (!"isRead".equals(field) && !"isAdopted".equals(field)) {
            throw new RuntimeException("不支持更新的字段: " + field);
        }
-        
+
        // 验证值是否合法
        if (!"0".equals(value) && !"1".equals(value)) {
            throw new RuntimeException("无效的值: " + value);
        }
-        
+
        try {
            LambdaUpdateWrapper<DocumentTaskResultDetail> updateWrapper = Wrappers.lambdaUpdate(DocumentTaskResultDetail.class);
            updateWrapper.eq(DocumentTaskResultDetail::getId, id);
-            
+
            if ("isRead".equals(field)) {
                updateWrapper.set(DocumentTaskResultDetail::getIsRead, value);
            } else {
                updateWrapper.set(DocumentTaskResultDetail::getIsAdopted, value);
            }
-            
+
            return documentTaskResultDetailMapper.update(null, updateWrapper) > 0;
        } catch (Exception e) {
            throw new RuntimeException("更新状态失败: " + e.getMessage(), e);
@ -351,12 +352,6 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi

    // Markdown转PDF方法
    private void convertMarkdownToPdf(String markdown, String outputPath) throws Exception {
-        // 使用flexmark-java解析Markdown
-//        MutableDataSet options = new MutableDataSet();
-//        Parser parser = Parser.builder(options).build();
-//        HtmlRenderer renderer = HtmlRenderer.builder(options).build();
-//        Node document = parser.parse(markdown);
-//        String htmlContent = renderer.render(document);
        MutableDataSet options = new MutableDataSet();
        // 启用表格解析
        options.set(Parser.EXTENSIONS, Arrays.asList(
@ -371,7 +366,7 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
                StrikethroughExtension.create()
            ))
            .build();
-
+        markdown=markdown.replaceAll("!\\[","!{}");
        Node document = parser.parse(markdown);
        String htmlContent = renderer.render(document);
        // 使用 JSoup 清理
@ -380,21 +375,12 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi

        // 配置允许的标签
        Safelist safelist = Safelist.relaxed()
-//            .addTags("em", "strong", "div", "span","br")
-//            .addAttributes(":all", "style", "class");
-            .addTags("em", "strong", "div", "span", "br", "table", "thead", "tbody", "tr", "th", "td")
+            .addTags("em", "strong", "div", "span", "br", "table", "thead", "tbody", "tr", "th", "td", "img")
            .addAttributes(":all", "style", "class")
-            .addAttributes("table", "border", "cellpadding", "cellspacing");
+            .addAttributes("table", "border", "cellpadding", "cellspacing")
+            .addAttributes("img", "src", "alt", "width", "height"); // 允许img标签及其属性
        htmlContent = Jsoup.clean(htmlContent, "", safelist, settings);
-        // 处理代码块 - 将code标签替换为带内联样式的div (不使用class)
-        htmlContent = htmlContent.replaceAll("(?s)<pre><code>(.*?)</code></pre>",
-            "<div style='background-color:#f6f8fa;border:1px solid #ddd;'>$1</div>");
-
-        // 处理行内代码 - 将行内code替换为带内联样式的span (不使用class)
-        htmlContent = htmlContent.replaceAll("<code>(.*?)</code>",
-            "<span style='background-color:#f6f8fa;border:1px solid #ddd;'>$1</span>");

-        // 使用 NekoHTML 进一步处理
        // 创建完整的HTML文档，使用微软雅黑
        String html = String.format("""
                <!DOCTYPE html>
@ -437,6 +423,14 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
                         tr {
                             page-break-inside: avoid;
                         }
+                         img {
+                             width: 600px;
+                             max-width: 100%%;
+                             height: auto;
+                             display: block;
+                             margin: 20px auto;
+                             page-break-inside: avoid;
+                         }
                    </style>
                </head>
                <body>
@ -444,44 +438,130 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
                </body>
                </html>
                """,
-            // 特殊处理code标签，确保其内容不被换行破坏
            htmlContent.replace("<br>", "\n"));
+        // 处理图片标签，将markdown的图片语法转换为HTML的img标签
+        html=html.replaceAll("!\\{}","!\\[");
+        html = handleImageTags(html);
+        // 配置Flying Saucer
+        ITextRenderer pdfRenderer = new ITextRenderer();
+
+        // 主要使用微软雅黑，但也加载其他字体作为备选
+        // 从classpath加载字体
+        try (InputStream is = getClass().getResourceAsStream("/fonts/msyh.ttc")) {
+            File tempFont = File.createTempFile("msyh", ".ttc");
+            FileUtils.copyInputStreamToFile(is, tempFont);
+            pdfRenderer.getFontResolver().addFont(tempFont.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
+            tempFont.deleteOnExit();
+        }

+        // 设置文档
+        pdfRenderer.setDocumentFromString(html);

+        // 设置输出编码和基础URL
+        pdfRenderer.getSharedContext().setBaseURL("file:///");

-         // 配置Flying Saucer
-         ITextRenderer pdfRenderer = new ITextRenderer();
-
-         // 主要使用微软雅黑，但也加载其他字体作为备选
-         // 从classpath加载字体
-         try (InputStream is = getClass().getResourceAsStream("/fonts/msyh.ttc")) {
-             File tempFont = File.createTempFile("msyh", ".ttc");
-             FileUtils.copyInputStreamToFile(is, tempFont);
-             pdfRenderer.getFontResolver().addFont(tempFont.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
-             tempFont.deleteOnExit();
-         }
-         try (InputStream consolas = getClass().getResourceAsStream("/fonts/consolas.ttf")){
-             if (consolas != null) {
-                 File tempConsolas = File.createTempFile("consolas", ".ttf");
-                 FileUtils.copyInputStreamToFile(consolas, tempConsolas);
-                 pdfRenderer.getFontResolver().addFont(tempConsolas.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
-                 tempConsolas.deleteOnExit();
-             }
-         }
+        // 布局和创建PDF
+        pdfRenderer.layout();

-         // 设置文档
-         pdfRenderer.setDocumentFromString(html);
+        try (OutputStream os = new FileOutputStream(outputPath)) {
+            pdfRenderer.createPDF(os, true);
+        }
+    }

-         // 设置输出编码和基础URL
-         pdfRenderer.getSharedContext().setBaseURL("file:///");
+    /**
+     * 处理Markdown中的图片标签
+     * @param content Markdown内容
+     * @return 处理后的HTML内容
+     */
+    private String handleImageTags(String content) {
+        // 使用非贪婪模式匹配alt文本，使用平衡组模式匹配路径
+        java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("!\\[(.*?)\\]\\(([^()]*(?:\\([^()]*\\)[^()]*)*?)\\)");
+        java.util.regex.Matcher matcher = pattern.matcher(content);
+        StringBuffer sb = new StringBuffer();
+
+        while (matcher.find()) {
+            String alt = matcher.group(1);
+            String imagePath = matcher.group(2);
+
+            // 检查图片文件是否存在
+            File imageFile = new File(imagePath);
+            if (!imageFile.exists() && imagePath.contains("(")) {
+                // 如果文件不存在且路径中包含括号，尝试修正路径
+                String correctedPath = correctImagePath(imagePath);
+                imageFile = new File(correctedPath);
+                if (imageFile.exists()) {
+                    imagePath = correctedPath;
+                }
+            }

-         // 布局和创建PDF
-         pdfRenderer.layout();
+            if (imageFile.exists()) {
+                try {
+                    // 将图片转换为Base64编码
+                    byte[] imageBytes = Files.readAllBytes(imageFile.toPath());
+                    String base64Image = java.util.Base64.getEncoder().encodeToString(imageBytes);
+                    String imageType = getImageMimeType(imagePath);
+
+                    // 创建data URL，使用固定的宽度来确保图片在页面内显示
+                    String imgTag = String.format("<img src=\"data:%s;base64,%s\" alt=\"%s\" style=\"width: 600px; max-width: 100%%; height: auto; display: block; margin: 10px auto;\"/>",
+                        imageType, base64Image, alt);
+
+                    // 替换markdown图片语法为HTML img标签
+                    matcher.appendReplacement(sb, imgTag.replace("$", "\\$"));
+                } catch (IOException e) {
+                    // 如果处理失败，保留原始文本
+                    matcher.appendReplacement(sb, matcher.group(0).replace("$", "\\$"));
+                }
+            } else {
+                // 如果图片不存在，保留原始文本
+                matcher.appendReplacement(sb, matcher.group(0).replace("$", "\\$"));
+            }
+        }
+        matcher.appendTail(sb);
+        return sb.toString();
+    }

-         try (OutputStream os = new FileOutputStream(outputPath)) {
-             pdfRenderer.createPDF(os, true);
-         }
+    /**
+     * 修正包含括号的图片路径
+     * @param originalPath 原始路径
+     * @return 修正后的路径
+     */
+    private String correctImagePath(String originalPath) {
+        // 如果路径中包含多个括号，取最后一个右括号之前的内容
+        int lastRightBracket = originalPath.lastIndexOf(")");
+        if (lastRightBracket != -1) {
+            // 找到对应的左括号
+            int count = 1;
+            int leftBracket = lastRightBracket - 1;
+            while (leftBracket >= 0 && count > 0) {
+                if (originalPath.charAt(leftBracket) == ')') {
+                    count++;
+                } else if (originalPath.charAt(leftBracket) == '(') {
+                    count--;
+                }
+                leftBracket--;
+            }
+            if (count == 0) {
+                // 找到匹配的括号对，返回不包含最外层括号的路径
+                return originalPath.substring(0, leftBracket + 1) + originalPath.substring(lastRightBracket + 1);
+            }
+        }
+        return originalPath;
+    }

+    /**
+     * 根据文件扩展名获取MIME类型
+     * @param filePath 文件路径
+     * @return MIME类型
+     */
+    private String getImageMimeType(String filePath) {
+        String extension = filePath.substring(filePath.lastIndexOf(".") + 1).toLowerCase();
+        return switch (extension) {
+            case "png" -> "image/png";
+            case "jpg", "jpeg" -> "image/jpeg";
+            case "gif" -> "image/gif";
+            case "bmp" -> "image/bmp";
+            default -> "image/jpeg";
+        };
    }

    // 添加文件到ZIP
--- a/zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/SjjDocumentTasksServiceImpl.java
+++ b/zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/SjjDocumentTasksServiceImpl.java
@ -159,7 +159,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
        new File(tenderOriginalDir).mkdirs();
        new File(tenderTxtDir).mkdirs();
        // 处理投标文件压缩包
-        processZipFile(bidZipPath, bidOriginalDir, bidTxtDir);
+        processZipFile(add.getTaskName(),bidZipPath, bidOriginalDir, bidTxtDir);

        // 复制招标文件到任务文件夹
        File tenderDoc = new File(tenderDocPath);
@ -177,7 +177,9 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {

            // 如果是PDF文件，解析其内容到招标文件TXT目录
            if (tenderDocName.toLowerCase().endsWith(".pdf") && PdfParserUtils.isValidPdf(tenderDocCopy.getAbsolutePath())) {
-                processAndSavePdfContent(tenderDocCopy, tenderTxtDir, getSystemCharset());
+                if (add.getTaskName().equals("ssjjbidAnalysis")){
+                    processAndSavePdfContent(tenderDocCopy, tenderTxtDir, getSystemCharset());
+                }
            }
        }

@ -231,7 +233,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
     * @throws IOException 解压或解析过程中发生IO错误
     * @throws ZipException ZIP文件处理错误
     */
-    private void processZipFile(String zipFilePath, String originalDir, String txtDir) throws IOException, ZipException {
+    private void processZipFile(String taskName,String zipFilePath, String originalDir, String txtDir) throws IOException, ZipException {
        // 创建解压目标目录（如果不存在）
        File extractDirFile = new File(originalDir);
        if (!extractDirFile.exists()) {
@ -272,14 +274,14 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
            }

            // 递归处理所有PDF文件
-            processAllPdfFiles(extractDirFile, txtDir, bestCharset);
+            processAllPdfFiles(taskName,extractDirFile, txtDir, bestCharset);
        } catch (Exception e) {
            // 如果使用检测的编码失败，尝试直接整体解压
            try {
                ZipFile zipFile = new ZipFile(zipFilePath);
                zipFile.setCharset(getSystemCharset());
                zipFile.extractAll(originalDir);
-                processAllPdfFiles(extractDirFile, txtDir, getSystemCharset());
+                processAllPdfFiles(taskName,extractDirFile, txtDir, getSystemCharset());
            } catch (Exception e2) {
                System.err.println("解压失败: " + e2.getMessage());
                throw new IOException("解压失败", e2);
@ -453,7 +455,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
     * @param txtOutputDir PDF解析后的TXT文件输出目录
     * @param charset 字符集
     */
-    private void processAllPdfFiles(File directory, String txtOutputDir, Charset charset) {
+    private void processAllPdfFiles(String taskName,File directory, String txtOutputDir, Charset charset) {
        if (!directory.isDirectory()) {
            return;
        }
@ -466,10 +468,12 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
        for (File file : files) {
            if (file.isDirectory()) {
                // 递归处理子目录
-                processAllPdfFiles(file, txtOutputDir, charset);
+                processAllPdfFiles(taskName,file, txtOutputDir, charset);
            } else if (file.getName().toLowerCase().endsWith(".pdf") && PdfParserUtils.isValidPdf(file.getAbsolutePath())) {
                // 处理PDF文件
-                processAndSavePdfContent(file, txtOutputDir, charset);
+                if (taskName.equals("sjjbidAnalysis")){
+                    processAndSavePdfContent(file, txtOutputDir, charset);
+                }
            }
        }
    }