Browse Source

新增招投标图片相似性

sjj_dev
zhouhaibin 2 weeks ago
parent
commit
4350264ab7
  1. 2
      ruoyi-admin/src/main/resources/application-dev.yml
  2. 184
      zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/DocumentTaskResultsServiceImpl.java
  3. 20
      zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/SjjDocumentTasksServiceImpl.java

2
ruoyi-admin/src/main/resources/application-dev.yml

@ -52,7 +52,7 @@ spring:
# url: jdbc:mysql://localhost:3306/zaojia?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
# username: root
# password: root
url: jdbc:mysql://10.1.21.250:3306/aitable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
url: jdbc:mysql://10.1.21.250:3306/sjjtable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
# url: jdbc:mysql://218.0.1.42:53306/sjjtable?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true
username: root
password: 'HXj-6nR|D8xy*h#!I&:('

184
zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/DocumentTaskResultsServiceImpl.java

@ -42,6 +42,7 @@ import javax.xml.parsers.SAXParser;
import java.io.*;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@ -243,22 +244,22 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
if (!"isRead".equals(field) && !"isAdopted".equals(field)) {
throw new RuntimeException("不支持更新的字段: " + field);
}
// 验证值是否合法
if (!"0".equals(value) && !"1".equals(value)) {
throw new RuntimeException("无效的值: " + value);
}
try {
LambdaUpdateWrapper<DocumentTaskResultDetail> updateWrapper = Wrappers.lambdaUpdate(DocumentTaskResultDetail.class);
updateWrapper.eq(DocumentTaskResultDetail::getId, id);
if ("isRead".equals(field)) {
updateWrapper.set(DocumentTaskResultDetail::getIsRead, value);
} else {
updateWrapper.set(DocumentTaskResultDetail::getIsAdopted, value);
}
return documentTaskResultDetailMapper.update(null, updateWrapper) > 0;
} catch (Exception e) {
throw new RuntimeException("更新状态失败: " + e.getMessage(), e);
@ -351,12 +352,6 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
// Markdown转PDF方法
private void convertMarkdownToPdf(String markdown, String outputPath) throws Exception {
// 使用flexmark-java解析Markdown
// MutableDataSet options = new MutableDataSet();
// Parser parser = Parser.builder(options).build();
// HtmlRenderer renderer = HtmlRenderer.builder(options).build();
// Node document = parser.parse(markdown);
// String htmlContent = renderer.render(document);
MutableDataSet options = new MutableDataSet();
// 启用表格解析
options.set(Parser.EXTENSIONS, Arrays.asList(
@ -371,7 +366,7 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
StrikethroughExtension.create()
))
.build();
markdown=markdown.replaceAll("!\\[","!{}");
Node document = parser.parse(markdown);
String htmlContent = renderer.render(document);
// 使用 JSoup 清理
@ -380,21 +375,12 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
// 配置允许的标签
Safelist safelist = Safelist.relaxed()
// .addTags("em", "strong", "div", "span","br")
// .addAttributes(":all", "style", "class");
.addTags("em", "strong", "div", "span", "br", "table", "thead", "tbody", "tr", "th", "td")
.addTags("em", "strong", "div", "span", "br", "table", "thead", "tbody", "tr", "th", "td", "img")
.addAttributes(":all", "style", "class")
.addAttributes("table", "border", "cellpadding", "cellspacing");
.addAttributes("table", "border", "cellpadding", "cellspacing")
.addAttributes("img", "src", "alt", "width", "height"); // 允许img标签及其属性
htmlContent = Jsoup.clean(htmlContent, "", safelist, settings);
// 处理代码块 - 将code标签替换为带内联样式的div (不使用class)
htmlContent = htmlContent.replaceAll("(?s)<pre><code>(.*?)</code></pre>",
"<div style='background-color:#f6f8fa;border:1px solid #ddd;'>$1</div>");
// 处理行内代码 - 将行内code替换为带内联样式的span (不使用class)
htmlContent = htmlContent.replaceAll("<code>(.*?)</code>",
"<span style='background-color:#f6f8fa;border:1px solid #ddd;'>$1</span>");
// 使用 NekoHTML 进一步处理
// 创建完整的HTML文档,使用微软雅黑
String html = String.format("""
<!DOCTYPE html>
@ -437,6 +423,14 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
tr {
page-break-inside: avoid;
}
img {
width: 600px;
max-width: 100%%;
height: auto;
display: block;
margin: 20px auto;
page-break-inside: avoid;
}
</style>
</head>
<body>
@ -444,44 +438,130 @@ public class DocumentTaskResultsServiceImpl implements IDocumentTaskResultsServi
</body>
</html>
""",
// 特殊处理code标签,确保其内容不被换行破坏
htmlContent.replace("<br>", "\n"));
// 处理图片标签,将markdown的图片语法转换为HTML的img标签
html=html.replaceAll("!\\{}","!\\[");
html = handleImageTags(html);
// 配置Flying Saucer
ITextRenderer pdfRenderer = new ITextRenderer();
// 主要使用微软雅黑,但也加载其他字体作为备选
// 从classpath加载字体
try (InputStream is = getClass().getResourceAsStream("/fonts/msyh.ttc")) {
File tempFont = File.createTempFile("msyh", ".ttc");
FileUtils.copyInputStreamToFile(is, tempFont);
pdfRenderer.getFontResolver().addFont(tempFont.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
tempFont.deleteOnExit();
}
// 设置文档
pdfRenderer.setDocumentFromString(html);
// 设置输出编码和基础URL
pdfRenderer.getSharedContext().setBaseURL("file:///");
// 配置Flying Saucer
ITextRenderer pdfRenderer = new ITextRenderer();
// 主要使用微软雅黑,但也加载其他字体作为备选
// 从classpath加载字体
try (InputStream is = getClass().getResourceAsStream("/fonts/msyh.ttc")) {
File tempFont = File.createTempFile("msyh", ".ttc");
FileUtils.copyInputStreamToFile(is, tempFont);
pdfRenderer.getFontResolver().addFont(tempFont.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
tempFont.deleteOnExit();
}
try (InputStream consolas = getClass().getResourceAsStream("/fonts/consolas.ttf")){
if (consolas != null) {
File tempConsolas = File.createTempFile("consolas", ".ttf");
FileUtils.copyInputStreamToFile(consolas, tempConsolas);
pdfRenderer.getFontResolver().addFont(tempConsolas.getAbsolutePath(), BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
tempConsolas.deleteOnExit();
}
}
// 布局和创建PDF
pdfRenderer.layout();
// 设置文档
pdfRenderer.setDocumentFromString(html);
try (OutputStream os = new FileOutputStream(outputPath)) {
pdfRenderer.createPDF(os, true);
}
}
// 设置输出编码和基础URL
pdfRenderer.getSharedContext().setBaseURL("file:///");
/**
* 处理Markdown中的图片标签
* @param content Markdown内容
* @return 处理后的HTML内容
*/
private String handleImageTags(String content) {
// 使用非贪婪模式匹配alt文本,使用平衡组模式匹配路径
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("!\\[(.*?)\\]\\(([^()]*(?:\\([^()]*\\)[^()]*)*?)\\)");
java.util.regex.Matcher matcher = pattern.matcher(content);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String alt = matcher.group(1);
String imagePath = matcher.group(2);
// 检查图片文件是否存在
File imageFile = new File(imagePath);
if (!imageFile.exists() && imagePath.contains("(")) {
// 如果文件不存在且路径中包含括号,尝试修正路径
String correctedPath = correctImagePath(imagePath);
imageFile = new File(correctedPath);
if (imageFile.exists()) {
imagePath = correctedPath;
}
}
// 布局和创建PDF
pdfRenderer.layout();
if (imageFile.exists()) {
try {
// 将图片转换为Base64编码
byte[] imageBytes = Files.readAllBytes(imageFile.toPath());
String base64Image = java.util.Base64.getEncoder().encodeToString(imageBytes);
String imageType = getImageMimeType(imagePath);
// 创建data URL,使用固定的宽度来确保图片在页面内显示
String imgTag = String.format("<img src=\"data:%s;base64,%s\" alt=\"%s\" style=\"width: 600px; max-width: 100%%; height: auto; display: block; margin: 10px auto;\"/>",
imageType, base64Image, alt);
// 替换markdown图片语法为HTML img标签
matcher.appendReplacement(sb, imgTag.replace("$", "\\$"));
} catch (IOException e) {
// 如果处理失败,保留原始文本
matcher.appendReplacement(sb, matcher.group(0).replace("$", "\\$"));
}
} else {
// 如果图片不存在,保留原始文本
matcher.appendReplacement(sb, matcher.group(0).replace("$", "\\$"));
}
}
matcher.appendTail(sb);
return sb.toString();
}
try (OutputStream os = new FileOutputStream(outputPath)) {
pdfRenderer.createPDF(os, true);
}
/**
* 修正包含括号的图片路径
* @param originalPath 原始路径
* @return 修正后的路径
*/
private String correctImagePath(String originalPath) {
// 如果路径中包含多个括号,取最后一个右括号之前的内容
int lastRightBracket = originalPath.lastIndexOf(")");
if (lastRightBracket != -1) {
// 找到对应的左括号
int count = 1;
int leftBracket = lastRightBracket - 1;
while (leftBracket >= 0 && count > 0) {
if (originalPath.charAt(leftBracket) == ')') {
count++;
} else if (originalPath.charAt(leftBracket) == '(') {
count--;
}
leftBracket--;
}
if (count == 0) {
// 找到匹配的括号对,返回不包含最外层括号的路径
return originalPath.substring(0, leftBracket + 1) + originalPath.substring(lastRightBracket + 1);
}
}
return originalPath;
}
/**
* 根据文件扩展名获取MIME类型
* @param filePath 文件路径
* @return MIME类型
*/
private String getImageMimeType(String filePath) {
String extension = filePath.substring(filePath.lastIndexOf(".") + 1).toLowerCase();
return switch (extension) {
case "png" -> "image/png";
case "jpg", "jpeg" -> "image/jpeg";
case "gif" -> "image/gif";
case "bmp" -> "image/bmp";
default -> "image/jpeg";
};
}
// 添加文件到ZIP

20
zaojiaManagement/zaojia-productManagement/src/main/java/org/dromara/productManagement/service/impl/SjjDocumentTasksServiceImpl.java

@ -159,7 +159,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
new File(tenderOriginalDir).mkdirs();
new File(tenderTxtDir).mkdirs();
// 处理投标文件压缩包
processZipFile(bidZipPath, bidOriginalDir, bidTxtDir);
processZipFile(add.getTaskName(),bidZipPath, bidOriginalDir, bidTxtDir);
// 复制招标文件到任务文件夹
File tenderDoc = new File(tenderDocPath);
@ -177,7 +177,9 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
// 如果是PDF文件,解析其内容到招标文件TXT目录
if (tenderDocName.toLowerCase().endsWith(".pdf") && PdfParserUtils.isValidPdf(tenderDocCopy.getAbsolutePath())) {
processAndSavePdfContent(tenderDocCopy, tenderTxtDir, getSystemCharset());
if (add.getTaskName().equals("ssjjbidAnalysis")){
processAndSavePdfContent(tenderDocCopy, tenderTxtDir, getSystemCharset());
}
}
}
@ -231,7 +233,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
* @throws IOException 解压或解析过程中发生IO错误
* @throws ZipException ZIP文件处理错误
*/
private void processZipFile(String zipFilePath, String originalDir, String txtDir) throws IOException, ZipException {
private void processZipFile(String taskName,String zipFilePath, String originalDir, String txtDir) throws IOException, ZipException {
// 创建解压目标目录(如果不存在)
File extractDirFile = new File(originalDir);
if (!extractDirFile.exists()) {
@ -272,14 +274,14 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
}
// 递归处理所有PDF文件
processAllPdfFiles(extractDirFile, txtDir, bestCharset);
processAllPdfFiles(taskName,extractDirFile, txtDir, bestCharset);
} catch (Exception e) {
// 如果使用检测的编码失败,尝试直接整体解压
try {
ZipFile zipFile = new ZipFile(zipFilePath);
zipFile.setCharset(getSystemCharset());
zipFile.extractAll(originalDir);
processAllPdfFiles(extractDirFile, txtDir, getSystemCharset());
processAllPdfFiles(taskName,extractDirFile, txtDir, getSystemCharset());
} catch (Exception e2) {
System.err.println("解压失败: " + e2.getMessage());
throw new IOException("解压失败", e2);
@ -453,7 +455,7 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
* @param txtOutputDir PDF解析后的TXT文件输出目录
* @param charset 字符集
*/
private void processAllPdfFiles(File directory, String txtOutputDir, Charset charset) {
private void processAllPdfFiles(String taskName,File directory, String txtOutputDir, Charset charset) {
if (!directory.isDirectory()) {
return;
}
@ -466,10 +468,12 @@ public class SjjDocumentTasksServiceImpl implements ISjjDocumentTasksService {
for (File file : files) {
if (file.isDirectory()) {
// 递归处理子目录
processAllPdfFiles(file, txtOutputDir, charset);
processAllPdfFiles(taskName,file, txtOutputDir, charset);
} else if (file.getName().toLowerCase().endsWith(".pdf") && PdfParserUtils.isValidPdf(file.getAbsolutePath())) {
// 处理PDF文件
processAndSavePdfContent(file, txtOutputDir, charset);
if (taskName.equals("sjjbidAnalysis")){
processAndSavePdfContent(file, txtOutputDir, charset);
}
}
}
}

Loading…
Cancel
Save