将 Java 的 DOCX 文档转换为 PDF 有多种方法,我将介绍两种常见的方法,一种是使用 Apache POI 库来处理 DOCX 文档,然后使用 iText 库来生成 PDF,另一种是使用 docx4j 库直接将 DOCX 转换为 PDF。
步骤流程:
Maven 和 Gradle 依赖:
Maven 依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
</dependency>
Gradle 依赖:
implementation 'org.apache.poi:poi:4.1.2'
implementation 'org.apache.poi:poi-ooxml:4.1.2'
implementation 'com.itextpdf:itextpdf:5.5.13'
示例代码:
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureType;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import com.itextpdf.text.Document;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class DocxToPdfConverter {
public static void main(String[] args) {
try {
// 1. 读取DOCX文档
FileInputStream docxFile = new FileInputStream("input.docx");
XWPFDocument document = new XWPFDocument(docxFile);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
// 2. 创建PDF文档
Document pdfDocument = new Document();
PdfWriter.getInstance(pdfDocument, new FileOutputStream("output.pdf"));
pdfDocument.open();
// 3. 将DOCX内容写入PDF文档
String text = extractor.getText();
pdfDocument.add(new Paragraph(text));
// 4. 关闭文档
pdfDocument.close();
docxFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
步骤流程:
Maven 和 Gradle 依赖:
Maven 依赖:
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>11.3.1</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>8.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>fop</artifactId>
<version>2.5</version>
</dependency>
Gradle 依赖:
implementation 'org.docx4j:docx4j:11.3.1'
implementation 'org.docx4j:docx4j-export-fo:8.2.0'
implementation 'org.apache.xmlgraphics:fop:2.5'
示例代码:
import org.docx4j.Docx4J;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.model.structure.PageSizePaper;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.xmlgraphics.util.MimeConstants;
import org.apache.xmlgraphics.util.MimeConstants;
import org.apache.commons.io.IOUtils;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.*;
public class DocxToPdfConverter {
public static void main(String[] args) {
try {
// 1. 加载DOCX文档
WordprocessingMLPackage wordMLPackage = Docx4J.load(new File("input.docx"));
// 2. 配置FO转换设置
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
foSettings.setApacheFopMime(MimeConstants.MIME_PDF);
// 3. 执行转换
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Docx4J.toFO(foSettings, outputStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
// 4. 使用Apache FOP将XSL-FO转换为PDF
FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI());
Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, outputStream);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.transform(new StreamSource(new ByteArrayInputStream(outputStream.toByteArray())),
new StreamResult(new FileOutputStream("output.pdf")));
} catch (Exception e) {
e.printStackTrace();
}
}
}
这两种方法都可以将 DOCX 文档转换为 PDF,你可以根据你的项目需求和偏好选择其中一种方法来实现。请确保添加正确的依赖项,并替换示例代码中的输入和输出文件路径以适应你的项目。