Java删除Word文档中水印的实现方法

在Java中删除Word文档中的水印,主要有以下几种实现方式,分别基于不同的技术栈和库:

图片[1]_Java删除Word文档中水印的实现方法_知途无界

一、使用Apache POI库(适用于.docx格式)

Apache POI是处理Microsoft Office文档的Java API,可以操作.docx格式的Word文档。

1. 添加依赖

<dependencies>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>5.2.4</version>
    </dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml-schemas</artifactId>
        <version>4.1.2</version>
    </dependency>
</dependencies>

2. 删除水印的实现代码

import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

public class WordWatermarkRemover {
    
    /**
     * 删除Word文档中的水印
     * @param inputPath 输入文件路径
     * @param outputPath 输出文件路径
     */
    public static void removeWatermark(String inputPath, String outputPath) {
        try (FileInputStream fis = new FileInputStream(inputPath);
             XWPFDocument document = new XWPFDocument(fis);
             FileOutputStream fos = new FileOutputStream(outputPath)) {
            
            // 方法1:删除页眉页脚中的水印
            removeHeaderFooterWatermarks(document);
            
            // 方法2:删除文档背景水印
            removeBackgroundWatermark(document);
            
            // 保存文档
            document.write(fos);
            System.out.println("水印删除成功!输出文件:" + outputPath);
            
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    /**
     * 删除页眉页脚中的水印
     */
    private static void removeHeaderFooterWatermarks(XWPFDocument document) {
        // 遍历所有页眉
        for (XWPFHeader header : document.getHeaderList()) {
            removeWatermarksFromBodyElements(header.getBodyElements());
        }
        
        // 遍历所有页脚
        for (XWPFFooter footer : document.getFooterList()) {
            removeWatermarksFromBodyElements(footer.getBodyElements());
        }
        
        // 处理默认页眉页脚
        if (document.getHeaderFooterPolicy() != null) {
            XWPFHeader defaultHeader = document.getHeaderFooterPolicy().getDefaultHeader();
            if (defaultHeader != null) {
                removeWatermarksFromBodyElements(defaultHeader.getBodyElements());
            }
            
            XWPFFooter defaultFooter = document.getHeaderFooterPolicy().getDefaultFooter();
            if (defaultFooter != null) {
                removeWatermarksFromBodyElements(defaultFooter.getBodyElements());
            }
        }
    }
    
    /**
     * 从段落和表格中删除水印文本
     */
    private static void removeWatermarksFromBodyElements(java.util.List<IBodyElement> bodyElements) {
        for (IBodyElement element : bodyElements) {
            if (element instanceof XWPFParagraph) {
                XWPFParagraph paragraph = (XWPFParagraph) element;
                removeWatermarkRuns(paragraph.getRuns());
            } else if (element instanceof XWPFTable) {
                XWPFTable table = (XWPFTable) element;
                for (XWPFTableRow row : table.getRows()) {
                    for (XWPFTableCell cell : row.getTableCells()) {
                        removeWatermarksFromBodyElements(cell.getBodyElements());
                    }
                }
            }
        }
    }
    
    /**
     * 删除段落中的水印Run
     */
    private static void removeWatermarkRuns(java.util.List<XWPFRun> runs) {
        java.util.List<XWPFRun> runsToRemove = new java.util.ArrayList<>();
        
        for (XWPFRun run : runs) {
            String text = run.text();
            if (text != null && isWatermarkText(text)) {
                runsToRemove.add(run);
            }
        }
        
        // 移除水印Run
        for (XWPFRun run : runsToRemove) {
            run.setText("", 0); // 清空文本内容
        }
    }
    
    /**
     * 判断文本是否为水印文本
     */
    private static boolean isWatermarkText(String text) {
        if (text == null) return false;
        
        // 常见水印关键词
        String[] watermarkKeywords = {"水印", "CONFIDENTIAL", "DRAFT", "内部使用", "保密"};
        String lowerText = text.toLowerCase();
        
        for (String keyword : watermarkKeywords) {
            if (lowerText.contains(keyword.toLowerCase())) {
                return true;
            }
        }
        return false;
    }
    
    /**
     * 删除背景水印(通过删除背景关系)
     */
    private static void removeBackgroundWatermark(XWPFDocument document) {
        // 删除背景图片关系
        document.getDocument().getBody().getEG_BodyContent().stream()
            .filter(content -> content instanceof org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBackground)
            .forEach(content -> {
                org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBackground bg = 
                    (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBackground) content;
                bg.setDocPr(null); // 移除文档属性
            });
        
        // 移除水印形状(需要操作底层XML)
        removeWatermarkShapes(document);
    }
    
    /**
     * 删除水印形状(复杂水印)
     */
    private static void removeWatermarkShapes(XWPFDocument document) {
        try {
            // 获取文档的底层XML
            org.w3c.dom.Document xmlDoc = document.getDocument().getDomNode().getOwnerDocument();
            
            // 查找并删除水印相关的节点
            org.w3c.dom.NodeList shapes = xmlDoc.getElementsByTagName("v:shape");
            java.util.List<org.w3c.dom.Node> nodesToRemove = new java.util.ArrayList<>();
            
            for (int i = 0; i < shapes.getLength(); i++) {
                org.w3c.dom.Node shape = shapes.item(i);
                String style = shape.getAttributes().getNamedItem("style").getNodeValue();
                if (style != null && style.contains("position:absolute") && 
                    (style.contains("z-index:-1") || style.contains("rotation"))) {
                    nodesToRemove.add(shape);
                }
            }
            
            // 删除找到的水印节点
            for (org.w3c.dom.Node node : nodesToRemove) {
                node.getParentNode().removeChild(node);
            }
            
        } catch (Exception e) {
            System.err.println("删除水印形状时出错: " + e.getMessage());
        }
    }
    
    public static void main(String[] args) {
        String inputFile = "input_with_watermark.docx";
        String outputFile = "output_without_watermark.docx";
        removeWatermark(inputFile, outputFile);
    }
}

二、使用Aspose.Words for Java(商业库,功能强大)

Aspose.Words是专业的Word处理库,对水印操作的支持更加完善。

1. 添加依赖

<dependency>
    <groupId>com.aspose</groupId>
    <artifactId>aspose-words</artifactId>
    <version>24.1</version>
    <classifier>jdk17</classifier>
</dependency>

2. 使用Aspose删除水印

import com.aspose.words.*;

import java.io.File;

public class AsposeWatermarkRemover {
    
    /**
     * 使用Aspose.Words删除水印
     */
    public static void removeWatermarkWithAspose(String inputPath, String outputPath) {
        try {
            // 验证许可证(可选,避免评估版水印)
            // License license = new License();
            // license.setLicense("Aspose.Words.lic");
            
            // 加载文档
            Document doc = new Document(inputPath);
            
            // 方法1:删除文字水印
            removeTextWatermarks(doc);
            
            // 方法2:删除图片水印
            removePictureWatermarks(doc);
            
            // 方法3:删除背景水印
            removeBackgroundWatermarks(doc);
            
            // 保存文档
            doc.save(outputPath);
            System.out.println("Aspose删除水印成功!输出文件:" + outputPath);
            
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    
    /**
     * 删除文字水印
     */
    private static void removeTextWatermarks(Document doc) {
        for (Section section : doc.getSections()) {
            HeaderFooterCollection headersFooters = section.getHeadersFooters();
            
            // 处理所有页眉
            for (HeaderFooter header : headersFooters) {
                if (header.getHeaderFooterType() == HeaderFooterType.HEADER_PRIMARY ||
                    header.getHeaderFooterType() == HeaderFooterType.HEADER_FIRST ||
                    header.getHeaderFooterType() == HeaderFooterType.HEADER_EVEN_PAGES) {
                    
                    NodeCollection paragraphs = header.getChildNodes(NodeType.PARAGRAPH, true);
                    for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs) {
                        if (containsWatermarkText(paragraph.getText())) {
                            paragraph.remove();
                        }
                    }
                }
            }
            
            // 处理所有页脚
            for (HeaderFooter footer : headersFooters) {
                if (footer.getHeaderFooterType() == HeaderFooterType.FOOTER_PRIMARY ||
                    footer.getHeaderFooterType() == HeaderFooterType.FOOTER_FIRST ||
                    footer.getHeaderFooterType() == HeaderFooterType.FOOTER_EVEN_PAGES) {
                    
                    NodeCollection paragraphs = footer.getChildNodes(NodeType.PARAGRAPH, true);
                    for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs) {
                        if (containsWatermarkText(paragraph.getText())) {
                            paragraph.remove();
                        }
                    }
                }
            }
        }
    }
    
    /**
     * 删除图片水印
     */
    private static void removePictureWatermarks(Document doc) {
        // 删除水印形状
        NodeCollection shapes = doc.getChildNodes(NodeType.SHAPE, true);
        for (Shape shape : (Iterable<Shape>) shapes) {
            if (isWatermarkShape(shape)) {
                shape.remove();
            }
        }
        
        // 删除水印图片
        NodeCollection pictures = doc.getChildNodes(NodeType.PICTURE, true);
        for (Shape picture : (Iterable<Shape>) pictures) {
            if (isWatermarkShape(picture)) {
                picture.remove();
            }
        }
    }
    
    /**
     * 删除背景水印
     */
    private static void removeBackgroundWatermarks(Document doc) {
        // 移除页面颜色(可能包含水印)
        for (Section section : doc.getSections()) {
            PageSetup pageSetup = section.getPageSetup();
            pageSetup.setPageColor(null);
        }
        
        // 移除水印文本框
        NodeCollection textBoxes = doc.getChildNodes(NodeType.SHAPE, true);
        for (Shape textBox : (Iterable<Shape>) textBoxes) {
            if (textBox.getTextBox() != null && containsWatermarkText(textBox.getText())) {
                textBox.remove();
            }
        }
    }
    
    /**
     * 判断形状是否为水印
     */
    private static boolean isWatermarkShape(Shape shape) {
        if (shape == null) return false;
        
        // 检查形状名称
        String name = shape.getName();
        if (name != null && (name.contains("Watermark") || name.contains("水印"))) {
            return true;
        }
        
        // 检查位置(水印通常在页面中央且层级较低)
        LayoutFlow layoutFlow = shape.getLayoutFlow();
        if (layoutFlow != null && layoutFlow == LayoutFlow.BOTTOM_TO_TOP) {
            return true;
        }
        
        // 检查旋转角度(水印通常有较大旋转角度)
        double rotation = shape.getRotation();
        if (Math.abs(rotation) > 30) { // 旋转超过30度可能是水印
            return true;
        }
        
        return false;
    }
    
    /**
     * 判断文本是否包含水印关键词
     */
    private static boolean containsWatermarkText(String text) {
        if (text == null || text.trim().isEmpty()) return false;
        
        String lowerText = text.toLowerCase();
        String[] watermarkKeywords = {"watermark", "水印", "confidential", "draft", "sample", "内部", "保密"};
        
        for (String keyword : watermarkKeywords) {
            if (lowerText.contains(keyword)) {
                return true;
            }
        }
        return false;
    }
    
    public static void main(String[] args) {
        String inputFile = "input_with_watermark.docx";
        String outputFile = "output_aspose_removed.docx";
        removeWatermarkWithAspose(inputFile, outputFile);
    }
}

三、使用docx4j库

docx4j是另一个处理OOXML文档的Java库,也可以用于删除水印。

1. 添加依赖

<dependency>
    <groupId>org.docx4j</groupId>
    <artifactId>docx4j-JAXB-MOXy</artifactId>
    <version>11.4.9</version>
</dependency>

2. 使用docx4j删除水印

import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.wml.*;

import java.io.File;
import java.util.List;

public class Docx4jWatermarkRemover {
    
    public static void removeWatermarkWithDocx4j(String inputPath, String outputPath) {
        try {
            // 加载文档
            WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new File(inputPath));
            
            // 删除页眉中的水印
            removeHeaderWatermarks(wordMLPackage);
            
            // 删除背景水印
            removeBackgroundWatermarks(wordMLPackage);
            
            // 保存文档
            wordMLPackage.save(new File(outputPath));
            System.out.println("docx4j删除水印成功!输出文件:" + outputPath);
            
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    
    private static void removeHeaderWatermarks(WordprocessingMLPackage wordMLPackage) throws Exception {
        List<SectionWrapper> sections = wordMLPackage.getDocumentModel().getSections();
        
        for (SectionWrapper section : sections) {
            HeaderFooterPolicy hfp = section.getHeaderFooterPolicy();
            
            if (hfp.getDefaultHeader() != null) {
                processHeaderFooter(hfp.getDefaultHeader());
            }
            if (hfp.getEvenPagesHeader() != null) {
                processHeaderFooter(hfp.getEvenPagesHeader());
            }
            if (hfp.getFirstPageHeader() != null) {
                processHeaderFooter(hfp.getFirstPageHeader());
            }
        }
    }
    
    private static void processHeaderFooter(HeaderPart headerPart) throws Exception {
        if (headerPart == null) return;
        
        MainDocumentPart documentPart = headerPart.getMainDocumentPart();
        List<Object> content = documentPart.getContent();
        
        // 过滤掉水印内容
        List<Object> filteredContent = new java.util.ArrayList<>();
        for (Object obj : content) {
            if (!isWatermarkContent(obj)) {
                filteredContent.add(obj);
            }
        }
        
        // 更新内容
        content.clear();
        content.addAll(filteredContent);
    }
    
    private static boolean isWatermarkContent(Object obj) {
        if (obj instanceof P) {
            P paragraph = (P) obj;
            String text = extractTextFromParagraph(paragraph);
            return containsWatermarkText(text);
        }
        return false;
    }
    
    private static String extractTextFromParagraph(P paragraph) {
        StringBuilder sb = new StringBuilder();
        List<Object> elements = paragraph.getContent();
        
        for (Object element : elements) {
            if (element instanceof R) {
                R run = (R) element;
                List<Object> runElements = run.getContent();
                for (Object runElement : runElements) {
                    if (runElement instanceof Text) {
                        Text text = (Text) runElement;
                        sb.append(text.getValue());
                    }
                }
            }
        }
        return sb.toString();
    }
    
    private static void removeBackgroundWatermarks(WordprocessingMLPackage wordMLPackage) {
        // 实现背景水印删除逻辑
        // 类似于Apache POI的方法
    }
    
    private static boolean containsWatermarkText(String text) {
        if (text == null) return false;
        String lowerText = text.toLowerCase();
        return lowerText.contains("watermark") || lowerText.contains("水印") || 
               lowerText.contains("confidential") || lowerText.contains("draft");
    }
}

四、完整工具类示例

以下是一个整合多种方法的完整工具类:

import java.io.File;
import java.util.HashMap;
import java.util.Map;

public class WordWatermarkUtils {
    
    private static final Map<String, String> SUPPORTED_LIBRARIES = new HashMap<>();
    
    static {
        SUPPORTED_LIBRARIES.put("apache-poi", "Apache POI");
        SUPPORTED_LIBRARIES.put("aspose", "Aspose.Words");
        SUPPORTED_LIBRARIES.put("docx4j", "docx4j");
    }
    
    /**
     * 统一的水印删除接口
     */
    public static boolean removeWatermark(String inputPath, String outputPath, String library) {
        if (!new File(inputPath).exists()) {
            System.err.println("输入文件不存在: " + inputPath);
            return false;
        }
        
        switch (library.toLowerCase()) {
            case "apache-poi":
                WordWatermarkRemover.removeWatermark(inputPath, outputPath);
                return true;
            case "aspose":
                AsposeWatermarkRemover.removeWatermarkWithAspose(inputPath, outputPath);
                return true;
            case "docx4j":
                Docx4jWatermarkRemover.removeWatermarkWithDocx4j(inputPath, outputPath);
                return true;
            default:
                System.err.println("不支持的库: " + library);
                System.err.println("支持的库: " + SUPPORTED_LIBRARIES.values());
                return false;
        }
    }
    
    /**
     * 自动检测并选择最佳方法
     */
    public static void autoRemoveWatermark(String inputPath, String outputPath) {
        // 根据文件大小和复杂度选择方法
        File file = new File(inputPath);
        long fileSize = file.length();
        
        if (fileSize > 10 * 1024 * 1024) { // 大于10MB使用Aspose
            System.out.println("文件较大,使用Aspose.Words处理...");
            removeWatermark(inputPath, outputPath, "aspose");
        } else {
            System.out.println("使用Apache POI处理...");
            removeWatermark(inputPath, outputPath, "apache-poi");
        }
    }
}

五、注意事项和最佳实践

1. 水印类型识别

  • 文字水印​:通常位于页眉页脚,包含特定关键词
  • 图片水印​:作为形状或图片插入,可能有特定名称或属性
  • 背景水印​:通过页面颜色或背景图片实现

2. 处理复杂水印

对于复杂的对角线水印或艺术字水印,可能需要:

  • 分析XML结构识别水印元素
  • 使用正则表达式匹配水印文本模式
  • 结合多种方法综合处理

3. 备份原文件

在处理前务必备份原文件,防止意外损坏。

4. 性能考虑

  • Apache POI适合简单水印处理和小文件
  • Aspose.Words功能强大但为商业软件
  • docx4j介于两者之间,开源免费

这些方法提供了从不同角度删除Word水印的解决方案,可以根据具体需求选择合适的实现方式。

© 版权声明
THE END
喜欢就点个赞,支持一下吧!
点赞72 分享
评论 抢沙发
头像
欢迎您留下评论!
提交
头像

昵称

取消
昵称表情代码图片

    暂无评论内容