Skip to content

Commit

Permalink
PDFBOX-5929: remove orphan annotations from the structure tree if ann…
Browse files Browse the repository at this point in the history
…otations were removed from the page

git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922671 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
THausherr committed Dec 24, 2024
1 parent 91f2db5 commit 7c32532
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
33 changes: 33 additions & 0 deletions pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,10 @@ private COSBase createDictionaryClone(COSBase src, COSBase dstParent, COSDiction
// replace annotation with clone
dstDict.setItem(COSName.OBJ, dstObj);
}
else
{
removePossibleOrphanAnnotation(srcObj, srcDict, currentPageDict, dstDict);
}
}
else
{
Expand Down Expand Up @@ -492,6 +496,35 @@ private COSBase createDictionaryClone(COSBase src, COSBase dstParent, COSDiction
}
return dstDict;
}

private void removePossibleOrphanAnnotation(COSDictionary srcObj, COSDictionary srcDict,
COSDictionary currentPageDict, COSDictionary dstDict)
{
// PDFBOX-5929: Check whether this is an "orphan" annotation that isn't in the page
COSBase objType = srcObj.getDictionaryObject(COSName.TYPE);
COSBase objSubtype = srcObj.getDictionaryObject(COSName.SUBTYPE);
if (COSName.ANNOT.equals(objType) || COSName.LINK.equals(objSubtype))
{
COSDictionary srcPageDict = srcDict.getCOSDictionary(COSName.PG);
if (srcPageDict == null)
{
// /Pg entry is not always on this level
srcPageDict = currentPageDict;
}
if (srcPageDict != null)
{
COSArray annotationArray = srcPageDict.getCOSArray(COSName.ANNOTS);
if (annotationArray == null || annotationArray.indexOfObject(srcObj) == -1)
{
// Ideally the entire OBJR entry should be removed.
// Removing the OBJ entry is done to avoid potential page orphans
// from the annotation destination.
LOG.warn("An annotation OBJ that isn't in the page has been removed from the structure tree");
dstDict.removeItem(COSName.OBJ);
}
}
}
}
}

// Look for /StructParent and /StructParents and add them to the destination tree
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,7 @@ else if (kdict.containsKey(COSName.NUMS))
PDPage page = annotation.getPage();
if (annotation instanceof PDAnnotationLink)
{
// PDFBOX-5928: check whether the destination of a link annotation is an orphan
PDAnnotationLink link = (PDAnnotationLink) annotation;
PDDestination destination = link.getDestination();
if (destination == null)
Expand Down Expand Up @@ -1130,6 +1131,35 @@ void testSplitWithStructureTreeAndDestinations() throws IOException
}
}

/**
* PDFBOX-5929: Check that orphan annotations are removed from the structure tree if annotations
* were removed from the pages (don't do that!).
*
* @throws IOException
*/
@Test
void testSplitWithStructureTreeAndDestinationsAndRemovedAnnotations() throws IOException
{
try (PDDocument doc = Loader.loadPDF(new File(SRCDIR,"PDFBOX-5762-722238.pdf")))
{
Splitter splitter = new Splitter();
for (PDPage page : doc.getPages())
{
page.setAnnotations(Collections.emptyList());
}
splitter.setStartPage(1);
splitter.setEndPage(2);
splitter.setSplitAtPage(2);
List<PDDocument> splitResult = splitter.split(doc);
assertEquals(1, splitResult.size());
try (PDDocument dstDoc = splitResult.get(0))
{
assertEquals(2, dstDoc.getNumberOfPages());
checkForPageOrphans(dstDoc);
}
}
}

/**
* Check for the bug that happened in PDFBOX-5792, where a destination was outside a target
* document and hit an NPE in the next call of Splitter.fixDestinations().
Expand Down

0 comments on commit 7c32532

Please sign in to comment.