Author: tilman
Date: Tue Jul 29 14:25:12 2025
New Revision: 1927521
Log:
PDFBOX-6047: add text extraction options submenu
Added:
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
(contents, props changed)
Modified:
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
Modified:
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
==============================================================================
---
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
Tue Jul 29 14:25:07 2025 (r1927520)
+++
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
Tue Jul 29 14:25:12 2025 (r1927521)
@@ -69,6 +69,7 @@ import org.apache.pdfbox.debugger.ui.Hig
import org.apache.pdfbox.debugger.ui.ImageTypeMenu;
import org.apache.pdfbox.debugger.ui.RenderDestinationMenu;
import org.apache.pdfbox.debugger.ui.TextDialog;
+import org.apache.pdfbox.debugger.ui.TextStripperMenu;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup;
import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
@@ -215,7 +216,7 @@ public class PagePane implements ActionL
// (checking widget.getPage() also works, but it is sometimes
null)
if (dictionarySet.contains(widget.getCOSObject()) &&
widget.getRectangle() != null)
{
- rectMap.put(widget.getRectangle(), "Field name: " +
field.getFullyQualifiedName());
+ rectMap.put(widget.getRectangle(), "Field name: " +
field.getFullyQualifiedName() + ", value: " + field.getValueAsString());
}
}
}
@@ -310,6 +311,8 @@ public class PagePane implements ActionL
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(pageIndex + 1);
stripper.setEndPage(pageIndex + 1);
+ stripper.setSortByPosition(TextStripperMenu.isSorted());
+
stripper.setIgnoreContentStreamSpaceGlyphs(TextStripperMenu.isIgnoreSpaces());
textDialog.setText(stripper.getText(document));
}
catch (IOException ex)
Added:
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
Tue Jul 29 14:25:12 2025 (r1927521)
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.debugger.ui;
+
+import javax.swing.JCheckBoxMenuItem;
+import javax.swing.JMenu;
+
+/**
+ *
+ * @author Tilman Hausherr
+ */
+public class TextStripperMenu extends MenuBase
+{
+ private static TextStripperMenu instance;
+ private static JCheckBoxMenuItem sortOptionMenuItem;
+ private static JCheckBoxMenuItem ignoreSpacesOptionMenuItem;
+
+ /**
+ * Constructor.
+ */
+ private TextStripperMenu()
+ {
+ JMenu menu = new JMenu("Text extraction options");
+
+ sortOptionMenuItem = new JCheckBoxMenuItem("sort");
+ menu.add(sortOptionMenuItem);
+
+ ignoreSpacesOptionMenuItem = new JCheckBoxMenuItem("ignore spaces");
+ menu.add(ignoreSpacesOptionMenuItem);
+
+ setMenu(menu);
+ }
+
+ /**
+ * Provides the TextStripperMenu instance.
+ *
+ * @return TextStripperMenu instance.
+ */
+ public static TextStripperMenu getInstance()
+ {
+ if (instance == null)
+ {
+ instance = new TextStripperMenu();
+ }
+ return instance;
+ }
+
+ public static boolean isSorted()
+ {
+ return sortOptionMenuItem.isSelected();
+ }
+
+ public static boolean isIgnoreSpaces()
+ {
+ return ignoreSpacesOptionMenuItem.isSelected();
+ }
+}
Modified:
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
==============================================================================
---
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
Tue Jul 29 14:25:07 2025 (r1927520)
+++
pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
Tue Jul 29 14:25:12 2025 (r1927521)
@@ -229,6 +229,10 @@ public class ViewMenu extends MenuBase
extractTextMenuItem.setEnabled(false);
viewMenu.add(extractTextMenuItem);
+ TextStripperMenu textStripperMenu = TextStripperMenu.getInstance();
+ textStripperMenu.setEnableMenu(false);
+ viewMenu.add(textStripperMenu.getMenu());
+
viewMenu.addSeparator();
repairAcroFormMenuItem = new JCheckBoxMenuItem(REPAIR_ACROFORM);