- Mar 10 Fri 2017 14:36
使用itext將PDF分頁
- Dec 09 Fri 2016 16:58
PDFBox remove all text
PDFBox 1.8.x
import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.util.PDFOperator; import java.util.ArrayList; import java.util.List; /** * This is an example on how to remove all text from PDF document. * * Usage: java org.apache.pdfbox.examples.util.RemoveAllText <input-pdf> <output-pdf> * * @author Ben Litchfield * @version $Revision: 1.2 $ */ public class RemoveAllText { /** * Default constructor. */ private RemoveAllText() { //example class should not be instantiated } /** * This will remove all text from a PDF document. * * @param args The command line arguments. * * @throws Exception If there is an error parsing the document. */ public static void main( String[] args ) throws Exception { if( args.length != 2 ) { usage(); } else { PDDocument document = null; try { document = PDDocument.load( args[0] ); if( document.isEncrypted() ) { System.err.println( "Error: Encrypted documents are not supported for this example." ); System.exit( 1 ); } List allPages = document.getDocumentCatalog().getAllPages(); for( int i=0; i " ); } } ();>
- Dec 09 Fri 2016 16:32
使用PDFBox擷取字元座標
使用pdfBox 1.8.x版本
import java.io.*;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;
import java.io.IOException;
import java.util.List;
public class PrintTextLocations extends PDFTextStripper {
public PrintTextLocations() throws IOException {
super.setSortByPosition(true);
}
public static void main(String[] args) throws Exception {
PDDocument document = null;
try {
File input = new File("C:\\path\\to\\PDF.pdf");
document = PDDocument.load(input);
if (document.isEncrypted()) {
try {
document.decrypt("");
}
}
PrintTextLocations printer = new PrintTextLocations();
List allPages = document.getDocumentCatalog().getAllPages();
//讀取每一頁
for (int i = 0; i < allPages.size(); i++) {
PDPage page = (PDPage) allPages.get(i);
System.out.println("Processing page: " + i);
PDStream contents = page.getContents();
if (contents != null) {
//顯示字元資料
printer.processStream(page, page.findResources(), page.getContents().getStream());
}
}
} finally {
if (document != null) {
document.close();
}
}
}
/**
* @param text The text to be processed
*/
@Override /* this is questionable, not sure if needed... */
protected void processTextPosition(TextPosition text) {
System.out.println("String[" + text.getXDirAdj() + ","
+ text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale="
+ text.getXScale() + " height=" + text.getHeightDir() + " space="
+ text.getWidthOfSpace() + " width="
+ text.getWidthDirAdj() + "]" + text.getCharacter());
}
}
- Nov 09 Wed 2016 17:41
[Java] 利用POI讀取指定行之資料並建立新的Excel檔案
- Nov 04 Fri 2016 10:41
[Java] POI讀取資料範例