/**//*
* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License.
*/
package com.future.pdfbox.image; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.List; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; public class ExtractImages
{
public static void main(String[] args) throws IOException { PDDocument doc = PDDocument.load("F:\\1.pdf"); int pageCount = doc.getPageCount(); System.out.println(pageCount); List pages = doc.getDocumentCatalog().getAllPages(); for(int i=0;iConfig Build Path->Add Jars”命令,把工程lib目录下面的包都加入工程的Build Path。 4、使用PDFBox解析PDF内容
在刚刚创建的Eclipse工程中,创建一个ch7.pdfbox包,并创建一个PdfboxTest类。该类包含一个getText方法,用于从一个PDF中获取文本信息,其代码如下。
import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter;
import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.util.PDFTextStripper;
public class PdfParser {
/** * @param args */ // TODO 自动生成方法存根
public static void main(String[] args) throws Exception{ FileInputStream fis = new FileInputStream("F:\\task\\lerman-atem2001.pdf"); BufferedWriter writer = new BufferedWriter(new FileWriter("F:\\task\\pdf_change.txt")); PDFParser p = new PDFParser(fis); p.parse(); PDFTextStripper ts = new PDFTextStripper(); String s = ts.getText(p.getPDDocument()); writer.write(s); System.out.println(s); fis.close(); writer.close(); } }
下面是自己按照书上的例子写的代码。