csharp_pain/Scraping/COM/samples/Java/ObjExt.java

/****************************************************************************
 *
 * File:            ObjExt.java
 *
 * Usage:           java ObjExt PDF-InputFile
 *
 * Description:     Extract Objects from the PDF-InputFile.
 *
 * Version:         1.03  (18-November-2005)
 *
 * Author:          Philip Renggli, PDF Tools AG   
 * 
 * Copyright:       Copyright (C) 2004 PDF Tools AG, Switzerland
 *                  Permission to use, copy, modify, and distribute this
 *                  software and its documentation for any purpose and without
 *                  fee is hereby granted, provided that the above copyright
 *                  notice appear in all copies and that both that copyright
 *                  notice and this permission notice appear in supporting
 *                  documentation.  This software is provided "as is" without
 *                  express or implied warranty.
 *
 ***************************************************************************/

import com.pdftools.expa.*;
import com.pdftools.*;

public class ObjExt {

public static void main(String[] args) {
    try {

        // open input file
        Document thePDF = new Document(args[0], "");
        
        // select 1st page
        thePDF.setPageNo(1);

        // get the content
        Content theContent = thePDF.getPage().getContent();
        theContent.resetContent(false);
    
        int imageCounter = 1;
        while(true)
        {
            int theObject = theContent.getNextObject();
            
            if(theObject == ExpaInitialize.CONTENTOBJECT.eNone)
            {
                System.out.println("0 End:");
                break;
            }
            else if(theObject == ExpaInitialize.CONTENTOBJECT.eText)
            {
                System.out.println("1 Text: " + theContent.getText().getUnicodeString());   
            }
            else if(theObject == ExpaInitialize.CONTENTOBJECT.eImage)
            {
                Image theImage = theContent.getImage();
                System.out.println("2 Image: width=" + theImage.getWidth() + " height=" + theImage.getHeight());
                theImage.store("image" + imageCounter++ + ".tif", NativeLibrary.COMPRESSION.eComprFlate);
            }
            else if(theObject == ExpaInitialize.CONTENTOBJECT.ePath)
            {
                System.out.println("3 Path: " + new String(theContent.getPath()));
            }
        }
        
    } catch (Throwable e) {
        e.printStackTrace();
    }
}
}
initial commit 2014-06-26 15:13:46 +00:00			`/****************************************************************************`
			`*`
			`* File: ObjExt.java`
			`*`
			`* Usage: java ObjExt PDF-InputFile`
			`*`
			`* Description: Extract Objects from the PDF-InputFile.`
			`*`
			`* Version: 1.03 (18-November-2005)`
			`*`
			`* Author: Philip Renggli, PDF Tools AG`
			`*`
			`* Copyright: Copyright (C) 2004 PDF Tools AG, Switzerland`
			`* Permission to use, copy, modify, and distribute this`
			`* software and its documentation for any purpose and without`
			`* fee is hereby granted, provided that the above copyright`
			`* notice appear in all copies and that both that copyright`
			`* notice and this permission notice appear in supporting`
			`* documentation. This software is provided "as is" without`
			`* express or implied warranty.`
			`*`
			`***************************************************************************/`

			`import com.pdftools.expa.*;`
			`import com.pdftools.*;`

			`public class ObjExt {`

			`public static void main(String[] args) {`
			`try {`

			`// open input file`
			`Document thePDF = new Document(args[0], "");`

			`// select 1st page`
			`thePDF.setPageNo(1);`

			`// get the content`
			`Content theContent = thePDF.getPage().getContent();`
			`theContent.resetContent(false);`

			`int imageCounter = 1;`
			`while(true)`
			`{`
			`int theObject = theContent.getNextObject();`

			`if(theObject == ExpaInitialize.CONTENTOBJECT.eNone)`
			`{`
			`System.out.println("0 End:");`
			`break;`
			`}`
			`else if(theObject == ExpaInitialize.CONTENTOBJECT.eText)`
			`{`
			`System.out.println("1 Text: " + theContent.getText().getUnicodeString());`
			`}`
			`else if(theObject == ExpaInitialize.CONTENTOBJECT.eImage)`
			`{`
			`Image theImage = theContent.getImage();`
			`System.out.println("2 Image: width=" + theImage.getWidth() + " height=" + theImage.getHeight());`
			`theImage.store("image" + imageCounter++ + ".tif", NativeLibrary.COMPRESSION.eComprFlate);`
			`}`
			`else if(theObject == ExpaInitialize.CONTENTOBJECT.ePath)`
			`{`
			`System.out.println("3 Path: " + new String(theContent.getPath()));`
			`}`
			`}`

			`} catch (Throwable e) {`
			`e.printStackTrace();`
			`}`
			`}`
			`}`