csharp_pain/Scraping/COM/samples/Java/ObjExt.java

73 lines
2.5 KiB
Java
Raw Normal View History

2014-06-26 15:13:46 +00:00
/****************************************************************************
*
* File: ObjExt.java
*
* Usage: java ObjExt PDF-InputFile
*
* Description: Extract Objects from the PDF-InputFile.
*
* Version: 1.03 (18-November-2005)
*
* Author: Philip Renggli, PDF Tools AG
*
* Copyright: Copyright (C) 2004 PDF Tools AG, Switzerland
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies and that both that copyright
* notice and this permission notice appear in supporting
* documentation. This software is provided "as is" without
* express or implied warranty.
*
***************************************************************************/
import com.pdftools.expa.*;
import com.pdftools.*;
public class ObjExt {
public static void main(String[] args) {
try {
// open input file
Document thePDF = new Document(args[0], "");
// select 1st page
thePDF.setPageNo(1);
// get the content
Content theContent = thePDF.getPage().getContent();
theContent.resetContent(false);
int imageCounter = 1;
while(true)
{
int theObject = theContent.getNextObject();
if(theObject == ExpaInitialize.CONTENTOBJECT.eNone)
{
System.out.println("0 End:");
break;
}
else if(theObject == ExpaInitialize.CONTENTOBJECT.eText)
{
System.out.println("1 Text: " + theContent.getText().getUnicodeString());
}
else if(theObject == ExpaInitialize.CONTENTOBJECT.eImage)
{
Image theImage = theContent.getImage();
System.out.println("2 Image: width=" + theImage.getWidth() + " height=" + theImage.getHeight());
theImage.store("image" + imageCounter++ + ".tif", NativeLibrary.COMPRESSION.eComprFlate);
}
else if(theObject == ExpaInitialize.CONTENTOBJECT.ePath)
{
System.out.println("3 Path: " + new String(theContent.getPath()));
}
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}