csharp_pain/Scraping/COM/samples/CPP/comparser/comparser.cpp

/****************************************************************************
 *
 * File:            comparser.cpp
 *
 * Usage:           comparser input.pdf
 *
 * Description:     Very simple text extraction through the COM interface.
 *
 * Version:         1.01  (5-April-2004)
 *
 * Author:          Hans Baerfuss, PDF Tools AG 
 * 
 * Copyright:       Copyright (C) 2004 PDF Tools AG, Switzerland
 *                  Permission to use, copy, modify, and distribute this
 *                  software and its documentation for any purpose and without
 *                  fee is hereby granted, provided that the above copyright
 *                  notice appear in all copies and that both that copyright
 *                  notice and this permission notice appear in supporting
 *                  documentation.  This software is provided "as is" without
 *                  express or implied warranty.
 *
 ***************************************************************************/

#include "stdafx.h"
#include "pdfparser.h"
#include "pdfparser_i.c"

void Usage(char* argv0)
{
    printf("PDF Export API - C++ COM Sample\n"
           "Usage: %s input.pdf\n", argv0);
    printf("Errors: 1 - couldn't open input file\n"
           "        3 - too few arguments\n"
           "        4 - couldn't create COM object\n");
}

int main(int argc, char* argv[])
{
    // Check if there are enough arguments.
    if (argc < 2)
    {
        Usage(argv[0]);
        return 3;
    }

    // Initialize COM.
    ::CoInitialize(NULL);

    // Create the COM object.
    IPDFDocument* pDocument = NULL;
    HRESULT hResult = ::CoCreateInstance(CLSID_Document, NULL, CLSCTX_INPROC_SERVER, IID_IPDFDocument, (void**) &pDocument);
    if (FAILED(hResult))
    {
        fprintf(stderr, "Couldn't create COM object\n");
        ::CoUninitialize();
        return 4;
    }

    // Open the PDF input document.
    USES_CONVERSION;
    VARIANT_BOOL bDone;
    BSTR fn = T2OLE(argv[1]);
    BSTR password = OLESTR("");
    hResult = pDocument->Open(fn, password, &bDone);
    if (FAILED(hResult) || bDone == 0)
    {
        fprintf(stderr, "Couldn't open input file\n");
        pDocument->Release();
        ::CoUninitialize();
        return 1;
    }
    
    // Traverse the pages.
    long nPageCount;
    hResult = pDocument->get_PageCount(&nPageCount);
    for (long i = 1; i <= nPageCount; i++)
    {
        printf("Page no. %d\n", i);
        hResult = pDocument->put_PageNo(i);

        // Get the page interface.
        IPDFPage* pPage = NULL;
        hResult = pDocument->get_Page(&pPage);
        if (!pPage)
            continue;

        // Get the content interface.
        IPDFContent* pContent = NULL;
        pPage->get_Content(&pContent);
        if (!pContent)
            continue;

        // Traverse the objects in the page content.
        for (;;)
        {
            TPDFContentObject iResult;
            hResult = pContent->GetNextObject(&iResult);
            switch (iResult)
            {
                // Exit when there are no more objects.
                case eNone:
                    goto _exitloop;

                // Extract the text.
                case eText:
                    {
                        IPDFText* pText = NULL;
                        hResult = pContent->get_Text(&pText);
            
                        BSTR s = NULL;
                        hResult = pText->get_UnicodeString(&s);
                        if (s)
                            wprintf(L"Text: %s\r\n", s);
                        ::SysFreeString(s);
                        pText->Release();
                    }
                    break;
            }
        }
    _exitloop:

        // Release the content and page object.
        pContent->Release();
        pPage->Release();
    }

    // Close the document.
    hResult = pDocument->Close();

    // Release the document object.
    pDocument->Release();

    // Uninitialize COM.
    ::CoUninitialize();
	return 0;
}
initial commit 2014-06-26 15:13:46 +00:00			`/****************************************************************************`
			`*`
			`* File: comparser.cpp`
			`*`
			`* Usage: comparser input.pdf`
			`*`
			`* Description: Very simple text extraction through the COM interface.`
			`*`
			`* Version: 1.01 (5-April-2004)`
			`*`
			`* Author: Hans Baerfuss, PDF Tools AG`
			`*`
			`* Copyright: Copyright (C) 2004 PDF Tools AG, Switzerland`
			`* Permission to use, copy, modify, and distribute this`
			`* software and its documentation for any purpose and without`
			`* fee is hereby granted, provided that the above copyright`
			`* notice appear in all copies and that both that copyright`
			`* notice and this permission notice appear in supporting`
			`* documentation. This software is provided "as is" without`
			`* express or implied warranty.`
			`*`
			`***************************************************************************/`

			`#include "stdafx.h"`
			`#include "pdfparser.h"`
			`#include "pdfparser_i.c"`

			`void Usage(char* argv0)`
			`{`
			`printf("PDF Export API - C++ COM Sample\n"`
			`"Usage: %s input.pdf\n", argv0);`
			`printf("Errors: 1 - couldn't open input file\n"`
			`" 3 - too few arguments\n"`
			`" 4 - couldn't create COM object\n");`
			`}`

			`int main(int argc, char* argv[])`
			`{`
			`// Check if there are enough arguments.`
			`if (argc < 2)`
			`{`
			`Usage(argv[0]);`
			`return 3;`
			`}`

			`// Initialize COM.`
			`::CoInitialize(NULL);`

			`// Create the COM object.`
			`IPDFDocument* pDocument = NULL;`
			`HRESULT hResult = ::CoCreateInstance(CLSID_Document, NULL, CLSCTX_INPROC_SERVER, IID_IPDFDocument, (void**) &pDocument);`
			`if (FAILED(hResult))`
			`{`
			`fprintf(stderr, "Couldn't create COM object\n");`
			`::CoUninitialize();`
			`return 4;`
			`}`

			`// Open the PDF input document.`
			`USES_CONVERSION;`
			`VARIANT_BOOL bDone;`
			`BSTR fn = T2OLE(argv[1]);`
			`BSTR password = OLESTR("");`
			`hResult = pDocument->Open(fn, password, &bDone);`
			`if (FAILED(hResult) \|\| bDone == 0)`
			`{`
			`fprintf(stderr, "Couldn't open input file\n");`
			`pDocument->Release();`
			`::CoUninitialize();`
			`return 1;`
			`}`

			`// Traverse the pages.`
			`long nPageCount;`
			`hResult = pDocument->get_PageCount(&nPageCount);`
			`for (long i = 1; i <= nPageCount; i++)`
			`{`
			`printf("Page no. %d\n", i);`
			`hResult = pDocument->put_PageNo(i);`

			`// Get the page interface.`
			`IPDFPage* pPage = NULL;`
			`hResult = pDocument->get_Page(&pPage);`
			`if (!pPage)`
			`continue;`

			`// Get the content interface.`
			`IPDFContent* pContent = NULL;`
			`pPage->get_Content(&pContent);`
			`if (!pContent)`
			`continue;`

			`// Traverse the objects in the page content.`
			`for (;;)`
			`{`
			`TPDFContentObject iResult;`
			`hResult = pContent->GetNextObject(&iResult);`
			`switch (iResult)`
			`{`
			`// Exit when there are no more objects.`
			`case eNone:`
			`goto _exitloop;`

			`// Extract the text.`
			`case eText:`
			`{`
			`IPDFText* pText = NULL;`
			`hResult = pContent->get_Text(&pText);`

			`BSTR s = NULL;`
			`hResult = pText->get_UnicodeString(&s);`
			`if (s)`
			`wprintf(L"Text: %s\r\n", s);`
			`::SysFreeString(s);`
			`pText->Release();`
			`}`
			`break;`
			`}`
			`}`
			`_exitloop:`

			`// Release the content and page object.`
			`pContent->Release();`
			`pPage->Release();`
			`}`

			`// Close the document.`
			`hResult = pDocument->Close();`

			`// Release the document object.`
			`pDocument->Release();`

			`// Uninitialize COM.`
			`::CoUninitialize();`
			`return 0;`
			`}`