VERSION 5.00 Object = "{F9043C88-F6F2-101A-A3C9-08002B2F49FB}#1.2#0"; "comdlg32.ocx" Begin VB.Form Form1 BackColor = &H00FFFFFF& BorderStyle = 1 'Fixed Single Caption = "Extract Text" ClientHeight = 9285 ClientLeft = 45 ClientTop = 435 ClientWidth = 5550 Icon = "TextExt.frx":0000 LinkTopic = "Form1" MaxButton = 0 'False MinButton = 0 'False ScaleHeight = 9285 ScaleWidth = 5550 StartUpPosition = 3 'Windows Default Begin VB.CommandButton GetText Caption = "Extract" Height = 375 Left = 2280 TabIndex = 9 Top = 8760 Width = 1095 End Begin MSComDlg.CommonDialog FileDialog Left = 4080 Top = 8760 _ExtentX = 847 _ExtentY = 847 _Version = 393216 End Begin VB.CommandButton Browse BackColor = &H00C0C0C0& Caption = "..." Height = 375 Left = 5040 TabIndex = 4 Top = 8400 Width = 375 End Begin VB.TextBox PDFFile Height = 285 Left = 1080 TabIndex = 3 Top = 8400 Width = 3855 End Begin VB.Frame Frame1 BackColor = &H00AC7A3E& BorderStyle = 0 'None ForeColor = &H00AC7A3E& Height = 7455 Left = 120 TabIndex = 1 Top = 720 Width = 5295 Begin VB.TextBox TextOut BorderStyle = 0 'None Height = 7035 Left = 50 MultiLine = -1 'True ScrollBars = 2 'Vertical TabIndex = 2 Top = 360 Width = 5190 End Begin VB.Label Label2 BackStyle = 0 'Transparent Caption = "Output Text" BeginProperty Font Name = "Verdana" Size = 9 Charset = 0 Weight = 700 Underline = 0 'False Italic = 0 'False Strikethrough = 0 'False EndProperty ForeColor = &H00FFFFFF& Height = 255 Left = 120 TabIndex = 5 Top = 90 Width = 2055 End End Begin VB.TextBox TextLogo BorderStyle = 0 'None BeginProperty Font Name = "Verdana" Size = 12.75 Charset = 0 Weight = 700 Underline = 0 'False Italic = 0 'False Strikethrough = 0 'False EndProperty ForeColor = &H00404040& Height = 375 Left = 0 TabIndex = 0 Text = " pdf-tools.com" Top = 120 Width = 5535 End Begin VB.Frame Frame3 BackColor = &H00E0E0E0& BorderStyle = 0 'None Height = 615 Left = 0 TabIndex = 7 Top = 0 Width = 5535 End Begin VB.Frame Frame2 BackColor = &H00E0E0E0& BorderStyle = 0 'None Height = 9375 Left = 0 TabIndex = 6 Top = 0 Width = 1215 Begin VB.Label Label1 BackColor = &H00E0E0E0& BackStyle = 0 'Transparent Caption = "PDF File" BeginProperty Font Name = "Verdana" Size = 8.25 Charset = 0 Weight = 700 Underline = 0 'False Italic = 0 'False Strikethrough = 0 'False EndProperty ForeColor = &H00AC7A3E& Height = 255 Left = 120 TabIndex = 8 Top = 8400 Width = 855 End End End Attribute VB_Name = "Form1" Attribute VB_GlobalNameSpace = False Attribute VB_Creatable = False Attribute VB_PredeclaredId = True Attribute VB_Exposed = False ' List Fonts and Text Extraction ' ------------------------------ ' ' Visual Basic 6 sample for the 3-Heights PDF Extract Tool API ' http://www.pdf-tools.com ' ' Copyright (C) 2005 PDF Tools AG, Switzerland ' Permission to use, copy, modify, and distribute this ' software and its documentation for any purpose and without ' fee is hereby granted, provided that the above copyright ' notice appear in all copies and that both that copyright ' notice and this permission notice appear in supporting ' documentation. This software is provided "as is" without ' express or implied warranty. Private Sub GetText_Click() Dim pdf As New PDFPARSERLib.Document Dim content As PDFPARSERLib.content Dim text As PDFPARSERLib.text Dim cFontRes As PDFPARSERLib.Font Dim X As Single, Y As Single, Yold As Single, FontSize As Single Dim CurPage As Long Dim sPageText As String ' Separate string per page to improve the ' string concatenaten operation If pdf.Open(PDFFile.text) Then ' List fonts sCR = Chr(13) & Chr(10) ' Carriage Return TextOut.text = "- - - Fonts - - -" & sCR & sCR Set cFontRes = pdf.GetFirstFontResource While Not cFontRes Is Nothing TextOut.text = TextOut.text & cFontRes.BaseName & sCR Set cFontRes = pdf.GetNextFontResource Wend ' List text Yold = -1 If pdf.PageCount > 10 Then ' limit page numbers to 10 LastPage = 10 ' due to limitation of the text control Else LastPage = pdf.PageCount End If For CurPage = 1 To LastPage pdf.PageNo = CurPage ' set the current page number Set content = pdf.Page.content ' get the page's content If Not (content Is Nothing) Then content.BreakWords = True ' extract words TextOut.text = TextOut.text & sCR & "- - - Page " & CurPage & " - - -" & sCR sPageText = "" Do If content.GetNextText Is Nothing Then Exit Do Set text = content.text ' at this point text properties can be accessed If Not (text Is Nothing) Then FontSize = text.FontSize ' the font size If text.Length > 0 Then X = text.XPos(0) ' the X position Y = text.YPos(0) ' the y position If Yold = Y Then sPageText = sPageText & " " & text.UnicodeString Else sPageText = sPageText & sCR & text.UnicodeString End If Yold = Y End If End If Loop TextOut.text = TextOut.text & sPageText Else TextOut.text = TextOut.text & sCR & sCR & "- - - There is no content on page " _ & CurPage & " - - -" & sCR End If Next CurPage pdf.Close Else MsgBox "Couldn't open input file" End If End Sub Private Sub Browse_Click() FileDialog.FileName = PDFFile.text FileDialog.ShowOpen PDFFile.text = FileDialog.FileName End Sub