234 lines
8.3 KiB
Text
234 lines
8.3 KiB
Text
VERSION 5.00
|
|
Object = "{F9043C88-F6F2-101A-A3C9-08002B2F49FB}#1.2#0"; "comdlg32.ocx"
|
|
Begin VB.Form Form1
|
|
BackColor = &H00FFFFFF&
|
|
BorderStyle = 1 'Fixed Single
|
|
Caption = "Extract Text"
|
|
ClientHeight = 9285
|
|
ClientLeft = 45
|
|
ClientTop = 435
|
|
ClientWidth = 5550
|
|
Icon = "TextExt.frx":0000
|
|
LinkTopic = "Form1"
|
|
MaxButton = 0 'False
|
|
MinButton = 0 'False
|
|
ScaleHeight = 9285
|
|
ScaleWidth = 5550
|
|
StartUpPosition = 3 'Windows Default
|
|
Begin VB.CommandButton GetText
|
|
Caption = "Extract"
|
|
Height = 375
|
|
Left = 2280
|
|
TabIndex = 9
|
|
Top = 8760
|
|
Width = 1095
|
|
End
|
|
Begin MSComDlg.CommonDialog FileDialog
|
|
Left = 4080
|
|
Top = 8760
|
|
_ExtentX = 847
|
|
_ExtentY = 847
|
|
_Version = 393216
|
|
End
|
|
Begin VB.CommandButton Browse
|
|
BackColor = &H00C0C0C0&
|
|
Caption = "..."
|
|
Height = 375
|
|
Left = 5040
|
|
TabIndex = 4
|
|
Top = 8400
|
|
Width = 375
|
|
End
|
|
Begin VB.TextBox PDFFile
|
|
Height = 285
|
|
Left = 1080
|
|
TabIndex = 3
|
|
Top = 8400
|
|
Width = 3855
|
|
End
|
|
Begin VB.Frame Frame1
|
|
BackColor = &H00AC7A3E&
|
|
BorderStyle = 0 'None
|
|
ForeColor = &H00AC7A3E&
|
|
Height = 7455
|
|
Left = 120
|
|
TabIndex = 1
|
|
Top = 720
|
|
Width = 5295
|
|
Begin VB.TextBox TextOut
|
|
BorderStyle = 0 'None
|
|
Height = 7035
|
|
Left = 50
|
|
MultiLine = -1 'True
|
|
ScrollBars = 2 'Vertical
|
|
TabIndex = 2
|
|
Top = 360
|
|
Width = 5190
|
|
End
|
|
Begin VB.Label Label2
|
|
BackStyle = 0 'Transparent
|
|
Caption = "Output Text"
|
|
BeginProperty Font
|
|
Name = "Verdana"
|
|
Size = 9
|
|
Charset = 0
|
|
Weight = 700
|
|
Underline = 0 'False
|
|
Italic = 0 'False
|
|
Strikethrough = 0 'False
|
|
EndProperty
|
|
ForeColor = &H00FFFFFF&
|
|
Height = 255
|
|
Left = 120
|
|
TabIndex = 5
|
|
Top = 90
|
|
Width = 2055
|
|
End
|
|
End
|
|
Begin VB.TextBox TextLogo
|
|
BorderStyle = 0 'None
|
|
BeginProperty Font
|
|
Name = "Verdana"
|
|
Size = 12.75
|
|
Charset = 0
|
|
Weight = 700
|
|
Underline = 0 'False
|
|
Italic = 0 'False
|
|
Strikethrough = 0 'False
|
|
EndProperty
|
|
ForeColor = &H00404040&
|
|
Height = 375
|
|
Left = 0
|
|
TabIndex = 0
|
|
Text = " pdf-tools.com"
|
|
Top = 120
|
|
Width = 5535
|
|
End
|
|
Begin VB.Frame Frame3
|
|
BackColor = &H00E0E0E0&
|
|
BorderStyle = 0 'None
|
|
Height = 615
|
|
Left = 0
|
|
TabIndex = 7
|
|
Top = 0
|
|
Width = 5535
|
|
End
|
|
Begin VB.Frame Frame2
|
|
BackColor = &H00E0E0E0&
|
|
BorderStyle = 0 'None
|
|
Height = 9375
|
|
Left = 0
|
|
TabIndex = 6
|
|
Top = 0
|
|
Width = 1215
|
|
Begin VB.Label Label1
|
|
BackColor = &H00E0E0E0&
|
|
BackStyle = 0 'Transparent
|
|
Caption = "PDF File"
|
|
BeginProperty Font
|
|
Name = "Verdana"
|
|
Size = 8.25
|
|
Charset = 0
|
|
Weight = 700
|
|
Underline = 0 'False
|
|
Italic = 0 'False
|
|
Strikethrough = 0 'False
|
|
EndProperty
|
|
ForeColor = &H00AC7A3E&
|
|
Height = 255
|
|
Left = 120
|
|
TabIndex = 8
|
|
Top = 8400
|
|
Width = 855
|
|
End
|
|
End
|
|
End
|
|
Attribute VB_Name = "Form1"
|
|
Attribute VB_GlobalNameSpace = False
|
|
Attribute VB_Creatable = False
|
|
Attribute VB_PredeclaredId = True
|
|
Attribute VB_Exposed = False
|
|
' List Fonts and Text Extraction
|
|
' ------------------------------
|
|
'
|
|
' Visual Basic 6 sample for the 3-Heights PDF Extract Tool API
|
|
' http://www.pdf-tools.com
|
|
'
|
|
' Copyright (C) 2005 PDF Tools AG, Switzerland
|
|
' Permission to use, copy, modify, and distribute this
|
|
' software and its documentation for any purpose and without
|
|
' fee is hereby granted, provided that the above copyright
|
|
' notice appear in all copies and that both that copyright
|
|
' notice and this permission notice appear in supporting
|
|
' documentation. This software is provided "as is" without
|
|
' express or implied warranty.
|
|
|
|
Private Sub GetText_Click()
|
|
Dim pdf As New PDFPARSERLib.Document
|
|
Dim content As PDFPARSERLib.content
|
|
Dim text As PDFPARSERLib.text
|
|
Dim cFontRes As PDFPARSERLib.Font
|
|
Dim X As Single, Y As Single, Yold As Single, FontSize As Single
|
|
Dim CurPage As Long
|
|
Dim sPageText As String ' Separate string per page to improve the
|
|
' string concatenaten operation
|
|
If pdf.Open(PDFFile.text) Then
|
|
|
|
' List fonts
|
|
sCR = Chr(13) & Chr(10) ' Carriage Return
|
|
TextOut.text = "- - - Fonts - - -" & sCR & sCR
|
|
Set cFontRes = pdf.GetFirstFontResource
|
|
While Not cFontRes Is Nothing
|
|
TextOut.text = TextOut.text & cFontRes.BaseName & sCR
|
|
Set cFontRes = pdf.GetNextFontResource
|
|
Wend
|
|
|
|
' List text
|
|
Yold = -1
|
|
If pdf.PageCount > 10 Then ' limit page numbers to 10
|
|
LastPage = 10 ' due to limitation of the text control
|
|
Else
|
|
LastPage = pdf.PageCount
|
|
End If
|
|
|
|
For CurPage = 1 To LastPage
|
|
pdf.PageNo = CurPage ' set the current page number
|
|
Set content = pdf.Page.content ' get the page's content
|
|
If Not (content Is Nothing) Then
|
|
content.BreakWords = True ' extract words
|
|
TextOut.text = TextOut.text & sCR & "- - - Page " & CurPage & " - - -" & sCR
|
|
sPageText = ""
|
|
Do
|
|
If content.GetNextText Is Nothing Then Exit Do
|
|
Set text = content.text ' at this point text properties can be accessed
|
|
If Not (text Is Nothing) Then
|
|
FontSize = text.FontSize ' the font size
|
|
If text.Length > 0 Then
|
|
X = text.XPos(0) ' the X position
|
|
Y = text.YPos(0) ' the y position
|
|
If Yold = Y Then
|
|
sPageText = sPageText & " " & text.UnicodeString
|
|
Else
|
|
sPageText = sPageText & sCR & text.UnicodeString
|
|
End If
|
|
Yold = Y
|
|
End If
|
|
End If
|
|
Loop
|
|
TextOut.text = TextOut.text & sPageText
|
|
Else
|
|
TextOut.text = TextOut.text & sCR & sCR & "- - - There is no content on page " _
|
|
& CurPage & " - - -" & sCR
|
|
End If
|
|
Next CurPage
|
|
pdf.Close
|
|
Else
|
|
MsgBox "Couldn't open input file"
|
|
End If
|
|
End Sub
|
|
|
|
Private Sub Browse_Click()
|
|
FileDialog.FileName = PDFFile.text
|
|
FileDialog.ShowOpen
|
|
PDFFile.text = FileDialog.FileName
|
|
End Sub
|