csharp_pain/Scraping/COM/samples/VB/ObjExt.frm

313 lines
11 KiB
Text
Raw Normal View History

2014-06-26 15:13:46 +00:00
VERSION 5.00
Object = "{F9043C88-F6F2-101A-A3C9-08002B2F49FB}#1.2#0"; "comdlg32.ocx"
Begin VB.Form Form1
BackColor = &H00FFFFFF&
Caption = "Get Path"
ClientHeight = 8310
ClientLeft = 60
ClientTop = 345
ClientWidth = 9090
LinkTopic = "Form1"
ScaleHeight = 8310
ScaleWidth = 9090
StartUpPosition = 3 'Windows Default
Begin VB.Frame Frame1
Appearance = 0 'Flat
BackColor = &H00996600&
BorderStyle = 0 'None
ForeColor = &H80000008&
Height = 6180
Left = 1440
TabIndex = 11
Top = 1320
Width = 7500
Begin VB.Frame Frame4
Appearance = 0 'Flat
BackColor = &H00FFFFFF&
BorderStyle = 0 'None
ForeColor = &H80000008&
Height = 6140
Left = 20
TabIndex = 12
Top = 20
Width = 7460
Begin MSComDlg.CommonDialog FileDialog
Left = 6600
Top = 5640
_ExtentX = 847
_ExtentY = 847
_Version = 393216
End
Begin VB.TextBox txtList
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 400
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
Height = 5925
Left = 120
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 14
Top = 120
Width = 6135
End
Begin VB.CommandButton Extract
Caption = "List Paths"
Height = 495
Left = 6360
TabIndex = 13
Top = 120
Width = 975
End
End
End
Begin VB.Frame Frame14
Appearance = 0 'Flat
BackColor = &H00996600&
BorderStyle = 0 'None
ForeColor = &H80000008&
Height = 540
Left = 1440
TabIndex = 7
Top = 720
Width = 7500
Begin VB.Frame Frame3
Appearance = 0 'Flat
BackColor = &H00FFFFFF&
BorderStyle = 0 'None
ForeColor = &H80000008&
Height = 500
Left = 20
TabIndex = 8
Top = 20
Width = 7460
Begin VB.TextBox txtInput
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 400
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
Height = 285
Left = 120
TabIndex = 10
Top = 120
Width = 6135
End
Begin VB.CommandButton OpenPDF
Caption = "Browse"
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 400
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
Height = 375
Left = 6360
TabIndex = 9
Top = 80
Width = 975
End
End
End
Begin VB.Frame Frame17
BackColor = &H00996600&
BorderStyle = 0 'None
Height = 45
Left = 0
TabIndex = 6
Top = 8040
Width = 9135
End
Begin VB.Frame Frame16
BackColor = &H00E2D1AE&
BorderStyle = 0 'None
Height = 255
Left = 0
TabIndex = 4
Top = 7800
Width = 9135
Begin VB.Label Label22
BackColor = &H00D7B67A&
BackStyle = 0 'Transparent
Caption = "Last update: October 17, 2005 - Copyright 2001-2005 PDF Tools AG"
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 700
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
ForeColor = &H00996600&
Height = 315
Left = 720
TabIndex = 5
Top = 0
Width = 6495
End
End
Begin VB.Frame Frame2
BackColor = &H00E2D1AE&
BorderStyle = 0 'None
Height = 7770
Left = 0
TabIndex = 1
Top = 0
Width = 1335
Begin VB.Label Label1
BackColor = &H00D7B67A&
Caption = " PATH"
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 700
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
ForeColor = &H00996600&
Height = 315
Left = 0
TabIndex = 3
Top = 1440
Width = 1335
End
Begin VB.Label Label10
BackColor = &H00D7B67A&
Caption = " PDF FILE"
BeginProperty Font
Name = "Verdana"
Size = 8.25
Charset = 0
Weight = 700
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
ForeColor = &H00996600&
Height = 315
Left = 0
TabIndex = 2
Top = 840
Width = 1335
End
End
Begin VB.TextBox TextLogo
BackColor = &H006699FF&
BorderStyle = 0 'None
BeginProperty Font
Name = "Verdana"
Size = 9.75
Charset = 0
Weight = 700
Underline = 0 'False
Italic = 0 'False
Strikethrough = 0 'False
EndProperty
ForeColor = &H00FFFFFF&
Height = 375
Left = 1440
TabIndex = 0
Text = " 3-Heights PDF Extract Tool - Get Path"
Top = 120
Width = 7575
End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Private Sub Extract_Click()
Dim pdf As New PDFPARSERLib.Document
Dim content As PDFPARSERLib.content
Dim image As PDFPARSERLib.image
Dim text As PDFPARSERLib.text
Dim ctm As PDFPARSERLib.TransformMatrix
Dim t As PDFPARSERLib.TPDFContentObject
Dim Width As Single
Dim Height As Single
Dim X As Single
Dim y As Single
Dim path As String
Dim i As Integer
Dim txtPage As String
sCR = Chr(13) & Chr(10)
If pdf.Open(txtInput.text) Then
LastPage = pdf.PageCount
If LastPage > 5 Then
LastPage = 5
End If
txtList = ""
For CurPage = 1 To LastPage
txtPage = ""
pdf.PageNo = CurPage
Dim mBox As Variant
mBox = pdf.Page.MediaBox
txtPage = txtPage & sCR & "- - - Page " & CurPage & " - - -" & sCR
txtPage = txtPage & "MediaBox = " & mBox(0) & ", " & mBox(1) & ", " & mBox(2) & ", " & mBox(3) & sCR
Set content = pdf.Page.content ' get the page content
If Not (content Is Nothing) Then
content.Reset True ' account for rotate
Do
t = content.GetNextObject ' the the next image from the page
Select Case t
Case eNone
Exit Do
Case eImage
Set image = content.image
If Not (image Is Nothing) Then
Height = image.Height
image.Store App.path + "\out.tif" ' store the image on a file
Else
MsgBox "there is no image on this page"
End If
Set ctm = content.GraphicsState.ctm ' see the coordinates of the image
Width = Round(ctm.a, 1) ' the width
Height = Round(ctm.d, 1) ' the height
X = Round(ctm.e, 1) ' the X position
y = Round(ctm.f, 1) ' the Y position
txtPage = txtPage & "Image: Width=" & Width & ", Height=" & Height & ", Pos(" & X & ", " & y & ")" & sCR
Case eText
' See sample TextExt.vbp
Set text = content.text
If Not (text Is Nothing) Then
End If
Case ePath
path = content.path
txtPage = txtPage & "Path: " & path & sCR
End Select
Loop
Else
txtPage = txtPage & "There is no content on this page" & sCR
End If
txtList = txtList & txtPage
Next CurPage
pdf.Close
Else
MsgBox "Couldn't open input file"
End If
End Sub
Private Sub OpenPDF_Click()
' Open File Dialog
FileDialog.FileName = txtInput
FileDialog.ShowOpen
txtInput = FileDialog.FileName
End Sub