csharp_pain/Scraping/COM/samples/ASP/gettext.asp

45 lines
1.2 KiB
Text
Raw Normal View History

2014-06-26 15:13:46 +00:00
<%@ Language=VBScript %>
<%
Dim pdfDoc
Dim content
Dim text
set pdfDoc = Server.CreateObject("PDFParser.Document")
If not pdfDoc.Open(Server.Mappath("in_sample.pdf")) then
Response.Write "<p>"
Response.Write "Could not open file." & "<br>"
End if
pdfDoc.PageNo = 1
set content = pdfDoc.Page.content
If Not (content Is Nothing) Then
content.BreakWords = True
set text = content.text
content.Reset True
Yold = -1
thetext = ""
Do
If content.GetNextText Is Nothing Then Exit Do
set text = content.text
if text.length > 0 then
YPos = text.YPos
Y = YPos(0)
If Yold = Y Then
thetext = thetext & " " & text.UnicodeString
Else
Response.Write Server.HTMLEncode(thetext) & "<br>"
thetext = text.UnicodeString
End If
Yold = Y
end if
loop
Else
Response.Write "<p>"
Response.Write "There is no content on this page" & "<br>"
Response.Write "</p>"
End If
%>