using System; using System.Drawing; using System.Windows.Forms; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.IO; using System.Threading; namespace Scraping { public class MainForm : System.Windows.Forms.Form { private System.Windows.Forms.TextBox tbDelay; private System.Windows.Forms.Button btnDownloadPdf; private System.Windows.Forms.StatusBarPanel statusPanel; private System.Windows.Forms.Label lAbBhf; private System.Windows.Forms.TextBox tbAnBhf; private System.Windows.Forms.GroupBox gbProperties; private System.Windows.Forms.TextBox tbAbBhf; private System.Windows.Forms.TextBox tbPdfAddr; private System.Windows.Forms.Label lID; private System.Windows.Forms.Label lAddr; private System.Windows.Forms.TextBox tbBeginn; private System.Windows.Forms.Label lAnBhf; private System.Windows.Forms.Label label1; private System.Windows.Forms.StatusBar statusBar; private System.Windows.Forms.Label lIdent; private System.Windows.Forms.Label lBeginn; private Thread t = null; public MainForm() { InitializeComponent(); tbBeginn.Text = DateTime.Today.Date.Day.ToString()+"."+DateTime.Today.Date.Month.ToString()+"."+DateTime.Today.Date.Year.ToString(); } [STAThread] public static void Main(string[] args) { Application.Run(new MainForm()); } #region Windows Forms Designer generated code /// /// This method is required for Windows Forms designer support. /// Do not change the method contents inside the source code editor. The Forms designer might /// not be able to load this method if it was changed manually. /// private void InitializeComponent() { this.lBeginn = new System.Windows.Forms.Label(); this.lIdent = new System.Windows.Forms.Label(); this.statusBar = new System.Windows.Forms.StatusBar(); this.label1 = new System.Windows.Forms.Label(); this.lAnBhf = new System.Windows.Forms.Label(); this.tbBeginn = new System.Windows.Forms.TextBox(); this.lAddr = new System.Windows.Forms.Label(); this.lID = new System.Windows.Forms.Label(); this.tbPdfAddr = new System.Windows.Forms.TextBox(); this.tbAbBhf = new System.Windows.Forms.TextBox(); this.gbProperties = new System.Windows.Forms.GroupBox(); this.tbAnBhf = new System.Windows.Forms.TextBox(); this.lAbBhf = new System.Windows.Forms.Label(); this.statusPanel = new System.Windows.Forms.StatusBarPanel(); this.btnDownloadPdf = new System.Windows.Forms.Button(); this.tbDelay = new System.Windows.Forms.TextBox(); this.gbProperties.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.statusPanel)).BeginInit(); this.SuspendLayout(); // // lBeginn // this.lBeginn.Location = new System.Drawing.Point(16, 112); this.lBeginn.Name = "lBeginn"; this.lBeginn.Size = new System.Drawing.Size(48, 16); this.lBeginn.TabIndex = 5; this.lBeginn.Text = "Beginn"; // // lIdent // this.lIdent.Location = new System.Drawing.Point(256, 16); this.lIdent.Name = "lIdent"; this.lIdent.Size = new System.Drawing.Size(256, 24); this.lIdent.TabIndex = 1; this.lIdent.Text = "Ident:"; // // statusBar // this.statusBar.Location = new System.Drawing.Point(0, 247); this.statusBar.Name = "statusBar"; this.statusBar.Panels.AddRange(new System.Windows.Forms.StatusBarPanel[] { this.statusPanel}); this.statusBar.ShowPanels = true; this.statusBar.Size = new System.Drawing.Size(520, 22); this.statusBar.TabIndex = 7; // // label1 // this.label1.Location = new System.Drawing.Point(16, 152); this.label1.Name = "label1"; this.label1.Size = new System.Drawing.Size(100, 32); this.label1.TabIndex = 7; this.label1.Text = "Wartezeit bis zur Pdf Abfrage in s"; // // lAnBhf // this.lAnBhf.Location = new System.Drawing.Point(16, 80); this.lAnBhf.Name = "lAnBhf"; this.lAnBhf.Size = new System.Drawing.Size(40, 16); this.lAnBhf.TabIndex = 3; this.lAnBhf.Text = "AnBhf"; // // tbBeginn // this.tbBeginn.Location = new System.Drawing.Point(80, 112); this.tbBeginn.Name = "tbBeginn"; this.tbBeginn.Size = new System.Drawing.Size(104, 20); this.tbBeginn.TabIndex = 6; this.tbBeginn.Text = ""; // // lAddr // this.lAddr.Location = new System.Drawing.Point(256, 80); this.lAddr.Name = "lAddr"; this.lAddr.Size = new System.Drawing.Size(40, 24); this.lAddr.TabIndex = 4; this.lAddr.Text = "Addr:"; // // lID // this.lID.Location = new System.Drawing.Point(256, 48); this.lID.Name = "lID"; this.lID.Size = new System.Drawing.Size(256, 23); this.lID.TabIndex = 2; this.lID.Text = "ID:"; // // tbPdfAddr // this.tbPdfAddr.Location = new System.Drawing.Point(256, 104); this.tbPdfAddr.Multiline = true; this.tbPdfAddr.Name = "tbPdfAddr"; this.tbPdfAddr.Size = new System.Drawing.Size(256, 48); this.tbPdfAddr.TabIndex = 5; this.tbPdfAddr.Text = ""; // // tbAbBhf // this.tbAbBhf.Location = new System.Drawing.Point(80, 48); this.tbAbBhf.Name = "tbAbBhf"; this.tbAbBhf.Size = new System.Drawing.Size(104, 20); this.tbAbBhf.TabIndex = 2; this.tbAbBhf.Text = "Salzwedel"; // // gbProperties // this.gbProperties.Controls.Add(this.tbDelay); this.gbProperties.Controls.Add(this.label1); this.gbProperties.Controls.Add(this.tbBeginn); this.gbProperties.Controls.Add(this.lBeginn); this.gbProperties.Controls.Add(this.tbAnBhf); this.gbProperties.Controls.Add(this.lAnBhf); this.gbProperties.Controls.Add(this.tbAbBhf); this.gbProperties.Controls.Add(this.lAbBhf); this.gbProperties.Controls.Add(this.btnDownloadPdf); this.gbProperties.Location = new System.Drawing.Point(8, 8); this.gbProperties.Name = "gbProperties"; this.gbProperties.Size = new System.Drawing.Size(232, 232); this.gbProperties.TabIndex = 6; this.gbProperties.TabStop = false; this.gbProperties.Text = "Einstellungen"; // // tbAnBhf // this.tbAnBhf.Location = new System.Drawing.Point(80, 80); this.tbAnBhf.Name = "tbAnBhf"; this.tbAnBhf.Size = new System.Drawing.Size(104, 20); this.tbAnBhf.TabIndex = 4; this.tbAnBhf.Text = "Stendal"; // // lAbBhf // this.lAbBhf.Location = new System.Drawing.Point(16, 48); this.lAbBhf.Name = "lAbBhf"; this.lAbBhf.Size = new System.Drawing.Size(40, 16); this.lAbBhf.TabIndex = 1; this.lAbBhf.Text = "AbBhf"; // // statusPanel // this.statusPanel.Text = "Fertig"; this.statusPanel.Width = 500; // // btnDownloadPdf // this.btnDownloadPdf.Location = new System.Drawing.Point(16, 192); this.btnDownloadPdf.Name = "btnDownloadPdf"; this.btnDownloadPdf.Size = new System.Drawing.Size(96, 23); this.btnDownloadPdf.TabIndex = 0; this.btnDownloadPdf.Text = "DownloadPdf"; this.btnDownloadPdf.Click += new System.EventHandler(this.BtnDownloadPdfClick); // // tbDelay // this.tbDelay.Location = new System.Drawing.Point(112, 152); this.tbDelay.Name = "tbDelay"; this.tbDelay.TabIndex = 8; this.tbDelay.Text = "10"; // // MainForm // this.AutoScaleBaseSize = new System.Drawing.Size(5, 13); this.ClientSize = new System.Drawing.Size(520, 269); this.Controls.Add(this.statusBar); this.Controls.Add(this.gbProperties); this.Controls.Add(this.tbPdfAddr); this.Controls.Add(this.lAddr); this.Controls.Add(this.lID); this.Controls.Add(this.lIdent); this.Name = "MainForm"; this.Text = "MainForm"; this.Closed += new System.EventHandler(this.MainFormClosed); this.gbProperties.ResumeLayout(false); ((System.ComponentModel.ISupportInitialize)(this.statusPanel)).EndInit(); this.ResumeLayout(false); } #endregion public void DownloadPdf() { //Request 1 (holt die aktuelle ident) statusPanel.Text = "(1/3) Create Request..."; WebRequest wrq1 = WebRequest.Create("http://persoenlicherfahrplan.bahn.de/bin/pf/query-p2w.exe/dn?"); //übernimmt die Proxy Einstellungen des IE!!!!!!!!!!!!!!! statusPanel.Text = "(1/3) Get Response..."; HttpWebResponse wrp1 = (HttpWebResponse)wrq1.GetResponse(); StreamReader sr1 = new StreamReader(wrp1.GetResponseStream(), Encoding.ASCII); StringBuilder sb1 = new StringBuilder(); statusPanel.Text = "(1/3) Speichere Stream..."; while (-1 != sr1.Peek()){ sb1.Append(sr1.ReadLine()); } statusPanel.Text = "(1/3) Search Expression..."; Regex re1 = new Regex("\\S{20,22})\"",RegexOptions.IgnoreCase); Match m1 = re1.Match(sb1.ToString()); wrp1.Close(); //Beenden der Verbindung nicht vergessen!!! if(m1.Success) { //Request 2 (setzt die in Request 1 geholte ident in die spezifische Abfrage ein und holt aus ihrer Rückgabe die 16-stellige ID der Abfrage) string strWrq2Address = "http://persoenlicherfahrplan.bahn.de/bin/pf/query-p2w.exe/dn?"; strWrq2Address += "ident=" + m1.Groups[1].Captures[0] + "&seqnr=0&ignore_ident=1&pfmodus=1&transfertype=xml"; //Einsetzten der ident strWrq2Address += "&descfrom.-1=BASEREQ&majorfrom=BHF&from="+tbAbBhf.Text; strWrq2Address += "&descto.-1=BASEREQ&majorto=BHF&to="+tbAnBhf.Text; strWrq2Address += "&date_from="+tbBeginn.Text+"&date_to="+DateTime.Parse(tbBeginn.Text).AddDays(6).ToShortDateString(); strWrq2Address += "&weekday_mo=checked&weekday_tu=checked&weekday_we=checked&weekday_th=checked&weekday_fr=checked&weekday_sa=checked&weekday_su=checked"; strWrq2Address += "×el=depart&time=12%3A00&time0_from_hin=0%3A00&time0_to_hin=23%3A59&time0_from_rueck=0%3A00&time0_to_rueck=23%3A59"; strWrq2Address += "&answerMode=download&eMailAddress=&output=pdf&maxNrOfChanges=0&pr1.4=CHECKED&outputFilter=complete&filter=&start.x=42&start.y=10"; statusPanel.Text = "(2/3) Create Request..."; WebRequest wrq2 = WebRequest.Create(strWrq2Address); //übernimmt die Proxy Einstellungen des IE!!!!!!!!!!!!!!! statusPanel.Text = "(2/3) Get Response..."; WebResponse wrp2 = wrq2.GetResponse(); StreamReader sr2 = new StreamReader(wrp2.GetResponseStream(), Encoding.ASCII); StringBuilder sb2 = new StringBuilder(); statusPanel.Text = "(2/3) Speichere Stream..."; while (-1 != sr2.Peek()){ sb2.Append(sr2.ReadLine()); } statusPanel.Text = "(2/3) Search Expression..."; Regex re2 = new Regex("\\S{4}.\\S{4}.\\S{4}.\\S{4})",RegexOptions.IgnoreCase); Match m2 = re2.Match(sb2.ToString()); wrp2.Close(); //Beenden der Verbindung nicht vergessen!!! if(m2.Success) { //Request 3 (benutzt die ID aus Request 2 um die Adresse des PDFs zu holen) //Thread wartet i sekunden um die Pdf berechnen zu lassen for(int i = Convert.ToInt32(tbDelay.Text);i>=0;i--) { statusPanel.Text = "(3/3) Warte "+i.ToString()+" Sekunden..."; Thread.Sleep(1000); } statusPanel.Text = "(3/3) Create Request..."; WebRequest wrq3 = WebRequest.Create("http://persoenlicherfahrplan.bahn.de/bin/pf/query-p2w.exe/dn?id="+m2.Groups[1].Captures[0]); //übernimmt die Proxy Einstellungen des IE!!!!!!!!!!!!!!! statusPanel.Text = "(3/3) Get Response..."; WebResponse wrp3 = wrq3.GetResponse(); StreamReader sr3 = new StreamReader(wrp3.GetResponseStream(), Encoding.ASCII); StringBuilder sb3 = new StringBuilder(); statusPanel.Text = "(3/3) Speichere Stream..."; while (-1 != sr3.Peek()){ sb3.Append(sr3.ReadLine()); } statusPanel.Text = "(3/3) Search Expression..."; Regex re3 = new Regex("\\S{0,})\">",RegexOptions.IgnoreCase); Match m3 = re3.Match(sb3.ToString()); wrp3.Close(); //Beenden der Verbindung nicht vergessen!!! if(m3.Success){ //Bei Erfolg wird der Link ausgegeben lIdent.Text = "Ident: "+m1.Groups[1].Captures[0]; lID.Text = "ID: "+m2.Groups[1].Captures[0]; tbPdfAddr.Text = "http://persoenlicherfahrplan.bahn.de/bin/pf/query-p2w.exe/"+m3.Groups[1].Captures[0]; statusPanel.Text = "Downloade Pdf..."; WebClient wc = new WebClient(); wc.DownloadFile(tbPdfAddr.Text, tbAbBhf.Text.ToUpper()+"-"+tbAnBhf.Text.ToUpper()+".pdf"); statusPanel.Text = "Als "+tbAbBhf.Text.ToUpper()+"-"+tbAnBhf.Text.ToUpper()+".pdf gespeichert"; btnDownloadPdf.Enabled = true; btnDownloadPdf.Text = "DownloadPdf"; } else { MessageBox.Show("Request 3 fehlgeschlagen"); } } else { //Schlägt die Abfrage der ID fehl, wird die ident ausgegeben. MessageBox.Show("ID Abfrage fehlgeschlagen! Ident:"+m1.Groups[1].Captures[0]); } } else { //Abfrage der ident schlägt fehl MessageBox.Show("Ident Abfrage fehlgeschlagen"); } } void MainFormClosed(object sender, System.EventArgs e) { if(t!=null) { t.Abort(); } } void BtnDownloadPdfClick(object sender, System.EventArgs e) { if(tbAbBhf.Text.Length!=0&&tbAnBhf.Text.Length!=0&&tbBeginn.Text.Length!=0&&tbDelay.Text.Length!=0) { btnDownloadPdf.Enabled = false; btnDownloadPdf.Text = "In Progress..."; t = new Thread(new ThreadStart(DownloadPdf)); t.Start(); } else { ShowErrorMsg("Eingabefelder überprüfen!"); } } void ShowErrorMsg(string msg) { MessageBox.Show(msg, "Fehler", MessageBoxButtons.OK, MessageBoxIcon.Hand, MessageBoxDefaultButton.Button1); } } }