210c2238e8
git-svn-id: http://www.neo1973-germany.de/svn@34 46df4e5c-bc4e-4628-a0fc-830ba316316d
166 lines
No EOL
5.9 KiB
Python
166 lines
No EOL
5.9 KiB
Python
"""
|
|
* ShowFeedContent.py - FeedRead - show text
|
|
*
|
|
* (C) 2007 by Kristian Mueller <kristian-m@kristian-m.de>
|
|
* All Rights Reserved
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""
|
|
|
|
import gtk
|
|
import urllib
|
|
# import BeautifulSoup
|
|
import re
|
|
import os
|
|
|
|
class ShowFeedText(gtk.VBox):
|
|
def __init__(self, FeedPanel):
|
|
gtk.VBox.__init__(self, False, 0)
|
|
self.FeedContentPanel = FeedPanel
|
|
self.create_notebook_page()
|
|
|
|
def get_url_content(self, title, url):
|
|
self.state_entry.set_text(self.FeedContentPanel.feed_provider)
|
|
## html to text
|
|
## from http://mail.python.org/pipermail/python-list/2005-February/309923.html
|
|
comments = re.compile('<!--.*?-->', re.DOTALL)
|
|
tags = re.compile('<.*?>', re.DOTALL)
|
|
"""
|
|
def extracttext(obj):
|
|
if isinstance(obj,BeautifulSoup.Tag):
|
|
return "".join(extracttext(c) for c in obj.contents)
|
|
else:
|
|
return str(obj)
|
|
|
|
def striptags(text):
|
|
text = re.sub(comments,'', text)
|
|
text = re.sub(tags,'', text)
|
|
return text
|
|
|
|
|
|
def bsouptext(text):
|
|
if len(text) > 0:
|
|
# try:
|
|
souptree = BeautifulSoup.BeautifulSoup(text)
|
|
# if isinstance(souptree.body, str):
|
|
bodytext = extracttext(souptree.body.fetchText)
|
|
text = re.sub(comments,'', bodytext)
|
|
text = collapsenewlines(text)
|
|
return text
|
|
# except:
|
|
# return ""
|
|
return ""
|
|
"""
|
|
def collapsenewlines(text):
|
|
tmp_text = "\n".join(line for line in text.splitlines() if line)
|
|
ret_text = ""
|
|
for line in tmp_text.split("\n"):
|
|
if len(line.rstrip()) >= 1:
|
|
ret_text += line.rstrip() + '\n'
|
|
return ret_text
|
|
return " ".join(word for word in \
|
|
("\n".join(line for line in text.splitlines() if line)).split(" ") \
|
|
if len(word) > 1)
|
|
|
|
descr_text = self.FeedContentPanel.descriptions[url]
|
|
if len(descr_text) >= 1:
|
|
descr_text = re.sub(comments,'', descr_text)
|
|
# descr_text = collapsenewlines(descr_text)
|
|
return descr_text
|
|
|
|
|
|
#doc = urllib.urlopen(url)
|
|
print "url: %s" %url
|
|
#doc_text = doc.read()
|
|
|
|
### different aproach using lynx
|
|
ELINKS_CMD = "links -dump -dump-charset utf8 -dump-width 1024" # -assume_charset=utf8 -display_charset=utf8"
|
|
LINKS_CMD = "links -dump"
|
|
LYNX_CMD = "lynx -dump -assume_charset=utf8 -display_charset=utf8 -width=1024"
|
|
CMD = LINKS_CMD
|
|
try:
|
|
# print "Command: %s" %("%s %s" %(CMD, url))
|
|
# lynx_ostream = os.popen("%s %s" %(CMD, url))
|
|
# message_text = re.sub(comments,'', lynx_ostream.read())
|
|
|
|
doc = urllib.urlopen(url)
|
|
print "1"
|
|
new_text = ""
|
|
message_text = doc.read()
|
|
message_text = re.sub(comments,'', message_text)
|
|
tag_level = 0
|
|
for char in message_text:
|
|
if char == '<':
|
|
tag_level += 1
|
|
if char == '>':
|
|
tag_level -= 1
|
|
else:
|
|
if tag_level >= 1:
|
|
continue
|
|
else:
|
|
try:
|
|
new_text += char.encode('utf-8')
|
|
except:
|
|
new_text += '?'
|
|
|
|
message_text = collapsenewlines(new_text)
|
|
return message_text
|
|
except:
|
|
return "Error: Could not run the links program to load data."
|
|
|
|
def load_feed(self, widget):
|
|
(model, model_iter) = self.FeedContentPanel.feed_list.get_selection().get_selected()
|
|
if model_iter >= 1:
|
|
feed_name = model.get_value(model_iter, 0) # column is first (name)
|
|
url = model.get_value(model_iter, 1) # column is second (url)
|
|
text = self.get_url_content(feed_name, url)
|
|
else:
|
|
feed_name = "none"
|
|
url = "http://"
|
|
text = "No Content"
|
|
|
|
self.text_buffer.set_text("%s\n\n%s" % (feed_name, text) )
|
|
|
|
def create_notebook_page(self):
|
|
self.set_border_width(0)
|
|
|
|
text_box = gtk.VBox(False, 0)
|
|
#text_box.set_border_width(15)
|
|
|
|
update_btn = gtk.Button("update")
|
|
update_btn.connect("clicked", self.load_feed)
|
|
self.pack_start(update_btn, False, False, 0)
|
|
|
|
self.state_entry = gtk.Entry()
|
|
self.state_entry.set_text("")
|
|
self.state_entry.set_sensitive(0)
|
|
text_box.pack_start(self.state_entry, False, False, 0)
|
|
|
|
scroll_win = gtk.ScrolledWindow()
|
|
scroll_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
|
|
|
|
text_view = gtk.TextView()
|
|
text_view.set_cursor_visible(False)
|
|
text_view.set_wrap_mode(gtk.WRAP_WORD)
|
|
self.text_buffer = text_view.get_buffer()
|
|
|
|
text_view.set_editable(False)
|
|
|
|
scroll_win.add(text_view)
|
|
text_box.add(scroll_win)
|
|
|
|
self.pack_start(text_box, True, True, 0)
|
|
self.show_all()
|
|
|