neo1973-germany/pyFeedRead/trunk/src/ShowFeedText.py
kriss 210c2238e8 pyFeedRead - Initial import
git-svn-id: http://www.neo1973-germany.de/svn@34 46df4e5c-bc4e-4628-a0fc-830ba316316d
2008-03-13 16:09:34 +00:00

166 lines
No EOL
5.9 KiB
Python

"""
* ShowFeedContent.py - FeedRead - show text
*
* (C) 2007 by Kristian Mueller <kristian-m@kristian-m.de>
* All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
import gtk
import urllib
# import BeautifulSoup
import re
import os
class ShowFeedText(gtk.VBox):
def __init__(self, FeedPanel):
gtk.VBox.__init__(self, False, 0)
self.FeedContentPanel = FeedPanel
self.create_notebook_page()
def get_url_content(self, title, url):
self.state_entry.set_text(self.FeedContentPanel.feed_provider)
## html to text
## from http://mail.python.org/pipermail/python-list/2005-February/309923.html
comments = re.compile('<!--.*?-->', re.DOTALL)
tags = re.compile('<.*?>', re.DOTALL)
"""
def extracttext(obj):
if isinstance(obj,BeautifulSoup.Tag):
return "".join(extracttext(c) for c in obj.contents)
else:
return str(obj)
def striptags(text):
text = re.sub(comments,'', text)
text = re.sub(tags,'', text)
return text
def bsouptext(text):
if len(text) > 0:
# try:
souptree = BeautifulSoup.BeautifulSoup(text)
# if isinstance(souptree.body, str):
bodytext = extracttext(souptree.body.fetchText)
text = re.sub(comments,'', bodytext)
text = collapsenewlines(text)
return text
# except:
# return ""
return ""
"""
def collapsenewlines(text):
tmp_text = "\n".join(line for line in text.splitlines() if line)
ret_text = ""
for line in tmp_text.split("\n"):
if len(line.rstrip()) >= 1:
ret_text += line.rstrip() + '\n'
return ret_text
return " ".join(word for word in \
("\n".join(line for line in text.splitlines() if line)).split(" ") \
if len(word) > 1)
descr_text = self.FeedContentPanel.descriptions[url]
if len(descr_text) >= 1:
descr_text = re.sub(comments,'', descr_text)
# descr_text = collapsenewlines(descr_text)
return descr_text
#doc = urllib.urlopen(url)
print "url: %s" %url
#doc_text = doc.read()
### different aproach using lynx
ELINKS_CMD = "links -dump -dump-charset utf8 -dump-width 1024" # -assume_charset=utf8 -display_charset=utf8"
LINKS_CMD = "links -dump"
LYNX_CMD = "lynx -dump -assume_charset=utf8 -display_charset=utf8 -width=1024"
CMD = LINKS_CMD
try:
# print "Command: %s" %("%s %s" %(CMD, url))
# lynx_ostream = os.popen("%s %s" %(CMD, url))
# message_text = re.sub(comments,'', lynx_ostream.read())
doc = urllib.urlopen(url)
print "1"
new_text = ""
message_text = doc.read()
message_text = re.sub(comments,'', message_text)
tag_level = 0
for char in message_text:
if char == '<':
tag_level += 1
if char == '>':
tag_level -= 1
else:
if tag_level >= 1:
continue
else:
try:
new_text += char.encode('utf-8')
except:
new_text += '?'
message_text = collapsenewlines(new_text)
return message_text
except:
return "Error: Could not run the links program to load data."
def load_feed(self, widget):
(model, model_iter) = self.FeedContentPanel.feed_list.get_selection().get_selected()
if model_iter >= 1:
feed_name = model.get_value(model_iter, 0) # column is first (name)
url = model.get_value(model_iter, 1) # column is second (url)
text = self.get_url_content(feed_name, url)
else:
feed_name = "none"
url = "http://"
text = "No Content"
self.text_buffer.set_text("%s\n\n%s" % (feed_name, text) )
def create_notebook_page(self):
self.set_border_width(0)
text_box = gtk.VBox(False, 0)
#text_box.set_border_width(15)
update_btn = gtk.Button("update")
update_btn.connect("clicked", self.load_feed)
self.pack_start(update_btn, False, False, 0)
self.state_entry = gtk.Entry()
self.state_entry.set_text("")
self.state_entry.set_sensitive(0)
text_box.pack_start(self.state_entry, False, False, 0)
scroll_win = gtk.ScrolledWindow()
scroll_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
text_view = gtk.TextView()
text_view.set_cursor_visible(False)
text_view.set_wrap_mode(gtk.WRAP_WORD)
self.text_buffer = text_view.get_buffer()
text_view.set_editable(False)
scroll_win.add(text_view)
text_box.add(scroll_win)
self.pack_start(text_box, True, True, 0)
self.show_all()