#!/usr/bin/python
"""Ping all traceback-eligable or pingback-elibable servers associated with 
hrefs found in a given blog entry - Most code is by Sam Ruby

One requirement for this is that you run this code in your py['datadir']. This
script can be placed anywhere. If your entry is placed in,
technology/stuff.txt, run autoping this way.

cd /your/blog/dir
/path/to/autoping.py technology/stuff.txt

Autoping will try to send a trackback and/or pingback based on the URLs it
found on technology/stuff.txt. There's a limitation that, if the autodiscovery
in the trackback RDF in the site does not properly point to the correct URL to
ping, autoping will not be able to send out the trackback. Alert the author of
the site.
"""

# Please change this value to where you pyblosxom is installed.
BASEURL = 'http://danilosegan.com/blog/'
# Modify this to where your pyblosxom and config.py is installed
#sys.path.append('/path/to/config')
#sys.path.append('/path/to/Pyblosxom/dir')

import re, sgmllib, sys, urllib, xmlrpclib
from xml.sax import parseString, SAXParseException
from xml.sax.handler import ContentHandler
import cPickle, os, string

# Get our pyblosxom specifics here
from Pyblosxom import tools
from Pyblosxom.pyblosxom import PyBlosxom
from Pyblosxom.Request import Request
import config


ahref_re=re.compile('<a [^>]*href="([^"]+)">',re.I)
def extract_links(body=''):
    links = []
    data = string.replace(body,"\n",' ')
    for href in ahref_re.findall(data):
        base = href.split("#")[0]
        links.append(base)

    return links
    

def excerpt(request, filename, title, body, blogname):
    """ filename,title,body => url,args

    Excerpt the body and urlencode the trackback arguments.
    """

    config=request.getConfiguration()
    
    body = re.split('<div\s+class="excerpt">(.*?)<\/div>',body)[:2][-1]

    body = re.sub('\n',' ',body)
    body = re.sub('&nbsp;',' ',body)
    body = re.sub('^(<p>)?<a\s+href="\S+">[\w\s\.]+<\/a>:\s*','',body)
    body = re.sub('<em>.*?<\/em>\.?\s*','',body)
    body = re.sub('<.*?>','',body)

    body = body[:252]

    url = config.get('base_url', BASEURL) + filename
    url = re.sub('\.[a-zA-Z]+$','',url)

    arg = {}
    arg['url'] = url
    arg['title'] = title
    arg['blog_name'] = blogname
    arg['excerpt'] = body

    return url, urllib.urlencode(arg)


tb_re=re.compile('(<rdf:RDF .*?</rdf:RDF>)')
pb_re=re.compile('<link rel="pingback" href="([^"]+)" ?/?>')
def backrefs(href):
    """ href -> ([trackbacks],[pingbacks])

    Parse a given html page, and retrieve the rdf:about, X-Pingback header,
    or pingback link information associated with a given href.  At most
    one is returned (in the above priority).
    """
    base = href.split("#")[0]
    file = urllib.urlopen(base)
    data = file.read().replace('\n',' ')
    file.close()

    trackback = []

    for x in tb_re.findall(data):
        try:
            if __name__ == '__main__':
                print x
            parseString(x, rdf())
        except SAXParseException:
            pass

    if rdf.ids.has_key('tbping'): trackback = [rdf.ids['tbping']]
    if not trackback and href.find("#")>0:
        if rdf.ids.has_key(base): trackback = [rdf.ids[base]]

    return trackback


class rdf(ContentHandler):
    """ xml -> dictionary of {dc:identifier => trackback:ping|rdf:about}

    Parse a given html page, and retrieve the rdf:about information associated
    with a given href.
    """

    ids = {}
    def startElement(self, name, attrs):
        if name == 'rdf:Description':
            attrs=dict(attrs)
	    # This should be restored to check 'dc:identifier', or whatever
            #if attrs.has_key('dc:identifier'):
            if attrs.has_key('trackback:ping'):
                self.ids['tbping'] = attrs['trackback:ping']
                if __name__ == '__main__':
                    print attrs['trackback:ping']
            elif attrs.has_key('about'):
                self.ids['tbping'] = attrs['about']
            elif attrs.has_key('rdf:about'):
                self.ids['tbping'] = attrs['rdf:about']

def trackback(urls,args):
    """ parser -> None

    Ping all trackbacks encountered with the url, title, blog_name, and 
    excerpt.
    """
    for url in urls:
        try:
            if url.find('?tb_id=') >= 0:
                address = url + "&" + args
            else:
                address = url + "?" + args
            infile=urllib.urlopen(address)
            info = infile.info()
            if __name__ == '__main__':
                print infile.read()
            infile.close()
        except:
            pass