url2htmlcite

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# generate HTML cite element from URL
# needs html5lib and requests modules

from cgi import escape
from html5lib import parse
from requests import get
from sys import argv, stderr

try:
    url = argv[1]
except IndexError:
    stderr.write('Usage:\n\t%s [URL]\n' % argv[0])

headers = { 'User-Agent': 'url2htmlcite/2016-04-23' }
html = get(url, headers=headers).text
document = parse(html, treebuilder='etree', namespaceHTMLElements=False)

for element in document.iter('title'):
    title_text = element.text.strip()
    print '<a href="%s"><cite>%s</cite></a>' % (escape(url), escape(title_text))