Last Updated: February 25, 2016
·
484
· deontologician

Fetch IANA registered link relations

Does your application need to know what link relations are officially registered with IANA? This script fetches them and then prints out a python file containing a dictionary of relationships to relationship descriptions.

While writing it, I discovered lxml's wonderful objectify module that allows the script to quickly navigate the xml structure. Then it uses a custom linewrapping function to ensure the resulting python output is pep8 compliant.

from __future__ import print_function
from __future__ import unicode_literals

import requests
from lxml import objectify
import datetime

# location where registry is stored in xml format
iana_xml = 'http://www.iana.org/assignments/link-relations/link-relations.xml'


def linewrap(chunks, width = None, sep = ' ', preamble = '', line_prefix = '',
             return_gen = False):
    r'''Takes an iterator of strings, and attempts to wrap them in whole chunks
    to fit within width. Takes an optional preamble which is prepended before
    the first line, and an optional per-line prefix (which is appended to every
    line but the first). If return_gen is true, this function returns a
    generator that will produce the lines of output as needed, otherwise it
    returns a single joined string'''
    sep_len = len(sep)
    width = width or terminal_width()
    if isinstance(chunks, basestring):
        chunks = chunks.split()
    def line_len(line):
        r'Gets the full length of a line passed a list of strings'
        word_len = sum(len(l) for l in line)
        seps_len = sep_len * (len(line) - 1)
        return word_len + seps_len if seps_len >= 0 else 0

    def gen():
        r'The generator to incrementally create lines from the input'
        sep_len = len(sep)
        line = [preamble] if preamble else []
        for chunk in chunks:
            chunk_len = len(chunk)
            if line_len(line) + len(chunk) + sep_len > width:
                yield sep.join(line)
                line = [line_prefix + chunk]
            else:
                line.append(chunk)
        if line:
            yield sep.join(line)

    return gen() if return_gen else '\n'.join(gen())


if __name__ == '__main__':
    text = requests.get(iana_xml).text.encode('ascii', 'ignore')
    xml = objectify.fromstring(text)
    iana_rels = {str(rec.value): str(rec.description)
                 for rec in xml.registry.record}
    keys = sorted(iana_rels)
    print('# This file was autogenerated')
    print()
    print('# Registry last updated on:', xml.updated)
    print('# This file generated on:', datetime.date.today())
    print()
    print('iana_rels = {')
    for key in keys:
        print('    {!r}: ('.format(key))
        desc_list = list(linewrap(iana_rels[key], width=68, return_gen=True))
        for i, line in enumerate(desc_list):
            line_ = line.replace('"', '\\"')  # escape double quotes
            if i < len(desc_list) - 1:
                print('        "{} "'.format(line_))
            else:
                print('        "{}"'.format(line_))
        print('    ),')
    print('}')

Also, gist is here:

https://gist.github.com/deontologician/5647004