# -*- coding: utf-8 -*- """Generates, from the tables of the file "units.mdwn", a xml file for use with udunits2. Author: Pierre de Buyl License: BSD 3-clause """ import codecs import re # r is regular expression to match unit symbols and the associated exponents. r = re.compile(r"(?P\w+)(?P-?\d)?") def convert_def(s): """ Convert a unit definition from the H5MD table format to a udunits2 acceptable format. """ res = u'' for i in s.split(): symbol, exp = r.findall(i)[0] if len(exp)>0: if exp[:1] == u'-': res += u' 1/'+symbol if exp!=u'-1': res += u'^'+exp[1:] else: if exp!=u'1': res += symbol+u'^'+exp else: res += u' '+symbol return res.strip() def convert_superscript(s): tmp = u''.join(dict(zip(u"⁻⁰¹²³⁴⁵⁶⁷⁸⁹", u"-0123456789")).get(c, c) for c in s) if tmp.lstrip()[:1]==u'1': return tmp.lstrip()[1:] else: return tmp # Initialize the variables for the table parser below. IN_TABLE=1 state = 0 tables=[] # Parse the file 'units.mdwn' to extract the tables. with codecs.open('units.mdwn', 'rb', encoding='utf8') as f: ll = f.readlines() for l in ll: if state==0 and l.lstrip()[:2]=='+-': state = IN_TABLE t = [] tables.append(t) continue if state==IN_TABLE: if l.lstrip()[:1]=='|': #print "table line = ", l t.append( ";".join([i.strip() for i in l.strip().split('|') if len(i)>0])) elif (len(l.strip())==0): state=0 continue print """ """ # The following units require a special handling because the carry no # dimension. dimensionless = [u'rad', u'sr'] # The following units require a special handling because they are # aliases to existing units. aliases = [u'Bq', u'Sv'] def format_alias(flag): if flag: return """ """ else: return "" def write_xml(table, base=False): for i in table: j = i.split(';') if j[1]==u'symbol': continue print """ """ if j[1] in dimensionless: print """ """ else: if base: print """ """ else: print u""" %s""" % convert_def(convert_superscript(j[3].split('=')[1])) if j[1] in aliases: print """ """ print """ %s""" % j[2] if j[1]!=j[2]: print """ %s%s""" % (j[1], format_alias(j[1] in aliases)) print """ """ def convert_prefix(s): tmp = u''.join(dict(zip(u"⁻⁰¹²³⁴⁵⁶⁷⁸⁹", u"-0123456789")).get(c, c) for c in s) if tmp[:2]==u'10': tmp = u'1E' + tmp[2:] else: raise ValueError("Bad prefix.") return tmp def write_xml_prefixes(table): for i in table[1:]: j = i.split(';') print u""" %s %s %s """ % (convert_prefix(j[2]), j[0][:-1], j[1]) write_xml(tables[1], base=True) print "" write_xml(tables[2]) print "" write_xml_prefixes(tables[3]) print """"""