view usr/src/data/locale/tools/mkwidths.py @ 20576:628a13d9b7eb

9979 Support python3 for in-gate tools Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: Peter Tribble <peter.tribble@gmail.com> Reviewed by: Alexander Pyhalov <apyhalov@gmail.com> Reviewed by: Andrew Stormont <andyjstormont@gmail.com> Approved by: Dan McDonald <danmcd@joyent.com>
author Andy Fiddaman <omnios@citrus-it.co.uk>
date Thu, 15 Nov 2018 10:17:46 +0000
parents d2005a038eb2
children
line wrap: on
line source

#!/bin/python
"""

This file and its contents are supplied under the terms of the
Common Development and Distribution License ("CDDL"), version 1.0.
You may only use this file in accordance with the terms of version
1.0 of the CDDL.

A full copy of the text of the CDDL should have accompanied this
source.  A copy of the CDDL is also available via the Internet at
http://www.illumos.org/license/CDDL.

Copyright 2013 DEY Storage Systems, Inc.

Scratch script to produce the widths.cm content from the widths text
files.  It converts numeric unicode to symbolic forms.
"""

# Copyright 2018 OmniOS Community Edition (OmniOSce) Association.

from __future__ import print_function

SYMBOLS = {}


def u8_str(val):
    """
    Convert a numeric value to a string representing the UTF-8 encoding
    of the numeric value, which should be a valid Unicode code point.
    """
    u8str = unichr(val).encode('utf-8')
    idx = 0
    out = ""
    while idx < len(u8str):
        out += "\\x%X" % ord(u8str[idx])
        idx += 1
    return out


def load_utf8():
    """
    This function loads the UTF-8 character map file, loading the symbols
    and the numeric values.  The result goes into the global SYMBOLS array.
    """
    lines = open("UTF-8.cm").readlines()
    for line in lines:
        items = line.split()
        if (len(items) != 2) or items[0].startswith("#"):
            continue
        (sym, val) = (items[0], items[1])
        SYMBOLS[val] = sym


def do_width_file(width, filename):
    """
    This function takes a file pairs of unicode values (hex), each of
    which is a range of unicode values, that all have the given width.
    """
    for line in open(filename).readlines():
        if line.startswith("#"):
            continue
        vals = line.split()
        while len(vals) > 1:
            start = int(vals[0], 16)
            end = int(vals[1], 16)
            val = start
            while val <= end:
                key = u8_str(val)
                val += 1
                sym = SYMBOLS.get(key, None)
                if sym == None:
                    continue
                print("%s\t%d" % (sym, width))
            vals = vals[2:]


if __name__ == "__main__":
    print("WIDTH")
    load_utf8()
    do_width_file(0, "widths-0.txt")
    do_width_file(2, "widths-2.txt")
    print("END WIDTH")