script: improve a bit create-table.py and regenerate the Georgian charsets.

- Avoid trailing whitespaces.
- Print which tool and version were used for the generation (to help for
  future debugging in case of discrepancies between versions or
  implementations).
This commit is contained in:
Jehan 2022-12-20 14:38:51 +01:00
parent 7875272a8c
commit bdd71d88f8
3 changed files with 53 additions and 36 deletions

View File

@ -50,38 +50,39 @@ language = \
'incomplete': [] 'incomplete': []
} }
## Table generated by script/create-table.py with iconv ## ## Table generated by script/create-table.py with: ##
## iconv (Debian GLIBC 2.31-13+deb11u5) 2.31 ##
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \ charmap = \
[ [
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
#' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/' # ' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
#'0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?' # '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
#'@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' # '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
#'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_' # 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
#'`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' # '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
#'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR # 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X
#CTR CTR '' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '' 'Œ' CTR CTR CTR # CTR CTR '' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '' 'Œ' CTR CTR CTR
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X
#CTR '' '' '“' '”' '•' '' '—' '˜' '™' 'š' '' 'œ' CTR CTR 'Ÿ' # CTR '' '' '“' '”' '•' '' '—' '˜' '™' 'š' '' 'œ' CTR CTR 'Ÿ'
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX
#CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯' # CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX
#'°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿' # '°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
#'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ო' 'პ' 'ჟ' # 'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ო' 'პ' 'ჟ'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
#'რ' 'ს' 'ტ' 'უ' 'ფ' 'ქ' 'ღ' '' 'შ' 'ჩ' 'ც' 'ძ' 'წ' 'ჭ' 'ხ' 'ჯ' # 'რ' 'ს' 'ტ' 'უ' 'ფ' 'ქ' 'ღ' '' 'შ' 'ჩ' 'ც' 'ძ' 'წ' 'ჭ' 'ხ' 'ჯ'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
#'ჰ' 'ჱ' 'ჲ' 'ჳ' 'ჴ' 'ჵ' 'ჶ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï' # 'ჰ' 'ჱ' 'ჲ' 'ჳ' 'ჴ' 'ჵ' 'ჶ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
#'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ' # 'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
] ]

View File

@ -50,38 +50,39 @@ language = \
'incomplete': [] 'incomplete': []
} }
## Table generated by script/create-table.py with iconv ## ## Table generated by script/create-table.py with: ##
## iconv (Debian GLIBC 2.31-13+deb11u5) 2.31 ##
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \ charmap = \
[ [
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
#' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/' # ' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
#'0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?' # '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
#'@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' # '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
#'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_' # 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
#'`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' # '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
#'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR # 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X
#CTR CTR '' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '' 'Œ' CTR CTR CTR # CTR CTR '' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '' 'Œ' CTR CTR CTR
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X
#CTR '' '' '“' '”' '•' '' '—' '˜' '™' 'š' '' 'œ' CTR CTR 'Ÿ' # CTR '' '' '“' '”' '•' '' '—' '˜' '™' 'š' '' 'œ' CTR CTR 'Ÿ'
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX
#CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯' # CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX
#'°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿' # '°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
#'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'ჱ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ჲ' 'ო' # 'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'ჱ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ჲ' 'ო'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
#'პ' 'ჟ' 'რ' 'ს' 'ტ' 'ჳ' 'უ' 'ფ' 'ქ' 'ღ' '' 'შ' 'ჩ' 'ც' 'ძ' 'წ' # 'პ' 'ჟ' 'რ' 'ს' 'ტ' 'ჳ' 'უ' 'ფ' 'ქ' 'ღ' '' 'შ' 'ჩ' 'ც' 'ძ' 'წ'
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
#'ჭ' 'ხ' 'ჴ' 'ჯ' 'ჰ' 'ჵ' 'æ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï' # 'ჭ' 'ხ' 'ჴ' 'ჯ' 'ჰ' 'ჵ' 'æ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
#'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ' # 'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
] ]

View File

@ -60,8 +60,22 @@ charset = charset[0]
use_iconv = False use_iconv = False
try: try:
b' '.decode(charset) b' '.decode(charset)
dec_version = 'Python {}'.format(sys.version).splitlines()[0]
except LookupError: except LookupError:
use_iconv = True use_iconv = True
try:
call = subprocess.Popen(['iconv', '--version'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL)
if call.poll() is not None:
(_, error) = call.communicate(input='')
sys.stderr.write('Error: `iconv` ended with error "{}".\n'.format(error))
exit(1)
(dec_version, _) = call.communicate(input='')
dec_version = dec_version.decode('UTF-8').splitlines()[0]
except FileNotFoundError:
sys.stderr.write('Error: `iconv` is not installed.\n')
exit(1)
def get_utf8_char(bchar, charset, iconv): def get_utf8_char(bchar, charset, iconv):
if iconv: if iconv:
@ -90,7 +104,8 @@ def get_utf8_char(bchar, charset, iconv):
# It would mean an illegal character. # It would mean an illegal character.
return None return None
print('## Table generated by {} with {} ##'.format(script_path, 'iconv' if use_iconv else 'Python decode()')) print('## Table generated by {} with: ##'.format(script_path))
print('## {} ##'.format(dec_version))
print('# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #') print('# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #')
print('charmap = \\') print('charmap = \\')
@ -120,18 +135,18 @@ for l in range(0x10):
sys.stdout.write(' # {}X'.format(hex(l)[-1:].upper())) sys.stdout.write(' # {}X'.format(hex(l)[-1:].upper()))
if has_printable: if has_printable:
sys.stdout.write('\n #') sys.stdout.write('\n#')
# The line has at least one printable character. Print in comment for # The line has at least one printable character. Print in comment for
# debugging. # debugging.
for c in range(0x10): for c in range(0x10):
char = bytes([c + l * 0x10]) char = bytes([c + l * 0x10])
char = get_utf8_char(char, charset, use_iconv) char = get_utf8_char(char, charset, use_iconv)
if char is None: if char is None:
sys.stdout.write('ILL ') sys.stdout.write(' ILL')
elif char == '\n' or char == '\r': elif char == '\n' or char == '\r':
sys.stdout.write('RET ') sys.stdout.write(' RET')
elif char.isalpha() or char.isdigit() or char.isprintable(): elif char.isalpha() or char.isdigit() or char.isprintable():
sys.stdout.write("'{}' ".format(char)) sys.stdout.write(" '{}'".format(char))
else: else:
sys.stdout.write('CTR ') sys.stdout.write(' CTR')
sys.stdout.write('\n]') sys.stdout.write('\n]')