mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 08:46:40 +08:00
script: improve a bit create-table.py and regenerate the Georgian charsets.
- Avoid trailing whitespaces. - Print which tool and version were used for the generation (to help for future debugging in case of discrepancies between versions or implementations).
This commit is contained in:
parent
7875272a8c
commit
bdd71d88f8
@ -50,38 +50,39 @@ language = \
|
||||
'incomplete': []
|
||||
}
|
||||
|
||||
## Table generated by script/create-table.py with iconv ##
|
||||
## Table generated by script/create-table.py with: ##
|
||||
## iconv (Debian GLIBC 2.31-13+deb11u5) 2.31 ##
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
#' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
|
||||
# ' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
#'0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
|
||||
# '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
#'@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
|
||||
# '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
#'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
|
||||
# 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
#'`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
|
||||
# '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
#'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
|
||||
# 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
|
||||
CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X
|
||||
#CTR CTR '‚' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '‹' 'Œ' CTR CTR CTR
|
||||
# CTR CTR '‚' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '‹' 'Œ' CTR CTR CTR
|
||||
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X
|
||||
#CTR '‘' '’' '“' '”' '•' '–' '—' '˜' '™' 'š' '›' 'œ' CTR CTR 'Ÿ'
|
||||
# CTR '‘' '’' '“' '”' '•' '–' '—' '˜' '™' 'š' '›' 'œ' CTR CTR 'Ÿ'
|
||||
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX
|
||||
#CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
|
||||
# CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
|
||||
SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX
|
||||
#'°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
|
||||
# '°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
|
||||
#'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ო' 'პ' 'ჟ'
|
||||
# 'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ო' 'პ' 'ჟ'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
|
||||
#'რ' 'ს' 'ტ' 'უ' 'ფ' 'ქ' 'ღ' 'ყ' 'შ' 'ჩ' 'ც' 'ძ' 'წ' 'ჭ' 'ხ' 'ჯ'
|
||||
# 'რ' 'ს' 'ტ' 'უ' 'ფ' 'ქ' 'ღ' 'ყ' 'შ' 'ჩ' 'ც' 'ძ' 'წ' 'ჭ' 'ხ' 'ჯ'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
#'ჰ' 'ჱ' 'ჲ' 'ჳ' 'ჴ' 'ჵ' 'ჶ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
|
||||
# 'ჰ' 'ჱ' 'ჲ' 'ჳ' 'ჴ' 'ჵ' 'ჶ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
|
||||
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
|
||||
#'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
|
||||
# 'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
|
||||
]
|
||||
|
||||
@ -50,38 +50,39 @@ language = \
|
||||
'incomplete': []
|
||||
}
|
||||
|
||||
## Table generated by script/create-table.py with iconv ##
|
||||
## Table generated by script/create-table.py with: ##
|
||||
## iconv (Debian GLIBC 2.31-13+deb11u5) 2.31 ##
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
#' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
|
||||
# ' ' '!' '"' '#' '$' '%' '&' ''' '(' ')' '*' '+' ',' '-' '.' '/'
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
#'0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
|
||||
# '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' ':' ';' '<' '=' '>' '?'
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
#'@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
|
||||
# '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
#'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
|
||||
# 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' '[' '\' ']' '^' '_'
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
#'`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
|
||||
# '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
#'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
|
||||
# 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '{' '|' '}' '~' CTR
|
||||
CTR,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,LET,CTR,CTR,CTR, # 8X
|
||||
#CTR CTR '‚' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '‹' 'Œ' CTR CTR CTR
|
||||
# CTR CTR '‚' 'ƒ' '„' '…' '†' '‡' 'ˆ' '‰' 'Š' '‹' 'Œ' CTR CTR CTR
|
||||
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,CTR,CTR,LET, # 9X
|
||||
#CTR '‘' '’' '“' '”' '•' '–' '—' '˜' '™' 'š' '›' 'œ' CTR CTR 'Ÿ'
|
||||
# CTR '‘' '’' '“' '”' '•' '–' '—' '˜' '™' 'š' '›' 'œ' CTR CTR 'Ÿ'
|
||||
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,CTR,SYM,SYM, # AX
|
||||
#CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
|
||||
# CTR '¡' '¢' '£' '¤' '¥' '¦' '§' '¨' '©' 'ª' '«' '¬' CTR '®' '¯'
|
||||
SYM,SYM,NUM,NUM,SYM,LET,SYM,SYM,SYM,NUM,LET,SYM,SYM,SYM,SYM,SYM, # BX
|
||||
#'°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
|
||||
# '°' '±' '²' '³' '´' 'µ' '¶' '·' '¸' '¹' 'º' '»' '¼' '½' '¾' '¿'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
|
||||
#'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'ჱ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ჲ' 'ო'
|
||||
# 'ა' 'ბ' 'გ' 'დ' 'ე' 'ვ' 'ზ' 'ჱ' 'თ' 'ი' 'კ' 'ლ' 'მ' 'ნ' 'ჲ' 'ო'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
|
||||
#'პ' 'ჟ' 'რ' 'ს' 'ტ' 'ჳ' 'უ' 'ფ' 'ქ' 'ღ' 'ყ' 'შ' 'ჩ' 'ც' 'ძ' 'წ'
|
||||
# 'პ' 'ჟ' 'რ' 'ს' 'ტ' 'ჳ' 'უ' 'ფ' 'ქ' 'ღ' 'ყ' 'შ' 'ჩ' 'ც' 'ძ' 'წ'
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
#'ჭ' 'ხ' 'ჴ' 'ჯ' 'ჰ' 'ჵ' 'æ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
|
||||
# 'ჭ' 'ხ' 'ჴ' 'ჯ' 'ჰ' 'ჵ' 'æ' 'ç' 'è' 'é' 'ê' 'ë' 'ì' 'í' 'î' 'ï'
|
||||
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
|
||||
#'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
|
||||
# 'ð' 'ñ' 'ò' 'ó' 'ô' 'õ' 'ö' '÷' 'ø' 'ù' 'ú' 'û' 'ü' 'ý' 'þ' 'ÿ'
|
||||
]
|
||||
|
||||
@ -60,8 +60,22 @@ charset = charset[0]
|
||||
use_iconv = False
|
||||
try:
|
||||
b' '.decode(charset)
|
||||
dec_version = 'Python {}'.format(sys.version).splitlines()[0]
|
||||
except LookupError:
|
||||
use_iconv = True
|
||||
try:
|
||||
call = subprocess.Popen(['iconv', '--version'],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL)
|
||||
if call.poll() is not None:
|
||||
(_, error) = call.communicate(input='')
|
||||
sys.stderr.write('Error: `iconv` ended with error "{}".\n'.format(error))
|
||||
exit(1)
|
||||
(dec_version, _) = call.communicate(input='')
|
||||
dec_version = dec_version.decode('UTF-8').splitlines()[0]
|
||||
except FileNotFoundError:
|
||||
sys.stderr.write('Error: `iconv` is not installed.\n')
|
||||
exit(1)
|
||||
|
||||
def get_utf8_char(bchar, charset, iconv):
|
||||
if iconv:
|
||||
@ -90,7 +104,8 @@ def get_utf8_char(bchar, charset, iconv):
|
||||
# It would mean an illegal character.
|
||||
return None
|
||||
|
||||
print('## Table generated by {} with {} ##'.format(script_path, 'iconv' if use_iconv else 'Python decode()'))
|
||||
print('## Table generated by {} with: ##'.format(script_path))
|
||||
print('## {} ##'.format(dec_version))
|
||||
|
||||
print('# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #')
|
||||
print('charmap = \\')
|
||||
@ -120,18 +135,18 @@ for l in range(0x10):
|
||||
sys.stdout.write(' # {}X'.format(hex(l)[-1:].upper()))
|
||||
|
||||
if has_printable:
|
||||
sys.stdout.write('\n #')
|
||||
sys.stdout.write('\n#')
|
||||
# The line has at least one printable character. Print in comment for
|
||||
# debugging.
|
||||
for c in range(0x10):
|
||||
char = bytes([c + l * 0x10])
|
||||
char = get_utf8_char(char, charset, use_iconv)
|
||||
if char is None:
|
||||
sys.stdout.write('ILL ')
|
||||
sys.stdout.write(' ILL')
|
||||
elif char == '\n' or char == '\r':
|
||||
sys.stdout.write('RET ')
|
||||
sys.stdout.write(' RET')
|
||||
elif char.isalpha() or char.isdigit() or char.isprintable():
|
||||
sys.stdout.write("'{}' ".format(char))
|
||||
sys.stdout.write(" '{}'".format(char))
|
||||
else:
|
||||
sys.stdout.write('CTR ')
|
||||
sys.stdout.write(' CTR')
|
||||
sys.stdout.write('\n]')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user