OpenFPGA/libs/EXTERNAL/tcl8.6.12/tools/uniParse.tcl

# uniParse.tcl --
#
#	This program parses the UnicodeData file and generates the
#	corresponding tclUniData.c file with compressed character
#	data tables.  The input to this program should be the latest
#	UnicodeData file from:
#	    ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
#
# Copyright (c) 1998-1999 Scriptics Corporation.
# All rights reserved.


namespace eval uni {
    set shift 5;		# number of bits of data within a page
				# This value can be adjusted to find the
				# best split to minimize table size

    variable pMap;		# map from page to page index, each entry is
				# an index into the pages table, indexed by
				# page number
    variable pages;		# map from page index to page info, each
				# entry is a list of indices into the groups
				# table, the list is indexed by the offset
    variable groups;		# list of character info values, indexed by
				# group number, initialized with the
				# unassigned character group

    variable categories {
	Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
	Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
    };				# Ordered list of character categories, must
				# match the enumeration in the header file.
}

proc uni::getValue {items index} {
    variable categories

    # Extract character info

    set category [lindex $items 2]
    if {[scan [lindex $items 12] %x toupper] == 1} {
	set toupper [expr {$index - $toupper}]
    } else {
	set toupper 0
    }
    if {[scan [lindex $items 13] %x tolower] == 1} {
	set tolower [expr {$tolower - $index}]
    } else {
	set tolower 0
    }
    if {[scan [lindex $items 14] %x totitle] == 1} {
	set totitle [expr {$index - $totitle}]
    } elseif {$tolower} {
	set totitle 0
    } else {
	set totitle $toupper
    }

    set categoryIndex [lsearch -exact $categories $category]
    if {$categoryIndex < 0} {
	error "Unexpected character category: $index($category)"
    }

    return [list $categoryIndex $toupper $tolower $totitle]
}

proc uni::getGroup {value} {
    variable groups

    set gIndex [lsearch -exact $groups $value]
    if {$gIndex < 0} {
	set gIndex [llength $groups]
	lappend groups $value
    }
    return $gIndex
}

proc uni::addPage {info} {
    variable pMap
    variable pages
    variable shift

    set pIndex [lsearch -exact $pages $info]
    if {$pIndex < 0} {
	set pIndex [llength $pages]
	lappend pages $info
    }
    lappend pMap [expr {$pIndex << $shift}]
    return
}

proc uni::buildTables {data} {
    variable shift

    variable pMap {}
    variable pages {}
    variable groups {{0 0 0 0}}
    variable next 0
    set info {}			;# temporary page info

    set mask [expr {(1 << $shift) - 1}]

    foreach line [split $data \n] {
	if {$line eq ""} {
	    if {!($next & $mask)} {
		# next character is already on page boundary
		continue
	    }
	    # fill remaining page
	    set line [format %X [expr {($next-1)|$mask}]]
	    append line ";;Cn;0;ON;;;;;N;;;;;\n"
	}

	set items [split $line \;]

	scan [lindex $items 0] %x index
	if {$index > 0x3FFFF} then {
	    # Ignore characters > plane 3
	    continue
	}
	set index [format %d $index]

	set gIndex [getGroup [getValue $items $index]]

	# Since the input table omits unassigned characters, these will
	# show up as gaps in the index sequence.  There are a few special cases
	# where the gaps correspond to a uniform block of assigned characters.
	# These are indicated as such in the character name.

	# Enter all unassigned characters up to the current character.
	if {($index > $next) \
		&& ![regexp "Last>$" [lindex $items 1]]} {
	    for {} {$next < $index} {incr next} {
		lappend info 0
		if {($next & $mask) == $mask} {
		    addPage $info
		    set info {}
		}
	    }
	}

	# Enter all assigned characters up to the current character
	for {set i $next} {$i <= $index} {incr i} {
	    # Add the group index to the info for the current page
	    lappend info $gIndex

	    # If this is the last entry in the page, add the page
	    if {($i & $mask) == $mask} {
		addPage $info
		set info {}
	    }
	}
	set next [expr {$index + 1}]
    }
    return
}

proc uni::main {} {
    global argc argv0 argv
    variable pMap
    variable pages
    variable groups
    variable shift
    variable next

    if {$argc != 2} {
	puts stderr "\nusage: $argv0 <datafile> <outdir>\n"
	exit 1
    }
    set f [open [lindex $argv 0] r]
    set data [read $f]
    close $f

    buildTables $data
    puts "X = [llength $pMap]  Y= [llength $pages]  A= [llength $groups]"
    set size [expr {[llength $pMap]*2 + ([llength $pages]<<$shift)}]
    puts "shift = $shift, space = $size"

    set f [open [file join [lindex $argv 1] tclUniData.c] w]
    fconfigure $f -translation lf -encoding utf-8
    puts $f "/*
 * tclUniData.c --
 *
 *	Declarations of Unicode character information tables.  This file is
 *	automatically generated by the tools/uniParse.tcl script.  Do not
 *	modify this file by hand.
 *
 * Copyright (c) 1998 Scriptics Corporation.
 * All rights reserved.
 */

/*
 * A 16-bit Unicode character is split into two parts in order to index
 * into the following tables.  The lower OFFSET_BITS comprise an offset
 * into a page of characters.  The upper bits comprise the page number.
 */

#define OFFSET_BITS $shift

/*
 * The pageMap is indexed by page number and returns an alternate page number
 * that identifies a unique page of characters.  Many Unicode characters map
 * to the same alternate page number.
 */

static const unsigned short pageMap\[\] = {"
    set line "    "
    set last [expr {[llength $pMap] - 1}]
    for {set i 0} {$i <= $last} {incr i} {
	if {$i == [expr {0x10000 >> $shift}]} {
	    set line [string trimright $line " \t,"]
	    puts $f $line
	    set lastpage [expr {[lindex $line end] >> $shift}]
	    puts stdout "lastpage: $lastpage"
	    puts $f "#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6"
	    set line "    ,"
	}
	append line [lindex $pMap $i]
	if {$i != $last} {
	    append line ", "
	}
	if {[string length $line] > 70} {
	    puts $f [string trimright $line]
	    set line "    "
	}
    }
    puts $f $line
    puts $f "#endif /* TCL_UTF_MAX > 3 */"
    puts $f "};

/*
 * The groupMap is indexed by combining the alternate page number with
 * the page offset and returns a group number that identifies a unique
 * set of character attributes.
 */

static const unsigned char groupMap\[\] = {"
    set line "    "
    set lasti [expr {[llength $pages] - 1}]
    for {set i 0} {$i <= $lasti} {incr i} {
	set page [lindex $pages $i]
	set lastj [expr {[llength $page] - 1}]
	if {$i == ($lastpage + 1)} {
	    puts $f [string trimright $line " \t,"]
	    puts $f "#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6"
	    set line "    ,"
	}
	for {set j 0} {$j <= $lastj} {incr j} {
	    append line [lindex $page $j]
	    if {$j != $lastj || $i != $lasti} {
		append line ", "
	    }
	    if {[string length $line] > 70} {
		puts $f [string trimright $line]
		set line "    "
	    }
	}
    }
    puts $f $line
    puts $f "#endif /* TCL_UTF_MAX > 3 */"
    puts $f "};

/*
 * Each group represents a unique set of character attributes.  The attributes
 * are encoded into a 32-bit value as follows:
 *
 * Bits 0-4	Character category: see the constants listed below.
 *
 * Bits 5-7	Case delta type: 000 = identity
 *				 010 = add delta for lower
 *				 011 = add delta for lower, add 1 for title
 *				 100 = subtract delta for title/upper
 *				 101 = sub delta for upper, sub 1 for title
 *				 110 = sub delta for upper, add delta for lower
 *				 111 = subtract delta for upper
 *
 * Bits 8-31	Case delta: delta for case conversions.  This should be the
 *			    highest field so we can easily sign extend.
 */

static const int groups\[\] = {"
    set line "    "
    set last [expr {[llength $groups] - 1}]
    for {set i 0} {$i <= $last} {incr i} {
	foreach {type toupper tolower totitle} [lindex $groups $i] {}

	# Compute the case conversion type and delta

	if {$totitle} {
	    if {$totitle == $toupper} {
		# subtract delta for title or upper
		set case 4
		set delta $toupper
		if {$tolower} {
		    error "New case conversion type needed: $toupper $tolower $totitle"
		}
	    } elseif {$toupper} {
		# subtract delta for upper, subtract 1 for title
		set case 5
		set delta $toupper
		if {($totitle != 1) || $tolower} {
		    error "New case conversion type needed: $toupper $tolower $totitle"
		}
	    } else {
		# add delta for lower, add 1 for title
		set case 3
		set delta $tolower
		if {$totitle != -1} {
		    error "New case conversion type needed: $toupper $tolower $totitle"
		}
	    }
	} elseif {$toupper} {
	    set delta $toupper
	    if {$tolower == $toupper} {
		# subtract delta for upper, add delta for lower
		set case 6
	    } elseif {!$tolower} {
		# subtract delta for upper
		set case 7
	    } else {
		error "New case conversion type needed: $toupper $tolower $totitle"
	    }
	} elseif {$tolower} {
	    # add delta for lower
	    set case 2
	    set delta $tolower
	} else {
	    # noop
	    set case 0
	    set delta 0
	}

	append line [expr {($delta << 8) | ($case << 5) | $type}]
	if {$i != $last} {
	    append line ", "
	}
	if {[string length $line] > 65} {
	    puts $f [string trimright $line]
	    set line "    "
	}
    }
    puts $f $line
    puts -nonewline $f "};

#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6
#   define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1FFFFF) >= [format 0x%X $next])
#else
#   define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1F0000) != 0)
#endif

/*
 * The following constants are used to determine the category of a
 * Unicode character.
 */

enum {
    UNASSIGNED,
    UPPERCASE_LETTER,
    LOWERCASE_LETTER,
    TITLECASE_LETTER,
    MODIFIER_LETTER,
    OTHER_LETTER,
    NON_SPACING_MARK,
    ENCLOSING_MARK,
    COMBINING_SPACING_MARK,
    DECIMAL_DIGIT_NUMBER,
    LETTER_NUMBER,
    OTHER_NUMBER,
    SPACE_SEPARATOR,
    LINE_SEPARATOR,
    PARAGRAPH_SEPARATOR,
    CONTROL,
    FORMAT,
    PRIVATE_USE,
    SURROGATE,
    CONNECTOR_PUNCTUATION,
    DASH_PUNCTUATION,
    OPEN_PUNCTUATION,
    CLOSE_PUNCTUATION,
    INITIAL_QUOTE_PUNCTUATION,
    FINAL_QUOTE_PUNCTUATION,
    OTHER_PUNCTUATION,
    MATH_SYMBOL,
    CURRENCY_SYMBOL,
    MODIFIER_SYMBOL,
    OTHER_SYMBOL
};

/*
 * The following macros extract the fields of the character info.  The
 * GetDelta() macro is complicated because we can't rely on the C compiler
 * to do sign extension on right shifts.
 */

#define GetCaseType(info) (((info) & 0xE0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F)
#define GetDelta(info) ((info) >> 8)

/*
 * This macro extracts the information about a character from the
 * Unicode character tables.
 */

#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6
#   define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0x1FFFFF) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
#else
#   define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xFFFF) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
#endif
"

    close $f
}

uni::main

return
adding Tcl interface to vpr 2022-06-07 11:15:20 -05:00			`# uniParse.tcl --`
			`#`
			`# This program parses the UnicodeData file and generates the`
			`# corresponding tclUniData.c file with compressed character`
			`# data tables. The input to this program should be the latest`
			`# UnicodeData file from:`
			`# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt`
			`#`
			`# Copyright (c) 1998-1999 Scriptics Corporation.`
			`# All rights reserved.`


			`namespace eval uni {`
			`set shift 5; # number of bits of data within a page`
			`# This value can be adjusted to find the`
			`# best split to minimize table size`

			`variable pMap; # map from page to page index, each entry is`
			`# an index into the pages table, indexed by`
			`# page number`
			`variable pages; # map from page index to page info, each`
			`# entry is a list of indices into the groups`
			`# table, the list is indexed by the offset`
			`variable groups; # list of character info values, indexed by`
			`# group number, initialized with the`
			`# unassigned character group`

			`variable categories {`
			`Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp`
			`Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So`
			`}; # Ordered list of character categories, must`
			`# match the enumeration in the header file.`
			`}`

			`proc uni::getValue {items index} {`
			`variable categories`

			`# Extract character info`

			`set category [lindex $items 2]`
			`if {[scan [lindex $items 12] %x toupper] == 1} {`
			`set toupper [expr {$index - $toupper}]`
			`} else {`
			`set toupper 0`
			`}`
			`if {[scan [lindex $items 13] %x tolower] == 1} {`
			`set tolower [expr {$tolower - $index}]`
			`} else {`
			`set tolower 0`
			`}`
			`if {[scan [lindex $items 14] %x totitle] == 1} {`
			`set totitle [expr {$index - $totitle}]`
			`} elseif {$tolower} {`
			`set totitle 0`
			`} else {`
			`set totitle $toupper`
			`}`

			`set categoryIndex [lsearch -exact $categories $category]`
			`if {$categoryIndex < 0} {`
			`error "Unexpected character category: $index($category)"`
			`}`

			`return [list $categoryIndex $toupper $tolower $totitle]`
			`}`

			`proc uni::getGroup {value} {`
			`variable groups`

			`set gIndex [lsearch -exact $groups $value]`
			`if {$gIndex < 0} {`
			`set gIndex [llength $groups]`
			`lappend groups $value`
			`}`
			`return $gIndex`
			`}`

			`proc uni::addPage {info} {`
			`variable pMap`
			`variable pages`
			`variable shift`

			`set pIndex [lsearch -exact $pages $info]`
			`if {$pIndex < 0} {`
			`set pIndex [llength $pages]`
			`lappend pages $info`
			`}`
			`lappend pMap [expr {$pIndex << $shift}]`
			`return`
			`}`

			`proc uni::buildTables {data} {`
			`variable shift`

			`variable pMap {}`
			`variable pages {}`
			`variable groups {{0 0 0 0}}`
			`variable next 0`
			`set info {} ;# temporary page info`

			`set mask [expr {(1 << $shift) - 1}]`

			`foreach line [split $data \n] {`
			`if {$line eq ""} {`
			`if {!($next & $mask)} {`
			`# next character is already on page boundary`
			`continue`
			`}`
			`# fill remaining page`
			`set line [format %X [expr {($next-1)\|$mask}]]`
			`append line ";;Cn;0;ON;;;;;N;;;;;\n"`
			`}`

			`set items [split $line \;]`

			`scan [lindex $items 0] %x index`
			`if {$index > 0x3FFFF} then {`
			`# Ignore characters > plane 3`
			`continue`
			`}`
			`set index [format %d $index]`

			`set gIndex [getGroup [getValue $items $index]]`

			`# Since the input table omits unassigned characters, these will`
			`# show up as gaps in the index sequence. There are a few special cases`
			`# where the gaps correspond to a uniform block of assigned characters.`
			`# These are indicated as such in the character name.`

			`# Enter all unassigned characters up to the current character.`
			`if {($index > $next) \`
			`&& ![regexp "Last>$" [lindex $items 1]]} {`
			`for {} {$next < $index} {incr next} {`
			`lappend info 0`
			`if {($next & $mask) == $mask} {`
			`addPage $info`
			`set info {}`
			`}`
			`}`
			`}`

			`# Enter all assigned characters up to the current character`
			`for {set i $next} {$i <= $index} {incr i} {`
			`# Add the group index to the info for the current page`
			`lappend info $gIndex`

			`# If this is the last entry in the page, add the page`
			`if {($i & $mask) == $mask} {`
			`addPage $info`
			`set info {}`
			`}`
			`}`
			`set next [expr {$index + 1}]`
			`}`
			`return`
			`}`

			`proc uni::main {} {`
			`global argc argv0 argv`
			`variable pMap`
			`variable pages`
			`variable groups`
			`variable shift`
			`variable next`

			`if {$argc != 2} {`
			`puts stderr "\nusage: $argv0 <datafile> <outdir>\n"`
			`exit 1`
			`}`
			`set f [open [lindex $argv 0] r]`
			`set data [read $f]`
			`close $f`

			`buildTables $data`
			`puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]"`
			`set size [expr {[llength $pMap]*2 + ([llength $pages]<<$shift)}]`
			`puts "shift = $shift, space = $size"`

			`set f [open [file join [lindex $argv 1] tclUniData.c] w]`
			`fconfigure $f -translation lf -encoding utf-8`
			`puts $f "/*`
			`* tclUniData.c --`
			`*`
			`* Declarations of Unicode character information tables. This file is`
			`* automatically generated by the tools/uniParse.tcl script. Do not`
			`* modify this file by hand.`
			`*`
			`* Copyright (c) 1998 Scriptics Corporation.`
			`* All rights reserved.`
			`*/`

			`/*`
			`* A 16-bit Unicode character is split into two parts in order to index`
			`* into the following tables. The lower OFFSET_BITS comprise an offset`
			`* into a page of characters. The upper bits comprise the page number.`
			`*/`

			`#define OFFSET_BITS $shift`

			`/*`
			`* The pageMap is indexed by page number and returns an alternate page number`
			`* that identifies a unique page of characters. Many Unicode characters map`
			`* to the same alternate page number.`
			`*/`

			`static const unsigned short pageMap\[\] = {"`
			`set line " "`
			`set last [expr {[llength $pMap] - 1}]`
			`for {set i 0} {$i <= $last} {incr i} {`
			`if {$i == [expr {0x10000 >> $shift}]} {`
			`set line [string trimright $line " \t,"]`
			`puts $f $line`
			`set lastpage [expr {[lindex $line end] >> $shift}]`
			`puts stdout "lastpage: $lastpage"`
			`puts $f "#if TCL_UTF_MAX > 3 \|\| TCL_MAJOR_VERSION > 8 \|\| TCL_MINOR_VERSION > 6"`
			`set line " ,"`
			`}`
			`append line [lindex $pMap $i]`
			`if {$i != $last} {`
			`append line ", "`
			`}`
			`if {[string length $line] > 70} {`
			`puts $f [string trimright $line]`
			`set line " "`
			`}`
			`}`
			`puts $f $line`
			`puts $f "#endif /* TCL_UTF_MAX > 3 */"`
			`puts $f "};`

			`/*`
			`* The groupMap is indexed by combining the alternate page number with`
			`* the page offset and returns a group number that identifies a unique`
			`* set of character attributes.`
			`*/`

			`static const unsigned char groupMap\[\] = {"`
			`set line " "`
			`set lasti [expr {[llength $pages] - 1}]`
			`for {set i 0} {$i <= $lasti} {incr i} {`
			`set page [lindex $pages $i]`
			`set lastj [expr {[llength $page] - 1}]`
			`if {$i == ($lastpage + 1)} {`
			`puts $f [string trimright $line " \t,"]`
			`puts $f "#if TCL_UTF_MAX > 3 \|\| TCL_MAJOR_VERSION > 8 \|\| TCL_MINOR_VERSION > 6"`
			`set line " ,"`
			`}`
			`for {set j 0} {$j <= $lastj} {incr j} {`
			`append line [lindex $page $j]`
			`if {$j != $lastj \|\| $i != $lasti} {`
			`append line ", "`
			`}`
			`if {[string length $line] > 70} {`
			`puts $f [string trimright $line]`
			`set line " "`
			`}`
			`}`
			`}`
			`puts $f $line`
			`puts $f "#endif /* TCL_UTF_MAX > 3 */"`
			`puts $f "};`

			`/*`
			`* Each group represents a unique set of character attributes. The attributes`
			`* are encoded into a 32-bit value as follows:`
			`*`
			`* Bits 0-4 Character category: see the constants listed below.`
			`*`
			`* Bits 5-7 Case delta type: 000 = identity`
			`* 010 = add delta for lower`
			`* 011 = add delta for lower, add 1 for title`
			`* 100 = subtract delta for title/upper`
			`* 101 = sub delta for upper, sub 1 for title`
			`* 110 = sub delta for upper, add delta for lower`
			`* 111 = subtract delta for upper`
			`*`
			`* Bits 8-31 Case delta: delta for case conversions. This should be the`
			`* highest field so we can easily sign extend.`
			`*/`

			`static const int groups\[\] = {"`
			`set line " "`
			`set last [expr {[llength $groups] - 1}]`
			`for {set i 0} {$i <= $last} {incr i} {`
			`foreach {type toupper tolower totitle} [lindex $groups $i] {}`

			`# Compute the case conversion type and delta`

			`if {$totitle} {`
			`if {$totitle == $toupper} {`
			`# subtract delta for title or upper`
			`set case 4`
			`set delta $toupper`
			`if {$tolower} {`
			`error "New case conversion type needed: $toupper $tolower $totitle"`
			`}`
			`} elseif {$toupper} {`
			`# subtract delta for upper, subtract 1 for title`
			`set case 5`
			`set delta $toupper`
			`if {($totitle != 1) \|\| $tolower} {`
			`error "New case conversion type needed: $toupper $tolower $totitle"`
			`}`
			`} else {`
			`# add delta for lower, add 1 for title`
			`set case 3`
			`set delta $tolower`
			`if {$totitle != -1} {`
			`error "New case conversion type needed: $toupper $tolower $totitle"`
			`}`
			`}`
			`} elseif {$toupper} {`
			`set delta $toupper`
			`if {$tolower == $toupper} {`
			`# subtract delta for upper, add delta for lower`
			`set case 6`
			`} elseif {!$tolower} {`
			`# subtract delta for upper`
			`set case 7`
			`} else {`
			`error "New case conversion type needed: $toupper $tolower $totitle"`
			`}`
			`} elseif {$tolower} {`
			`# add delta for lower`
			`set case 2`
			`set delta $tolower`
			`} else {`
			`# noop`
			`set case 0`
			`set delta 0`
			`}`

			`append line [expr {($delta << 8) \| ($case << 5) \| $type}]`
			`if {$i != $last} {`
			`append line ", "`
			`}`
			`if {[string length $line] > 65} {`
			`puts $f [string trimright $line]`
			`set line " "`
			`}`
			`}`
			`puts $f $line`
			`puts -nonewline $f "};`

			`#if TCL_UTF_MAX > 3 \|\| TCL_MAJOR_VERSION > 8 \|\| TCL_MINOR_VERSION > 6`
			`# define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1FFFFF) >= [format 0x%X $next])`
			`#else`
			`# define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1F0000) != 0)`
			`#endif`

			`/*`
			`* The following constants are used to determine the category of a`
			`* Unicode character.`
			`*/`

			`enum {`
			`UNASSIGNED,`
			`UPPERCASE_LETTER,`
			`LOWERCASE_LETTER,`
			`TITLECASE_LETTER,`
			`MODIFIER_LETTER,`
			`OTHER_LETTER,`
			`NON_SPACING_MARK,`
			`ENCLOSING_MARK,`
			`COMBINING_SPACING_MARK,`
			`DECIMAL_DIGIT_NUMBER,`
			`LETTER_NUMBER,`
			`OTHER_NUMBER,`
			`SPACE_SEPARATOR,`
			`LINE_SEPARATOR,`
			`PARAGRAPH_SEPARATOR,`
			`CONTROL,`
			`FORMAT,`
			`PRIVATE_USE,`
			`SURROGATE,`
			`CONNECTOR_PUNCTUATION,`
			`DASH_PUNCTUATION,`
			`OPEN_PUNCTUATION,`
			`CLOSE_PUNCTUATION,`
			`INITIAL_QUOTE_PUNCTUATION,`
			`FINAL_QUOTE_PUNCTUATION,`
			`OTHER_PUNCTUATION,`
			`MATH_SYMBOL,`
			`CURRENCY_SYMBOL,`
			`MODIFIER_SYMBOL,`
			`OTHER_SYMBOL`
			`};`

			`/*`
			`* The following macros extract the fields of the character info. The`
			`* GetDelta() macro is complicated because we can't rely on the C compiler`
			`* to do sign extension on right shifts.`
			`*/`

			`#define GetCaseType(info) (((info) & 0xE0) >> 5)`
			`#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1F)`
			`#define GetDelta(info) ((info) >> 8)`

			`/*`
			`* This macro extracts the information about a character from the`
			`* Unicode character tables.`
			`*/`

			`#if TCL_UTF_MAX > 3 \|\| TCL_MAJOR_VERSION > 8 \|\| TCL_MINOR_VERSION > 6`
			`# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0x1FFFFF) >> OFFSET_BITS\] \| ((ch) & ((1 << OFFSET_BITS)-1))\]\])`
			`#else`
			`# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xFFFF) >> OFFSET_BITS\] \| ((ch) & ((1 << OFFSET_BITS)-1))\]\])`
			`#endif`
			`"`

			`close $f`
			`}`

			`uni::main`

			`return`