sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

mkrunetype.awk (7571B)


      1 # See LICENSE file for copyright and license details.
      2 
      3 BEGIN {
      4 	FS = ";"
      5 	# set up hexadecimal lookup table
      6 	for(i = 0; i < 16; i++)
      7 		hex[sprintf("%X",i)] = i;
      8 	HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
      9 	HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
     10 }
     11 
     12 $3  ~ /^L/ { alphav[alphac++] = $1; }
     13 ($3  ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; }
     14 $3 == "Cc" { cntrlv[cntrlc++] = $1; }
     15 $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; }
     16 $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; }
     17 $3 == "Lt" { titlev[titlec++] = $1; }
     18 $3 == "Nd" { digitv[digitc++] = $1; }
     19 
     20 END {
     21 	system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c");
     22 
     23 	mkis("alpha", alphav, alphac, "isalpharune.c", q, "");
     24 	mkis("space", spacev, spacec, "isspacerune.c", q, "");
     25 	mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, "");
     26 	mkis("upper", upperv, upperc,   "upperrune.c", tolowerv, "lower");
     27 	mkis("lower", lowerv, lowerc,   "lowerrune.c", toupperv, "upper");
     28 	mkis("title", titlev, titlec, "istitlerune.c", q, "");
     29 	mkis("digit", digitv, digitc, "isdigitrune.c", q, "");
     30 
     31 	system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c");
     32 
     33 	otheris();
     34 }
     35 
     36 # parse hexadecimal rune index to int
     37 function code(s) {
     38 	x = 0;
     39 	for(i = 1; i <= length(s); i++) {
     40 		c = substr(s, i, 1);
     41 		x = (x*16) + hex[c];
     42 	}
     43 	return x;
     44 }
     45 
     46 # generate 'is<name>rune' unicode lookup function
     47 function mkis(name, runev, runec, file, casev, casename) {
     48 	rune1c = 0;
     49 	rune2c = 0;
     50 	rune3c = 0;
     51 	rune4c = 0;
     52 	mode = 1;
     53 
     54 	#sort rune groups into singletons, ranges and laces
     55 	for(j = 0; j < runec; j++) {
     56 		# range
     57 		if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) ||
     58 		   code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) {
     59 			if (mode == 2) {
     60 				continue;
     61 			} else if (mode == 3) {
     62 				rune3v1[rune3c] = runev[j];
     63 				rune3c++;
     64 			} else if (mode == 4) {
     65 				rune4v1[rune4c] = runev[j];
     66 				rune4c++;
     67 			}
     68 			mode = 2;
     69 			rune2v0[rune2c] = runev[j];
     70 			if(length(casev) > 0) {
     71 				case2v[rune2c] = casev[j];
     72 			}
     73 			continue;
     74 		}
     75 		# lace 1
     76 		if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
     77 		   (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) &&
     78 		   j+1 < runec) {
     79 			if (mode == 3) {
     80 				continue;
     81 			} else if (mode == 2) {
     82 				rune2v1[rune2c] = runev[j];
     83 				rune2c++;
     84 			} else if (mode == 4) {
     85 				rune4v1[rune2c] = runev[j];
     86 				rune4c++;
     87 			}
     88 			mode = 3;
     89 			rune3v0[rune3c] = runev[j];
     90 			continue;
     91 		}
     92 		# lace 2
     93 		if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
     94 		   (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) &&
     95 		   j+1 < runec) {
     96 			if (mode == 4) {
     97 				continue;
     98 			} else if (mode == 2) {
     99 				rune2v1[rune2c] = runev[j];
    100 				rune2c++;
    101 			} else if (mode == 3) {
    102 				rune3v1[rune2c] = runev[j];
    103 				rune3c++;
    104 			}
    105 			mode = 4;
    106 			rune4v0[rune4c] = runev[j];
    107 			continue;
    108 		}
    109 		# terminating case
    110 		if (mode == 1) {
    111 			rune1v[rune1c] = runev[j];
    112 			if (length(casev) > 0) {
    113 				case1v[rune1c] = casev[j];
    114 			}
    115 			rune1c++;
    116 		} else if (mode == 2) {
    117 			rune2v1[rune2c] = runev[j];
    118 			rune2c++;
    119 		} else if (mode == 3) {
    120 			rune3v1[rune3c] = runev[j];
    121 			rune3c++;
    122 		} else { #lace 2
    123 			rune4v1[rune4c] = runev[j];
    124 			rune4c++;
    125 		}
    126 		mode = 1;
    127 	}
    128 	print HEADER > file;
    129 
    130 	#generate list of laces 1
    131 	if(rune3c > 0) {
    132 		print "static Rune "name"3[][2] = {" > file;
    133 		for(j = 0; j < rune3c; j++) {
    134 			print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file;
    135 		}
    136 		print "};\n" > file;
    137 	}
    138 
    139 	#generate list of laces 2
    140 	if(rune4c > 0) {
    141 		print "static Rune "name"4[][2] = {" > file;
    142 		for(j = 0; j < rune4c; j++) {
    143 			print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file;
    144 		}
    145 		print "};\n" > file;
    146 	}
    147 
    148 	# generate list of ranges
    149 	if(rune2c > 0) {
    150 		if(length(casev) > 0) {
    151 			print "static Rune "name"2[][3] = {" > file;
    152 			for(j = 0; j < rune2c; j++) {
    153 				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file;
    154 			}
    155 		} else {
    156 			print "static Rune "name"2[][2] = {" > file
    157 			for(j = 0; j < rune2c; j++) {
    158 				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file;
    159 			}
    160 		}
    161 		print "};\n" > file;
    162 	}
    163 
    164 	# generate list of singletons
    165 	if(rune1c > 0) {
    166 		if(length(casev) > 0) {
    167 			print "static Rune "name"1[][2] = {" > file;
    168 			for(j = 0; j < rune1c; j++) {
    169 				print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file;
    170 			}
    171 		} else {
    172 			print "static Rune "name"1[] = {" > file;
    173 			for(j = 0; j < rune1c; j++) {
    174 				print "\t0x"rune1v[j]"," > file;
    175 			}
    176 		}
    177 		print "};\n" > file;
    178 	}
    179 	# generate lookup function
    180 	print "int\nis"name"rune(Rune r)\n{" > file;
    181 	if(rune4c > 0 || rune3c > 0)
    182 		print "\tRune *match;\n" > file;
    183 	if(rune4c > 0) {
    184 		print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file;
    185 		print "\t\treturn !((r - match[0]) % 2);" > file;
    186 	}
    187 	if(rune3c > 0) {
    188 		print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file;
    189 		print "\t\treturn !((r - match[0]) % 2);" > file;
    190 	}
    191 	if(rune2c > 0) {
    192 		print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file;
    193 	}
    194 	if(rune1c > 0) {
    195 		print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file;
    196 	}
    197 	print "\treturn 0;\n}" > file;
    198 
    199 	# generate case conversion function
    200 	if(length(casev) > 0) {
    201 		print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file;
    202 		if(rune4c > 0) {
    203 			print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file;
    204 			print "\tif (match)" > file;
    205 			print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file;
    206 		}
    207 		if(rune3c > 0) {
    208 			print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file;
    209 			print "\tif (match)" > file;
    210 			print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file;
    211 		}
    212 		if(rune2c > 0) {
    213 			print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file;
    214 			print "\tif (match)" > file;
    215 			print "\t\treturn match[2] + (r - match[0]);" > file;
    216 		}
    217 		if(rune1c > 0) {
    218 			print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file;
    219 			print "\tif (match)" > file;
    220 			print "\t\treturn match[1];" > file;
    221 		}
    222 		print "\treturn r;\n}" > file;
    223 	}
    224 }
    225 
    226 function otheris() {
    227 	print HEADER_OTHER > "isalnumrune.c";
    228 	print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c";
    229 	print HEADER_OTHER > "isblankrune.c";
    230 	print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c";
    231 	print HEADER_OTHER > "isprintrune.c";
    232 	print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c";
    233 	print "\t       ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c";
    234 	print HEADER_OTHER > "isgraphrune.c";
    235 	print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c";
    236 	print HEADER_OTHER > "ispunctrune.c";
    237 	print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c";
    238 	print HEADER_OTHER > "isxdigitrune.c";
    239 	print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c";
    240 }