hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

rune.3 (3057B)


      1 .deEX
      2 .ift .ft5
      3 .nf
      4 ..
      5 .deEE
      6 .ft1
      7 .fi
      8 ..
      9 .TH RUNE 3
     10 .SH NAME
     11 runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
     12 .SH SYNOPSIS
     13 .ta \w'\fLchar*xx'u
     14 .B #include <utf.h>
     15 .PP
     16 .B
     17 int	runetochar(char *s, Rune *r)
     18 .PP
     19 .B
     20 int	chartorune(Rune *r, char *s)
     21 .PP
     22 .B
     23 int	runelen(long r)
     24 .PP
     25 .B
     26 int	runenlen(Rune *r, int n)
     27 .PP
     28 .B
     29 int	fullrune(char *s, int n)
     30 .PP
     31 .B
     32 char*	utfecpy(char *s1, char *es1, char *s2)
     33 .PP
     34 .B
     35 int	utflen(char *s)
     36 .PP
     37 .B
     38 int	utfnlen(char *s, long n)
     39 .PP
     40 .B
     41 char*	utfrune(char *s, long c)
     42 .PP
     43 .B
     44 char*	utfrrune(char *s, long c)
     45 .PP
     46 .B
     47 char*	utfutf(char *s1, char *s2)
     48 .SH DESCRIPTION
     49 These routines convert to and from a
     50 .SM UTF
     51 byte stream and runes.
     52 .PP
     53 .I Runetochar
     54 copies one rune at
     55 .I r
     56 to at most
     57 .B UTFmax
     58 bytes starting at
     59 .I s
     60 and returns the number of bytes copied.
     61 .BR UTFmax ,
     62 defined as
     63 .B 3
     64 in
     65 .BR <libc.h> ,
     66 is the maximum number of bytes required to represent a rune.
     67 .PP
     68 .I Chartorune
     69 copies at most
     70 .B UTFmax
     71 bytes starting at
     72 .I s
     73 to one rune at
     74 .I r
     75 and returns the number of bytes copied.
     76 If the input is not exactly in
     77 .SM UTF
     78 format,
     79 .I chartorune
     80 will convert to 0x80 and return 1.
     81 .PP
     82 .I Runelen
     83 returns the number of bytes
     84 required to convert
     85 .I r
     86 into
     87 .SM UTF.
     88 .PP
     89 .I Runenlen
     90 returns the number of bytes
     91 required to convert the
     92 .I n
     93 runes pointed to by
     94 .I r
     95 into
     96 .SM UTF.
     97 .PP
     98 .I Fullrune
     99 returns 1 if the string
    100 .I s
    101 of length
    102 .I n
    103 is long enough to be decoded by
    104 .I chartorune
    105 and 0 otherwise.
    106 This does not guarantee that the string
    107 contains a legal
    108 .SM UTF
    109 encoding.
    110 This routine is used by programs that
    111 obtain input a byte at
    112 a time and need to know when a full rune
    113 has arrived.
    114 .PP
    115 The following routines are analogous to the
    116 corresponding string routines with
    117 .B utf
    118 substituted for
    119 .B str
    120 and
    121 .B rune
    122 substituted for
    123 .BR chr .
    124 .PP
    125 .I Utfecpy
    126 copies UTF sequences until a null sequence has been copied, but writes no 
    127 sequences beyond
    128 .IR es1 .
    129 If any sequences are copied,
    130 .I s1
    131 is terminated by a null sequence, and a pointer to that sequence is returned.
    132 Otherwise, the original
    133 .I s1
    134 is returned.
    135 .PP
    136 .I Utflen
    137 returns the number of runes that
    138 are represented by the
    139 .SM UTF
    140 string
    141 .IR s .
    142 .PP
    143 .I Utfnlen
    144 returns the number of complete runes that
    145 are represented by the first
    146 .I n
    147 bytes of
    148 .SM UTF
    149 string
    150 .IR s .
    151 If the last few bytes of the string contain an incompletely coded rune,
    152 .I utfnlen
    153 will not count them; in this way, it differs from
    154 .IR utflen ,
    155 which includes every byte of the string.
    156 .PP
    157 .I Utfrune
    158 .RI ( utfrrune )
    159 returns a pointer to the first (last)
    160 occurrence of rune
    161 .I c
    162 in the
    163 .SM UTF
    164 string
    165 .IR s ,
    166 or 0 if
    167 .I c
    168 does not occur in the string.
    169 The NUL byte terminating a string is considered to
    170 be part of the string
    171 .IR s .
    172 .PP
    173 .I Utfutf
    174 returns a pointer to the first occurrence of
    175 the
    176 .SM UTF
    177 string
    178 .I s2
    179 as a
    180 .SM UTF
    181 substring of
    182 .IR s1 ,
    183 or 0 if there is none.
    184 If
    185 .I s2
    186 is the null string,
    187 .I utfutf
    188 returns
    189 .IR s1 .
    190 .SH SOURCE
    191 .B http://swtch.com/plan9port/unix
    192 .SH SEE ALSO
    193 .IR utf (7),
    194 .IR tcs (1)