commit 986a9de51a77e7f6803e1b2259ec0675762077db
parent 696bb992c3b3f5536c3c204c3e7130832fdce0e6
Author: FRIGN <dev@frign.de>
Date: Sun, 1 Feb 2015 04:06:06 +0100
Add even stricter UTF-8-support to wc(1)
using readrune() and iswspace().
musl for instance doesn't differentiate between iswspace() and
isspace(), but when it does, the code will be ready.
It goes without saying that GNU coreutils don't use iswspace()[0].
[0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c
Diffstat:
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/wc.c b/wc.c
@@ -3,7 +3,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <wctype.h>
+#include "utf.h"
#include "util.h"
static int lflag = 0;
@@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
void
wc(FILE *fp, const char *str)
{
- int word = 0;
- int c;
+ int word = 0, read;
+ Rune c;
size_t nc = 0, nl = 0, nw = 0;
- while ((c = getc(fp)) != EOF) {
- if (cmode != 'm' || UTF8_POINT(c))
- nc++;
+ while ((read = readrune(str, fp, &c))) {
+ nc += (cmode == 'c') ? read :
+ (c != Runeerror) ? 1 : 0;
if (c == '\n')
nl++;
- if (!isspace(c))
+ if (!iswspace(c))
word = 1;
else if (word) {
word = 0;