sort.c (5857B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <unistd.h> 7 8 #include "text.h" 9 #include "util.h" 10 11 struct keydef { 12 int start_column; 13 int end_column; 14 int start_char; 15 int end_char; 16 int flags; 17 }; 18 19 enum { 20 MOD_N = 1 << 1, 21 MOD_STARTB = 1 << 2, 22 MOD_ENDB = 1 << 3, 23 MOD_R = 1 << 4, 24 }; 25 26 struct kdlist { 27 struct keydef keydef; 28 struct kdlist *next; 29 }; 30 31 static struct kdlist *head = NULL; 32 static struct kdlist *tail = NULL; 33 34 static void addkeydef(char *, int); 35 static void check(FILE *); 36 static int linecmp(const char **, const char **); 37 static char *skipblank(char *); 38 static int parse_flags(char **, int *, int); 39 static int parse_keydef(struct keydef *, char *, int); 40 static char *nextcol(char *); 41 static char *columns(char *, const struct keydef *); 42 43 static int Cflag = 0, cflag = 0, uflag = 0; 44 static char *fieldsep = NULL; 45 46 static void 47 usage(void) 48 { 49 enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0); 50 } 51 52 int 53 main(int argc, char *argv[]) 54 { 55 long i; 56 FILE *fp; 57 struct linebuf linebuf = EMPTY_LINEBUF; 58 int global_flags = 0; 59 60 ARGBEGIN { 61 case 'C': 62 Cflag = 1; 63 break; 64 case 'b': 65 global_flags |= MOD_STARTB | MOD_ENDB; 66 break; 67 case 'c': 68 cflag = 1; 69 break; 70 case 'k': 71 addkeydef(EARGF(usage()), global_flags); 72 break; 73 case 'n': 74 global_flags |= MOD_N; 75 break; 76 case 'r': 77 global_flags |= MOD_R; 78 break; 79 case 't': 80 fieldsep = EARGF(usage()); 81 if (strlen(fieldsep) != 1) 82 usage(); 83 break; 84 case 'u': 85 uflag = 1; 86 break; 87 default: 88 usage(); 89 } ARGEND; 90 91 if (!head && global_flags) 92 addkeydef("1", global_flags); 93 addkeydef("1", global_flags & MOD_R); 94 95 if (argc == 0) { 96 if (Cflag || cflag) { 97 check(stdin); 98 } else { 99 getlines(stdin, &linebuf); 100 } 101 } else for (; argc > 0; argc--, argv++) { 102 if (!(fp = fopen(argv[0], "r"))) { 103 enprintf(2, "fopen %s:", argv[0]); 104 continue; 105 } 106 if (Cflag || cflag) { 107 check(fp); 108 } else { 109 getlines(fp, &linebuf); 110 } 111 fclose(fp); 112 } 113 114 if (!Cflag && !cflag) { 115 qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines, 116 (int (*)(const void *, const void *))linecmp); 117 118 for (i = 0; i < linebuf.nlines; i++) { 119 if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i], 120 (const char **)&linebuf.lines[i-1])) { 121 fputs(linebuf.lines[i], stdout); 122 } 123 } 124 } 125 126 return 0; 127 } 128 129 static void 130 addkeydef(char *def, int flags) 131 { 132 struct kdlist *node; 133 134 node = malloc(sizeof(*node)); 135 if (!node) 136 enprintf(2, "malloc:"); 137 if (!head) 138 head = node; 139 if (parse_keydef(&node->keydef, def, flags)) 140 enprintf(2, "faulty key definition\n"); 141 if (tail) 142 tail->next = node; 143 node->next = NULL; 144 tail = node; 145 } 146 147 static void 148 check(FILE *fp) 149 { 150 static struct { char *buf; size_t size; } prev, cur, tmp; 151 152 if (!prev.buf) 153 getline(&prev.buf, &prev.size, fp); 154 while (getline(&cur.buf, &cur.size, fp) != -1) { 155 if (uflag > linecmp((const char **) &cur.buf, (const char **) &prev.buf)) { 156 if (!Cflag) 157 weprintf("disorder: %s", cur.buf); 158 exit(1); 159 } 160 tmp = cur; 161 cur = prev; 162 prev = tmp; 163 } 164 } 165 166 static int 167 linecmp(const char **a, const char **b) 168 { 169 char *s1, *s2; 170 int res = 0; 171 struct kdlist *node; 172 173 for (node = head; node && res == 0; node = node->next) { 174 s1 = columns((char *)*a, &node->keydef); 175 s2 = columns((char *)*b, &node->keydef); 176 177 /* if -u is given, don't use default key definition 178 * unless it is the only one */ 179 if (uflag && node == tail && head != tail) 180 res = 0; 181 else if (node->keydef.flags & MOD_N) 182 res = strtol(s1, 0, 10) - strtol(s2, 0, 10); 183 else 184 res = strcmp(s1, s2); 185 186 if (node->keydef.flags & MOD_R) 187 res = -res; 188 189 free(s1); 190 free(s2); 191 } 192 return res; 193 } 194 195 static int 196 parse_flags(char **s, int *flags, int bflag) 197 { 198 while (isalpha((int)**s)) 199 switch (*((*s)++)) { 200 case 'b': 201 *flags |= bflag; 202 break; 203 case 'n': 204 *flags |= MOD_N; 205 break; 206 case 'r': 207 *flags |= MOD_R; 208 break; 209 default: 210 return -1; 211 } 212 return 0; 213 } 214 215 static int 216 parse_keydef(struct keydef *kd, char *s, int flags) 217 { 218 char *rest = s; 219 220 kd->start_column = 1; 221 kd->start_char = 1; 222 /* 0 means end of line */ 223 kd->end_column = 0; 224 kd->end_char = 0; 225 kd->flags = flags; 226 227 kd->start_column = strtol(rest, &rest, 10); 228 if (kd->start_column < 1) 229 return -1; 230 if (*rest == '.') 231 kd->start_char = strtol(rest+1, &rest, 10); 232 if (kd->start_char < 1) 233 return -1; 234 if (parse_flags(&rest, &kd->flags, MOD_STARTB) < 0) 235 return -1; 236 if (*rest == ',') { 237 kd->end_column = strtol(rest+1, &rest, 10); 238 if (kd->end_column && kd->end_column < kd->start_column) 239 return -1; 240 if (*rest == '.') { 241 kd->end_char = strtol(rest+1, &rest, 10); 242 if (kd->end_char < 1) 243 return -1; 244 } 245 if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0) 246 return -1; 247 } 248 if (*rest != '\0') 249 return -1; 250 return 0; 251 } 252 253 static char * 254 skipblank(char *s) 255 { 256 while(*s && isblank(*s)) 257 s++; 258 return s; 259 } 260 261 static char * 262 nextcol(char *s) 263 { 264 if (!fieldsep) { 265 s = skipblank(s); 266 while(*s && !isblank(*s)) 267 s++; 268 } else { 269 if (!strchr(s, *fieldsep)) 270 s = strchr(s, '\0'); 271 else 272 s = strchr(s, *fieldsep) + 1; 273 } 274 return s; 275 } 276 277 static char * 278 columns(char *line, const struct keydef *kd) 279 { 280 char *start, *end; 281 char *res; 282 int i; 283 284 for (i = 1, start = line; i < kd->start_column; i++) 285 start = nextcol(start); 286 if (kd->flags & MOD_STARTB) 287 start = skipblank(start); 288 start += MIN(kd->start_char, nextcol(start) - start) - 1; 289 290 if (kd->end_column) { 291 for (i = 1, end = line; i < kd->end_column; i++) 292 end = nextcol(end); 293 if (kd->flags & MOD_ENDB) 294 end = skipblank(end); 295 if (kd->end_char) 296 end += MIN(kd->end_char, nextcol(end) - end); 297 else 298 end = nextcol(end); 299 } else { 300 if (!(end = strchr(line, '\n'))) 301 end = strchr(line, '\0'); 302 } 303 304 if (!(res = strndup(start, end - start))) 305 enprintf(2, "strndup:"); 306 return res; 307 }