/* Copyright (c) 2008, Tim Post * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the original program's authors nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* Some example usages: * grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages * grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages * grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog * cat syslog | grawk dhclient '$0' * cat myservice.log | grawk -F , error '$3' * * Contributors: * Tim Post, Nicholas Clements, Alex Karlov * We hope that you find this useful! */ /* FIXME: * readline() should probably be renamed */ /* TODO: * Add a tail -f like behavior that applies expressions and fields * Recursive (like grep -r) or at least honor symlinks ? */ #include #include #include #include #include #include #include #define VERSION "1.0.7" #define MAINTAINER "Tim Post " /* Storage structure to hold awk-style pattern */ struct awk_pattern { int maxfield; /* Maximum field number for $# fields */ int numfields; /* Number of awk pattern fields */ char **fields; /* The awk pattern fields */ }; typedef struct awk_pattern awk_pat_t; /* Option arguments */ static struct option const long_options[] = { { "ignore-case", no_argument, 0, 'i' }, { "with-filename", no_argument, 0, 'W' }, { "no-filename", no_argument, 0, 'w' }, { "line-number", no_argument, 0, 'n' }, { "field-separator", required_argument, 0, 'F' }, { "help", no_argument, 0, 'h' }, { "version", no_argument, 0, 'v' }, { 0, 0, 0, 0} }; /* The official name of the program */ const char *progname = "grawk"; /* Global for delimiters used in tokenizing strings */ char *tokdelim = NULL; /* Prototypes */ static void usage(void); static int process(FILE *, regex_t, awk_pat_t, char *, int); static int process_line(char *, awk_pat_t, char *, char *); static int process_files(int, char **, regex_t, awk_pat_t, int, int); static int process_pipe(regex_t, awk_pat_t, int); static int awkcomp(awk_pat_t *, char *); static void awkfree(awk_pat_t *); static char *readline(FILE *); static void usage(void) { printf("%s %s\n", progname, VERSION); printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n", progname); printf("Options:\n"); printf(" --help " "show help and examples\n"); printf(" -i, --ignore-case " "ignore case distinctions\n"); printf(" -W, --with-filename " "Print filename for each match\n"); printf(" -w, --no-filename " "Never print filename for each match\n"); printf(" -n, --line-number " "Prefix each line of output with line number.\n"); printf(" -F fs, --field-separator=fs " "Use fs as the field separator\n"); printf(" -h, --help " "Print a brief help summary\n"); printf(" -v, --version " "Print version information and exit normally\n"); printf(" PATTERN " "a basic regular expression\n"); printf(" OUTPUT_PATTERN " "awk-style print statement; defines " "output fields\n"); printf("\nExamples:\n"); printf(" Retrieve joe123's home directory from /etc/passwd:\n"); printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname); printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n"); printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname); printf("\n Use as a simple grep:\n"); printf("\t%s \"string to find\" '$0' /file.txt\n", progname); printf("\nReport bugs to %s\n", MAINTAINER); } /* readline() - read a line from the file handle. * Return an allocated string */ static char *readline(FILE *fp) { char *str = (char *)NULL; int ch = 0, len = 256, step = 256, i = 0; str = (char *)malloc(len); if (str == NULL) return str; while (1) { ch = fgetc(fp); if (feof(fp)) break; if (ch == '\n' || ch == '\r') { str[i++] = 0; break; } str[i++] = ch; if (i == len - 2) { len += step; str = (char *)realloc(str, len); if (str == NULL) { fclose(fp); return str; } } } return str; } /* process() - this is the actual processing where we compare against a * previously compiled grep pattern and output based on the awk pattern. * The file is opened by the calling function. We pass in an empty string * if we don't want to show the filename. If we want to show the line number, * the value of show_lineno is 1. If we find a line, return 1. If no line is * found, return 0. If an error occurs, return -1. */ static int process(FILE *fp, regex_t re, awk_pat_t awk, char *filename, int show_lineno) { char *inbuf = NULL; char slineno[32]; memset(slineno, 0, sizeof(slineno)); long lineno = 0; int found = 0; while (1) { inbuf = readline(fp); if (!inbuf) break; if (feof(fp)) break; lineno++; if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) { found = 1; // Found a line. if (show_lineno) sprintf(slineno, "%ld:", lineno); if (process_line(inbuf, awk, filename, slineno)) { fprintf (stderr, "Error processing line [%s]\n", inbuf); free (inbuf); return -1; } } free (inbuf); } if (inbuf) free(inbuf); return found; } /* process_files() - process one or more files from the command-line. * If at least one line is found, return 1, else return 0 if no lines * were found or an error occurs. */ static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk, int show_filename, int show_lineno) { int i, found = 0; FILE *fp = NULL; struct stat fstat; char filename[1024]; memset(filename, 0, sizeof(filename)); for(i = 0; i < numfiles; i++) { if (stat(files[i], &fstat) == -1) { /* Did a file get deleted from the time we started running? */ fprintf (stderr, "Error accessing file %s. No such file\n", files[i]); continue; } if (show_filename) sprintf( filename, "%s:", files[i] ); /* For now, we aren't recursive. Perhaps allow symlinks? */ if ((fstat.st_mode & S_IFMT) != S_IFREG) continue; if (NULL == (fp = fopen(files[i], "r"))) { fprintf(stderr, "Error opening file %s. Permission denied\n", files[i]); continue; } if (process(fp, re, awk, filename, show_lineno) == 1) found = 1; fclose(fp); } return found; } /* process_pipe() - process input from stdin */ static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno) { if (process(stdin, re, awk, "", show_lineno) == 1) return 1; return 0; } /* process_line() - process the line based on the awk-style pattern and output * the results. */ static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno) { char full_line[3] = { '\1', '0', '\0' }; if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) { /* If the caller only wants the whole string, oblige, quickly. */ fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf); return 0; } /* Build an array of fields from the line using strtok() * TODO: make this re-entrant so that grawk can be spawned as a thread */ char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *)); char *wrkbuf = strdup(inbuf), *tbuf; int count = 0, n = 1, i; for (i = 0; i < (awk.maxfield + 1); i++) { linefields[i] = NULL; } tbuf = strtok(wrkbuf, tokdelim); if(tbuf) linefields[0] = strdup(tbuf); while (tbuf != NULL) { tbuf = strtok(NULL, tokdelim); if (!tbuf) break; count++; if (count > awk.maxfield) break; linefields[count] = strdup(tbuf); if (!linefields[count]) { fprintf(stderr, "Could not allocate memory to process file %s\n", filename); return -1; } } /* For each field in the awk structure, * find the field and print it to stdout.*/ fprintf(stdout, "%s%s", filename, lineno); /* if needed */ for (i = 0; i < awk.numfields; i++) { if (awk.fields[i][0] == '\1') { n = atoi(&awk.fields[i][1]); if (n == 0) { fprintf(stdout, "%s", inbuf); continue; } if (linefields[n-1]) fprintf(stdout, "%s", linefields[n-1]); continue; } else fprintf(stdout, "%s", awk.fields[i]); } fprintf(stdout, "\n"); /* Cleanup */ if (wrkbuf) free(wrkbuf); for (i = 0; i < count; i++) { free(linefields[i]); linefields[i] = (char *) NULL; } free(linefields); linefields = (char **)NULL; return 0; } /* awkcomp() - little awk-style print format compilation routine. * Returns structure with the apattern broken down into an array for easier * comparison and printing. Handles string literals as well as fields and * delimiters. Example: $1,$2 " \$ and \"blah\" " $4 * Returns -1 on error, else 0. */ static int awkcomp(awk_pat_t *awk, char *apattern) { awk->maxfield = 0; awk->numfields = 0; awk->fields = NULL; awk->fields = (char **)malloc(sizeof(char *)); int i, num = 0; char *wrkbuf; wrkbuf = (char *)malloc(strlen(apattern) + 1); if (wrkbuf == NULL) { free(awk); fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n"); return -1; } int inString = 0, offs = 0; char ch; for (i = 0; i < strlen( apattern ); i++) { ch = apattern[i]; if (inString && ch != '"' && ch != '\\') { wrkbuf[offs++] = ch; continue; } if (ch == ' ') continue; switch (ch) { /* Handle delimited strings inside of literal strings */ case '\\': if (inString) { wrkbuf[offs++] = apattern[++i]; continue; } else { /* Unexpected and unconventional escape (can get these * from improper invocations of sed in a pipe with grawk), * if sed is used to build the field delimiters */ fprintf(stderr, "Unexpected character \'\\\' in output format\n"); return -1; } break; /* Beginning or ending of a literal string */ case '"': inString = !inString; if (inString) continue; break; /* Handle the awk-like $# field variables */ case '$': /* We use a non-printable ASCII character to * delimit the string field values.*/ wrkbuf[offs++] = '\1'; /* We also need the max. field number */ num = 0; while (1) { ch = apattern[++i]; /* Not a number, exit this loop */ if (ch < 48 || ch > 57) { i--; break; } num = (num * 10) + (ch - 48); wrkbuf[offs++] = ch; } if (num > awk->maxfield) awk->maxfield = num; /* Incomplete expression, a $ not followed by a number */ if (wrkbuf[1] == 0) { fprintf(stderr, "Incomplete field descriptor at " "or near character %d in awk pattern\n", i+1); return -1; } break; /* Field separator */ case ',': wrkbuf[offs++] = ' '; break; } /* if wrkbuf has nothing, we've got rubbish. Continue in the hopes * that something else makes sense. */ if (offs == 0) continue; /* End of a field reached, put it into awk->fields */ wrkbuf[offs] = '\0'; awk->fields = (char **)realloc(awk->fields, (awk->numfields + 1) * sizeof(char *)); if (!awk->fields ) { fprintf(stderr, "Memory allocation error (awk->fields) in awkcomp()\n"); return -1; } awk->fields[awk->numfields] = strdup(wrkbuf); if (!awk->fields[awk->numfields]) { fprintf(stderr, "Memory allocation error (awk->fields[%d]) in awkcomp()\n", awk->numfields); return -1; } memset(wrkbuf, 0, strlen(apattern) + 1); awk->numfields++; offs = 0; } free(wrkbuf); if (awk->numfields == 0) { fprintf(stderr, "Unable to parse and compile the pattern; no fields found\n"); return -1; } return 0; } /* awkfree() - free a previously allocated awk_pat structure */ static void awkfree(awk_pat_t *awk ) { int i; for (i = 0; i < awk->numfields; i++) free(awk->fields[i]); free(awk->fields); } int main(int argc, char **argv) { char *apattern = NULL, *gpattern = NULL; char **files = NULL; int numfiles = 0, i = 0, c = 0; int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0; if (argc < 3) { usage(); return EXIT_FAILURE; } tokdelim = strdup("\t\r\n "); while (1) { int opt_ind = 0; while (c != -1) { c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind); switch (c) { case 'w': with_filename = 0; no_filename = 1; break; case 'i': ignore_case = 1; break; case 'W': with_filename = 1; no_filename = 0; break; case 'n': line_number = 1; break; case 'F': tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1); memset(tokdelim, 0, 3 + strlen( optarg ) + 1); sprintf(tokdelim, "\t\r\n%s", optarg); break; case 'h': usage(); free(tokdelim); return EXIT_SUCCESS; break; case 'v': printf("%s\n", VERSION); free(tokdelim); return EXIT_SUCCESS; break; } } /* Now we'll grab our patterns and files. */ if ((argc - optind) < 2) { usage(); free(tokdelim); return EXIT_FAILURE; } /* pattern one will be our "grep" pattern */ gpattern = strdup(argv[optind]); if (gpattern == NULL) { fprintf(stderr, "Memory allocation error"); exit(EXIT_FAILURE); } optind++; /* pattern two is our "awk" pattern */ apattern = strdup(argv[optind]); if(apattern == NULL) { fprintf(stderr, "Memory allocation error"); exit(EXIT_FAILURE); } optind++; /* Anything that remains is a file or wildcard which should be * expanded by the calling shell. */ if (optind < argc) { numfiles = argc - optind; files = (char **)malloc(sizeof(char *) * (numfiles + 1)); for (i = 0; i < numfiles; i++) { files[i] = strdup(argv[optind + i]); } } /* If the number of files is greater than 1 then we default to * showing the filename unless specifically directed against it.*/ if (numfiles > 1 && no_filename == 0) with_filename = 1; break; } /* Process everything */ regex_t re; int cflags = 0, rc = 0; if (ignore_case) cflags = REG_ICASE; /* compile the regular expression parser */ if (regcomp(&re, gpattern, cflags)) { fprintf(stderr, "Error compiling grep-style pattern [%s]\n", gpattern); return EXIT_FAILURE; } awk_pat_t awk; if (awkcomp(&awk, apattern)) { fprintf(stderr, "Error compiling awk-style pattern [%s]\n", apattern); return EXIT_FAILURE; } if (numfiles > 0) { if(process_files( numfiles, files, re, awk, with_filename, line_number) == 0) rc = 255; // We'll return 255 if no lines were found. } else { if(process_pipe(re, awk, line_number) == 0) rc = 255; } /* Destructor */ for (i = 0; i < numfiles; i++) { if (files[i]) free(files[i]); } free(files); /* Awk pattern */ free(apattern); /* Grep pattern */ free(gpattern); /* Grep regex */ regfree(&re); /* Awk pattern structure */ awkfree(&awk); /* Token delimiter (might have been freed elsewhere) */ if (tokdelim) free(tokdelim); return rc; }