/* Jargrep.java -- grep files in a Jar file.
   Copyright (C) 2004  Casey Marshall <rsdio@metastatic.org>

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the

   Free Software Foundation, Inc.,
   59 Temple Place, Suite 330,
   Boston, MA  02111-1307
   USA  */


package gnu.jargrep;

import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;

import gnu.regexp.RE;
import gnu.regexp.REException;
import gnu.regexp.REMatch;
import gnu.regexp.RESyntax;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;

import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

/**
 * An implementation of a grep-like utility for Java archive (and zip)
 * files. Based on Wes Biggs' <code>gnu.regexp.util.Grep</code> utility.
 */
public class Jargrep
{
  private static final int BYTE_OFFSET = 0;
  private static final int COUNT = 1;
  private static final int LINE_NUMBER = 2;
  private static final int QUIET = 3;
  private static final int SILENT = 4;
  private static final int NO_FILENAME = 5;
  private static final int REVERT_MATCH = 6;
  private static final int FILES_WITH_MATCHES = 7;
  private static final int LINE_REGEXP = 8;
  private static final int FILES_WITHOUT_MATCH = 9;
  private static final int WITH_FILENAME = 10;

  private static final int OPT_HELP = 1024;

  private static final String PROGNAME = "jargrep";
  private static final String PROGVERSION = "0.01";

  private Jargrep() { }

  /**
   * Invokes the grep() function below with the command line arguments
   * and using the RESyntax.RE_SYNTAX_GREP syntax, which attempts to
   * emulate the traditional UNIX grep syntax.
   */
  public static void main(String[] argv)
  {
    System.exit(grep(argv, RESyntax.RE_SYNTAX_GREP, System.out));
  }

  public static int grep(String[] argv, RESyntax syntax, PrintStream out)
  {
    int cflags = 0;
    boolean[] options = new boolean [11];
    String pattern = null;

    LongOpt[] longOptions = {
      new LongOpt("byte-offset",         LongOpt.NO_ARGUMENT, null, 'b'),
      new LongOpt("count",               LongOpt.NO_ARGUMENT, null, 'c'),
      new LongOpt("no-filename",         LongOpt.NO_ARGUMENT, null, 'h'),
      new LongOpt("ignore-case",         LongOpt.NO_ARGUMENT, null, 'i'),
      new LongOpt("files-with-matches",  LongOpt.NO_ARGUMENT, null, 'l'),
      new LongOpt("help",                LongOpt.NO_ARGUMENT, null, OPT_HELP),
      new LongOpt("line-number",         LongOpt.NO_ARGUMENT, null, 'n'),
      new LongOpt("quiet",               LongOpt.NO_ARGUMENT, null, 'q'),
      new LongOpt("silent",              LongOpt.NO_ARGUMENT, null, 'q'),
      new LongOpt("no-messages",         LongOpt.NO_ARGUMENT, null, 's'),
      new LongOpt("revert-match",        LongOpt.NO_ARGUMENT, null, 'v'),
      new LongOpt("line-regexp",         LongOpt.NO_ARGUMENT, null, 'x'),
      new LongOpt("extended-regexp",     LongOpt.NO_ARGUMENT, null, 'E'),
      new LongOpt("basic-regexp",        LongOpt.NO_ARGUMENT, null, 'G'),
      new LongOpt("files-without-match", LongOpt.NO_ARGUMENT, null, 'L'),
      new LongOpt("version",             LongOpt.NO_ARGUMENT, null, 'V'),
      new LongOpt("regexp",              LongOpt.REQUIRED_ARGUMENT, null, 'e'),
      new LongOpt("file",                LongOpt.REQUIRED_ARGUMENT, null, 'f'),
      new LongOpt("with-filename",       LongOpt.NO_ARGUMENT, null, 'H')
    };

    Getopt g = new Getopt(PROGNAME, argv, "bcefhilnqsvxyEGLV", longOptions);
    int c;
    String arg;
    while ((c = g.getopt()) != -1) {
      switch (c) {
      case 'b':
        options[BYTE_OFFSET] = true;
        break;
      case 'c':
        options[COUNT] = true;
        break;
      case 'e':
        pattern = g.getOptarg();
        break;
      case 'f':
        try
          {
            StringBuffer buf = new StringBuffer();
            Reader r = new FileReader(g.getOptarg());
            char[] ch = new char[128];
            int len = 0;
            while ((len = r.read(ch)) != -1)
              buf.append(ch, 0, len);
            pattern = buf.toString();
          }
        catch (Exception x)
          {
            out.println("jargrep: "+g.getOptarg()+": "+x.getMessage());
            return 2;
          }
        break;
      case 'h':
        options[NO_FILENAME] = true;
        break;
      case 'i':
      case 'y':
        cflags |= RE.REG_ICASE;
        break;
      case 'l':
        options[FILES_WITH_MATCHES] = true;
        break;
      case 'n':
        options[LINE_NUMBER] = true;
        break;
      case 'q':
        options[QUIET] = true;
        break;
      case 's':
        options[SILENT] = true;
        break;
      case 'v':
        options[REVERT_MATCH] = true;
        break;
      case 'x':
        options[LINE_REGEXP] = true;
        break;
      case 'E':  // TODO: check compatibility with grep
        syntax = RESyntax.RE_SYNTAX_EGREP;
        break;
      case 'G':
        syntax = RESyntax.RE_SYNTAX_GREP;
        break;
      case 'H':
        options[WITH_FILENAME] = true;
        break;
      case 'L':
        options[FILES_WITHOUT_MATCH] = true;
        break;
      case 'V':
        System.err.println(PROGNAME+' '+PROGVERSION);
        return 0;
      case OPT_HELP:
        out.println("Usage: jargrep [OPTION]... PATTERN [JARFILE] ...");
        out.println("Search for PATTERN in each JARFILE or standard input.");
        out.println();
        out.println("Regexp selection and interpretation:");
        out.println("  -E, --extended-regexp     PATTERN is an extended regular expression");
        out.println("  -G, --basic-regexp        PATTERN is a basic regular expression");
        out.println("  -i, --ignore-case         ignore case distinctions");
        out.println("  -e, --regexp=PATTERN      use PATTERN as a regular expression");
        out.println("  -f, --file=FILE           obtain PATTERN from FILE");
        out.println();
        out.println("Miscellaneous:");
        out.println("  -s, --no-messages         suppress error messages");
        out.println("  -v, --revert-match        select non-matching lines");
        out.println("  -V, --version             print version information and exit");
        out.println("      --help                display this help and exit");
        out.println();
        out.println("Output control:");
        out.println("  -b, --byte-offset         print the byte offset with output lines");
        out.println("  -n, --line-number         print line number with output lines");
        out.println("  -h, --no-filename         suppress the prefixing filename on output");
        out.println("  -q, --quiet, --silent     suppress all normal output");
        out.println("  -L, --files-without-match only print FILE names containing no match");
        out.println("  -l, --files-with-matches  only print FILE names containing matches");
        out.println("  -c, --count               only print a count of matching lines per FILE");
        out.println("  -H, --with-filename       print the filename for each match");
        out.println();
        out.println("Context control:");
        out.println("Unsupported (may be available in a future release):");
        out.println("  -B, --before-context=NUM  print NUM lines of leading context");
        out.println("  -A, --after-context=NUM   print NUM lines of trailing context");
        out.println("  -NUM                      same as both -B NUM and -A NUM");
        out.println("  -C, --context             same as -2");
        out.println("  -U, --binary              do not strip CR characters at EOL (MSDOS)");
        out.println("  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)");
        out.println();
        out.println("With no JARFILE, or when JARFILE is -, read standard input. If less than");
        out.println("two JARFILEs given, assume -h. Exit with 0 if matches, with 1 if none.");
        out.println("Exit with 2 if syntax errors or system errors.");
        out.println();
        out.println("Report bugs to <rsdio@metastatic.org>");
        return 0;
      }
    }

    RE regexp;
    int optind = g.getOptind();
    if (pattern == null)
      {
        if (optind >= argv.length)
          {
            System.err.println("Usage: " + PROGNAME + " [OPTION]... PATTERN [FILE]...");
            System.err.println("Try `" + PROGNAME + " --help' for more information.");
            return 2;
          }
        pattern = argv[optind++];
      }
    try
      {
        regexp = new RE(pattern, cflags, syntax);
      }
    catch (REException e) {
      System.err.println("Error in expression: "+e);
      return 2;
    }

    if (argv.length >= optind+2)
      options[NO_FILENAME] = true;
    if (options[WITH_FILENAME])
      options[NO_FILENAME] = false;

    int retval = 1;
    if (argv.length >= optind)
      {
        for (int i = optind; i < argv.length; i++)
          {
            if (argv[i].equals("-"))
              {
                if (processZip(regexp,System.in,options,options[NO_FILENAME] ? null : "(standard input)",out))
                  retval = 0;
              }
            else
              {
                try
                  {
                    InputStream is = new FileInputStream(argv[i]);
                    if (processZip(regexp,is,options,options[NO_FILENAME] ? null : argv[i],out))
                      retval = 0;
                  }
                catch (FileNotFoundException e)
                  {
                    if (!options[SILENT])
                      System.err.println(PROGNAME+": "+e);
                  }
              }
          }
      }
    else
      {
        if (processZip(regexp,System.in,options,null,out))
          retval = 1;
      }
    return retval;
  }

  private static boolean processZip(RE pattern, InputStream is, boolean[] options, String filename, PrintStream out)
  {
    ZipInputStream zis = new ZipInputStream(is);
    ZipEntry entry = null;
    boolean match = false;
    try
      {
        while ((entry = zis.getNextEntry()) != null)
          {
            if (entry.isDirectory())
              continue;
            match |= processStream(pattern, zis, options, filename != null ? filename+"!"+entry.getName() : null, out);
          }
      }
    catch (Exception x)
      {
        if (!options[SILENT])
          {
            System.err.println(PROGNAME+": "+x);
          }
      }
    return match;
  }

  private static boolean processStream(RE pattern, InputStream is, boolean[] options, String filename, PrintStream out)
  {
    int newlineLen = System.getProperty("line.separator").length();
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    int count = 0;
    long atByte = 0;
    int atLine = 1;
    String line;
    REMatch match;

    try {
      while ((line = br.readLine()) != null) {
        match = pattern.getMatch(line);
        if (((options[LINE_REGEXP] && pattern.isMatch(line))
             || (!options[LINE_REGEXP] && (match != null)))
            ^ options[REVERT_MATCH]) {
          count++;
          if (!options[COUNT]) {
            if (options[QUIET]) {
              return true;
            }
            if (options[FILES_WITH_MATCHES]) {
              if (filename != null)
                out.println(filename);
              return true;
            }
            if (options[FILES_WITHOUT_MATCH]) {
              return false;
            }
            if (filename != null) {
              out.print(filename);
              out.print(':');
            }
            if (options[LINE_NUMBER]) {
              out.print(atLine);
              out.print(':');
            }
            if (options[BYTE_OFFSET]) {
              out.print(atByte + match.getStartIndex() );
              out.print(':');
            }
            out.println(line);
          }
        } // a match
        atByte += line.length() + newlineLen; // could be troublesome...
        atLine++;
      } // a valid line
//       br.close();

      if (options[COUNT]) {
        if (filename != null)
          out.println(filename+':');
        out.println(count);
      }
      if (options[FILES_WITHOUT_MATCH] && count==0) {
        if (filename != null)
          out.println(filename);
      }
    } catch (IOException e) {
      System.err.println(PROGNAME+": "+e);
    }
    return ((count > 0) ^ options[REVERT_MATCH]);
  }
}
