/**
 *  Copyright 2005, Troy Korjuslommi, aka TJK.
 *  tjk@tksoft.com
 *  
 *  Latest copy available from http://www.plug.fi/
 * 
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License,
 *  version 2, as published by the Free Software Foundation.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 *  USA.
 * 
 *  Any express or implied warranties, including, but not limited to, 
 *  the implied warranties of merchantability and fitness for a 
 *  particular purpose are disclaimed.  In no event shall TJK,
 *  agents of TJK, or other contributors be liable for any direct, 
 *  indirect, incidental, special, exemplary, or consequential damages 
 *  (including, but not limited to, procurement of substitute goods or 
 *  services; loss of use, data, or profits; or business interruption) 
 *  however caused and on any theory of liability, whether in 
 *  contract, strict liability, or tort (including negligence or 
 *  otherwise) arising in any way out of the use of this software, 
 *  even if advised of the possibility of such damage.                    
 * 
 */

/**
 * Read a file and search for a pattern.
 *
 * This is a demo program, to show how we can search through a file
 * using mmap or regular reads. The mmap version is actually so
 * inefficient that it works slower than regular reading. Efficiently
 * wasn't the point here, though, so fixing it is left for later.
 *
 * Output is a list of offsets into the file, where you can find the
 * searched for text. You can make sure the search gave you the right 
 * offset with dd. E.g. 
 * dd if=somefile.txt skip={offset} ibs=1 count=9
 * (Note: 9 is length of "some text"; {offset} is the number generated 
 * by this program.)
 * dd will print out the text from the file. It should be same as the text
 * you searched for.
 *
 * Note: if you search in a device file, such as /dev/hda1, you will be 
 * asked for size of the file, as the file doesn't have a "size."
 *
 * Usage: ./finder somefile.txt 'some text' 
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <string.h>
#include <time.h>
#include <errno.h>

#define croak(a) { \
	fprintf(stderr, "*** ERROR: %s\n", a); \
	if (errno) { \
		fprintf(stderr, "*** Errno: %s\n", strerror(errno)); \
	} \
	fprintf(stderr, "Usage: ./finder somefile.txt 'some text'\n"); \
	exit (-1); \
}


long long int getFileSize() {
	int count = 0;
	long long int len = 0;

	while (count < 1) {
		printf("Need filesize: ");
		fflush(NULL);
		count = fscanf(stdin, "%lld", &len);
	}
	fprintf(stderr, "Size: %lld.\n", len);
	return len;
}

int main (int argc, char ** argv) {
	char * file;
	char * pat;
	FILE * fp;
	struct stat stats;
	off_t reads = 0;
	int opt = 0;
	time_t start = 0;

	if (argc < 3)
		croak("No file and pattern.");
	file = argv[1];
	pat = argv[2];
	fprintf(stderr, "Reading file: %s\n", file);
	fprintf(stderr, "Looking for: %s\n", pat);

	if (access(file, F_OK | R_OK))
		croak("File doesn't exist or can't be read.");
	fp = fopen(file, "r");
	if (! fp)
		croak("Couldn't open file.");
	if (stat(file, &stats) < 0)
		croak("Couldn't stat file.");

	do {
		printf("Choose\n\t  (1) mmap\n\t  (2) normal read.\nEnter your choice: ");
		fflush(NULL);
		char c = fgetc(stdin);
		if (c == '1') {
			opt = 1;
		}
		else if (c == '2') {
			opt = 2;
		}
		else {
			printf("\n*** Illegal value!\n\n\n");
			continue;
		}
	} while (0);

	start = time(NULL);

	if (opt == 1) {
		long long int rd = 0;
		long long int rdblocks = 0;
		int fd;
		long long int maxblocks = 0;
		int pgsz = getpagesize();
		const size_t MAXBLOCKS = 1000;
		size_t patlen = strlen(pat);

		if (S_ISREG(stats.st_mode)) {
			maxblocks = stats.st_size / pgsz;
		}
		else {
			maxblocks = getFileSize() / pgsz;
		}

		fd = fileno(fp);
		if (fd < 0)
			croak("Couldn't open file.");
		while (rdblocks < maxblocks) {
			long long int currd = 0;
			char  *  p;
			long long int off = (rdblocks == 0) ? 0 : rdblocks;
			const long long int blockcnt = (maxblocks-rdblocks > MAXBLOCKS) ? MAXBLOCKS : maxblocks-rdblocks;
			char  *  const top = (char * const) mmap(0, blockcnt*pgsz, PROT_READ, MAP_SHARED, fd, off*pgsz);
			if (top == MAP_FAILED)
				croak("mmap failed.");
			p = (char *) top;
			while (currd <= (blockcnt*pgsz)-patlen) {
				if (strncasecmp(p, pat, patlen) == 0) {
					fprintf(stderr, "Match at: %lld. Time taken: %ld seconds.\n", rd, time(NULL)-start);
					p += patlen;
					rd += patlen;
					currd += patlen;
				}
				else {
					p++;
					rd++;
					currd++;
				}
			}
			rd += patlen-1;
       			munmap((char *)top, blockcnt*pgsz);
			rdblocks += blockcnt;
			if (++reads % 50 == 0) {
				fprintf(stderr, "Reading: %lld Mb\n", rdblocks*pgsz/(off_t)1024000);
			}
		}
	} else {
		long long int rd = 0;
		char buf[4096] = {0};
		size_t patlen = strlen(pat);
		char exbuf[patlen*2+1];
		memset(exbuf, 0, sizeof(exbuf));
		while (! feof(fp) && ! ferror(fp)) {
			int i;
			int n = fread(buf, 1, sizeof(buf)-1, fp);
			if (n < 0)
				break;
			buf[n] = 0;
			for (i = 0; i < n; i++) {
				if (strncasecmp(buf+i, pat, patlen) == 0) {
					fprintf(stderr, "Match at: %lld. Time taken: %ld seconds.\n", rd+i, time(NULL)-start);
				}
				if (i == 0) {
					int j;
					memmove(exbuf+patlen, buf, patlen);
					for (j = 0; j < patlen; j++) {
						if (strncasecmp(exbuf+j, pat, patlen) == 0) {
							fprintf(stderr, "Match at: %lld\n", rd-patlen+j);
						}
					}
				}
			}
			rd += n;
			memset(exbuf, 0, sizeof(exbuf));
			memmove(exbuf, buf+n-patlen, patlen);
			if (++reads % 30000 == 0) {
				fprintf(stderr, "Reading: %lld Mb\n", rd/(off_t)1024000);
			}
		}
		if (S_ISREG(stats.st_mode) && rd != stats.st_size)
			printf("Incomplete read. Read %lld/%ld bytes.\n", rd, stats.st_size);
		else
			printf("Complete reading %lld bytes.\n", rd);
	}
	

	fclose(fp);

	return 0;
}





