blob: 9d86ed7cbfcdd60da41f7f107852b5f546f20bce [file] [log] [blame]
/*
* Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* Further, this software is distributed without any warranty that it is
* free of the rightful claim of any third person regarding infringement
* or the like. Any license provided herein, whether implied or
* otherwise, applies only to this software file. Patent licenses, if
* any, provided herein do not apply to combinations of this program with
* other software, or any other product whatsoever.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
* Mountain View, CA 94043, or:
*
* http://www.sgi.com
*
* For further information regarding this notice, see:
*
* http://oss.sgi.com/projects/GenInfo/NoticeExplan/
*
*/
/* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
/**************************************************************
*
* OS Testing - Silicon Graphics, Inc.
*
* TOOL IDENTIFIER : rand_lines
*
* DESCRIPTION : prints lines from a file in random order
*
* SYNOPSIS:
* rand_line [-hg][-S seed][-l numlines] [files...]
*
* AUTHOR : Richard Logan
*
* CO-PILOT(s) :
*
* DATE STARTED : 05/94
*
* INPUT SPECIFICATIONS
* This tool will print lines of a file in random order.
* The max line length is 4096.
* The options supported are:
* -h This option prints an help message then exits.
*
* -g This option specifies to count the number of lines
* in the file before randomizing. This option overrides
* -l option. Using this option, will give you the best
* randomization, but it requires processing
* the file an additional time.
*
* -l numlines : This option specifies to randomize file in
* numlines chucks. The default size is 4096.
*
* -S seed : sets randomization seed to seed.
* The default is time(0). If seed is zero, time(0) is used.
*
* file A readable, seekable filename. The cmd allows the user
* to specify multiple files, but each file is dealt with
* separately.
*
* DESIGN DESCRIPTION
* This tool uses a simple algorithm where the file is read.
* The offset to the each line is randomly placed into an
* array. The array is then processed sequentially. The infile's
* line who's offset in the array element is thus reread then printed.
* This output will thus be infile's lines in random order.
*
* SPECIAL REQUIREMENTS
* None.
*
* UPDATE HISTORY
* This should contain the description, author, and date of any
* "interesting" modifications (i.e. info should helpful in
* maintaining/enhancing this tool).
* username description
* ----------------------------------------------------------------
* rrl Creatation of program
* rrl 06/02 Fixed bug and some cleanup. Changed default chunk
* and line size to 4096 characters.
*
* BUGS/LIMITATIONS
* This program can not deal with non-seekable file like
* stdin or a pipe. If more than one file is specified,
* each file is randomized one at a time. The max line
* length is 4096 characters.
*
**************************************************************/
#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "random_range.h"
/*
* Structure used to hold file line offset.
*/
struct offset_t {
long used;
long offset;
};
void usage(FILE * stream);
void help(void);
int rnd_file(FILE * infile, int numlines, long seed);
int get_numlines(FILE * infile);
int rnd_insert(struct offset_t offsets[], long offset, int size);
#define DEF_SIZE 4096 /* default chunk size */
#define MAX_LN_SZ 4096 /* max line size */
#ifndef SEEK_SET
#define SEEK_SET 0
#endif
char *Progname = NULL;
/***********************************************************************
* MAIN
***********************************************************************/
int main(int argc, char *argv[])
{
FILE *infile;
int c;
long seed = -1; /* use time as seed */
int lsize = DEF_SIZE; /* num lines to randomize */
int getfilelines = 0; /* if set, count lines first */
if ((Progname = strrchr(argv[0], '/')) == NULL)
Progname = argv[0];
else
Progname++;
while ((c = getopt(argc, argv, "hgS:l:")) != EOF) {
switch (c) {
case 'h':
help();
exit(0);
break;
case 'S': /* seed */
if (sscanf(optarg, "%li", &seed) != 1) {
fprintf(stderr,
"%s: --S option argument is invalid\n",
Progname);
exit(1);
}
break;
case 'l': /* number of lines */
if (sscanf(optarg, "%i", &lsize) != 1) {
fprintf(stderr,
"%s: --s option argument is invalid\n",
Progname);
exit(1);
}
break;
case 'g':
getfilelines++;
break;
case '?':
usage(stderr);
exit(1);
break;
}
}
if (optind + 1 != argc) {
fprintf(stderr, "%s: Missing argument.\n", Progname);
usage(stderr);
exit(1);
}
if (seed == -1) {
seed = time(0);
}
if (strcmp(argv[argc - 1], "-") == 0) {
infile = stdin;
fprintf(stderr, "%s: Can not support stdin processing.\n",
Progname);
exit(2);
} else {
if ((infile = fopen(argv[argc - 1], "r")) == NULL) {
fprintf(stderr, "%s: Unable to open file %s: %s\n",
Progname, argv[argc - 1], strerror(errno));
exit(1);
}
if (getfilelines) {
lsize = get_numlines(infile);
}
rnd_file(infile, lsize, seed);
}
exit(0);
}
/***********************************************************************
* Print usage message to stream.
***********************************************************************/
void usage(FILE * stream)
{
fprintf(stream,
"Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);
}
/***********************************************************************
* Print help message to stdout.
***********************************************************************/
void help(void)
{
usage(stdout);
printf("This tool will print lines in random order (max line len %d).\n\
-h : print this help and exit\n\
-g : count the number of lines in the file before randomizing\n\
This option overrides -l option.\n\
-l numlines : randoms lines in numlines chuncks (def %d)\n\
-S seed : sets seed to seed (def time(0))\n", MAX_LN_SZ, DEF_SIZE);
}
/***********************************************************************
* counts the number of lines in already open file.
* Note: File must be seekable (not stdin or a pipe).
***********************************************************************/
int get_numlines(FILE *infile)
{
char line[MAX_LN_SZ]; /* max size of a line */
int cnt = 0;
while (fgets(line, MAX_LN_SZ, infile) != NULL) {
cnt++;
}
/* rewind the file */
fseek(infile, 0, SEEK_SET);
return cnt;
}
/***********************************************************************
*
* infile must be a fseekable file. Thus, it can not be stdin.
* It will read each line in the file, randomly saving the offset
* of each line in a array of struct offset_t.
* It will then print each line in the array stored order.
*
***********************************************************************/
int rnd_file(FILE *infile,
int numlines, /* can be more or less than num lines in file */
/* most opt randomized when num lines in files */
/* or just a bit bigger */
long seed)
{
char line[MAX_LN_SZ]; /* max size of a line */
int cnt;
long coffset; /* current line offset */
struct offset_t *offsets;
int memsize;
if (numlines <= 0) { /*use default */
numlines = DEF_SIZE;
}
/*
* Malloc space for numlines copies the offset_t structure.
* This is where the randomization takes place.
*/
memsize = sizeof(struct offset_t) * numlines;
if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
errno);
return -1;
}
random_range_seed(seed);
coffset = 0;
while (!feof(infile)) {
fseek(infile, coffset, SEEK_SET);
coffset = ftell(infile);
memset(offsets, 0, memsize);
cnt = 0;
/*
* read the file in and place offset of each line randomly
* into offsets array. Only numlines line can be randomized
* at a time.
*/
while (cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL) {
if (rnd_insert(offsets, coffset, numlines) < 0) {
fprintf(stderr,
"%s:%d rnd_insert() returned -1 (fatal error)!\n",
__FILE__, __LINE__);
abort();
}
cnt++;
coffset = ftell(infile);
}
if (cnt == 0) {
continue;
}
/*
* print out lines based on offset.
*/
for (cnt = 0; cnt < numlines; cnt++) {
if (offsets[cnt].used) {
fseek(infile, offsets[cnt].offset, SEEK_SET);
if (fgets(line, MAX_LN_SZ, infile) == NULL)
err(1, "fgets");
fputs(line, stdout);
}
}
} /* end of file */
return 0;
}
/***********************************************************************
* This function randomly inserts offset information into
* the offsets array. The array has a size of size.
* It will attempt 75 random array indexes before finding the first
* open array element.
*
***********************************************************************/
int rnd_insert(struct offset_t offsets[], long offset, int size)
{
int rand_num;
int quick = 0;
int ind;
/*
* Loop looking for random unused index.
* It will only be attempted 75 times.
*/
while (quick < 75) {
rand_num = random_range(0, size - 1, 1, NULL);
if (!offsets[rand_num].used) {
offsets[rand_num].offset = offset;
offsets[rand_num].used++;
return rand_num;
}
quick++;
}
/*
* an randomly choosen index was not found, find
* first open index and use it.
*/
for (ind = 0; ind < size && offsets[ind].used != 0; ind++) ; /* do nothing */
if (ind >= size) {
/*
* If called with an array where all offsets are used,
* we won't be able to find an open array location.
* Thus, return -1 indicating the error.
* This should never happen if called correctly.
*/
return -1;
}
offsets[ind].offset = offset;
offsets[ind].used++;
return ind;
}
/***********************************************************************
*
* CODE NOT TESTED AT ALL - it must be tested before it is used.
*
* This function was written to allow rand_lines to work on non-seekable
* file (i.e stdin).
*
***********************************************************************/
int rnd_stdin(FILE *infile,
int space, /* amount of space to use to read file into memory, */
/* randomized and print. randomize in chunks */
int numlines, /* can be more or less than num lines in file */
/* most opt randomized when num lines in files */
/* or just a bit bigger */
long seed)
{
char line[MAX_LN_SZ]; /* max size of a line */
int cnt; /* offset printer counter */
long loffset; /* last line address */
char *buffer; /* malloc space for file reads */
char *rdbuff; /* where to start read */
long stopaddr; /* end of read space (address) */
int rdsz; /* amount read */
int sztord;
char *chr; /* buffer processing pointer */
char *ptr; /* printing processing pointer */
char *lptr; /* printing processing pointer */
int loopcntl = 1; /* main loop control flag */
struct offset_t *offsets; /* pointer to offset space */
int memsize; /* amount of offset space to malloc */
int newbuffer = 1; /* need new buffer */
if (numlines <= 0) { /*use default */
numlines = DEF_SIZE;
}
/*
* Malloc space for file contents
*/
if ((buffer = (char *)malloc(space)) == NULL) {
fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space,
errno);
return -1;
}
/*
* Malloc space for numlines copies the offset_t structure.
* This is where the randomization takes place.
*/
memsize = sizeof(struct offset_t) * numlines;
if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
errno);
return -1;
}
random_range_seed(seed);
rdbuff = buffer; /* read into start of buffer */
sztord = space; /* amount of space left in buffer */
/*
* Loop until read doesn't read anything
* If last line does not end in newline, it is not printed
*/
while (loopcntl) {
/*
* read in file up to space size
* only works if used as filter.
* The code will randomize one reads worth at a time.
* If typing in lines, read will read only one line - no randomizing.
*/
chr = buffer;
if ((rdsz = fread((void *)rdbuff, sztord, 1, infile)) == 0) {
fprintf(stderr,
"input file is empty, done randomizing\n");
loopcntl = 0;
return 0;
}
stopaddr = ((long)buffer + rdsz);
loffset = (long)buffer;
while (!newbuffer) {
while ((long)chr < stopaddr && *chr != '\n')
chr++;
chr++;
if ((long)chr >= stopaddr) {
fprintf(stderr, "end of read in buffer\n");
/*
* print out lines based on offset.
*/
for (cnt = 0; cnt < numlines; cnt++) {
if (offsets[cnt].used) {
ptr =
(char *)offsets[cnt].offset;
/*
* copy buffer characters into line for printing
*/
lptr = line;
while (*ptr != '\n')
*lptr++ = *ptr++;
printf("%s\n", line);
}
}
/*
* move start of partically read line to beginning of buffer
* and adjust rdbuff to end of partically read line
*/
memcpy((void *)loffset, buffer,
(stopaddr - loffset));
rdbuff = buffer + (stopaddr - loffset);
sztord = space - (stopaddr - loffset);
newbuffer++;
}
if (rnd_insert(offsets, loffset, numlines) < 0) {
fprintf(stderr,
"%s:%d rnd_insert() returned -1 (fatal error)!\n",
__FILE__, __LINE__);
abort();
}
loffset = (long)chr;
}
}
return 0;
}