/*
* Copyright 1989 by Rayan S. Zachariassen, all rights reserved.
* This will be free software, but only when it is finished.
*/
/*
* Filename expansion (globbing) routines. The expand() function is called
* just before pushing an argv onto a command descriptor in the interpreter.
* As a side-effect, multiple buffers are mashed into one, and word separation
* using IFS characters is also done here.
*/
#include "hostenv.h"
#include <stdio.h>
#include <sys/file.h>
#include <sys/stat.h>
#ifdef HAVE_DIRENT_H
# include <dirent.h>
#else /* not HAVE_DIRENT_H */
# define dirent direct
# ifdef HAVE_SYS_NDIR_H
# include <sys/ndir.h>
# endif /* HAVE_SYS_NDIR_H */
# ifdef HAVE_SYS_DIR_H
# include <sys/dir.h>
# endif /* HAVE_SYS_DIR_H */
# ifdef HAVE_NDIR_H
# include <ndir.h>
# endif /* HAVE_NDIR_H */
#endif /* HAVE_DIRENT_H */
#include "sh.h"
#include "flags.h"
#include "zmalloc.h"
#include "listutils.h"
#include "io.h" /* redefines stdio routines */
#include "shconfig.h"
#include "libsh.h"
/*
* For speed we look up magic characters (*, ?, and [) to check whether
* a word requires filename globbing. The globchars array contains such flags.
*/
char globchars[CHARSETSIZE];
/*
* Initialize the array, called from main(), once.
*/
void
glob_init()
{
int i;
for (i = 0; i < CHARSETSIZE; ++i)
globchars[i] = 0;
globchars['*'] = globchars['?'] = globchars['['] = 1;
/* see also sJumpIfMatch in interpreter for globchars['|'] */
}
/*
* Because the result of filename expansion must be presented in alphabetical
* order, we have to keep the pathnames somewhere temporarily to do a qsort().
* Since we don't know how many there will be, we maintain a linked list of
* (struct sawpath), then turn that into an array that we can sort easily.
*/
struct sawpath {
struct sawpath *next;
char array[1]; /* is really array[strlen(path)+1] */
};
STATIC int glut __((char *cwd, char *pwd, int *pb, int recur, struct sawpath **swp));
/*
* Normally we don't want to descend into symlink'ed directories, but in
* case you do, this is where you change it. It is INADVISABLE to use
* stat() since in that case a super-glob can lead to infinite recursion.
*/
#ifdef HAVE_LSTAT
extern int lstat();
static int (*statfcn)() = lstat; /* stat if following symlinks, o/w lstat */
#else /* !HAVE_LSTAT */
extern int stat();
static int (*statfcn)() = stat;
#endif /* HAVE_LSTAT */
/*
* Qsort() comparison function to sort pathnames.
*/
int
pathcmp(ap, bp)
const void *ap, *bp;
{
register const void **a = (const void **)ap;
register const void **b = (const void **)bp;
/* sort to reverse order, easier to collate
into an object chain */
return -strcmp(*a, *b);
}
/* note that | is going to be used instead of / in some pathname examples */
/*
* Super-glob. This is exactly like glob except that the sequence: |**|
* in a pathname will match any number of levels of directories. Its an
* old idea of mine so here's a wonderful opportunity to express myself within
* the confines of sh! The major thing to note is that quoted characters
* do not qualify for globbing. How does one know if a character is quoted?
* Each character is stored in an int, the u_char value is obtained using
* the BYTE() macro, and the quotedness is determined by the QUOTEBYTE bit
* in the int.
*/
#define BYTE(X) ((X) & 0xFF)
#define QUOTEBYTE (0x8000)
STATIC conscell * sglob __((int *));
STATIC conscell *
sglob(ibuf)
int *ibuf; /* unglobbed pathname w/ each byte stored as int */
{
register int i, n;
conscell *pp, *cc = NULL;
struct sawpath *spp;
struct sawpath head;
char *pwd, /* points to end of current directory in cwd */
**base, /* array of expanded filenames, for sorting */
cwd[4096]; /* all pathnames are constructed in cwd */
GCVARS1;
if (BYTE(ibuf[0]) == '/') {
cwd[0] = '/';
pwd = cwd+1;
} else if (BYTE(ibuf[0]) == '.' && BYTE(ibuf[1]) == '/') {
cwd[0] = '.';
cwd[1] = '/';
pwd = cwd+2;
ibuf += 2;
} else
pwd = cwd;
*pwd = '\0';
head.next = NULL;
spp = &head;
if (BYTE(ibuf[0]) != 0)
n = glut(cwd, pwd, ibuf, 0, &spp); /* do expansion */
else
goto leave;
if (n <= 0)
goto leave;
#ifdef USE_ALLOCA
base = (char **)alloca((sizeof (char *))*n);
#else
base = (char **)malloc((sizeof (char *))*n);
#endif
i = 0;
for (spp = head.next; spp != NULL ; spp = spp->next)
base[i++] = spp->array;
qsort(base, n, sizeof base[0], pathcmp);
/* construct a sorted linked list */
cc = NULL;
GCPRO1(cc);
for (i = 0; i < n; ++i) {
int slen = strlen(base[i]);
pp = newstring(dupnstr(base[i],slen),slen);
cdr(pp) = cc;
cc = pp;
/* printf("saw %s\n", base[i]); */
}
UNGCPRO1;
#ifndef USE_ALLOCA
free(base);
#endif
leave:
spp = head.next;
while (spp != NULL) {
struct sawpath *spp2 = spp->next;
free(spp);
spp = spp2;
}
return cc;
}
/*
* Utility function to append a pathname to a linked list for later sorting.
*/
STATIC struct sawpath * stash __((const void *, int, struct sawpath *));
STATIC struct sawpath *
stash(s, len, ps)
const void *s; /* the pathname we want to stash away */
int len; /* its length */
struct sawpath *ps; /* the previous list element */
{
register struct sawpath *spp;
spp = (struct sawpath *)malloc(sizeof (struct sawpath) + len);
ps->next = spp;
spp->next = NULL;
memcpy(spp->array, s, len);
spp->array[len] = 0;
return spp;
}
STATIC int kleene[] = { '*', '/', 0 }; /* foo|**| becomes foo|**|*| */
/*
* This routine is the recursive workhorse of the filename globbing.
*/
static int
glut(cwd, pwd, bp, recur, swp)
char *cwd, /* always the same buffer */
*pwd; /* pointer to end of directories/ inside cwd */
int *bp, /* next character in the name to glob */
recur; /* flag: superglob mode, deep dir. descend */
struct sawpath **swp;
{
int *start, /* beginning of simple file name to expand */
*eoname, /* end of same */
*ip,
havepattern, /* the simple file name contains glob chars */
flag, /* remember to put the trailing / on cwd back */
count, /* number of expansions */
namlen, /* filename length */
i;
struct stat stbuf;
struct dirent *dp;
DIR *dirp = NULL;
if (interrupted)
return 0;
if (pwd > cwd+1) {
*--pwd = '\0';
flag = 1;
} else
flag = 0;
/* printf("%s:\n", cwd); */
/*
* must do stat since assuming opendir will
* fail on files might not be portable.
*/
if ((*cwd == '\0' && statfcn(".", &stbuf) < 0) ||
(*cwd != '\0' && statfcn(cwd, &stbuf) < 0) ||
(stbuf.st_mode & S_IFMT) != S_IFDIR)
return -1;
again:
while (*bp != '\0' && BYTE(*bp) == '/')
++bp;
if (*bp == '\0') {
if (recur) /* we're at the end of the road of a |**| */
bp = kleene;
else {
*swp = stash(cwd, (pwd - cwd), *swp);
return 1;
}
}
if (*bp == '*' && *(bp+1) == '*' && BYTE(*(bp+2)) == '/') {
recur = 1; /* superglob mode */
bp += 2;
if (*(bp+1))
goto again;
else
++bp; /* start and eoname will point at 0 */
}
start = bp;
while (*bp != '\0' && BYTE(*bp) != '/')
++bp;
eoname = bp;
/*
* Now we have a local name between start and eoname, relative to
* the directory we have open. search through the directory for
* that name, then do the whole thing again, recursively.
*/
havepattern = count = 0;
/* optimization... is it worth globbing in inner loop far below? */
for (ip = start; ip < eoname; ++ip)
if (*ip == '*' || *ip == '?' || *ip == '[') {
havepattern = 1;
break;
}
if ((*cwd == '\0' && (dirp = opendir(".")) == NULL) ||
(*cwd != '\0' && (dirp = opendir((char *)cwd)) == NULL)) {
perror((char *)cwd);
return 0;
}
if (flag) {
*pwd++ = '/';
*pwd = '\0';
}
/* major loop */
for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
/* a * won't match .files */
if (*start == '*' && start == eoname - 1
&& dp->d_name[0] == '.')
continue;
/* if we can't match the simple name, forget it */
if (!recur &&
glob_match(start, eoname, dp->d_name) == 0)
continue;
strcpy((char *)pwd, dp->d_name);
namlen = strlen(dp->d_name);
if (recur && !(dp->d_name[0] == '.' && (dp->d_name[1] == '\0'
|| (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))) {
/* in superglob mode... glut returns -1 if is a file */
i = glut(cwd, pwd+namlen+1, start, recur, swp);
if (i >= 0) {
count += i;
strcpy((char *)pwd, dp->d_name);
if (*start != 0 && start != kleene)
continue;
++count;
*swp = stash(cwd, (int)((pwd+namlen)-cwd), *swp);
}
}
if (*eoname == 0) { /* end of the globbable name */
/* we aren't interested in descending directories */
if (!recur ||
glob_match(start, eoname, dp->d_name))
++count,
*swp = stash(cwd, (int)((pwd+namlen)-cwd), *swp);
} else if (!recur) {
/* we are only interested in directories */
i = glut(cwd, pwd+namlen+1, eoname, recur, swp);
count += (i > 0 ? i : 0);
}
if (recur || havepattern)
continue;
else
break;
}
#ifdef BUGGY_CLOSEDIR
/*
* Major serious bug time here; some closedir()'s
* free dirp before referring to dirp->dd_fd. GRRR.
* XX: remove this when bug is eradicated from System V's.
*/
close(dirp->dd_fd);
#endif
closedir(dirp);
return count;
}
/*
* This is a heavily recursive globbing function, it uses the string (which
* is actually an int array as mentioned above) as a DFA that it interprets.
* This routine should be kept in sync with the case-statement globbing
* in the interpreter. Unfortunately the requirements are different enough
* that sharing code would be hard.
*/
int
glob_match(pattern, eopattern, s)
register int *pattern, *eopattern;
register const char *s;
{
register int i, i2, sense;
while (eopattern == NULL || pattern < eopattern) {
switch (*pattern) {
case '*':
while (*pattern == '*')
pattern++;
do {
if (glob_match(pattern, eopattern, s))
return 1;
} while (*s++ != '\0');
return 0;
case '[':
if (*s == '\0')
return 0;
sense = (*(pattern+1) != '!');
if (!sense)
++pattern;
while ((*++pattern != ']') && (*pattern != *s)) {
if (pattern == eopattern)
return !sense;
if (*(pattern+1) == '-'
&& (i2 = BYTE(*(pattern+2))) != ']'
&& i2 != 0) {
i2 = (i2 < 128) ? i2 : 127;
for (i = BYTE(*pattern)+1; i <= i2; i++)
if (i == *s) {
if (sense)
goto ok;
else
return 0;
}
pattern += 2;
}
}
if ((*pattern == ']') == sense)
return 0;
ok:
while (*pattern++ != ']')
if (pattern == eopattern)
return 0;
s++;
break;
case '?':
if (*s == '\0')
return 0;
s++;
pattern++;
break;
case '\0':
return (*s == '\0');
default:
if (BYTE(*pattern++) != *s++)
return 0;
}
}
return (*s == '\0');
}
/*
* Mash the linked list of buffers passed into a single buffer on return.
*/
int
squish(d, bufp, ibufp, doglob)
conscell *d; /* input is protected */
char **bufp;
int **ibufp, doglob;
{
register char *cp, *bp;
register int *ip;
register int sawglob, mask, len;
register conscell *l;
char *buf;
int *ibuf;
if ((LIST(d) || ISQUOTED(d)) && cdr(d) == NULL)
return -1;
/* how much space will unexpanded concatenation of buffers take? */
for (l = d, len = 0, sawglob = 0; l != NULL; l = cdr(l)) {
int slen = l->slen;
if (l->string == NULL || slen == 0)
continue;
len += slen;
if (doglob) {
/* We do glob processing */
cp = l->string;
if (!sawglob) {
while (slen > 0) {
if (globchars[BYTE(*cp)] != 0 &&
!(cp == d->string &&
*cp == '[' && *(d->string+1) == '\0')) {
++sawglob;
break;
}
++cp; --slen;
}
}
}
}
/* f option disables filename generation */
sawglob = (sawglob != 0); /* zero/one value space */
if (doglob > 0) /* doglob == -1 for 'case' matching */
if (isset('f')) sawglob = 0;
if (!sawglob && cdr(d) == NULL) {
return -1;
}
/* allocate something large enough to hold integer per char */
*bufp = buf = (char *)malloc((len+1)*(sawglob ? sizeof(int) : 1));
*ibufp = ibuf = (int *)buf;
for (l = d, bp = buf, ip = ibuf; l != NULL; l = cdr(l)) {
if (l->string) {
if (sawglob) {
int slen = l->slen;
/*
* Create int array with quoted characters
* marked by the QUOTEBYTE bit.
*/
mask = ISQUOTED(l) ? QUOTEBYTE : 0;
for (cp = l->string; slen > 0; --slen,++cp) {
#if 0
if (*cp == '\\' && *(cp+1) != '\0')
*ip++ = BYTE(*++cp) | QUOTEBYTE;
else
#endif
*ip++ = BYTE(*cp) | mask;
}
} else if (ISQUOTED(l)) {
if (l->slen > 0) {
memcpy(bp, l->string, l->slen);
bp += l->slen;
}
} else {
int slen = l->slen;
for (cp = l->string; slen > 0; ++cp, --slen) {
if (*cp == '\\' && slen > 1)
++cp;
*bp++ = *cp;
}
}
}
}
if (sawglob)
*ip = 0;
else
*bp = '\0';
return sawglob;
}
/*
* Given a buffer list, check all non-quoted non-list portions for
* file globbing characters and if relevant perform filename expansion.
* The caller relies on the return value never being NULL.
*/
STATIC conscell * csglob __((conscell *, int doglob));
STATIC conscell *
csglob(d, doglob)
conscell *d; int doglob;
{
register char *bp;
register int *ip;
conscell *tmp;
char *buf = NULL;
int *ibuf;
int s, slen;
s = squish(d, &buf, &ibuf, doglob);
switch (s) {
case -1:
/* if (buf) free(buf); -- no allocations; ever */
return d;
case 1:
tmp = sglob(ibuf);
if (tmp != NULL) {
free(buf);
return tmp;
}
for (bp = buf, ip = ibuf; *ip != '\0'; ++ip)
*bp++ = BYTE(*ip);
*bp = '\0';
/* FALLTHROUGH */
/* (re)filled the buffer, can throw away
the conscell chain in 'tmp'. */
case 0:
slen = strlen(buf);
tmp = newstring(dupnstr(buf,slen),slen);
free(buf);
break;
default:
abort();
}
return tmp;
}
extern conscell *
sh_glob(avl,il)
conscell *avl, *il;
{
il = cdar(avl);
if (il == NULL || !STRING(il)) {
fprintf(stderr, "Usage: %s glob-patter-string\n",
car(avl)->string);
return NULL;
}
/* Always do the glob, never mind if 'set -f' is in effect! */
il = csglob(il, -1);
for (avl = il; avl != NULL; avl = cdr(avl))
if (STRING(avl))
avl->flags |= (QUOTEDSTRING|ELEMENT);
return il;
}
/*
* This function is called with the unexpanded buffer contents (usually
* a linked list of strings) just prior to it being added as a command
* argument. Expansion involves scanning unquoted strings for whitespace
* (as defined by IFS) and breaking those apart into multiple argv's,
* as well as filename globbing of the resulting unquoted strings.
* The return value is a list of argv's.
*
* The input array is safe to be munched in place! No need to copy it!
*/
conscell *
expand(d, variant)
conscell *d; /* input protected */
int variant;
{
conscell *tmp, *head, *next, *orig;
conscell *globbed = NULL, **pav;
register char *cp, *cp0;
int slen, slen0;
GCVARS6;
/* Ok, following is MAGIC - sVariablePush when the
data is $(elements ..) produced list */
if (variant == 2 && ISELEMENT(d)) return d;
tmp = head = next = orig = globbed = NULL;
GCPRO6(tmp, head, next, orig, globbed, d);
/* grindef("EXP = ", d); */
/* *NOT* copying at first with s_copy_tree -- 13 % of total system
runtime is in THAT call from THIS function! */
#if 0
d = s_copy_chain(d); /* Copy the LINEAR CHAIN, not the whole tree! */
#endif
orig = d;
pav = &globbed;
for (head = d; d != NULL; d = next) {
if (head == NULL)
head = d;
next = cdr(d);
if (LIST(d) || ISQUOTED(d)) {
continue;
} else if (ISELEMENT(d)) {
if (head != d) {
cdr(head) = NULL;
head = *pav = csglob(head,0);
while (cdr(head) != NULL) head = cdr(head);
pav = &cdr(head);
}
head = NULL;
d->flags &= ~ELEMENT;
*pav = d;
pav = &cdr(d);
continue;
}
/* null strings should be retained */
/* fprintf(stderr,"checking '%s'\n", d->string); */
cp0 = cp = d->string;
slen0 = slen = d->slen;
if (head == d) {
/* skip leading whitespace */
while (slen > 0 && WHITESPACE(*cp)) ++cp, --slen;
cp0 = cp;
}
while (slen > 0) {
if (WHITESPACE(*cp)) {
/* Two possible cases: cp0 == d->string, which means
the stuff is from the beginning of the string, and
playing with d->slen does work.
The second case is when the input string contains
possibly several white space separated substrings:
"aa bb cc", and this is second white-space part, or
later.. */
if (cp0 == d->string) {
/* Variant first.. */
int s0len = d->slen;
d->slen = (cp - d->string);
++cp;
d->slen = (slen0 - slen);
--slen;
cdr(d) = NULL;
/* wrap the stuff at head into its own argv */
/* printf("wrapped '%s'\n", d->string); */
tmp = *pav = csglob(head,0);
while (cdr(tmp)) tmp = cdr(tmp);
if (tmp == d) {
/* Crap ! That stuff didn't expand */
;
} else
d->slen = s0len;
pav = & cdr(tmp);
} else {
/* Variant second... */
/* This means also that the previous text bit
IS 'head' -- or can be set as it! */
head = newstring(dupnstr(cp0,cp-cp0),cp-cp0);
cdr(head) = NULL;
tmp = *pav = csglob(head,0);
while (cdr(tmp)) tmp = cdr(tmp);
pav = & cdr(tmp);
}
/* now find the continuation */
while (slen > 0 && WHITESPACE(*cp))
++cp, --slen;
if (slen == 0) {
cp0 = d->string; /* mark to ignore this string */
head = NULL;
break;
} else {
/* We have more non-white-space stuff
following */
cp0 = cp;
}
}
++cp;
--slen;
}
if (d->string != cp0) {
/* went to the end, and did start in the middle of
the thing */
int sslen = slen0 - (cp0 - d->string);
head = newstring(dupnstr(cp0, sslen), sslen);
}
}
if (head != NULL) {
/* printf("trailing '%s'\n", head->string); */
/* glob is guaranteed to not return NULL */
head = *pav = csglob(head,0);
while (cdr(head) != NULL) head = cdr(head);
pav = &cdr(head);
}
*pav = NULL;
UNGCPRO6;
/* fprintf(stderr, "EXPLEN = %d ",globbed->slen);
grindef("EXPOUT = ", globbed); */
return globbed;
}
syntax highlighted by Code2HTML, v. 0.9.1