/* $Id: glob.c 616 2005-08-19 20:11:01Z bruce $ */ #include #include #include #include /* GLOB patterns: * * matches everything * ? matches any single character * [seq] matches any sing echaracter in seq * [!seq] matches any single character not in seq */ static unsigned make_set(const char* pptr, unsigned plen, char set[256]) { unsigned orig_plen = plen; char value; if (plen == 0 || *pptr != '[') return 0; ++pptr; --plen; if (*pptr == '!' || *pptr == '^') { memset(set, 1, 256); value = 0; ++pptr; --plen; } else { value = 1; memset(set, 0, 256); } while (plen > 0) { unsigned char p = *pptr++; --plen; if (p == ']') return orig_plen - plen - 1; if (p == '\\') { p = *pptr++; --plen; } set[p] = value; } return 0; } static int glob_match(const str* s, unsigned offset, const char* pptr, unsigned plen); static int glob_search(const str* s, unsigned offset, const char* pptr, unsigned plen) { char start; int r; while (plen > 0) { /* Strip off leading "*"s. */ if ((start = *pptr) == '*') ++pptr, --plen; /* Leading "?"s always match a single character, so just advance past. */ else if (start == '?') ++pptr, --plen, ++offset; else break; } /* Searching at end of pattern always succeeds. */ if (plen == 0) return 1; /* Searching at end of string (with non-empty pattern) always fails. */ if (offset >= s->len) return 0; /* Optimize for searching for a set by only computing the set once. */ if ((start = *pptr) == '[') { char set[256]; unsigned len; if ((len = make_set(pptr, plen, set)) == 0) return -1; ++len; pptr += len; plen -= len; for (; offset < s->len; ++offset) { if (set[(unsigned char)s->s[offset]] && (r = glob_match(s, offset+1, pptr, plen)) != 0) return r; } } /* Otherwise search for a normal character. */ else { ++pptr, --plen; do { char c = s->s[offset++]; if (c == start && (r = glob_match(s, offset, pptr, plen)) != 0) return r; } while (offset < s->len); #if 0 do { if ((r = glob_match(s, offset, pptr, plen)) != 0) return r; ++offset; } while (offset < s->len); #endif } #if 0 else { for (; offset < s->len; ++offset) { char c = s->s[offset]; if (c == start && (r = glob_match(s, offset+1, pptr+1, plen-1)) != 0) return r; } } #endif return 0; } static int glob_match(const str* s, unsigned offset, const char* pptr, unsigned plen) { for (; plen > 0; ++pptr, --plen, ++offset) { char p = *pptr; if (p == '*') return glob_search(s, offset, pptr+1, plen-1); if (offset >= s->len) break; switch (p) { case '?': continue; case '[': { char set[256]; unsigned len; if ((len = make_set(pptr, plen, set)) == 0) return -1; pptr += len; plen -= len; if (!set[(unsigned char)s->s[offset]]) return 0; continue; } default: if (p != s->s[offset]) return 0; } } return offset == s->len && plen == 0; } int str_globb(const str* s, const char* pptr, unsigned plen) { return glob_match(s, 0, pptr, plen); } int str_globs(const str* s, const char* pattern) { return glob_match(s, 0, pattern, strlen(pattern)); } int str_glob(const str* s, const str* pattern) { return glob_match(s, 0, pattern->s, pattern->len); } #ifdef SELFTEST_MAIN #include "selftest.c" void t(const char* string, const char* pattern) { str s; str p; s.s = (char*)string; s.len = strlen(string); s.size = 0; p.s = (char*)pattern; p.len = strlen(pattern); p.size = 0; obuf_putstr(&outbuf, &s); obuf_putc(&outbuf, ' '); obuf_putstr(&outbuf, &p); obuf_putc(&outbuf, ' '); debugfn(str_glob(&s, &p)); } MAIN { /* Empty string */ t("", ""); t("", "*"); t("", "**"); t("", "?"); t("", "[abc]"); /* Case sensitivity: single char */ t("a", "a"); t("a", "A"); t("A", "a"); t("A", "A"); /* Case sensitivity: normal set */ t("a", "[a]"); t("a", "[A]"); t("A", "[a]"); t("A", "[A]"); /* Case sensitivity: negated set */ t("a", "[!a]"); t("a", "[!A]"); t("A", "[!a]"); t("A", "[!A]"); t("a", "[^a]"); t("a", "[^A]"); t("A", "[^a]"); t("A", "[^A]"); /* Case sensitivity: search */ t("a", "*a"); t("a", "*A"); t("A", "*a"); t("A", "*A"); /* Miscelaneous matches */ t("abc", ""); t("abc", "*"); t("abc", "**"); t("abc", "?"); t("abc", "??"); t("abc", "???"); t("abc", "????"); t("abc", "abc"); t("abc", "*abc"); t("abc", "*bc"); t("abc", "*c"); t("abc", "?bc"); t("abc", "a?c"); t("abc", "ab?"); t("abc", "[axy][bxy][cxy]"); /* Three patterns for each case: * 1 at start of filename * 2 in filename * 3 at end of filename * Four characters to test: * 1 First filename character "a" * 2 Second filename character "b" * 3 Third filename character "c" * 4 Non-filename character "x" */ /* First case: single character */ t("abc", "a*"); t("abc", "*a*"); t("abc", "*a"); t("abc", "b*"); t("abc", "*b*"); t("abc", "*b"); t("abc", "c*"); t("abc", "*c*"); t("abc", "*c"); t("abc", "x*"); t("abc", "*x*"); t("abc", "*x"); /* Second case: character in a set */ t("abc", "[axy]*"); t("abc", "*[axy]*"); t("abc", "*[axy]"); t("abc", "[bxy]*"); t("abc", "*[bxy]*"); t("abc", "*[bxy]"); t("abc", "[cxy]*"); t("abc", "*[cxy]*"); t("abc", "*[cxy]"); t("abc", "[xyz]*"); t("abc", "*[xyz]*"); t("abc", "*[xyz]"); /* Third case: negated set */ t("abc", "[!a]*"); t("abc", "*[!a]*"); t("abc", "*[!a]"); t("abc", "[!b]*"); t("abc", "*[!b]*"); t("abc", "*[!b]"); t("abc", "[!c]*"); t("abc", "*[!c]*"); t("abc", "*[!c]"); t("abc", "[!x]*"); t("abc", "*[!x]*"); t("abc", "*[!x]"); t("abc", "[^a]*"); t("abc", "*[^a]*"); t("abc", "*[^a]"); t("abc", "[^b]*"); t("abc", "*[^b]*"); t("abc", "*[^b]"); t("abc", "[^c]*"); t("abc", "*[^c]*"); t("abc", "*[^c]"); t("abc", "[^x]*"); t("abc", "*[^x]*"); t("abc", "*[^x]"); /* Fourth case: preceding wildcard */ t("abc", "?a*"); t("abc", "*?a*"); t("abc", "*?a"); t("abc", "?b*"); t("abc", "*?b*"); t("abc", "*?b"); t("abc", "?c*"); t("abc", "*?c*"); t("abc", "*?c"); t("abc", "?x*"); t("abc", "*?x*"); t("abc", "*?x"); /* Fifth case: following wildcard */ t("abc", "a?*"); t("abc", "*a?*"); t("abc", "*a?"); t("abc", "b?*"); t("abc", "*b?*"); t("abc", "*b?"); t("abc", "c?*"); t("abc", "*c?*"); t("abc", "*c?"); t("abc", "x?*"); t("abc", "*x?*"); t("abc", "*x?"); } #endif #ifdef SELFTEST_EXP result=1 * result=1 ** result=1 ? result=0 [abc] result=0 a a result=1 a A result=0 A a result=0 A A result=1 a [a] result=1 a [A] result=0 A [a] result=0 A [A] result=1 a [!a] result=0 a [!A] result=1 A [!a] result=1 A [!A] result=0 a [^a] result=0 a [^A] result=1 A [^a] result=1 A [^A] result=0 a *a result=1 a *A result=0 A *a result=0 A *A result=1 abc result=0 abc * result=1 abc ** result=1 abc ? result=0 abc ?? result=0 abc ??? result=1 abc ???? result=0 abc abc result=1 abc *abc result=1 abc *bc result=1 abc *c result=1 abc ?bc result=1 abc a?c result=1 abc ab? result=1 abc [axy][bxy][cxy] result=1 abc a* result=1 abc *a* result=1 abc *a result=0 abc b* result=0 abc *b* result=1 abc *b result=0 abc c* result=0 abc *c* result=1 abc *c result=1 abc x* result=0 abc *x* result=0 abc *x result=0 abc [axy]* result=1 abc *[axy]* result=1 abc *[axy] result=0 abc [bxy]* result=0 abc *[bxy]* result=1 abc *[bxy] result=0 abc [cxy]* result=0 abc *[cxy]* result=1 abc *[cxy] result=1 abc [xyz]* result=0 abc *[xyz]* result=0 abc *[xyz] result=0 abc [!a]* result=0 abc *[!a]* result=1 abc *[!a] result=1 abc [!b]* result=1 abc *[!b]* result=1 abc *[!b] result=1 abc [!c]* result=1 abc *[!c]* result=1 abc *[!c] result=0 abc [!x]* result=1 abc *[!x]* result=1 abc *[!x] result=1 abc [^a]* result=0 abc *[^a]* result=1 abc *[^a] result=1 abc [^b]* result=1 abc *[^b]* result=1 abc *[^b] result=1 abc [^c]* result=1 abc *[^c]* result=1 abc *[^c] result=0 abc [^x]* result=1 abc *[^x]* result=1 abc *[^x] result=1 abc ?a* result=0 abc *?a* result=0 abc *?a result=0 abc ?b* result=1 abc *?b* result=1 abc *?b result=0 abc ?c* result=0 abc *?c* result=1 abc *?c result=1 abc ?x* result=0 abc *?x* result=0 abc *?x result=0 abc a?* result=1 abc *a?* result=1 abc *a? result=0 abc b?* result=0 abc *b?* result=1 abc *b? result=1 abc c?* result=0 abc *c?* result=0 abc *c? result=0 abc x?* result=0 abc *x?* result=0 abc *x? result=0 #endif