Compare commits

..

1 commit

Author SHA1 Message Date
24ad00274d
feat: rewrote glob to have a full doublestar impl
All checks were successful
default / ensure tests work (push) Successful in 35s
This implements a full double star glob implementation with it's own filesystem implementation.
2026-01-25 17:22:16 +01:00
7 changed files with 1354 additions and 333 deletions

View file

@ -0,0 +1,17 @@
name: default
on:
push:
branches: [main]
jobs:
default:
name: ensure tests work
runs-on: debian-latest
container: docker.io/golang:1.24-alpine
steps:
- name: checkout
uses: https://code.geekeey.de/actions/checkout@1
- name: go run
run: |
go test

127
glob/fs.go Normal file
View file

@ -0,0 +1,127 @@
package glob
import (
"io/fs"
"path/filepath"
"slices"
)
type GlobFS struct {
base fs.FS
patterns []Pattern
}
// NewGlobFS creates a new GlobFS that exposes only files matching any of the given glob patterns.
func NewGlobFS(base fs.FS, patterns ...string) (*GlobFS, error) {
fs := &GlobFS{base: base, patterns: []Pattern{}}
for _, value := range patterns {
pattern, err := New(value)
if err != nil {
return nil, err
}
fs.patterns = append(fs.patterns, *pattern)
}
return fs, nil
}
func (g *GlobFS) match(name string, prefix bool) bool {
var f func(Pattern) bool
if prefix {
f = func(p Pattern) bool { return p.MatchPrefix(name) }
} else {
f = func(p Pattern) bool { return p.Match(name) }
}
return slices.ContainsFunc(g.patterns, f)
}
func (g *GlobFS) contains(name string) (bool, error) {
stat, err := fs.Stat(g.base, name)
if err != nil {
return false, err
}
if stat.IsDir() {
contains := false
err := fs.WalkDir(g.base, name, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() && !g.match(name+string(filepath.Separator), true) {
return fs.SkipDir
}
if g.match(path, false) {
contains = true
return fs.SkipAll
}
return nil
})
return contains, err
} else {
return g.match(name, false), nil
}
}
func (g *GlobFS) Open(name string) (fs.File, error) {
root := name == "."
// fast path some of the pattern matches
if root || g.match(name, false) {
return g.base.Open(name)
}
ok, err := g.contains(name)
if err != nil {
return nil, err
}
if ok {
return g.base.Open(name)
} else {
return nil, fs.ErrNotExist
}
}
func (g *GlobFS) ReadDir(name string) ([]fs.DirEntry, error) {
root := name == "."
path := name + string(filepath.Separator)
// fast path no pattern matches (prefix check)
// root dir ('.') must be handled to get initial entries
if !root && !g.match(path, true) {
return nil, fs.ErrNotExist
}
entries, err := fs.ReadDir(g.base, name)
if err != nil {
return nil, err
}
// if we do not have any child entries, we need to check if the directory
// itself matched some of the defined patterns, if so we should be able to
// read it, otherwise we can not read it.
if !root && len(entries) == 0 {
if !g.match(path, false) {
return nil, fs.ErrNotExist
}
}
children := []fs.DirEntry{}
for _, entry := range entries {
ok, err := g.contains(filepath.Join(name, entry.Name()))
if err != nil {
return nil, err
}
if ok {
children = append(children, entry)
} else {
continue
}
}
return children, nil
}

505
glob/fs_test.go Normal file
View file

@ -0,0 +1,505 @@
package glob
import (
"archive/zip"
"bytes"
"io"
"io/fs"
"reflect"
"sort"
"testing"
"testing/fstest"
)
func setupFS() fs.ReadDirFS {
// Create an in-memory FS with a mix of files and directories
return fstest.MapFS{
"main.go": &fstest.MapFile{Data: []byte("package main")},
"main_test.go": &fstest.MapFile{Data: []byte("package main_test")},
"README.md": &fstest.MapFile{Data: []byte("# readme")},
"LICENSE": &fstest.MapFile{Data: []byte("MIT")},
"docs/guide.md": &fstest.MapFile{Data: []byte("Docs")},
"docs/other.txt": &fstest.MapFile{Data: []byte("Other")},
"docs/hidden/.keep": &fstest.MapFile{Data: []byte("")},
"assets/img.png": &fstest.MapFile{Data: []byte("PNG")},
"assets/style.css": &fstest.MapFile{Data: []byte("CSS")},
".gitignore": &fstest.MapFile{Data: []byte("*.log")},
".hiddenfile": &fstest.MapFile{Data: []byte("")},
"emptydir": &fstest.MapFile{Mode: fs.ModeDir | 0o755},
}
}
// helper to get base names for easier comparison
func basenames(entries []fs.DirEntry) []string {
names := []string{}
for _, e := range entries {
names = append(names, e.Name())
}
sort.Strings(names)
return names
}
func TestGlobFS_MultiplePatterns(t *testing.T) {
memfs := setupFS()
gfs, err := NewGlobFS(memfs, "*.go", "*.md", "assets/*", "docs/guide.md", ".gitignore")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
tests := []struct {
path string
want []string
wantErr bool
}{
{path: ".", want: []string{"README.md", "assets", "docs", "main.go", "main_test.go", ".gitignore"}},
{path: "assets", want: []string{"img.png", "style.css"}},
{path: "docs", want: []string{"guide.md"}},
{path: "docs/hidden", want: []string{}, wantErr: true},
{path: "emptydir", want: []string{}, wantErr: true},
}
for _, tc := range tests {
tc := tc // capture range variable
t.Run(escape(tc.path), func(t *testing.T) {
entries, err := fs.ReadDir(gfs, tc.path)
if tc.wantErr && err == nil {
t.Errorf("expected error, got nil")
return
}
if !tc.wantErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
got := basenames(entries)
sort.Strings(tc.want)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("got %v; want %v", got, tc.want)
}
})
}
}
func TestGlobFS_Open(t *testing.T) {
memfs := setupFS()
gfs, err := NewGlobFS(memfs, "*.go", "*.md", "assets/*", "docs/guide.md", ".gitignore")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
type test struct {
path string
wantErr bool
}
tests := []test{
{path: "main.go"},
{path: "README.md"},
{path: "LICENSE", wantErr: true},
{path: "assets/img.png"},
{path: "assets/style.css"},
{path: "assets/nonexistent.png", wantErr: true},
{path: "docs/guide.md"},
{path: "docs/other.txt", wantErr: true},
{path: ".gitignore"},
{path: ".hiddenfile", wantErr: true},
{path: "docs/hidden/.keep", wantErr: true},
{path: "emptydir", wantErr: true},
{path: "docs"}, // allowed because it contains matching file(s)
{path: "assets"}, // allowed because it contains matching file(s)
}
for _, tc := range tests {
tc := tc
t.Run(escape(tc.path), func(t *testing.T) {
f, err := gfs.Open(tc.path)
if tc.wantErr && err == nil {
t.Errorf("expected error, got file")
if f != nil {
f.Close()
}
} else if !tc.wantErr && err != nil {
t.Errorf("unexpected error: %v", err)
} else if !tc.wantErr && err == nil {
info, _ := f.Stat()
if info.IsDir() {
_, derr := fs.ReadDir(gfs, tc.path)
if derr != nil && !tc.wantErr {
t.Errorf("unexpected error: %v", derr)
}
}
f.Close()
}
})
}
}
func TestGlobFS_ReadFile(t *testing.T) {
memfs := setupFS()
gfs, err := NewGlobFS(memfs, "*.go", "*.md", "assets/*", ".gitignore")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
tests := []struct {
name string
want []byte
wantErr bool
}{
{name: "main.go", want: []byte("package main")},
{name: "main_test.go", want: []byte("package main_test")},
{name: "README.md", want: []byte("# readme")},
{name: "assets/img.png", want: []byte("PNG")},
{name: "assets/style.css", want: []byte("CSS")},
{name: ".gitignore", want: []byte("*.log")},
{name: "LICENSE", wantErr: true}, // not allowed by filter
{name: "docs/guide.md", wantErr: true}, // not allowed by filter
{name: "docs/hidden/.keep", wantErr: true}, // not allowed by filter
{name: "doesnotexist.txt", wantErr: true}, // does not exist
}
for _, tc := range tests {
tc := tc
t.Run(escape(tc.name), func(t *testing.T) {
got, err := fs.ReadFile(gfs, tc.name)
if tc.wantErr {
if err == nil {
t.Errorf("expected error, got nil (got=%q)", got)
}
} else {
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if string(got) != string(tc.want) {
t.Errorf("got %q; want %q", got, tc.want)
}
}
})
}
}
func TestGlobFS_RelativePaths(t *testing.T) {
memfs := setupFS()
gfs, err := NewGlobFS(memfs, "docs/*.md")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
entries, err := fs.ReadDir(gfs, "docs")
if err != nil {
t.Fatal(err)
}
got := basenames(entries)
want := []string{"guide.md"}
if !reflect.DeepEqual(got, want) {
t.Errorf("docs/*.md: got %v, want %v", got, want)
}
}
func TestGlobFS_NoMatchesOpen(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "*.xyz")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = gfs.Open("main.go")
if err == nil {
t.Fatal("expected error when opening file with no matches")
}
}
func TestGlobFS_NoMatchesStat(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "*.xyz")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.Stat(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: stat")
}
}
func TestGlobFS_NoMatchesReadDir(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "*.xyz")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.ReadDir(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: readdir")
}
}
func TestGlobFS_NoMatchesReadFile(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "*.xyz")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.ReadFile(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: readfile")
}
}
func TestGlobFS_MatchEmptyDirExact(t *testing.T) {
// the trailing slash indicates that the directory should be included
gfs, err := NewGlobFS(setupFS(), "emptydir/")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.ReadDir(gfs, "emptydir")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
}
func TestGlobFS_MatchEmptyDirExact2(t *testing.T) {
// the trailing slash indicates that the directory should be included
gfs, err := NewGlobFS(setupFS(), "emptydir/*")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.ReadDir(gfs, "emptydir")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
}
func TestGlobFS_NoMatchEmptyDirExact(t *testing.T) {
// no traling slash indicates that the directory must be a file to be included
gfs, err := NewGlobFS(setupFS(), "emptydir")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
_, err = fs.ReadDir(gfs, "emptydir")
if err == nil {
t.Fatal("expected error with no matches: readfile")
}
}
func TestGlobFS_IntegrationWithStdlibWalkDir(t *testing.T) {
memfs := setupFS()
gfs, err := NewGlobFS(memfs, "*.go", "docs/guide.md")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
// Use fs.WalkDir with our filtered FS
var walked []string
err = fs.WalkDir(gfs, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
t.Fatalf("the %q caused: %v", path, err)
return err
}
walked = append(walked, path)
return nil
})
if err != nil {
t.Fatal(err)
}
// Only files and dirs matching or containing matches should appear
for _, p := range walked {
if p == "." || p == "main.go" || p == "main_test.go" || p == "docs" || p == "docs/guide.md" {
continue
}
t.Errorf("WalkDir: unexpected path %q", p)
}
}
func TestGlobFS_InvalidPattern(t *testing.T) {
_, err := NewGlobFS(setupFS(), "[invalid")
if err == nil {
t.Fatal("expected error for invalid pattern, got nil")
}
}
func TestGlobFS_WildcardInDirSegment(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "docs/*/*.md")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, "docs/hidden")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if len(entries) != 0 {
t.Errorf("expected no entries, got %v", basenames(entries))
}
}
func TestGlobFS_DeeplyNestedMatch(t *testing.T) {
memfs := fstest.MapFS{
"a/b/c/d.txt": &fstest.MapFile{Data: []byte("deep")},
}
gfs, err := NewGlobFS(memfs, "a/b/c/*.txt")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
data, err := fs.ReadFile(gfs, "a/b/c/d.txt")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if string(data) != "deep" {
t.Errorf("got %q, want %q", data, "deep")
}
}
func TestGlobFS_HiddenFilesOnly(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), ".*")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, ".")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got := basenames(entries)
want := []string{".gitignore", ".hiddenfile"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
}
// Test directory pattern matching with various directory globs
func TestGlobFS_DirectoryPatterns(t *testing.T) {
memfs := fstest.MapFS{
"foo/bar/baz.txt": &fstest.MapFile{Data: []byte("baz")},
"foo/bar/qux.txt": &fstest.MapFile{Data: []byte("qux")},
"foo/readme.md": &fstest.MapFile{Data: []byte("readme")},
"foo/empty/.keep": &fstest.MapFile{Data: []byte("")}, // represent empty dir by a file inside
"top.txt": &fstest.MapFile{Data: []byte("top")},
}
t.Run("single dir segment wildcard", func(t *testing.T) {
gfs, err := NewGlobFS(memfs, "foo/bar/*")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, "foo/bar")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got := basenames(entries)
want := []string{"baz.txt", "qux.txt"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
})
t.Run("recursive dir wildcard", func(t *testing.T) {
gfs, err := NewGlobFS(memfs, "foo/bar/*")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, "foo/bar")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got := basenames(entries)
want := []string{"baz.txt", "qux.txt"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
entries, err = fs.ReadDir(gfs, "foo")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got = basenames(entries)
want = []string{"bar"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
})
t.Run("match empty directory", func(t *testing.T) {
gfs, err := NewGlobFS(memfs, "foo/empty/")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, "foo/empty")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(entries) != 0 {
t.Errorf("expected empty, got %v", basenames(entries))
}
})
t.Run("top-level dir wildcard", func(t *testing.T) {
gfs, err := NewGlobFS(memfs, "*/bar/*")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries, err := fs.ReadDir(gfs, "foo/bar")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got := basenames(entries)
want := []string{"baz.txt", "qux.txt"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
})
}
func TestGlobFS_IntegrationWithStdlibZipWriter(t *testing.T) {
gfs, err := NewGlobFS(setupFS(), "*")
if err != nil {
t.Errorf("unexpected error while creating glob fs: %v", err)
}
want := map[string]string{
"main.go": "package main",
"main_test.go": "package main_test",
"README.md": "# readme",
"LICENSE": "MIT",
".gitignore": "*.log",
".hiddenfile": "",
}
buf := new(bytes.Buffer)
wr := zip.NewWriter(buf)
err = wr.AddFS(gfs)
if err != nil {
t.Fatalf("adding fs to zip writer: %v", err)
}
err = wr.Close()
if err != nil {
t.Fatalf("close zip writer: %v", err)
}
rd, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(len(buf.Bytes())))
if err != nil {
t.Fatalf("invalid zip archive: %v", err)
}
got := make(map[string]string)
for _, f := range rd.File {
rc, err := f.Open()
if err != nil {
t.Fatalf("cannot open file %s: %v", f.Name, err)
}
content, err := io.ReadAll(rc)
defer rc.Close()
if err != nil {
t.Fatalf("cannot read file %s: %v", f.Name, err)
}
got[f.Name] = string(content)
}
// Compare expected vs actual.
for name, exp := range want {
act, ok := got[name]
if !ok {
t.Errorf("expected file %q not found in zip", name)
continue
}
if act != exp {
t.Errorf("content mismatch for %q:\nexpected: %q\nactual: %q", name, exp, act)
}
}
// Check for unexpected extra files.
for name := range got {
if _, ok := want[name]; !ok {
t.Errorf("unexpected file %q found in zip", name)
}
}
}

485
glob/glob.go Normal file
View file

@ -0,0 +1,485 @@
// Package glob implements a language for specifying glob patterns for path
// names starting at some root. The language does not follow the specs from
// filepath.Match but provides a superset which allows for directory
// wildcards.
//
// Patterns consist of normal characters, non-separator wildcards '*' and '?',
// separators '/' and directory wildcards '**'.
//
// A somewhat formal grammer can be given as:
//
// pattern = term, { '/', term };
// term = '**' | name;
// name = { charSpecial | group | escapedChar | '*' | '?' };
// charSpecial = (* any unicode rune except '/', '*', '?', '[' and '\' *);
// char = (* any unicode rune *);
// escapedChar = '\\', char;
// group = '[', [ '^' ] { escapedChar | groupChar | range } ']';
// groupChar = (* any unicode rune except '-' and ']' *);
// range = ( groupChar | escapedChar ), '-', (groupChar | escapedChar);
//
// The format operators have the following meaning:
//
// - any character (rune) matches the exactly this rune - with the following
// exceptions
// - `/` works as a directory separator. It matches directory boundarys of the
// underlying system independently of the separator char used by the OS.
// - `?` matches exactly one non-separator char
// - `*` matches any number of non-separator chars - including zero
// - `\` escapes a character's special meaning allowing `*` and `?` to be used
// as regular characters.
// - `**` matches any number of nested directories. If anything is matched it
// always extends until a separator or the end of the name.
// - Groups can be defined using the `[` and `]` characters. Inside a group the
// special meaning of the characters mentioned before is disabled but the
// following rules apply
// - any character used as part of the group acts as a choice to pick from
// - if the group's first character is a `^` the whole group is negated
// - a range can be defined using `-` matching any rune between low and high
// inclusive
// - Multiple ranges can be given. Ranges can be combined with choices.
// - The meaning of `-` and `]` can be escacped using `\`
package glob
import (
"errors"
"fmt"
"io/fs"
"strings"
"unicode/utf8"
)
const (
// Separator defines the path separator to use in patterns. This is always
// a forward slash independently of the underlying's OS separator
Separator = '/'
// SingleWildcard defines the the single non-separator character wildcard
// operator.
SingleWildcard = '?'
// AnyWildcard defines the the any number of non-separator characters
// wildcard operator.
AnyWildcard = '*'
// Backslash escapes the next character's special meaning
Backslash = '\\'
// GroupStart starts a range
GroupStart = '['
// GroupEnd starts a range
GroupEnd = ']'
// GroupNegate when used as the first character of a group negates the group.
GroupNegate = '^'
// Range defines the range operator
Range = '-'
)
var (
// ErrBadPattern is returned when an invalid pattern is found. Make
// sure you use errors.Is to compare errors to this sentinel value.
ErrBadPattern = errors.New("bad pattern")
)
// Pattern defines a glob pattern prepared ahead of time which can be used to
// match filenames. Pattern is safe to use concurrently.
type Pattern struct {
tokens []token
}
// New creates a new pattern from pat and returns it. It returns an error
// indicating any invalid pattern.
func New(pat string) (*Pattern, error) {
var tokens []token
p := pat
for {
if len(p) == 0 {
return &Pattern{tokens: tokens}, nil
}
r, l := utf8.DecodeRuneInString(p)
var t token
switch r {
case Separator:
if len(tokens) > 0 && tokens[len(tokens)-1].r == Separator {
return nil, fmt.Errorf("%w: unexpected //", ErrBadPattern)
}
t = token{tokenTypeLiteral, Separator, runeGroup{}}
case SingleWildcard:
if len(tokens) > 0 && (tokens[len(tokens)-1].t == tokenTypeAnyRunes || tokens[len(tokens)-1].t == tokenTypeAnyDirectories) {
return nil, fmt.Errorf("%w: unexpected ?", ErrBadPattern)
}
t = token{tokenTypeSingleRune, 0, runeGroup{}}
case AnyWildcard:
if len(tokens) > 0 && (tokens[len(tokens)-1].t == tokenTypeSingleRune || tokens[len(tokens)-1].t == tokenTypeAnyDirectories) {
return nil, fmt.Errorf("%w: unexpected ?", ErrBadPattern)
}
t = token{tokenTypeAnyRunes, 0, runeGroup{}}
if len(p[l:]) > 0 {
n, nl := utf8.DecodeRuneInString(p[l:])
if n == AnyWildcard {
d, _ := utf8.DecodeRuneInString(p[l+nl:])
if d == utf8.RuneError {
return nil, fmt.Errorf("%w: unexpected end of patterm after **", ErrBadPattern)
}
if d != Separator {
return nil, fmt.Errorf("%w: unexpected %c after **", ErrBadPattern, d)
}
t.t = tokenTypeAnyDirectories
l += nl
}
}
case Backslash:
if len(p[l:]) == 0 {
return nil, fmt.Errorf("%w: no character given after \\", ErrBadPattern)
}
p = p[l:]
r, l = utf8.DecodeRuneInString(p)
t = token{tokenTypeLiteral, r, runeGroup{}}
case GroupStart:
var err error
t, l, err = parseGroup(p)
if err != nil {
return nil, err
}
case GroupEnd:
return nil, fmt.Errorf("%w: using ] w/o [", ErrBadPattern)
default:
t = token{tokenTypeLiteral, r, runeGroup{}}
}
tokens = append(tokens, t)
p = p[l:]
}
}
// String reconstructs the glob pattern from the tokens.
func (pat *Pattern) String() string {
var b strings.Builder
for _, t := range pat.tokens {
switch t.t {
case tokenTypeLiteral:
switch t.r {
case GroupStart, GroupEnd, GroupNegate, AnyWildcard, SingleWildcard, Range:
b.WriteRune(Backslash)
fallthrough
default:
b.WriteRune(t.r)
}
case tokenTypeSingleRune:
b.WriteRune(SingleWildcard)
case tokenTypeAnyRunes:
b.WriteRune(AnyWildcard)
case tokenTypeAnyDirectories:
b.WriteString("**")
case tokenTypeGroup:
b.WriteRune(GroupStart)
if t.g.neg {
b.WriteRune(GroupNegate)
}
for _, r := range t.g.runes {
b.WriteRune(r)
}
for _, rg := range t.g.ranges {
b.WriteRune(rg.lo)
b.WriteRune(Range)
b.WriteRune(rg.hi)
}
b.WriteRune(GroupEnd)
}
}
return b.String()
}
func (pat *Pattern) GoString() string {
return pat.String()
}
// Match matches a file's path name f to the compiled pattern and returns
// whether the path matches the pattern or not.
func (pat *Pattern) Match(f string) bool {
return match(f, pat.tokens, false)
}
func (pat *Pattern) MatchPrefix(f string) bool {
return match(f, pat.tokens, true)
}
// GlobFS applies pat to all files found in fsys under root and returns the
// matching path names as a string slice. It uses fs.WalkDir internally and all
// constraints given for that function apply to GlobFS.
func (pat *Pattern) GlobFS(fsys fs.FS, root string) ([]string, error) {
results := make([]string, 0)
err := fs.WalkDir(fsys, root, func(p string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if p == "." {
return nil
}
if root != "." && root != "" {
p = strings.Replace(p, root, "", 1)
}
if d.IsDir() {
if !pat.MatchPrefix(p) {
return fs.SkipDir
}
return nil
}
if pat.Match(p) {
results = append(results, p)
}
return nil
})
return results, err
}
func parseGroup(p string) (token, int, error) {
// re-read the [. No need to assert the rune here as it has been
// done in the main parsing loop.
_, le := utf8.DecodeRuneInString(p)
t := token{
t: tokenTypeGroup,
g: runeGroup{},
}
initialLen := le
var start rune
for {
if len(p[le:]) == 0 {
return t, le, fmt.Errorf("%w: missing %c", ErrBadPattern, GroupEnd)
}
r, l := utf8.DecodeRuneInString(p[le:])
le += l
if initialLen == le-l && r == GroupNegate {
t.g.neg = true
continue
}
switch r {
case GroupEnd:
if start != 0 {
t.g.runes = append(t.g.runes, start)
}
return t, le, nil
case Range:
if start == 0 {
return t, le, fmt.Errorf("%w: missing start for character range", ErrBadPattern)
}
if len(p[le:]) == 0 {
return t, le, fmt.Errorf("%w: missing range end", ErrBadPattern)
}
r, l = utf8.DecodeRuneInString(p[le:])
le += l
switch r {
case GroupEnd:
return t, le, fmt.Errorf("%w: unterminated range", ErrBadPattern)
case Backslash:
if len(p[le:]) == 0 {
return t, le, fmt.Errorf("%w: missing character after \\", ErrBadPattern)
}
r, l = utf8.DecodeRuneInString(p[le:])
le += l
fallthrough
default:
t.g.ranges = append(t.g.ranges, runeRange{start, r})
start = 0
}
case Backslash:
if len(p[le:]) == 0 {
return t, le, fmt.Errorf("%w: missing character after \\", ErrBadPattern)
}
r, l = utf8.DecodeRuneInString(p[le:])
le += l
fallthrough
default:
if start != 0 {
t.g.runes = append(t.g.runes, start)
}
start = r
}
}
}
// match is used internally to implement a simple recursive backtracking
// algorithmn using the token list t to match against file path f. If matchPrefix
// is set to true, match returns true if f is completely matched by any prefix
// of t. Otherwise, match returns true if f is matched by _all_ tokens in t.
func match(f string, t []token, matchPrefix bool) bool {
for {
if len(f) == 0 {
if matchPrefix {
return true
}
if len(t) == 0 {
return true
}
if len(t) == 1 && t[0].t == tokenTypeAnyRunes {
return true
}
return false
}
if len(t) == 0 {
return false
}
r, le := utf8.DecodeRuneInString(f)
switch t[0].t {
case tokenTypeLiteral:
if t[0].r != r {
return false
}
case tokenTypeGroup:
if !t[0].g.match(r) {
return false
}
case tokenTypeSingleRune:
if r == Separator {
return false
}
case tokenTypeAnyRunes:
if r == Separator {
return match(f, t[1:], matchPrefix)
}
if match(f[le:], t, matchPrefix) {
return true
}
if match(f, t[1:], matchPrefix) {
return true
}
case tokenTypeAnyDirectories:
if match(f, t[2:], matchPrefix) {
return true
}
var l2 int
for {
if len(f[le+l2:]) == 0 {
return false
}
n, nl := utf8.DecodeRuneInString(f[le+l2:])
l2 += nl
if n == Separator {
break
}
}
if match(f[le+l2:], t[2:], matchPrefix) {
return true
}
return match(f[le+l2:], t, matchPrefix)
}
t = t[1:]
f = f[le:]
}
}
// tokenType enumerates the different types of tokens.
type tokenType int
const (
// a rune literal
tokenTypeLiteral tokenType = iota + 1
// any single non-separator rune
tokenTypeSingleRune
// any number of non-separator runes (incl. zero)
tokenTypeAnyRunes
// any number runes including separators. Matches whole directories.
tokenTypeAnyDirectories
// a group of rune consisting of named runes and/or ranges. Might be negated.
tokenTypeGroup
)
// token implements a single token in the pattern.
type token struct {
// the token's type
t tokenType
// a literal rune to matche. Literal runes are stored separate from groups
// to improve matching performance.
r rune
// A rune group to match.
g runeGroup
}
// A group of runes. Groups can contain any number of enumerated runes and rune
// ranges. In addition a whole group can be negated.
type runeGroup struct {
// Whether the group is negated
neg bool
// Enumerated runes contained in this group
runes []rune
// All ranges contained in this group
ranges []runeRange
}
// match matches r with g. It returns true if r is matched.
func (g runeGroup) match(r rune) bool {
for _, ru := range g.runes {
if ru == r {
return !g.neg
}
}
for _, rang := range g.ranges {
if rang.match(r) {
return !g.neg
}
}
return g.neg
}
// A closed range of runes consisting of all runes between lo and hi both
// inclusive.
type runeRange struct {
lo, hi rune
}
// match returns whether r is in rg.
func (rg runeRange) match(r rune) bool {
return rg.lo <= r && r <= rg.hi
}

220
glob/glob_test.go Normal file
View file

@ -0,0 +1,220 @@
package glob
import (
"errors"
"fmt"
"reflect"
"strings"
"testing"
"testing/fstest"
)
type test struct {
pattern, f string
match bool
err error
}
func escape(name string) string {
// use a math division slash for correct visual
return strings.ReplaceAll(name, "/", "")
}
func TestPattern_Match(t *testing.T) {
tests := []test{
// Test cases not covered by path.Match
{"main.go", "main.go", true, nil},
{"main_test.go", "main_test.go", true, nil},
{"foo/foo_test.go", "foo/foo_test.go", true, nil},
{"?.go", "m.go", true, nil},
{"*.go", "main.go", true, nil},
{"**/*.go", "main.go", true, nil},
{"*.go", "*.go", true, nil},
{"//", "", false, ErrBadPattern},
{"foo//", "", false, ErrBadPattern},
{"*?.go", "", false, ErrBadPattern},
{"?*.go", "", false, ErrBadPattern},
{"**?.go", "", false, ErrBadPattern},
{"**f", "", false, ErrBadPattern},
{"[a-", "", false, ErrBadPattern},
{"[a-\\", "", false, ErrBadPattern},
{"[\\", "", false, ErrBadPattern},
{"**/m.go", "foo.go", false, nil},
{"**/m.go", "foo/a.go", false, nil},
{"**/m.go", "m.go", true, nil},
{"**/m.go", "foo/m.go", true, nil},
{"**/m.go", "bar/m.go", true, nil},
{"**/m.go", "foo/bar/m.go", true, nil},
{"ab[cde]", "abc", true, nil},
{"ab[cde]", "abd", true, nil},
{"ab[cde]", "abe", true, nil},
{"ab[+-\\-]", "ab-", true, nil},
{"ab[\\--a]", "ab-", true, nil},
{"[a-fA-F]", "a", true, nil},
{"[a-fA-F]", "f", true, nil},
{"[a-fA-F]", "A", true, nil},
{"[a-fA-F]", "F", true, nil},
// The following test cases are taken from
// https://github.com/golang/go/blob/master/src/path/match_test.go and are
// provided here to test compatebility of the match implementation with the
// test cases from the golang standard lib.
{"abc", "abc", true, nil},
{"*", "abc", true, nil},
{"*c", "abc", true, nil},
{"a*", "a", true, nil},
{"a*", "abc", true, nil},
{"a*", "ab/c", false, nil},
{"a*/b", "abc/b", true, nil},
{"a*/b", "a/c/b", false, nil},
{"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil},
{"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil},
{"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil},
{"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil},
{"a*b?c*x", "abxbbxdbxebxczzx", true, nil},
{"a*b?c*x", "abxbbxdbxebxczzy", false, nil},
{"ab[c]", "abc", true, nil},
{"ab[b-d]", "abc", true, nil},
{"ab[e-g]", "abc", false, nil},
{"ab[^c]", "abc", false, nil},
{"ab[^b-d]", "abc", false, nil},
{"ab[^e-g]", "abc", true, nil},
{"a\\*b", "a*b", true, nil},
{"a\\*b", "ab", false, nil},
{"a?b", "a☺b", true, nil},
{"a[^a]b", "a☺b", true, nil},
{"a???b", "a☺b", false, nil},
{"a[^a][^a][^a]b", "a☺b", false, nil},
{"[a-ζ]*", "α", true, nil},
{"*[a-ζ]", "A", false, nil},
{"a?b", "a/b", false, nil},
{"a*b", "a/b", false, nil},
{"[\\]a]", "]", true, nil},
{"[\\-]", "-", true, nil},
{"[x\\-]", "x", true, nil},
{"[x\\-]", "-", true, nil},
{"[x\\-]", "z", false, nil},
{"[\\-x]", "x", true, nil},
{"[\\-x]", "-", true, nil},
{"[\\-x]", "a", false, nil},
{"[]a]", "]", false, ErrBadPattern},
{"[-]", "-", false, ErrBadPattern},
{"[x-]", "x", false, ErrBadPattern},
{"[x-]", "-", false, ErrBadPattern},
{"[x-]", "z", false, ErrBadPattern},
{"[-x]", "x", false, ErrBadPattern},
{"[-x]", "-", false, ErrBadPattern},
{"[-x]", "a", false, ErrBadPattern},
{"\\", "a", false, ErrBadPattern},
{"[a-b-c]", "a", false, ErrBadPattern},
{"[", "a", false, ErrBadPattern},
{"[^", "a", false, ErrBadPattern},
{"[^bc", "a", false, ErrBadPattern},
{"a[", "a", false, ErrBadPattern},
{"a[", "ab", false, ErrBadPattern},
{"a[", "x", false, ErrBadPattern},
{"a/b[", "x", false, ErrBadPattern},
{"*x", "xxx", true, nil},
}
for _, tt := range tests {
pat, err := New(tt.pattern)
if err != tt.err && !errors.Is(err, tt.err) {
t.Errorf("New(%#q): wanted error %v but got %v", tt.pattern, tt.err, err)
}
if pat != nil {
match := pat.Match(tt.f)
if match != tt.match {
t.Errorf("New(%#q).Match(%#q): wanted match %v but got %v", tt.pattern, tt.f, tt.match, match)
}
}
}
}
func TestPattern_MatchPrefix(t *testing.T) {
tests := []test{
{"**/*.go", "foo/", true, nil},
{"**/*.go", "foo", true, nil},
{"**/*.go", "foo/bar/", true, nil},
{"**/*.go", "foo/bar", true, nil},
{"*/*.go", "foo", true, nil},
}
for _, tc := range tests {
tc := tc // capture range variable
t.Run(fmt.Sprintf("%s (%s)", escape(tc.pattern), escape(tc.f)), func(t *testing.T) {
pat, err := New(tc.pattern)
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
got := pat.MatchPrefix(tc.f)
if got != tc.match {
t.Errorf("got %v; want %v", got, tc.match)
}
})
}
}
func TestPattern_GlobFS(t *testing.T) {
fsys := fstest.MapFS{
"go.mod": &fstest.MapFile{Mode: 0644},
"go.sum": &fstest.MapFile{Mode: 0644},
"cmd/main.go": &fstest.MapFile{Mode: 0644},
"cmd/main_test.go": &fstest.MapFile{Mode: 0644},
"internal/tool/tool.go": &fstest.MapFile{Mode: 0644},
"internal/tool/tool_test.go": &fstest.MapFile{Mode: 0644},
"internal/cli/cli.go": &fstest.MapFile{Mode: 0644},
"internal/cli/cli_test.go": &fstest.MapFile{Mode: 0644},
}
pat, err := New("**/*_test.go")
if err != nil {
t.Fatal(err)
}
files, err := pat.GlobFS(fsys, ".")
if err != nil {
t.Fatal(err)
}
expect := []string{
"cmd/main_test.go",
"internal/cli/cli_test.go",
"internal/tool/tool_test.go",
}
if !reflect.DeepEqual(expect, files) {
t.Errorf("got %v; want %v", files, expect)
}
}
func TestPattern_String(t *testing.T) {
tests := []string{
"main.go",
"*.go",
"**/*.go",
"foo/bar/*",
"foo/?ar.go",
"foo/[abc].go",
"foo/[a-c].go",
"foo/**/",
"foo/*/bar.go",
"foo/\\*bar.go",
}
for _, patstr := range tests {
t.Run(escape(patstr), func(t *testing.T) {
pat, err := New(patstr)
if err != nil {
t.Fatalf("New(%q) failed: %v", patstr, err)
}
if pat.String() != patstr {
t.Fatalf("Pattern.String() = %q, want %q", pat.String(), patstr)
}
})
}
}

View file

@ -1,99 +0,0 @@
package iox
import (
"io/fs"
"os"
"path"
)
type GlobFS struct {
base fs.FS
patterns []string
}
// NewGlobFS creates a new GlobFS that exposes only files matching any of the given glob patterns.
func NewGlobFS(base fs.FS, patterns ...string) *GlobFS {
return &GlobFS{base: base, patterns: patterns}
}
// match reports whether the given path matches any of the configured patterns.
func (g *GlobFS) match(name string) bool {
for _, pat := range g.patterns {
if matched, _ := path.Match(pat, name); matched {
return true
}
}
return false
}
func (g *GlobFS) contains(dir string) bool {
queue := []string{dir}
visited := make(map[string]struct{})
for len(queue) > 0 {
current := queue[0]
queue = queue[1:] // dequeue
// Prevent visiting same dir multiple times
if _, seen := visited[current]; seen {
continue
}
visited[current] = struct{}{}
entries, err := fs.ReadDir(g.base, current)
if err != nil {
continue
}
for _, entry := range entries {
rel := path.Join(current, entry.Name())
if g.match(rel) {
return true
}
if entry.IsDir() {
queue = append(queue, rel)
}
}
}
return false
}
func (g *GlobFS) Open(name string) (fs.File, error) {
if g.match(name) {
return g.base.Open(name)
}
fi, err := fs.Stat(g.base, name)
if err != nil || !fi.IsDir() {
return nil, fs.ErrNotExist
}
if g.contains(name) {
return g.base.Open(name)
}
return nil, fs.ErrNotExist
}
func (g *GlobFS) ReadDir(name string) ([]fs.DirEntry, error) {
if g.match(name) {
return fs.ReadDir(g.base, name)
}
entries, err := fs.ReadDir(g.base, name)
if err != nil {
return nil, err
}
var children []fs.DirEntry
for _, entry := range entries {
rel := path.Join(name, entry.Name())
if g.match(rel) {
children = append(children, entry)
}
if entry.IsDir() && g.contains(rel) {
children = append(children, entry)
}
}
if len(children) == 0 {
return nil, os.ErrNotExist
}
return children, nil
}

View file

@ -1,234 +0,0 @@
package iox
import (
"io/fs"
"reflect"
"sort"
"testing"
"testing/fstest"
)
func setupFS() fs.ReadDirFS {
// Create an in-memory FS with a mix of files and directories
return fstest.MapFS{
"main.go": &fstest.MapFile{Data: []byte("package main")},
"main_test.go": &fstest.MapFile{Data: []byte("package main_test")},
"README.md": &fstest.MapFile{Data: []byte("# readme")},
"LICENSE": &fstest.MapFile{Data: []byte("MIT")},
"docs/guide.md": &fstest.MapFile{Data: []byte("Docs")},
"docs/other.txt": &fstest.MapFile{Data: []byte("Other")},
"docs/hidden/.keep": &fstest.MapFile{Data: []byte("")},
"assets/img.png": &fstest.MapFile{Data: []byte("PNG")},
"assets/style.css": &fstest.MapFile{Data: []byte("CSS")},
".gitignore": &fstest.MapFile{Data: []byte("*.log")},
".hiddenfile": &fstest.MapFile{Data: []byte("")},
"emptydir/": &fstest.MapFile{Mode: fs.ModeDir},
}
}
// helper to get base names for easier comparison
func basenames(entries []fs.DirEntry) []string {
names := []string{}
for _, e := range entries {
names = append(names, e.Name())
}
sort.Strings(names)
return names
}
func TestGlobFSMultiplePatterns(t *testing.T) {
memfs := setupFS()
gfs := NewGlobFS(memfs, "*.go", "*.md", "assets/*", "docs/guide.md", ".gitignore")
tests := []struct {
path string
want []string
wantErr bool
}{
{path: ".", want: []string{"README.md", "assets", "docs", "main.go", "main_test.go", ".gitignore"}},
{path: "assets", want: []string{"img.png", "style.css"}},
{path: "docs", want: []string{"guide.md"}},
{path: "docs/hidden", want: []string{}, wantErr: true},
{path: "emptydir", want: []string{}, wantErr: true},
}
for _, tc := range tests {
tc := tc // capture range variable
t.Run(tc.path, func(t *testing.T) {
entries, err := fs.ReadDir(gfs, tc.path)
if tc.wantErr && err == nil {
t.Errorf("expected error, got nil")
return
}
if !tc.wantErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
got := basenames(entries)
sort.Strings(tc.want)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("got %v; want %v", got, tc.want)
}
})
}
}
func TestGlobFSOpen(t *testing.T) {
memfs := setupFS()
gfs := NewGlobFS(memfs, "*.go", "*.md", "assets/*", "docs/guide.md", ".gitignore")
type test struct {
path string
wantErr bool
}
tests := []test{
{path: "main.go"},
{path: "README.md"},
{path: "LICENSE", wantErr: true},
{path: "assets/img.png"},
{path: "assets/style.css"},
{path: "assets/nonexistent.png", wantErr: true},
{path: "docs/guide.md"},
{path: "docs/other.txt", wantErr: true},
{path: ".gitignore"},
{path: ".hiddenfile", wantErr: true},
{path: "docs/hidden/.keep", wantErr: true},
{path: "emptydir", wantErr: true},
{path: "docs"}, // allowed because it contains matching file(s)
{path: "assets"}, // allowed because it contains matching file(s)
}
for _, tc := range tests {
tc := tc
t.Run(tc.path, func(t *testing.T) {
f, err := gfs.Open(tc.path)
if tc.wantErr && err == nil {
t.Errorf("expected error, got file")
if f != nil {
f.Close()
}
} else if !tc.wantErr && err != nil {
t.Errorf("unexpected error: %v", err)
} else if !tc.wantErr && err == nil {
info, _ := f.Stat()
if info.IsDir() {
_, derr := fs.ReadDir(gfs, tc.path)
if derr != nil && !tc.wantErr {
t.Errorf("unexpected error: %v", derr)
}
}
f.Close()
}
})
}
}
func TestGlobFSReadFile(t *testing.T) {
memfs := setupFS()
gfs := NewGlobFS(memfs, "*.go", "*.md", "assets/*", ".gitignore")
tests := []struct {
name string
want []byte
wantErr bool
}{
{name: "main.go", want: []byte("package main")},
{name: "main_test.go", want: []byte("package main_test")},
{name: "README.md", want: []byte("# readme")},
{name: "assets/img.png", want: []byte("PNG")},
{name: "assets/style.css", want: []byte("CSS")},
{name: ".gitignore", want: []byte("*.log")},
{name: "LICENSE", wantErr: true}, // not allowed by filter
{name: "docs/guide.md", wantErr: true}, // not allowed by filter
{name: "docs/hidden/.keep", wantErr: true}, // not allowed by filter
{name: "doesnotexist.txt", wantErr: true}, // does not exist
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
got, err := fs.ReadFile(gfs, tc.name)
if tc.wantErr {
if err == nil {
t.Errorf("expected error, got nil (got=%q)", got)
}
} else {
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if string(got) != string(tc.want) {
t.Errorf("got %q; want %q", got, tc.want)
}
}
})
}
}
func TestGlobFSRelativePaths(t *testing.T) {
memfs := setupFS()
gfs := NewGlobFS(memfs, "docs/*.md")
entries, err := fs.ReadDir(gfs, "docs")
if err != nil {
t.Fatal(err)
}
got := basenames(entries)
want := []string{"guide.md"}
if !reflect.DeepEqual(got, want) {
t.Errorf("docs/*.md: got %v, want %v", got, want)
}
}
func TestGlobFSNoMatchesOpen(t *testing.T) {
gfs := NewGlobFS(setupFS(), "*.xyz")
_, err := gfs.Open("main.go")
if err == nil {
t.Fatal("expected error when opening file with no matches")
}
}
func TestGlobFSNoMatchesStat(t *testing.T) {
gfs := NewGlobFS(setupFS(), "*.xyz")
_, err := fs.Stat(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: stat")
}
}
func TestGlobFSNoMatchesReadDir(t *testing.T) {
gfs := NewGlobFS(setupFS(), "*.xyz")
_, err := fs.ReadDir(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: readdir")
}
}
func TestGlobFSNoMatchesReadFile(t *testing.T) {
gfs := NewGlobFS(setupFS(), "*.xyz")
_, err := fs.ReadFile(gfs, "main.go")
if err == nil {
t.Fatal("expected error with no matches: readfile")
}
}
func TestGlobFS_IntegrationWithStdlib(t *testing.T) {
memfs := setupFS()
gfs := NewGlobFS(memfs, "*.go", "docs/guide.md")
// Use fs.WalkDir with our filtered FS
var walked []string
err := fs.WalkDir(gfs, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
walked = append(walked, path)
return nil
})
if err != nil {
t.Fatal(err)
}
// Only files and dirs matching or containing matches should appear
for _, p := range walked {
if p == "." || p == "main.go" || p == "main_test.go" || p == "docs" || p == "docs/guide.md" {
continue
}
t.Errorf("WalkDir: unexpected path %q", p)
}
}