diff --git a/api.go b/api.go index 4e2285940..f2e1247ee 100644 --- a/api.go +++ b/api.go @@ -33,7 +33,6 @@ const ( sliceHeaderBytes uint64 = 24 stringHeaderBytes uint64 = 16 pointerSize uint64 = 8 - interfaceBytes uint64 = 16 ) // FileMatch contains all the matches within a file. @@ -136,10 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) { return } -// addScore increments the score of the FileMatch by the computed score. If +// AddScore increments the score of the FileMatch by the computed score. If // debugScore is true, it also adds a debug string to the FileMatch. If raw is // -1, it is ignored. Otherwise, it is added to the debug string. -func (m *FileMatch) addScore(what string, computed float64, raw float64, debugScore bool) { +func (m *FileMatch) AddScore(what string, computed float64, raw float64, debugScore bool) { if computed != 0 && debugScore { var b strings.Builder fmt.Fprintf(&b, "%s", what) @@ -695,6 +694,10 @@ func (r *Repository) UnmarshalJSON(data []byte) error { return nil } +func (r *Repository) GetPriority() float64 { + return r.priority +} + // monthsSince1970 returns the number of months since 1970. It returns values in // the range [0, maxUInt16]. The upper bound is reached in the year 7431, the // lower bound for all dates before 1970. @@ -1014,6 +1017,17 @@ type SearchOptions struct { SpanContext map[string]string } +func (o *SearchOptions) SetDefaults() { + if o.ShardMaxMatchCount == 0 { + // We cap the total number of matches, so overly broad + // searches don't crash the machine. + o.ShardMaxMatchCount = 100000 + } + if o.TotalMaxMatchCount == 0 { + o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount + } +} + // String returns a succinct representation of the options. This is meant for // human consumption in logs and traces. // diff --git a/api_test.go b/api_test.go index d37daf39a..7b08c5ced 100644 --- a/api_test.go +++ b/api_test.go @@ -150,12 +150,6 @@ func TestMatchSize(t *testing.T) { }, { v: ChunkMatch{}, size: 120, - }, { - v: candidateMatch{}, - size: 80, - }, { - v: candidateChunk{}, - size: 40, }} for _, c := range cases { got := reflect.TypeOf(c.v).Size() diff --git a/cmd/flags.go b/cmd/flags.go index b528aee34..3e278d55d 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -20,23 +20,22 @@ import ( "os" "path/filepath" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) var ( version = flag.Bool("version", false, "Print version number") - opts = &build.Options{} + opts = &index.Options{} ) func init() { opts.Flags(flag.CommandLine) } -func OptionsFromFlags() *build.Options { +func OptionsFromFlags() *index.Options { if *version { name := filepath.Base(os.Args[0]) - fmt.Printf("%s version %q\n", name, zoekt.Version) + fmt.Printf("%s version %q\n", name, index.Version) os.Exit(0) } diff --git a/cmd/zoekt-index/main.go b/cmd/zoekt-index/main.go index 4e4ee0216..d7fb9cfb2 100644 --- a/cmd/zoekt-index/main.go +++ b/cmd/zoekt-index/main.go @@ -23,9 +23,8 @@ import ( "runtime/pprof" "strings" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/cmd" + "github.com/sourcegraph/zoekt/index" "go.uber.org/automaxprocs/maxprocs" ) @@ -103,14 +102,14 @@ func main() { } } -func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error { +func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error { dir, err := filepath.Abs(filepath.Clean(arg)) if err != nil { return err } opts.RepositoryDescription.Name = filepath.Base(dir) - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { return err } @@ -135,7 +134,7 @@ func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error for f := range comm { displayName := strings.TrimPrefix(f.name, dir+"/") if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) { - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: displayName, SkipReason: fmt.Sprintf("document size %d larger than limit %d", f.size, opts.SizeMax), }); err != nil { diff --git a/cmd/zoekt-indexserver/main.go b/cmd/zoekt-indexserver/main.go index 995f1d39a..d31f3bf04 100644 --- a/cmd/zoekt-indexserver/main.go +++ b/cmd/zoekt-indexserver/main.go @@ -33,7 +33,7 @@ import ( "strings" "time" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/gitindex" ) @@ -206,13 +206,13 @@ func deleteIfOrphan(repoDir string, fn string) error { } defer f.Close() - ifile, err := zoekt.NewIndexFile(f) + ifile, err := index.NewIndexFile(f) if err != nil { return nil } defer ifile.Close() - repos, _, err := zoekt.ReadMetadata(ifile) + repos, _, err := index.ReadMetadata(ifile) if err != nil { return nil } diff --git a/cmd/zoekt-merge-index/main.go b/cmd/zoekt-merge-index/main.go index 899d52b93..3221ea50f 100644 --- a/cmd/zoekt-merge-index/main.go +++ b/cmd/zoekt-merge-index/main.go @@ -8,13 +8,13 @@ import ( "path/filepath" "strings" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) // merge merges the input shards into a compound shard in dstDir. It returns the // full path to the compound shard. The input shards are removed on success. func merge(dstDir string, names []string) (string, error) { - var files []zoekt.IndexFile + var files []index.IndexFile for _, fn := range names { f, err := os.Open(fn) if err != nil { @@ -22,7 +22,7 @@ func merge(dstDir string, names []string) (string, error) { } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return "", err } @@ -31,14 +31,14 @@ func merge(dstDir string, names []string) (string, error) { files = append(files, indexFile) } - tmpName, dstName, err := zoekt.Merge(dstDir, files...) + tmpName, dstName, err := index.Merge(dstDir, files...) if err != nil { return "", err } // Delete input shards. for _, name := range names { - paths, err := zoekt.IndexFilePaths(name) + paths, err := index.IndexFilePaths(name) if err != nil { return "", fmt.Errorf("zoekt-merge-index: %w", err) } @@ -83,13 +83,13 @@ func explode(dstDir string, inputShard string) error { } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return err } defer indexFile.Close() - exploded, err := zoekt.Explode(dstDir, indexFile) + exploded, err := index.Explode(dstDir, indexFile) defer func() { // best effort removal of tmp files. If os.Remove fails, indexserver will delete // the leftover tmp files during the next cleanup. @@ -104,7 +104,7 @@ func explode(dstDir string, inputShard string) error { // remove the input shard first to avoid duplicate indexes. In the worst case, // the process is interrupted just after we delete the compound shard, in which // case we have to reindex the lost repos. - paths, err := zoekt.IndexFilePaths(inputShard) + paths, err := index.IndexFilePaths(inputShard) if err != nil { return err } diff --git a/cmd/zoekt-repo-index/main.go b/cmd/zoekt-repo-index/main.go index b012ae03e..f5e3970d1 100644 --- a/cmd/zoekt-repo-index/main.go +++ b/cmd/zoekt-repo-index/main.go @@ -41,8 +41,8 @@ import ( "github.com/google/slothfs/manifest" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/gitindex" "go.uber.org/automaxprocs/maxprocs" @@ -127,7 +127,7 @@ func main() { revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references") baseURLStr := flag.String("base_url", "", "base url to interpret repository names") repoCacheDir := flag.String("repo_cache", "", "root for repository cache") - indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files") + indexDir := flag.String("index", index.DefaultDir, "index directory for *.zoekt files") manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments") manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository") repoName := flag.String("name", "", "set repository name") @@ -150,7 +150,7 @@ func main() { *repoName = filepath.Join(u.Host, u.Path) } - opts := build.Options{ + opts := index.Options{ Parallelism: *parallelism, SizeMax: *sizeMax, ShardMax: *shardLimit, @@ -258,7 +258,7 @@ func main() { return } - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { log.Fatal(err) } @@ -269,7 +269,7 @@ func main() { log.Fatal(err) } - doc := zoekt.Document{ + doc := index.Document{ Name: k.FullPath(), Content: data, SubRepositoryPath: k.SubRepoPath, diff --git a/cmd/zoekt-sourcegraph-indexserver/cleanup.go b/cmd/zoekt-sourcegraph-indexserver/cleanup.go index 4d2f876a4..61bcc341c 100644 --- a/cmd/zoekt-sourcegraph-indexserver/cleanup.go +++ b/cmd/zoekt-sourcegraph-indexserver/cleanup.go @@ -12,8 +12,8 @@ import ( "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) var metricCleanupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ @@ -35,7 +35,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) trash := getShards(trashDir) tombtones := getTombstonedRepos(indexDir) - index := getShards(indexDir) + indexShards := getShards(indexDir) // trash: Remove old shards and conflicts with index minAge := now.Add(-24 * time.Hour) @@ -50,7 +50,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) } } - if _, conflicts := index[repo]; !conflicts && !old { + if _, conflicts := indexShards[repo]; !conflicts && !old { continue } @@ -62,7 +62,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) // tombstones: Remove tombstones that conflict with index or trash. After this, // tombstones only contain repos that are neither in the trash nor in the index. for repo := range tombtones { - if _, conflicts := index[repo]; conflicts { + if _, conflicts := indexShards[repo]; conflicts { delete(tombtones, repo) } // Trash takes precedence over tombstones. @@ -75,13 +75,13 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) // shards that have the same ID but different names delete and start over. // This can happen when a repository is renamed. In future we should make // shard file names based on ID. - for repo, shards := range index { + for repo, shards := range indexShards { if consistentRepoName(shards) { continue } // prevent further processing since we will delete - delete(index, repo) + delete(indexShards, repo) // This should be rare, so give an informative log message. var paths []string @@ -113,7 +113,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) for _, repo := range repos { // Delete from index so that index will only contain shards to be // trashed. - delete(index, repo) + delete(indexShards, repo) if shards, ok := trash[repo]; ok { infoLog.Printf("restoring shards from trash for %v", repo) @@ -123,7 +123,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) if s, ok := tombtones[repo]; ok { infoLog.Printf("removing tombstone for %v", repo) - err := zoekt.UnsetTombstone(s.Path, repo) + err := index.UnsetTombstone(s.Path, repo) if err != nil { errorLog.Printf("error removing tombstone for %v: %s", repo, err) } @@ -131,7 +131,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) } // index: Move non-existent repos into trash - for repo, shards := range index { + for repo, shards := range indexShards { // Best-effort touch. If touch fails, we will just remove from the // trash sooner. for _, shard := range shards { @@ -197,7 +197,7 @@ func getShards(dir string) map[uint32][]shard { continue } - repos, _, err := zoekt.ReadMetadataPathAlive(path) + repos, _, err := index.ReadMetadataPathAlive(path) if err != nil { debugLog.Printf("failed to read shard: %v", err) continue @@ -231,7 +231,7 @@ func getTombstonedRepos(dir string) map[uint32]shard { m := make(map[uint32]shard) for _, p := range paths { - repos, _, err := zoekt.ReadMetadataPath(p) + repos, _, err := index.ReadMetadataPath(p) if err != nil { continue } @@ -285,7 +285,7 @@ func removeAll(shards ...shard) { // exceedingly rare due to it being a mix of partial failure on something in // trash + an admin re-adding a repository. for _, shard := range shards { - paths, err := zoekt.IndexFilePaths(shard.Path) + paths, err := index.IndexFilePaths(shard.Path) if err != nil { debugLog.Printf("failed to remove shard %s: %v", shard.Path, err) } @@ -299,7 +299,7 @@ func removeAll(shards ...shard) { func moveAll(dstDir string, shards []shard) { for i, shard := range shards { - paths, err := zoekt.IndexFilePaths(shard.Path) + paths, err := index.IndexFilePaths(shard.Path) if err != nil { errorLog.Printf("failed to stat shard paths, deleting all shards for %s: %v", shard.RepoName, err) removeAll(shards...) @@ -367,7 +367,7 @@ func maybeSetTombstone(shards []shard, repoID uint32) bool { return false } - if err := zoekt.SetTombstone(shards[0].Path, repoID); err != nil { + if err := index.SetTombstone(shards[0].Path, repoID); err != nil { errorLog.Printf("error setting tombstone for %d in shard %s: %s. Removing shard\n", repoID, shards[0].Path, err) _ = os.Remove(shards[0].Path) } @@ -454,7 +454,7 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { runMerge = exec.Command("zoekt-merge-index", "merge", fn).Run } - repos, _, err := zoekt.ReadMetadataPath(fn) + repos, _, err := index.ReadMetadataPath(fn) if err != nil { return nil, fmt.Errorf("zoekt.ReadMetadataPath: %s", err) } @@ -470,7 +470,7 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { } defer func() { - paths, err := zoekt.IndexFilePaths(fn) + paths, err := index.IndexFilePaths(fn) if err != nil { return } diff --git a/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go b/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go index 5a1fa7666..3931f6c79 100644 --- a/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go @@ -14,7 +14,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) func TestCleanup(t *testing.T) { @@ -22,7 +22,7 @@ func TestCleanup(t *testing.T) { return shard{ RepoID: fakeID(name), RepoName: name, - Path: zoekt.ShardName("", name, 15, n), + Path: index.ShardName("", name, 15, n), ModTime: mtime, RepoTombstone: false, } @@ -37,7 +37,7 @@ func TestCleanup(t *testing.T) { if filepath.Ext(path) != ".zoekt" { continue } - repos, _, _ := zoekt.ReadMetadataPathAlive(path) + repos, _, _ := index.ReadMetadataPathAlive(path) fi, _ := os.Stat(path) for _, repo := range repos { shards = append(shards, shard{ @@ -173,7 +173,7 @@ func createTestShard(t *testing.T, repo string, id uint32, path string, optFns . for _, optFn := range optFns { optFn(r) } - b, err := zoekt.NewIndexBuilder(r) + b, err := index.NewIndexBuilder(r) if err != nil { t.Fatal(err) } @@ -234,7 +234,7 @@ func TestVacuum(t *testing.T) { tmpDir := t.TempDir() fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4}) - err := zoekt.SetTombstone(fn, 2) + err := index.SetTombstone(fn, 2) if err != nil { t.Fatal(err) } @@ -258,7 +258,7 @@ func TestVacuum(t *testing.T) { t.Fatalf("expected 1 shard, but instead got %d", len(shards)) } - repos, _, err := zoekt.ReadMetadataPath(shards[0]) + repos, _, err := index.ReadMetadataPath(shards[0]) if err != nil { t.Fatal(err) } @@ -286,13 +286,13 @@ func TestGetTombstonedRepos(t *testing.T) { dir := t.TempDir() var repoID uint32 = 2 csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour))) - if err := zoekt.SetTombstone(csOld, repoID); err != nil { + if err := index.SetTombstone(csOld, repoID); err != nil { t.Fatal(err) } now := time.Now() csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now)) - if err := zoekt.SetTombstone(csNew, repoID); err != nil { + if err := index.SetTombstone(csNew, repoID); err != nil { t.Fatal(err) } @@ -380,7 +380,7 @@ func TestCleanupCompoundShards(t *testing.T) { setTombstone := func(shardPath string, repoID uint32) { t.Helper() - if err := zoekt.SetTombstone(shardPath, repoID); err != nil { + if err := index.SetTombstone(shardPath, repoID); err != nil { t.Fatal(err) } } @@ -473,12 +473,12 @@ func createCompoundShard(t *testing.T, dir string, ids []uint32, optFns ...func( optsFn(&repo) } - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: repo, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -517,12 +517,12 @@ func mergeHelper(t *testing.T, fn string) error { } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return fmt.Errorf("zoekt.NewIndexFile: %s ", err) } defer indexFile.Close() - _, _, err = zoekt.Merge(filepath.Dir(fn), indexFile) + _, _, err = index.Merge(filepath.Dir(fn), indexFile) return err } diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index 9433e128a..647e5c6b1 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -18,7 +18,7 @@ import ( sglog "github.com/sourcegraph/log" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/ctags" "github.com/sourcegraph/zoekt/internal/tenant" ) @@ -99,15 +99,15 @@ type indexArgs struct { ShardMerging bool } -// BuildOptions returns a build.Options represented by indexArgs. Note: it +// BuildOptions returns a index.Options represented by indexArgs. Note: it // doesn't set fields like repository/branch. -func (o *indexArgs) BuildOptions() *build.Options { +func (o *indexArgs) BuildOptions() *index.Options { shardPrefix := "" if tenant.EnforceTenant() { shardPrefix = tenant.SrcPrefix(o.TenantID, o.RepoID) } - return &build.Options{ + return &index.Options{ // It is important that this RepositoryDescription exactly matches what // the indexer we call will produce. This is to ensure that // IncrementalSkipIndexing and IndexState can correctly calculate if diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 829bcd327..de3d13658 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -39,13 +39,14 @@ import ( sglog "github.com/sourcegraph/log" "github.com/sourcegraph/mountinfo" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" "github.com/sourcegraph/zoekt/grpc/internalerrs" "github.com/sourcegraph/zoekt/grpc/messagesize" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/debugserver" "github.com/sourcegraph/zoekt/internal/profiler" "github.com/sourcegraph/zoekt/internal/tenant" + "go.uber.org/automaxprocs/maxprocs" "golang.org/x/net/trace" "golang.org/x/sys/unix" @@ -110,7 +111,7 @@ var ( metricIndexIncrementalIndexState = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "index_incremental_index_state", Help: "A count of the state on disk vs what we want to build. See zoekt/build.IndexState.", - }, []string{"state"}) // state is build.IndexState + }, []string{"state"}) // state is index.IndexState metricNumIndexed = promauto.NewGauge(prometheus.GaugeOpts{ Name: "index_num_indexed", @@ -595,11 +596,11 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { metricIndexIncrementalIndexState.WithLabelValues(string(incrementalState)).Inc() switch incrementalState { - case build.IndexStateEqual: + case index.IndexStateEqual: debugLog.Printf("%s index already up to date. Shard=%s", args.String(), fn) return indexStateNoop, nil - case build.IndexStateMeta: + case index.IndexStateMeta: infoLog.Printf("updating index.meta %s", args.String()) // TODO(stefan) handle mergeMeta for tenant id. @@ -609,7 +610,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { return indexStateSuccessMeta, nil } - case build.IndexStateCorrupt: + case index.IndexStateCorrupt: infoLog.Printf("falling back to full update: corrupt index: %s", args.String()) } } @@ -723,7 +724,7 @@ func createEmptyShard(args *indexArgs) error { return nil } - builder, err := build.NewBuilder(*bo) + builder, err := index.NewBuilder(*bo) if err != nil { return err } @@ -1070,7 +1071,7 @@ func setupTmpDir(logger sglog.Logger, main bool, index string) error { } func printMetaData(fn string) error { - repo, indexMeta, err := zoekt.ReadMetadataPath(fn) + repo, indexMeta, err := index.ReadMetadataPath(fn) if err != nil { return err } @@ -1093,12 +1094,12 @@ func printShardStats(fn string) error { return err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return err } - return zoekt.PrintNgramStats(iFile) + return index.PrintNgramStats(iFile) } func srcLogLevelIsDebug() bool { @@ -1272,9 +1273,9 @@ func (rc *rootConfig) registerRootFlags(fs *flag.FlagSet) { fs.StringVar(&rc.root, "sourcegraph_url", os.Getenv("SRC_FRONTEND_INTERNAL"), "http://sourcegraph-frontend-internal or http://localhost:3090. If a path to a directory, we fake the Sourcegraph API and index all repos rooted under path.") fs.DurationVar(&rc.interval, "interval", time.Minute, "sync with sourcegraph this often") fs.Int64Var(&rc.indexConcurrency, "index_concurrency", getEnvWithDefaultInt64("SRC_INDEX_CONCURRENCY", 1), "the number of repos to index concurrently") - fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", build.DefaultDir), "set index directory to use") + fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", index.DefaultDir), "set index directory to use") fs.StringVar(&rc.listen, "listen", ":6072", "listen on this address.") - fs.StringVar(&rc.hostname, "hostname", zoekt.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") + fs.StringVar(&rc.hostname, "hostname", index.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") fs.Float64Var(&rc.cpuFraction, "cpu_fraction", 1.0, "use this fraction of the cores for indexing.") fs.DurationVar(&rc.backoffDuration, "backoff_duration", getEnvWithDefaultDuration("BACKOFF_DURATION", 10*time.Minute), "for the given duration we backoff from enqueue operations for a repository that's failed its previous indexing attempt. Consecutive failures increase the duration of the delay linearly up to the maxBackoffDuration. A negative value disables indexing backoff.") fs.DurationVar(&rc.maxBackoffDuration, "max_backoff_duration", getEnvWithDefaultDuration("MAX_BACKOFF_DURATION", 120*time.Minute), "the maximum duration to backoff from enqueueing a repo for indexing. A negative value disables indexing backoff.") @@ -1633,8 +1634,8 @@ func cloneURL(u *url.URL) *url.URL { func main() { liblog := sglog.Init(sglog.Resource{ Name: "zoekt-indexserver", - Version: zoekt.Version, - InstanceID: zoekt.HostnameBestEffort(), + Version: index.Version, + InstanceID: index.HostnameBestEffort(), }) defer liblog.Sync() diff --git a/cmd/zoekt-sourcegraph-indexserver/merge.go b/cmd/zoekt-sourcegraph-indexserver/merge.go index ca70ddd21..9f3d8e25e 100644 --- a/cmd/zoekt-sourcegraph-indexserver/merge.go +++ b/cmd/zoekt-sourcegraph-indexserver/merge.go @@ -11,9 +11,8 @@ import ( "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/sourcegraph/zoekt/index" "go.uber.org/atomic" - - "github.com/sourcegraph/zoekt" ) var metricShardMergingRunning = promauto.NewGauge(prometheus.GaugeOpts{ @@ -203,7 +202,7 @@ func isExcluded(path string, fi os.FileInfo, opts mergeOpts) bool { return true } - repos, _, err := zoekt.ReadMetadataPath(path) + repos, _, err := index.ReadMetadataPath(path) if err != nil { debugLog.Printf("failed to load metadata for %s\n", fi.Name()) return true diff --git a/cmd/zoekt-sourcegraph-indexserver/merge_test.go b/cmd/zoekt-sourcegraph-indexserver/merge_test.go index a7491d96b..57ec4a003 100644 --- a/cmd/zoekt-sourcegraph-indexserver/merge_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/merge_test.go @@ -11,7 +11,7 @@ import ( "testing" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) func TestHasMultipleShards(t *testing.T) { @@ -48,12 +48,12 @@ func TestDoNotDeleteSingleShards(t *testing.T) { dir := t.TempDir() // Create a test shard. - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{Name: "test-repo"}, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } diff --git a/cmd/zoekt-sourcegraph-indexserver/meta.go b/cmd/zoekt-sourcegraph-indexserver/meta.go index 52ede7ee4..9be31bb3a 100644 --- a/cmd/zoekt-sourcegraph-indexserver/meta.go +++ b/cmd/zoekt-sourcegraph-indexserver/meta.go @@ -7,7 +7,7 @@ import ( "path/filepath" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) // mergeMeta updates the .meta files for the shards on disk for o. @@ -16,10 +16,10 @@ import ( // failure. This means you might have an inconsistent state on disk if an // error is returned. It is recommended to fallback to re-indexing in that // case. -func mergeMeta(o *build.Options) error { +func mergeMeta(o *index.Options) error { todo := map[string]string{} for _, fn := range o.FindAllShards() { - repos, md, err := zoekt.ReadMetadataPath(fn) + repos, md, err := index.ReadMetadataPath(fn) if err != nil { return err } diff --git a/cmd/zoekt-sourcegraph-indexserver/meta_test.go b/cmd/zoekt-sourcegraph-indexserver/meta_test.go index e36971980..d0e8aa3b1 100644 --- a/cmd/zoekt-sourcegraph-indexserver/meta_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/meta_test.go @@ -7,7 +7,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) func TestMergeMeta(t *testing.T) { @@ -17,7 +17,7 @@ func TestMergeMeta(t *testing.T) { var repoFns []string for _, name := range repoNames { - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: name, @@ -27,7 +27,7 @@ func TestMergeMeta(t *testing.T) { }, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -41,7 +41,7 @@ func TestMergeMeta(t *testing.T) { } // update meta on repo3 then test it changed - opts := &build.Options{ + opts := &index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo3", @@ -54,7 +54,7 @@ func TestMergeMeta(t *testing.T) { if err := mergeMeta(opts); err != nil { t.Fatal(err) } - repos, _, _ := zoekt.ReadMetadataPath(repoFns[3]) + repos, _, _ := index.ReadMetadataPath(repoFns[3]) if got, want := repos[0].RawConfig["public"], "0"; got != want { t.Fatalf("failed to update metadata of repo3. Got public %q want %q", got, want) } @@ -72,7 +72,7 @@ func TestMergeMeta(t *testing.T) { readPublic := func() []string { var public []string - repos, _, _ := zoekt.ReadMetadataPath(dstFn) + repos, _, _ := index.ReadMetadataPath(dstFn) for _, r := range repos { public = append(public, r.RawConfig["public"]) } @@ -84,7 +84,7 @@ func TestMergeMeta(t *testing.T) { } // Update a repo1 in compound shard to be private - opts = &build.Options{ + opts = &index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo1", @@ -105,7 +105,7 @@ func TestMergeMeta(t *testing.T) { func merge(t *testing.T, dstDir string, names []string) (string, string, error) { t.Helper() - var files []zoekt.IndexFile + var files []index.IndexFile for _, fn := range names { f, err := os.Open(fn) if err != nil { @@ -113,7 +113,7 @@ func merge(t *testing.T, dstDir string, names []string) (string, string, error) } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return "", "", err } @@ -122,5 +122,5 @@ func merge(t *testing.T, dstDir string, names []string) (string, string, error) files = append(files, indexFile) } - return zoekt.Merge(dstDir, files...) + return index.Merge(dstDir, files...) } diff --git a/cmd/zoekt-test/main.go b/cmd/zoekt-test/main.go index 2bd6e67b4..5de3f26a3 100644 --- a/cmd/zoekt-test/main.go +++ b/cmd/zoekt-test/main.go @@ -32,7 +32,7 @@ import ( "time" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" ) @@ -72,7 +72,7 @@ func compare(dir, patfile string, caseSensitive bool) error { } defer os.RemoveAll(indexDir) - var opts build.Options + var opts index.Options opts.SetDefaults() opts.IndexDir = indexDir @@ -84,7 +84,7 @@ func compare(dir, patfile string, caseSensitive bool) error { return fmt.Errorf("no contents") } - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { return err } diff --git a/cmd/zoekt-webserver/main.go b/cmd/zoekt-webserver/main.go index a753077c1..bd2f30873 100644 --- a/cmd/zoekt-webserver/main.go +++ b/cmd/zoekt-webserver/main.go @@ -41,21 +41,8 @@ import ( grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/sourcegraph/mountinfo" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" - "github.com/sourcegraph/zoekt/grpc/internalerrs" - "github.com/sourcegraph/zoekt/grpc/messagesize" - "github.com/sourcegraph/zoekt/grpc/propagator" - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "github.com/sourcegraph/zoekt/internal/debugserver" - "github.com/sourcegraph/zoekt/internal/profiler" "github.com/sourcegraph/zoekt/internal/shards" - "github.com/sourcegraph/zoekt/internal/tenant" - "github.com/sourcegraph/zoekt/internal/trace" - "github.com/sourcegraph/zoekt/internal/tracer" - "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/web" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "golang.org/x/net/http2" "golang.org/x/net/http2/h2c" @@ -66,6 +53,19 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/shirou/gopsutil/v3/disk" sglog "github.com/sourcegraph/log" + "github.com/sourcegraph/zoekt" + zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" + "github.com/sourcegraph/zoekt/grpc/internalerrs" + "github.com/sourcegraph/zoekt/grpc/messagesize" + "github.com/sourcegraph/zoekt/grpc/propagator" + proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/profiler" + "github.com/sourcegraph/zoekt/internal/tenant" + "github.com/sourcegraph/zoekt/internal/trace" + "github.com/sourcegraph/zoekt/internal/tracer" + "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/web" "github.com/uber/jaeger-client-go" oteltrace "go.opentelemetry.io/otel/trace" "go.uber.org/automaxprocs/maxprocs" @@ -139,7 +139,7 @@ func main() { logRefresh := flag.Duration("log_refresh", 24*time.Hour, "if using --log_dir, start writing a new file this often.") listen := flag.String("listen", ":6070", "listen on this address.") - index := flag.String("index", build.DefaultDir, "set index directory to use") + indexDir := flag.String("index", index.DefaultDir, "set index directory to use") html := flag.Bool("html", true, "enable HTML interface") enableRPC := flag.Bool("rpc", false, "enable go/net RPC") enableIndexserverProxy := flag.Bool("indexserver_proxy", false, "proxy requests with URLs matching the path /indexserver/ to /indexserver.sock") @@ -158,7 +158,7 @@ func main() { flag.Parse() if *version { - fmt.Printf("zoekt-webserver version %q\n", zoekt.Version) + fmt.Printf("zoekt-webserver version %q\n", index.Version) os.Exit(0) } @@ -171,8 +171,8 @@ func main() { resource := sglog.Resource{ Name: "zoekt-webserver", - Version: zoekt.Version, - InstanceID: zoekt.HostnameBestEffort(), + Version: index.Version, + InstanceID: index.HostnameBestEffort(), } liblog := sglog.Init(resource) @@ -193,25 +193,25 @@ func main() { // Tune GOMAXPROCS to match Linux container CPU quota. _, _ = maxprocs.Set() - if err := os.MkdirAll(*index, 0o755); err != nil { + if err := os.MkdirAll(*indexDir, 0o755); err != nil { log.Fatal(err) } - mustRegisterDiskMonitor(*index) + mustRegisterDiskMonitor(*indexDir) metricsLogger := sglog.Scoped("metricsRegistration") mustRegisterMemoryMapMetrics(metricsLogger) opts := mountinfo.CollectorOpts{Namespace: "zoekt_webserver"} - c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *index}) + c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *indexDir}) prometheus.DefaultRegisterer.MustRegister(c) // Do not block on loading shards so we can become partially available // sooner. Otherwise on large instances zoekt can be unavailable on the // order of minutes. - searcher, err := shards.NewDirectorySearcherFast(*index) + searcher, err := shards.NewDirectorySearcherFast(*indexDir) if err != nil { log.Fatal(err) } @@ -224,7 +224,7 @@ func main() { s := &web.Server{ Searcher: searcher, Top: web.Top, - Version: zoekt.Version, + Version: index.Version, } if *templateDir != "" { @@ -260,7 +260,7 @@ func main() { debugserver.AddHandlers(serveMux, *enablePprof) if *enableIndexserverProxy { - socket := filepath.Join(*index, "indexserver.sock") + socket := filepath.Join(*indexDir, "indexserver.sock") sglog.Scoped("server").Info("adding reverse proxy", sglog.String("socket", socket)) addProxyHandler(serveMux, socket) } diff --git a/cmd/zoekt/main.go b/cmd/zoekt/main.go index 8fb2158ec..78179bee9 100644 --- a/cmd/zoekt/main.go +++ b/cmd/zoekt/main.go @@ -29,6 +29,7 @@ import ( "github.com/felixge/fgprof" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" ) @@ -64,19 +65,19 @@ func loadShard(fn string, verbose bool) (zoekt.Searcher, error) { return nil, err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return nil, err } - s, err := zoekt.NewSearcher(iFile) + s, err := index.NewSearcher(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err) } if verbose { - repo, index, err := zoekt.ReadMetadata(iFile) + repo, index, err := index.ReadMetadata(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("ReadMetadata(%s): %v", fn, err) diff --git a/bits.go b/index/bits.go similarity index 99% rename from bits.go rename to index/bits.go index d438cbf15..46a594085 100644 --- a/bits.go +++ b/index/bits.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "cmp" diff --git a/bits_test.go b/index/bits_test.go similarity index 99% rename from bits_test.go rename to index/bits_test.go index 0dba68283..1ecbdccb7 100644 --- a/bits_test.go +++ b/index/bits_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" diff --git a/btree.go b/index/btree.go similarity index 99% rename from btree.go rename to index/btree.go index f70d0a638..1929349f4 100644 --- a/btree.go +++ b/index/btree.go @@ -28,7 +28,7 @@ // Corpora, Proceedings of the ACL-HLT 2011 System Demonstrations, pages // 103-108 -package zoekt +package index import ( "encoding/binary" @@ -46,6 +46,11 @@ import ( // On linux "getconf PAGESIZE" returns the number of bytes in a memory page. const btreeBucketSize = (4096 * 2) / ngramEncoding +const ( + interfaceBytes uint64 = 16 + pointerSize uint64 = 8 +) + type btree struct { root node opts btreeOpts diff --git a/btree_test.go b/index/btree_test.go similarity index 99% rename from btree_test.go rename to index/btree_test.go index 019708441..0e01cd6d2 100644 --- a/btree_test.go +++ b/index/btree_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "fmt" diff --git a/build/builder.go b/index/builder.go similarity index 94% rename from build/builder.go rename to index/builder.go index 728ad59b3..4ac018fc0 100644 --- a/build/builder.go +++ b/index/builder.go @@ -14,7 +14,7 @@ // package build implements a more convenient interface for building // zoekt indices. -package build +package index import ( "cmp" @@ -40,6 +40,7 @@ import ( "github.com/dustin/go-humanize" "github.com/go-enry/go-enry/v2" "github.com/rs/xid" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/ctags" ) @@ -246,8 +247,8 @@ type Builder struct { throttle chan int nextShardNum int - todo []*zoekt.Document - docChecker zoekt.DocChecker + todo []*Document + docChecker DocChecker size int parserBins ctags.ParserBinMap @@ -334,11 +335,11 @@ func (o *Options) SetDefaults() { // ShardName returns the name the given index shard. func (o *Options) shardName(n int) string { - return o.shardNameVersion(zoekt.IndexFormatVersion, n) + return o.shardNameVersion(IndexFormatVersion, n) } func (o *Options) shardNameVersion(version, n int) string { - return zoekt.ShardName(o.IndexDir, cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name), version, n) + return ShardName(o.IndexDir, cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name), version, n) } type IndexState string @@ -357,11 +358,11 @@ var readVersions = []struct { IndexFormatVersion int FeatureVersion int }{{ - IndexFormatVersion: zoekt.IndexFormatVersion, - FeatureVersion: zoekt.FeatureVersion, + IndexFormatVersion: IndexFormatVersion, + FeatureVersion: FeatureVersion, }, { - IndexFormatVersion: zoekt.NextIndexFormatVersion, - FeatureVersion: zoekt.FeatureVersion, + IndexFormatVersion: NextIndexFormatVersion, + FeatureVersion: FeatureVersion, }} // IncrementalSkipIndexing returns true if the index present on disk matches @@ -380,7 +381,7 @@ func (o *Options) IndexState() (IndexState, string) { return IndexStateMissing, fn } - repos, index, err := zoekt.ReadMetadataPathAlive(fn) + repos, index, err := ReadMetadataPathAlive(fn) if os.IsNotExist(err) { return IndexStateMissing, fn } else if err != nil { @@ -435,7 +436,7 @@ func (o *Options) FindRepositoryMetadata() (repository *zoekt.Repository, metada return nil, nil, false, nil } - repositories, metadata, err := zoekt.ReadMetadataPathAlive(shard) + repositories, metadata, err := ReadMetadataPathAlive(shard) if err != nil { return nil, nil, false, fmt.Errorf("reading metadata for shard %q: %w", shard, err) } @@ -475,7 +476,7 @@ func (o *Options) findShard() string { return "" } for _, fn := range compoundShards { - repos, _, err := zoekt.ReadMetadataPathAlive(fn) + repos, _, err := ReadMetadataPathAlive(fn) if err != nil { continue } @@ -591,10 +592,10 @@ func NewBuilder(opts Options) (*Builder, error) { // AddFile is a convenience wrapper for the Add method func (b *Builder) AddFile(name string, content []byte) error { - return b.Add(zoekt.Document{Name: name, Content: content}) + return b.Add(Document{Name: name, Content: content}) } -func (b *Builder) Add(doc zoekt.Document) error { +func (b *Builder) Add(doc Document) error { if b.finishCalled { return nil } @@ -673,7 +674,7 @@ func (b *Builder) Finish() error { // Delta shard builds need to update FileTombstone and branch commit information for all // existing shards for _, shard := range oldShards { - repositories, _, err := zoekt.ReadMetadataPathAlive(shard) + repositories, _, err := ReadMetadataPathAlive(shard) if err != nil { return fmt.Errorf("reading metadata from shard %q: %w", shard, err) } @@ -718,7 +719,7 @@ func (b *Builder) Finish() error { repository.LatestCommitDate = b.opts.RepositoryDescription.LatestCommitDate - tempPath, finalPath, err := zoekt.JsonMarshalRepoMetaTemp(shard, repository) + tempPath, finalPath, err := JsonMarshalRepoMetaTemp(shard, repository) if err != nil { return fmt.Errorf("writing repository metadta for shard %q: %w", shard, err) } @@ -746,7 +747,7 @@ func (b *Builder) Finish() error { toDelete = make(map[string]struct{}) for _, name := range oldShards { - paths, err := zoekt.IndexFilePaths(name) + paths, err := IndexFilePaths(name) if err != nil { b.buildError = fmt.Errorf("failed to find old paths for %s: %w", name, err) } @@ -773,7 +774,7 @@ func (b *Builder) Finish() error { if !strings.HasSuffix(p, ".zoekt") { continue } - err := zoekt.SetTombstone(p, b.opts.RepositoryDescription.ID) + err := SetTombstone(p, b.opts.RepositoryDescription.ID) b.buildError = err continue } @@ -870,7 +871,7 @@ func IsLowPriority(path string, content []byte) bool { } type rankedDoc struct { - *zoekt.Document + *Document rank []float64 } @@ -878,7 +879,7 @@ type rankedDoc struct { // before writing them to disk. The order of documents in the shard is important // at query time, because earlier documents receive a boost at query time and // have a higher chance of being searched before limits kick in. -func rank(d *zoekt.Document, origIdx int) []float64 { +func rank(d *Document, origIdx int) []float64 { skipped := 0.0 if d.SkipReason != "" { skipped = 1.0 @@ -930,7 +931,7 @@ func rank(d *zoekt.Document, origIdx int) []float64 { } } -func sortDocuments(todo []*zoekt.Document) { +func sortDocuments(todo []*Document) { rs := make([]rankedDoc, 0, len(todo)) for i, t := range todo { rd := rankedDoc{t, rank(t, i)} @@ -955,7 +956,7 @@ func sortDocuments(todo []*zoekt.Document) { } } -func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { +func (b *Builder) buildShard(todo []*Document, nextShardNum int) (*finishedShard, error) { if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins) if b.opts.CTagsMustSucceed && err != nil { @@ -1019,13 +1020,13 @@ func (b *Builder) CheckMemoryUsage() { } } -func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) { +func (b *Builder) newShardBuilder() (*IndexBuilder, error) { desc := b.opts.RepositoryDescription desc.HasSymbols = !b.opts.DisableCTags && b.opts.CTagsPath != "" desc.SubRepoMap = b.opts.SubRepositories desc.IndexOptions = b.opts.GetHash() - shardBuilder, err := zoekt.NewIndexBuilder(&desc) + shardBuilder, err := NewIndexBuilder(&desc) if err != nil { return nil, err } @@ -1034,7 +1035,7 @@ func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) { return shardBuilder, nil } -func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, error) { +func (b *Builder) writeShard(fn string, ib *IndexBuilder) (*finishedShard, error) { dir := filepath.Dir(fn) if err := os.MkdirAll(dir, 0o700); err != nil { return nil, err @@ -1091,3 +1092,24 @@ func (e *deltaIndexOptionsMismatchError) Error() string { // umask holds the Umask of the current process var umask os.FileMode + +// Document holds a document (file) to index. +type Document struct { + Name string + Content []byte + Branches []string + SubRepositoryPath string + Language string + + // If set, something is wrong with the file contents, and this + // is the reason it wasn't indexed. + SkipReason string + + // Document sections for symbols. Offsets should use bytes. + Symbols []DocumentSection + SymbolsMetaData []*zoekt.Symbol +} + +type DocumentSection struct { + Start, End uint32 +} diff --git a/build/builder_test.go b/index/builder_test.go similarity index 89% rename from build/builder_test.go rename to index/builder_test.go index aec77a4e3..91fc8efa2 100644 --- a/build/builder_test.go +++ b/index/builder_test.go @@ -1,12 +1,14 @@ -package build +package index import ( "errors" "flag" + "fmt" "io" "log" "os" "path/filepath" + "reflect" "strconv" "strings" "testing" @@ -14,7 +16,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/sourcegraph/zoekt" ) @@ -53,7 +54,7 @@ func TestBuildv16(t *testing.T) { // fields indexTime and id depend on time. For this test, we copy the fields from // the old shard. - _, wantMetadata, err := zoekt.ReadMetadataPath(wantP) + _, wantMetadata, err := ReadMetadataPath(wantP) if err != nil { t.Fatal(err) } @@ -235,7 +236,7 @@ func TestDontCountContentOfSkippedFiles(t *testing.T) { // content with at least 100 bytes binary := append([]byte("abc def \x00"), make([]byte, 100)...) - err = b.Add(zoekt.Document{ + err = b.Add(Document{ Name: "f1", Content: binary, }) @@ -253,6 +254,40 @@ func TestDontCountContentOfSkippedFiles(t *testing.T) { } } +func TestPartialSuccess(t *testing.T) { + dir := t.TempDir() + + opts := Options{ + IndexDir: dir, + ShardMax: 1024, + SizeMax: 1 << 20, + Parallelism: 1, + } + opts.RepositoryDescription.Name = "repo" + opts.SetDefaults() + + b, err := NewBuilder(opts) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + for i := 0; i < 4; i++ { + nm := fmt.Sprintf("F%d", i) + _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) + } + b.buildError = fmt.Errorf("any error") + + // No error checking. + _ = b.Finish() + + // Finish cleans up temporary files. + if fs, err := filepath.Glob(dir + "/*"); err != nil { + t.Errorf("glob(%s): %v", dir, err) + } else if len(fs) != 0 { + t.Errorf("got shards %v, want []", fs) + } +} + func TestOptions_FindAllShards(t *testing.T) { type simpleShard struct { Repository zoekt.Repository @@ -371,7 +406,7 @@ func TestOptions_FindAllShards(t *testing.T) { if tt.expectedShardCount > 0 { for _, s := range shards { // all shards should contain the metadata for the desired repository - repos, _, err := zoekt.ReadMetadataPathAlive(s) + repos, _, err := ReadMetadataPathAlive(s) if err != nil { t.Fatalf("reading metadata from shard %q: %s", s, err) } @@ -620,7 +655,7 @@ func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { } for _, s := range shards { - repositories, _, err := zoekt.ReadMetadataPathAlive(s) + repositories, _, err := ReadMetadataPathAlive(s) if err != nil { t.Fatalf("reading repository metadata from shard %q", s) } @@ -766,7 +801,7 @@ func TestIsLowPriority(t *testing.T) { "builder_test.go", "test/TestQuery.java", "search/vendor/thirdparty.cc", - "search/node_modules/search/index.js", + "search/node_modules/search/js", "search.min.js", "internal/search.js.map", } @@ -838,7 +873,7 @@ func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShard // This (along with our shardMax setting of 75 bytes) means that each shard // will contain at most one of these. fileName := strconv.Itoa(i) - document := zoekt.Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} + document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} for _, branch := range o.RepositoryDescription.Branches { document.Branches = append(document.Branches, branch.Name) } @@ -879,7 +914,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } // load the normal shards that we created - var files []zoekt.IndexFile + var files []IndexFile for _, shard := range shardNames { f, err := os.Open(shard) if err != nil { @@ -887,7 +922,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := NewIndexFile(f) if err != nil { t.Fatalf("creating index file: %s", err) } @@ -897,7 +932,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } // merge all the simple shards into a compound shard - tmpName, dstName, err := zoekt.Merge(indexDir, files...) + tmpName, dstName, err := Merge(indexDir, files...) if err != nil { t.Fatalf("merging index files into compound shard: %s", err) } @@ -1000,3 +1035,100 @@ func TestIgnoreSizeMax(t *testing.T) { }) } } + +type filerankCase struct { + name string + docs []*Document + want []int +} + +func testFileRankAspect(t *testing.T, c filerankCase) { + var want []*Document + for _, j := range c.want { + want = append(want, c.docs[j]) + } + + got := make([]*Document, len(c.docs)) + copy(got, c.docs) + sortDocuments(got) + + print := func(ds []*Document) string { + r := "" + for _, d := range ds { + r += fmt.Sprintf("%v, ", d) + } + return r + } + if !reflect.DeepEqual(got, want) { + t.Errorf("got docs [%v], want [%v]", print(got), print(want)) + } +} + +func TestFileRank(t *testing.T) { + for _, c := range []filerankCase{{ + name: "filename", + docs: []*Document{ + { + Name: "longlonglong", + Content: []byte("bla"), + }, + { + Name: "short", + Content: []byte("bla"), + }, + }, + want: []int{1, 0}, + }, { + name: "test", + docs: []*Document{ + { + Name: "foo_test.go", + Content: []byte("bla"), + }, + { + Name: "longlonglong", + Content: []byte("bla"), + }, + }, + want: []int{1, 0}, + }, { + name: "content", + docs: []*Document{ + { + Content: []byte("bla"), + }, + { + Content: []byte("blablablabla"), + }, + { + Content: []byte("blabla"), + }, + }, + want: []int{0, 2, 1}, + }, { + name: "skipped docs", + docs: []*Document{ + { + Name: "binary_file", + SkipReason: "binary file", + }, + { + Name: "some_test.go", + Content: []byte("bla"), + }, + { + Name: "large_file.go", + SkipReason: "too large", + }, + { + Name: "file.go", + Content: []byte("blabla"), + }, + }, + want: []int{3, 1, 0, 2}, + }} { + t.Run(c.name, func(t *testing.T) { + testFileRankAspect(t, c) + }) + } +} diff --git a/build/builder_unix.go b/index/builder_unix.go similarity index 98% rename from build/builder_unix.go rename to index/builder_unix.go index edbd87b06..07768c0dd 100644 --- a/build/builder_unix.go +++ b/index/builder_unix.go @@ -15,7 +15,7 @@ //go:build !windows && !wasm // +build !windows,!wasm -package build +package index import ( "os" diff --git a/contentprovider.go b/index/contentprovider.go similarity index 94% rename from contentprovider.go rename to index/contentprovider.go index 80c3f6785..d729472b2 100644 --- a/contentprovider.go +++ b/index/contentprovider.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -23,6 +23,7 @@ import ( "unicode" "unicode/utf8" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/ctags" ) @@ -32,7 +33,7 @@ var _ = log.Println // content with the same code. type contentProvider struct { id *indexData - stats *Stats + stats *zoekt.Stats // mutable err error @@ -143,7 +144,7 @@ func (p *contentProvider) findOffset(filename bool, r uint32) uint32 { // // Note: the byte slices may be backed by mmapped data, so before being // returned by the API it needs to be copied. -func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []LineMatch { +func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { var filenameMatches []*candidateMatch contentMatches := make([]*candidateMatch, 0, len(ms)) @@ -161,9 +162,9 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, return p.fillContentMatches(contentMatches, numContextLines, language, opts) } - // Otherwise, we return a single line containing the filematch match. + // Otherwise, we return a single line containing the filematch index. lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) - res := LineMatch{ + res := zoekt.LineMatch{ Line: p.id.fileName(p.idx), FileName: true, Score: lineScore.score, @@ -171,14 +172,14 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, } for _, m := range ms { - res.LineFragments = append(res.LineFragments, LineFragmentMatch{ + res.LineFragments = append(res.LineFragments, zoekt.LineFragmentMatch{ LineOffset: int(m.byteOffset), MatchLength: int(m.byteMatchSz), Offset: m.byteOffset, }) } - return []LineMatch{res} + return []zoekt.LineMatch{res} } @@ -190,7 +191,7 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, // // Note: the byte slices may be backed by mmapped data, so before being // returned by the API it needs to be copied. -func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []ChunkMatch { +func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { var filenameMatches []*candidateMatch contentMatches := make([]*candidateMatch, 0, len(ms)) @@ -207,18 +208,18 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines return p.fillContentChunkMatches(contentMatches, numContextLines, language, opts) } - // Otherwise, we return a single chunk representing the filename match. + // Otherwise, we return a single chunk representing the filename index. lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) fileName := p.id.fileName(p.idx) - ranges := make([]Range, 0, len(ms)) + ranges := make([]zoekt.Range, 0, len(ms)) for _, m := range ms { - ranges = append(ranges, Range{ - Start: Location{ + ranges = append(ranges, zoekt.Range{ + Start: zoekt.Location{ ByteOffset: m.byteOffset, LineNumber: 1, Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1), }, - End: Location{ + End: zoekt.Location{ ByteOffset: m.byteOffset + m.byteMatchSz, LineNumber: 1, Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1), @@ -226,9 +227,9 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines }) } - return []ChunkMatch{{ + return []zoekt.ChunkMatch{{ Content: fileName, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, Ranges: ranges, FileName: true, Score: lineScore.score, @@ -236,8 +237,8 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines }} } -func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []LineMatch { - var result []LineMatch +func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { + var result []zoekt.LineMatch for len(ms) > 0 { m := ms[0] num := p.newlines().atOffset(m.byteOffset) @@ -271,7 +272,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin // Due to merging matches, we may have a match that // crosses a line boundary. Prevent confusion by - // taking lines until we pass the last match + // taking lines until we pass the last index for nextLineStart < len(data) && endMatch > uint32(nextLineStart) { next := bytes.IndexByte(data[nextLineStart:], '\n') if next == -1 { @@ -282,7 +283,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin } } - finalMatch := LineMatch{ + finalMatch := zoekt.LineMatch{ LineStart: lineStart, LineEnd: nextLineStart, LineNumber: num, @@ -299,7 +300,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin finalMatch.DebugScore = lineScore.debugScore for i, m := range lineCands { - fragment := LineFragmentMatch{ + fragment := zoekt.LineFragmentMatch{ Offset: m.byteOffset, LineOffset: int(m.byteOffset) - lineStart, MatchLength: int(m.byteMatchSz), @@ -316,7 +317,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin return result } -func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []ChunkMatch { +func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { data := p.data(false) // columnHelper prevents O(len(ms) * len(data)) lookups for all columns. @@ -332,21 +333,21 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte newlines := p.newlines() chunks := chunkCandidates(ms, newlines, numContextLines) - chunkMatches := make([]ChunkMatch, 0, len(chunks)) + chunkMatches := make([]zoekt.ChunkMatch, 0, len(chunks)) for _, chunk := range chunks { - ranges := make([]Range, 0, len(chunk.candidates)) + ranges := make([]zoekt.Range, 0, len(chunk.candidates)) for _, cm := range chunk.candidates { startOffset := cm.byteOffset endOffset := cm.byteOffset + cm.byteMatchSz startLine, endLine := newlines.offsetRangeToLineRange(startOffset, endOffset) - ranges = append(ranges, Range{ - Start: Location{ + ranges = append(ranges, zoekt.Range{ + Start: zoekt.Location{ ByteOffset: startOffset, LineNumber: uint32(startLine), Column: columnHelper.get(int(newlines.lineStart(startLine)), startOffset), }, - End: Location{ + End: zoekt.Location{ ByteOffset: endOffset, LineNumber: uint32(endLine), Column: columnHelper.get(int(newlines.lineStart(endLine)), endOffset), @@ -361,9 +362,9 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte firstLineStart := newlines.lineStart(firstLineNumber) chunkScore, symbolInfo := p.scoreChunk(chunk.candidates, language, opts) - chunkMatches = append(chunkMatches, ChunkMatch{ + chunkMatches = append(chunkMatches, zoekt.ChunkMatch{ Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1), - ContentStart: Location{ + ContentStart: zoekt.Location{ ByteOffset: firstLineStart, LineNumber: uint32(firstLineNumber), Column: 1, @@ -599,7 +600,7 @@ func (p *contentProvider) matchesSymbol(cm *candidateMatch) bool { return ok } -func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *Symbol, bool) { +func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *zoekt.Symbol, bool) { if cm.fileName { return DocumentSection{}, nil, false } @@ -871,29 +872,29 @@ func scoreSymbolKind(language string, filename []byte, sym []byte, kind ctags.Sy return factor * scoreKindMatch } -type matchScoreSlice []LineMatch +type matchScoreSlice []zoekt.LineMatch func (m matchScoreSlice) Len() int { return len(m) } func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } -type chunkMatchScoreSlice []ChunkMatch +type chunkMatchScoreSlice []zoekt.ChunkMatch func (m chunkMatchScoreSlice) Len() int { return len(m) } func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } -type fileMatchesByScore []FileMatch +type fileMatchesByScore []zoekt.FileMatch func (m fileMatchesByScore) Len() int { return len(m) } func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score } -func sortMatchesByScore(ms []LineMatch) { +func sortMatchesByScore(ms []zoekt.LineMatch) { sort.Sort(matchScoreSlice(ms)) } -func sortChunkMatchesByScore(ms []ChunkMatch) { +func sortChunkMatchesByScore(ms []zoekt.ChunkMatch) { sort.Sort(chunkMatchScoreSlice(ms)) } @@ -904,14 +905,14 @@ func sortChunkMatchesByScore(ms []ChunkMatch) { // // We don't only use the scores, we will also boost some results to present // files with novel extensions. -func SortFiles(ms []FileMatch) { +func SortFiles(ms []zoekt.FileMatch) { sort.Sort(fileMatchesByScore(ms)) // Boost a file extension not in the top 3 to the third filematch. boostNovelExtension(ms, 2, 0.9) } -func boostNovelExtension(ms []FileMatch, boostOffset int, minScoreRatio float64) { +func boostNovelExtension(ms []zoekt.FileMatch, boostOffset int, minScoreRatio float64) { if len(ms) <= boostOffset+1 { return } diff --git a/contentprovider_test.go b/index/contentprovider_test.go similarity index 99% rename from contentprovider_test.go rename to index/contentprovider_test.go index 7a4024b5a..12045a40c 100644 --- a/contentprovider_test.go +++ b/index/contentprovider_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "bytes" diff --git a/build/ctags.go b/index/ctags.go similarity index 93% rename from build/ctags.go rename to index/ctags.go index 246d05d83..869ede706 100644 --- a/build/ctags.go +++ b/index/ctags.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package index import ( "bytes" @@ -41,7 +41,7 @@ func normalizeLanguage(filetype string) string { return normalized } -func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { +func parseSymbols(todo []*Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { monitor := newMonitor() defer monitor.Stop() @@ -55,7 +55,7 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserB continue } - zoekt.DetermineLanguageIfUnknown(doc) + DetermineLanguageIfUnknown(doc) parserType := languageMap[normalizeLanguage(doc.Language)] if parserType == ctags.NoCTags { @@ -92,7 +92,7 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserB // overlaps finds the proper position to insert a zoekt.DocumentSection with // "start and "end" into "symOffsets". It returns -1 if the new section overlaps // with one of the existing ones. -func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { +func overlaps(symOffsets []DocumentSection, start, end uint32) int { i := 0 for i = len(symOffsets) - 1; i >= 0; i-- { // The most common case is that we exit here, because symOffsets is sorted by @@ -120,9 +120,9 @@ type tagsToSections struct { // corresponding metadata (zoekt.Symbol). // // This can not be called concurrently. -func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { +func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]DocumentSection, []*zoekt.Symbol, error) { nls := t.newLinesIndices(content) - symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) + symOffsets := make([]DocumentSection, 0, len(tags)) symMetaData := make([]*zoekt.Symbol, 0, len(tags)) for _, t := range tags { @@ -162,7 +162,7 @@ func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.D continue } - symOffsets = slices.Insert(symOffsets, i, zoekt.DocumentSection{ + symOffsets = slices.Insert(symOffsets, i, DocumentSection{ Start: start, End: endSym, }) @@ -242,7 +242,7 @@ func newMonitor() *monitor { return m } -func (m *monitor) BeginParsing(doc *zoekt.Document) { +func (m *monitor) BeginParsing(doc *Document) { now := time.Now() m.mu.Lock() m.lastUpdate = now diff --git a/build/ctags_test.go b/index/ctags_test.go similarity index 85% rename from build/ctags_test.go rename to index/ctags_test.go index 9281f52c2..3d950f33c 100644 --- a/build/ctags_test.go +++ b/index/ctags_test.go @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package index import ( "os" "reflect" "testing" - "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/ctags" ) @@ -64,7 +63,7 @@ func TestTagsToSectionsMultiple(t *testing.T) { t.Fatal("tagsToSections", err) } - want := []zoekt.DocumentSection{ + want := []DocumentSection{ {Start: 16, End: 17}, {Start: 23, End: 24}, } @@ -97,7 +96,7 @@ func TestTagsToSectionsReverse(t *testing.T) { t.Fatal("tagsToSections", err) } - want := []zoekt.DocumentSection{ + want := []DocumentSection{ {Start: 15, End: 18}, {Start: 20, End: 23}, {Start: 26, End: 29}, @@ -152,7 +151,7 @@ func TestTagsToSectionsEOF(t *testing.T) { func TestOverlaps(t *testing.T) { tests := []struct { - documentSections []zoekt.DocumentSection + documentSections []DocumentSection start uint32 end uint32 pos int @@ -161,37 +160,37 @@ func TestOverlaps(t *testing.T) { // overlap // { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 6, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 6, end: 12, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 4, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 1, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 0, end: 25, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}}, + documentSections: []DocumentSection{{0, 3}}, start: 0, end: 1, pos: -1, @@ -200,37 +199,37 @@ func TestOverlaps(t *testing.T) { // NO overlap // { - documentSections: []zoekt.DocumentSection{{2, 3}, {5, 10}}, + documentSections: []DocumentSection{{2, 3}, {5, 10}}, start: 0, end: 2, pos: 0, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 3, end: 4, pos: 1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 3, end: 5, pos: 1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 11, end: 14, pos: 2, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, start: 11, end: 13, pos: 2, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, start: 18, end: 19, pos: 3, diff --git a/eval.go b/index/eval.go similarity index 92% rename from eval.go rename to index/eval.go index 773563852..b73d15dc9 100644 --- a/eval.go +++ b/index/eval.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "context" @@ -25,7 +25,7 @@ import ( enry_data "github.com/go-enry/go-enry/v2/data" "github.com/grafana/regexp" - + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant" "github.com/sourcegraph/zoekt/query" ) @@ -33,7 +33,7 @@ import ( // simplifyMultiRepo takes a query and a predicate. It returns Const(true) if all // repository names fulfill the predicate, Const(false) if none of them do, and q // otherwise. -func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*Repository) bool) query.Q { +func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*zoekt.Repository) bool) query.Q { count := 0 alive := len(d.repoMetaData) for i := range d.repoMetaData { @@ -56,11 +56,11 @@ func (d *indexData) simplify(in query.Q) query.Q { eval := query.Map(in, func(q query.Q) query.Q { switch r := q.(type) { case *query.Repo: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Regexp.MatchString(repo.Name) }) case *query.RepoRegexp: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Regexp.MatchString(repo.Name) }) case *query.BranchesRepos: @@ -73,13 +73,13 @@ func (d *indexData) simplify(in query.Q) query.Q { } return &query.Const{Value: false} case *query.RepoSet: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Set[repo.Name] }) case query.RawConfig: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) case *query.RepoIDs: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Repos.Contains(repo.ID) }) case *query.Language: @@ -123,25 +123,14 @@ func (d *indexData) simplify(in query.Q) query.Q { return query.Simplify(eval) } -func (o *SearchOptions) SetDefaults() { - if o.ShardMaxMatchCount == 0 { - // We cap the total number of matches, so overly broad - // searches don't crash the machine. - o.ShardMaxMatchCount = 100000 - } - if o.TotalMaxMatchCount == 0 { - o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount - } -} - -func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) (sr *SearchResult, err error) { +func (d *indexData) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { timer := newTimer() copyOpts := *opts opts = ©Opts opts.SetDefaults() - var res SearchResult + var res zoekt.SearchResult if len(d.fileNameIndex) == 0 { return &res, nil } @@ -203,7 +192,7 @@ func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) // document frequency per term df := make(termDocumentFrequency) - // term frequency per file match + // term frequency per file index var tfs []termFrequency nextFileMatch: @@ -293,10 +282,10 @@ nextFileMatch: } } - fileMatch := FileMatch{ + fileMatch := zoekt.FileMatch{ Repository: md.Name, RepositoryID: md.ID, - RepositoryPriority: md.priority, + RepositoryPriority: md.GetPriority(), FileName: string(d.fileName(nextDoc)), Checksum: d.getChecksum(nextDoc), Language: d.languageMap[d.getLanguage(nextDoc)], @@ -397,7 +386,7 @@ nextFileMatch: return &res, nil } -func addRepo(res *SearchResult, repo *Repository) { +func addRepo(res *zoekt.SearchResult, repo *zoekt.Repository) { if res.RepoURLs == nil { res.RepoURLs = map[string]string{} } @@ -414,7 +403,7 @@ func addRepo(res *SearchResult, repo *Repository) { // returned, with filename matches first. // // If `merge` is set, overlapping and adjacent matches will be merged -// into a single match. Otherwise, overlapping matches will be removed, +// into a single index. Otherwise, overlapping matches will be removed, // but adjacent matches will remain. func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchTree]bool) []*candidateMatch { var cands []*candidateMatch @@ -548,19 +537,19 @@ func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTr return branches } -func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl *RepoList, err error) { - var include func(rle *RepoListEntry) bool +func (d *indexData) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) { + var include func(rle *zoekt.RepoListEntry) bool q = d.simplify(q) if c, ok := q.(*query.Const); ok { if !c.Value { - return &RepoList{}, nil + return &zoekt.RepoList{}, nil } - include = func(rle *RepoListEntry) bool { + include = func(rle *zoekt.RepoListEntry) bool { return true } } else { - sr, err := d.Search(ctx, q, &SearchOptions{ + sr, err := d.Search(ctx, q, &zoekt.SearchOptions{ ShardRepoMaxMatchCount: 1, }) if err != nil { @@ -572,23 +561,23 @@ func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl foundRepos[file.Repository] = struct{}{} } - include = func(rle *RepoListEntry) bool { + include = func(rle *zoekt.RepoListEntry) bool { _, ok := foundRepos[rle.Repository.Name] return ok } } - var l RepoList + var l zoekt.RepoList field, err := opts.GetField() if err != nil { return nil, err } switch field { - case RepoListFieldRepos: - l.Repos = make([]*RepoListEntry, 0, len(d.repoListEntry)) - case RepoListFieldReposMap: - l.ReposMap = make(ReposMap, len(d.repoListEntry)) + case zoekt.RepoListFieldRepos: + l.Repos = make([]*zoekt.RepoListEntry, 0, len(d.repoListEntry)) + case zoekt.RepoListFieldReposMap: + l.ReposMap = make(zoekt.ReposMap, len(d.repoListEntry)) } for i := range d.repoListEntry { @@ -614,10 +603,10 @@ func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl } switch field { - case RepoListFieldRepos: + case zoekt.RepoListFieldRepos: l.Repos = append(l.Repos, rle) - case RepoListFieldReposMap: - l.ReposMap[rle.Repository.ID] = MinimalRepoListEntry{ + case zoekt.RepoListFieldReposMap: + l.ReposMap[rle.Repository.ID] = zoekt.MinimalRepoListEntry{ HasSymbols: rle.Repository.HasSymbols, Branches: rle.Repository.Branches, IndexTimeUnix: rle.IndexMetadata.IndexTime.Unix(), diff --git a/eval_test.go b/index/eval_test.go similarity index 95% rename from eval_test.go rename to index/eval_test.go index 36e78ed9d..649ee07a2 100644 --- a/eval_test.go +++ b/index/eval_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "context" @@ -26,7 +26,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" - + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -154,7 +154,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { ctx := context.Background() q := &query.Const{Value: true} - opts := &SearchOptions{ShardRepoMaxMatchCount: 1} + opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} sr, err := cs.Search(ctx, q, opts) if err != nil { @@ -174,7 +174,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { }) t.Run("stats", func(t *testing.T) { - got, want := sr.Stats, Stats{ + got, want := sr.Stats, zoekt.Stats{ ContentBytesLoaded: 0, FileCount: 2, FilesConsidered: 2, @@ -182,7 +182,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { ShardsScanned: 1, MatchCount: 2, } - if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { + if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { t.Errorf("mismatch (-want, +got): %s", diff) } }) @@ -191,10 +191,10 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { func compoundReposShard(t *testing.T, names ...string) *indexData { t.Helper() - repos := make([]*Repository, 0, len(names)) + repos := make([]*zoekt.Repository, 0, len(names)) docs := make([][]Document, 0, len(names)) for _, name := range names { - repos = append(repos, &Repository{ID: hash(name), Name: name}) + repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) ds := []Document{ {Name: name + ".txt", Content: []byte(name + " content")}, {Name: name + ".2.txt", Content: []byte(name + " content 2")}, @@ -375,8 +375,8 @@ func hash(name string) uint32 { func TestGatherBranches(t *testing.T) { content := []byte("dummy") - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testIndexBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"foo", "v1"}, {"foo-2", "v1"}, {"main", "v1"}, @@ -396,7 +396,7 @@ func TestGatherBranches(t *testing.T) { &query.Branch{Pattern: "foo"}, &query.Branch{Pattern: "quz"}, }}, - &SearchOptions{}, + &zoekt.SearchOptions{}, ) if err != nil { t.Fatal(err) diff --git a/hititer.go b/index/hititer.go similarity index 94% rename from hititer.go rename to index/hititer.go index 01a58d1e1..7129ce0f2 100644 --- a/hititer.go +++ b/index/hititer.go @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" "fmt" + + "github.com/sourcegraph/zoekt" ) // hitIterator finds potential search matches, measured in offsets of @@ -30,7 +32,7 @@ type hitIterator interface { next(limit uint32) // Return how many bytes were read. - updateStats(s *Stats) + updateStats(s *zoekt.Stats) } // distanceHitIterator looks for hits at a fixed distance apart. @@ -73,7 +75,7 @@ func (i *distanceHitIterator) first() uint32 { return i.i1.first() } -func (i *distanceHitIterator) updateStats(s *Stats) { +func (i *distanceHitIterator) updateStats(s *zoekt.Stats) { i.i1.updateStats(s) i.i2.updateStats(s) } @@ -159,7 +161,7 @@ func (i *inMemoryIterator) first() uint32 { return maxUInt32 } -func (i *inMemoryIterator) updateStats(s *Stats) { +func (i *inMemoryIterator) updateStats(s *zoekt.Stats) { } func (i *inMemoryIterator) next(limit uint32) { @@ -219,7 +221,7 @@ func (i *compressedPostingIterator) next(limit uint32) { } } -func (i *compressedPostingIterator) updateStats(s *Stats) { +func (i *compressedPostingIterator) updateStats(s *zoekt.Stats) { s.IndexBytesLoaded += int64(i.indexBytesLoaded) s.NgramLookups += i.ngramLookups i.indexBytesLoaded = 0 @@ -237,7 +239,7 @@ func (i *mergingIterator) String() string { return fmt.Sprintf("merge:%v", i.iters) } -func (i *mergingIterator) updateStats(s *Stats) { +func (i *mergingIterator) updateStats(s *zoekt.Stats) { s.NgramLookups += i.ngramLookups i.ngramLookups = 0 for _, j := range i.iters { diff --git a/hititer_test.go b/index/hititer_test.go similarity index 97% rename from hititer_test.go rename to index/hititer_test.go index 0c276c129..8d4ca2771 100644 --- a/hititer_test.go +++ b/index/hititer_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "fmt" @@ -23,6 +23,7 @@ import ( "testing/quick" "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt" ) func TestCompressedPostingIterator_limit(t *testing.T) { @@ -94,7 +95,7 @@ func benchmarkCompressedPostingIterator(b *testing.B, size, limitsSize int) { it.next(limit) _ = it.first() } - var s Stats + var s zoekt.Stats it.updateStats(&s) b.SetBytes(s.IndexBytesLoaded) } diff --git a/index_test.go b/index/index_test.go similarity index 91% rename from index_test.go rename to index/index_test.go index 6231725d0..7685e6cfe 100644 --- a/index_test.go +++ b/index/index_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -26,11 +26,11 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" - + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) -func clearScores(r *SearchResult) { +func clearScores(r *zoekt.SearchResult) { for i := range r.Files { r.Files[i].Score = 0.0 for j := range r.Files[i].LineMatches { @@ -45,7 +45,7 @@ func clearScores(r *SearchResult) { } } -func testIndexBuilder(tb testing.TB, repo *Repository, docs ...Document) *IndexBuilder { +func testIndexBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *IndexBuilder { tb.Helper() b, err := NewIndexBuilder(repo) @@ -62,7 +62,7 @@ func testIndexBuilder(tb testing.TB, repo *Repository, docs ...Document) *IndexB return b } -func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { +func testIndexBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *IndexBuilder { t.Helper() b := newIndexBuilder() @@ -169,7 +169,7 @@ func TestEmptyIndex(t *testing.T) { b := testIndexBuilder(t, nil) searcher := searcherForTest(t, b) - var opts SearchOptions + var opts zoekt.SearchOptions if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { t.Fatalf("Search: %v", err) } @@ -209,10 +209,10 @@ func TestNewlines(t *testing.T) { sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) matches := sres.Files - want := []FileMatch{{ + want := []zoekt.FileMatch{{ FileName: "filename", - LineMatches: []LineMatch{{ - LineFragments: []LineFragmentMatch{{ + LineMatches: []zoekt.LineMatch{{ + LineFragments: []zoekt.LineFragmentMatch{{ Offset: 8, LineOffset: 2, MatchLength: 3, @@ -233,18 +233,18 @@ func TestNewlines(t *testing.T) { sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) matches := sres.Files - want := []FileMatch{{ + want := []zoekt.FileMatch{{ FileName: "filename", - ChunkMatches: []ChunkMatch{{ + ChunkMatches: []zoekt.ChunkMatch{{ Content: []byte("line2\n"), - ContentStart: Location{ + ContentStart: zoekt.Location{ ByteOffset: 6, LineNumber: 2, Column: 1, }, - Ranges: []Range{{ - Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, - End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, + End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, }}, }}, }} @@ -287,11 +287,11 @@ func TestQueryNewlines(t *testing.T) { }) } -var chunkOpts = SearchOptions{ChunkMatches: true} +var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} -func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { +func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { searcher := searcherForTest(t, b) - var opts SearchOptions + var opts zoekt.SearchOptions if len(o) > 0 { opts = o[0] } @@ -303,7 +303,7 @@ func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) return res } -func searcherForTest(t testing.TB, b *IndexBuilder) Searcher { +func searcherForTest(t testing.TB, b *IndexBuilder) zoekt.Searcher { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) @@ -375,13 +375,13 @@ func TestCaseFold(t *testing.T) { func wordsAsSymbols(doc Document) Document { re := regexp.MustCompile(`\b\w{2,}\b`) var symbols []DocumentSection - var symbolsMetadata []*Symbol + var symbolsMetadata []*zoekt.Symbol for _, match := range re.FindAllIndex(doc.Content, -1) { symbols = append(symbols, DocumentSection{ Start: uint32(match[0]), End: uint32(match[1]), }) - symbolsMetadata = append(symbolsMetadata, &Symbol{Kind: "method"}) + symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) } doc.Symbols = symbols doc.SymbolsMetaData = symbolsMetadata @@ -407,7 +407,7 @@ func TestSearchStats(t *testing.T) { ) t.Run("LineMatches", func(t *testing.T) { - sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) + sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) if err != nil { t.Fatal(err) } @@ -438,11 +438,11 @@ func TestSearchStats(t *testing.T) { cases := []struct { Name string Q query.Q - Want Stats + Want zoekt.Stats }{{ Name: "and-query", Q: andQuery, - Want: Stats{ + Want: zoekt.Stats{ FilesLoaded: 1, ContentBytesLoaded: 22, IndexBytesLoaded: 10, @@ -460,7 +460,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 14, IndexBytesLoaded: 1, FileCount: 1, @@ -477,7 +477,7 @@ func TestSearchStats(t *testing.T) { Pattern: "a y", Content: true, }, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 14, IndexBytesLoaded: 1, FileCount: 1, @@ -495,7 +495,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }, - Want: Stats{ + Want: zoekt.Stats{ ShardsSkippedFilter: 1, NgramLookups: 1, // only had to lookup once }, @@ -513,7 +513,7 @@ func TestSearchStats(t *testing.T) { CaseSensitive: true, }, ), - Want: Stats{ + Want: zoekt.Stats{ IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. ShardsSkippedFilter: 1, NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). @@ -525,7 +525,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ IndexBytesLoaded: 3, FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index MatchCount: 0, // even though there is a match it doesn't align with a symbol @@ -540,7 +540,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 35, IndexBytesLoaded: 4, FileCount: 2, @@ -558,7 +558,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents IndexBytesLoaded: 10, FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index @@ -575,7 +575,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 35, IndexBytesLoaded: 2, FileCount: 2, @@ -594,7 +594,7 @@ func TestSearchStats(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { + if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { t.Errorf("unexpected Stats (-want +got):\n%s", diff) } }) @@ -713,9 +713,9 @@ func TestFileSearch(t *testing.T) { } got := matches[0].LineMatches[0] - want := LineMatch{ + want := zoekt.LineMatch{ Line: []byte("banana"), - LineFragments: []LineFragmentMatch{{ + LineFragments: []zoekt.LineFragmentMatch{{ Offset: 1, LineOffset: 1, MatchLength: 4, @@ -740,12 +740,12 @@ func TestFileSearch(t *testing.T) { } got := matches[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: []byte("banana"), - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, - End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, + End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, }}, FileName: true, } @@ -764,12 +764,12 @@ func TestFileSearch(t *testing.T) { } got := matches[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: []byte("banana"), - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, }}, FileName: true, } @@ -1005,13 +1005,13 @@ func TestSearchBM25MatchScores(t *testing.T) { t.Run("LineMatches", func(t *testing.T) { q := &query.Substring{Pattern: "two"} - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true}) + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) if err != nil { t.Fatal(err) } matches := sres.Files if len(matches) != 1 { - t.Fatalf("want 1 file match, got %d", len(matches)) + t.Fatalf("want 1 file index, got %d", len(matches)) } if len(matches[0].LineMatches) != 2 { @@ -1025,14 +1025,14 @@ func TestSearchBM25MatchScores(t *testing.T) { t.Run("ChunkMatches", func(t *testing.T) { q := &query.Substring{Pattern: "five"} - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) if err != nil { t.Fatal(err) } matches := sres.Files if len(matches) != 1 { - t.Fatalf("want 1 file match, got %d", len(matches)) + t.Fatalf("want 1 file index, got %d", len(matches)) } if len(matches[0].ChunkMatches) != 2 { @@ -1052,14 +1052,14 @@ func TestSearchBM25MatchScores(t *testing.T) { }, } - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) if err != nil { t.Fatal(err) } matches := sres.Files if len(matches) != 2 { - t.Fatalf("want 2 file match, got %d", len(matches)) + t.Fatalf("want 2 file index, got %d", len(matches)) } foundSymbolInfo := false @@ -1205,8 +1205,8 @@ func TestDocumentOrder(t *testing.T) { } func TestBranchMask(t *testing.T) { - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testIndexBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"master", "v-master"}, {"stable", "v-stable"}, {"bonzai", "v-bonzai"}, @@ -1258,10 +1258,10 @@ func TestBranchMask(t *testing.T) { func TestBranchLimit(t *testing.T) { for limit := 64; limit <= 65; limit++ { - r := &Repository{} + r := &zoekt.Repository{} for i := 0; i < limit; i++ { s := fmt.Sprintf("b%d", i) - r.Branches = append(r.Branches, RepositoryBranch{ + r.Branches = append(r.Branches, zoekt.RepositoryBranch{ s, "v-" + s, }) } @@ -1276,8 +1276,8 @@ func TestBranchLimit(t *testing.T) { func TestBranchReport(t *testing.T) { branches := []string{"stable", "master"} - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testIndexBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"stable", "vs"}, {"master", "vm"}, }, @@ -1314,8 +1314,8 @@ func TestBranchReport(t *testing.T) { } func TestBranchVersions(t *testing.T) { - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testIndexBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"stable", "v-stable"}, {"master", "v-master"}, }, @@ -1380,8 +1380,8 @@ func TestRegexp(t *testing.T) { } got := sres.Files[0].LineMatches[0] - want := LineMatch{ - LineFragments: []LineFragmentMatch{{ + want := zoekt.LineMatch{ + LineFragments: []zoekt.LineFragmentMatch{{ LineOffset: 3, Offset: 3, MatchLength: 11, @@ -1409,12 +1409,12 @@ func TestRegexp(t *testing.T) { } got := sres.Files[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: content, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, - End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, + End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, }}, } @@ -1499,7 +1499,7 @@ func TestRepoName(t *testing.T) { content := []byte("bla the needle") // ----------------01234567890123 - b := testIndexBuilder(t, &Repository{Name: "bla"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "bla"}, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { @@ -1629,7 +1629,7 @@ func TestMergeMatches(t *testing.T) { func TestRepoURL(t *testing.T) { content := []byte("blablabla") - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "name", URL: "URL", CommitURLTemplate: "commit", @@ -1662,7 +1662,7 @@ func TestRegexpCaseSensitive(t *testing.T) { }) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } }) @@ -1676,7 +1676,7 @@ func TestRegexpCaseSensitive(t *testing.T) { ) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } }) } @@ -1693,7 +1693,7 @@ func TestRegexpCaseFolding(t *testing.T) { }) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } } @@ -1800,7 +1800,7 @@ func TestSymbolRank(t *testing.T) { t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1815,7 +1815,7 @@ func TestSymbolRank(t *testing.T) { t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1850,7 +1850,7 @@ func TestSymbolRankRegexpUTF8(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1864,7 +1864,7 @@ func TestSymbolRankRegexpUTF8(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1900,7 +1900,7 @@ func TestPartialSymbolRank(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1914,7 +1914,7 @@ func TestPartialSymbolRank(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1922,7 +1922,7 @@ func TestPartialSymbolRank(t *testing.T) { func TestNegativeRepo(t *testing.T) { content := []byte("bla the needle") // ----------------01234567890123 - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "bla", }, Document{Name: "f1", Content: content}) @@ -1956,9 +1956,9 @@ func TestListRepos(t *testing.T) { // ----------------012345678901234- t.Run("default and minimal fallback", func(t *testing.T) { - repo := &Repository{ + repo := &zoekt.Repository{ Name: "reponame", - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, } b := testIndexBuilder(t, repo, Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, @@ -1968,11 +1968,11 @@ func TestListRepos(t *testing.T) { searcher := searcherForTest(t, b) - for _, opts := range []*ListOptions{ + for _, opts := range []*zoekt.ListOptions{ nil, {}, - {Field: RepoListFieldRepos}, - {Field: RepoListFieldReposMap}, + {Field: zoekt.RepoListFieldRepos}, + {Field: zoekt.RepoListFieldReposMap}, } { t.Run(fmt.Sprint(opts), func(t *testing.T) { q := &query.Repo{Regexp: regexp.MustCompile("epo")} @@ -1982,10 +1982,10 @@ func TestListRepos(t *testing.T) { t.Fatalf("List(%v): %v", q, err) } - want := &RepoList{ - Repos: []*RepoListEntry{{ + want := &zoekt.RepoList{ + Repos: []*zoekt.RepoListEntry{{ Repository: *repo, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Documents: 4, ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 Shards: 1, @@ -1995,7 +1995,7 @@ func TestListRepos(t *testing.T) { OtherBranchesNewLinesCount: 3, }, }}, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 1, Documents: 4, ContentBytes: 68, @@ -2008,10 +2008,10 @@ func TestListRepos(t *testing.T) { } ignored := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), - cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), - cmpopts.IgnoreFields(Repository{}, "priority"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), + cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), + cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), } if diff := cmp.Diff(want, res, ignored...); diff != "" { t.Fatalf("mismatch (-want +got):\n%s", diff) @@ -2030,10 +2030,10 @@ func TestListRepos(t *testing.T) { }) t.Run("minimal", func(t *testing.T) { - repo := &Repository{ + repo := &zoekt.Repository{ ID: 1234, Name: "reponame", - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, RawConfig: map[string]string{"repoid": "1234"}, } b := testIndexBuilder(t, repo, @@ -2045,19 +2045,19 @@ func TestListRepos(t *testing.T) { searcher := searcherForTest(t, b) q := &query.Repo{Regexp: regexp.MustCompile("epo")} - res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) + res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) if err != nil { t.Fatalf("List(%v): %v", q, err) } - want := &RepoList{ - ReposMap: ReposMap{ + want := &zoekt.RepoList{ + ReposMap: zoekt.ReposMap{ repo.ID: { HasSymbols: repo.HasSymbols, Branches: repo.Branches, }, }, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 1, Shards: 1, Documents: 4, @@ -2070,14 +2070,14 @@ func TestListRepos(t *testing.T) { } ignored := []cmp.Option{ - cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), + cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), } if diff := cmp.Diff(want, res, ignored...); diff != "" { t.Fatalf("mismatch (-want +got):\n%s", diff) } q = &query.Repo{Regexp: regexp.MustCompile("bla")} - res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) + res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) if err != nil { t.Fatalf("List(%v): %v", q, err) } @@ -2090,7 +2090,7 @@ func TestListRepos(t *testing.T) { func TestListReposByContent(t *testing.T) { content := []byte("bla the needle") - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: content}, @@ -2121,7 +2121,7 @@ func TestListReposByContent(t *testing.T) { func TestMetadata(t *testing.T) { content := []byte("bla the needle") - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}) @@ -2225,7 +2225,7 @@ func TestMatchNewline(t *testing.T) { } func TestSubRepo(t *testing.T) { - subRepos := map[string]*Repository{ + subRepos := map[string]*zoekt.Repository{ "sub": { Name: "sub-name", LineFragmentTemplate: "sub-line", @@ -2234,7 +2234,7 @@ func TestSubRepo(t *testing.T) { content := []byte("pqr\nalex") - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ SubRepoMap: subRepos, }, Document{ Name: "sub/f1", @@ -2270,7 +2270,7 @@ func TestSearchEither(t *testing.T) { sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) if len(sres.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", sres.Files) + t.Fatalf("got %v, wanted 1 index", sres.Files) } if got, want := sres.Files[0].FileName, "f1"; got != want { @@ -2286,7 +2286,7 @@ func TestSearchEither(t *testing.T) { sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) if len(sres.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", sres.Files) + t.Fatalf("got %v, wanted 1 index", sres.Files) } if got, want := sres.Files[0].FileName, "f1"; got != want { @@ -2304,14 +2304,14 @@ func TestUnicodeExactMatch(t *testing.T) { t.Run("LineMatches", func(t *testing.T) { if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) } }) t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) } }) } @@ -2325,12 +2325,12 @@ func TestUnicodeCoverContent(t *testing.T) { t.Run("LineMatches", func(t *testing.T) { if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) } res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) if len(res.Files) != 1 { - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) } if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { @@ -2341,12 +2341,12 @@ func TestUnicodeCoverContent(t *testing.T) { t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) if len(res.Files) != 0 { - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) } res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) } got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset @@ -2367,7 +2367,7 @@ func TestUnicodeNonCoverContent(t *testing.T) { t.Run("LineMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { @@ -2378,7 +2378,7 @@ func TestUnicodeNonCoverContent(t *testing.T) { t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset @@ -2406,7 +2406,7 @@ func TestUnicodeVariableLength(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } }) @@ -2416,7 +2416,7 @@ func TestUnicodeVariableLength(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } }) } @@ -2437,7 +2437,7 @@ func TestUnicodeFileStartOffsets(t *testing.T) { q := &query.Substring{Pattern: wat, Content: true} res := searchForTest(t, b, q) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } } @@ -2476,7 +2476,7 @@ func TestLongFileUTF8(t *testing.T) { func TestEstimateDocCount(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}, ) @@ -2486,7 +2486,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("reponame")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, }); sres.Stats.ShardFilesConsidered != 2 { t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) @@ -2495,7 +2495,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("nomatch")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, }); sres.Stats.ShardFilesConsidered != 0 { t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) @@ -2507,7 +2507,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("reponame")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, ChunkMatches: true, }); sres.Stats.ShardFilesConsidered != 2 { @@ -2517,7 +2517,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("nomatch")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, ChunkMatches: true, }); sres.Stats.ShardFilesConsidered != 0 { @@ -2615,7 +2615,7 @@ func TestIOStats(t *testing.T) { t.Run("LineMatches with BM25", func(t *testing.T) { q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} - res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true}) + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) // 4096 (content) + 2 (overhead: newlines or doc sections) if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { @@ -2631,7 +2631,7 @@ func TestIOStats(t *testing.T) { t.Run("ChunkMatches with BM25", func(t *testing.T) { q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} - res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) // 4096 (content) + 2 (overhead: newlines or doc sections) if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { @@ -2708,9 +2708,9 @@ func TestAndOrUnicode(t *testing.T) { query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, query.NewOr(&query.Branch{Pattern: "master"})))) - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "name", - Branches: []RepositoryBranch{{"master", "master-version"}}, + Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, }, Document{ Name: "f2", Content: []byte("orange\u2318apple"), @@ -2735,7 +2735,7 @@ func TestAndOrUnicode(t *testing.T) { func TestAndShort(t *testing.T) { content := []byte("bla needle at orange bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: []byte("xx at xx")}, Document{Name: "f3", Content: []byte("yy orange xx")}, @@ -2761,7 +2761,7 @@ func TestAndShort(t *testing.T) { func TestNoCollectRegexpSubstring(t *testing.T) { content := []byte("bla final bla\nfoo final, foo") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, ) @@ -2775,7 +2775,7 @@ func TestNoCollectRegexpSubstring(t *testing.T) { t.Fatalf("got %v, want 1 result", res.Files) } if f := res.Files[0]; len(f.LineMatches) != 1 { - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) } }) @@ -2785,12 +2785,12 @@ func TestNoCollectRegexpSubstring(t *testing.T) { t.Fatalf("got %v, want 1 result", res.Files) } if f := res.Files[0]; len(f.ChunkMatches) != 1 { - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) } }) } -func printLineMatches(ms []LineMatch) string { +func printLineMatches(ms []zoekt.LineMatch) string { var ss []string for _, m := range ms { ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) @@ -2801,7 +2801,7 @@ func printLineMatches(ms []LineMatch) string { func TestLang(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, @@ -2835,7 +2835,7 @@ func TestLang(t *testing.T) { func TestLangShortcut(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, ) @@ -2849,7 +2849,7 @@ func TestLangShortcut(t *testing.T) { t.Fatalf("got %v, want 0 results", res.Files) } if res.Stats.IndexBytesLoaded > 0 { - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) } }) @@ -2859,14 +2859,14 @@ func TestLangShortcut(t *testing.T) { t.Fatalf("got %v, want 0 results", res.Files) } if res.Stats.IndexBytesLoaded > 0 { - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) } }) } func TestNoTextMatchAtoms(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, @@ -2889,7 +2889,7 @@ func TestNoTextMatchAtoms(t *testing.T) { func TestNoPositiveAtoms(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}, ) @@ -2915,7 +2915,7 @@ func TestSymbolBoundaryStart(t *testing.T) { content := []byte("start\nbla bla\nend") // ----------------012345-67890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2952,7 +2952,7 @@ func TestSymbolBoundaryEnd(t *testing.T) { content := []byte("start\nbla bla\nend") // ----------------012345-67890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2989,7 +2989,7 @@ func TestSymbolSubstring(t *testing.T) { content := []byte("bla\nsymblabla\nbla") // ----------------0123-4567890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -3026,7 +3026,7 @@ func TestSymbolSubstringExact(t *testing.T) { content := []byte("bla\nsym\nbla\nsym\nasymb") // ----------------0123-4567-890123456-78901 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -3063,7 +3063,7 @@ func TestSymbolRegexpExact(t *testing.T) { content := []byte("blah\nbla\nbl") // ----------------01234-5678-90 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -3100,7 +3100,7 @@ func TestSymbolRegexpPartial(t *testing.T) { content := []byte("abcdef") // ----------------012345 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -3155,7 +3155,7 @@ func TestSymbolRegexpAll(t *testing.T) { }, } - b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) q := &query.Symbol{ Expr: &query.Regexp{Regexp: mustParseRE(".*")}, } @@ -3200,7 +3200,7 @@ func TestSymbolRegexpAll(t *testing.T) { func TestHitIterTerminate(t *testing.T) { // contrived input: trigram frequencies forces selecting abc + - // def for the distance iteration. There is no match, so this + // def for the distance iteration. There is no index, so this // will advance the compressedPostingIterator to beyond the // end. content := []byte("abc bcdbcd cdecde abcabc def efg") @@ -3395,7 +3395,7 @@ func TestDocChecker(t *testing.T) { } func TestLineAnd(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, Document{Name: "f2", Content: []byte("apple orange\nbanana")}, Document{Name: "f3", Content: []byte("banana grape")}, @@ -3431,7 +3431,7 @@ func TestLineAnd(t *testing.T) { } func TestLineAndFileName(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple banana\ngrape")}, Document{Name: "f2", Content: []byte("apple banana\norange")}, Document{Name: "apple banana", Content: []byte("banana grape")}, @@ -3467,7 +3467,7 @@ func TestLineAndFileName(t *testing.T) { } func TestMultiLineRegex(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple banana\ngrape")}, Document{Name: "f2", Content: []byte("apple orange")}, Document{Name: "f3", Content: []byte("grape apple")}, @@ -3511,7 +3511,7 @@ func TestMultiLineRegex(t *testing.T) { } func TestSearchTypeFileName(t *testing.T) { - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: []byte("bla the needle")}, @@ -3520,7 +3520,7 @@ func TestSearchTypeFileName(t *testing.T) { ) t.Run("LineMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3562,7 +3562,7 @@ func TestSearchTypeFileName(t *testing.T) { }) t.Run("ChunkMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3609,7 +3609,7 @@ func TestSearchTypeFileName(t *testing.T) { } func TestSearchTypeLanguage(t *testing.T) { - b := testIndexBuilder(t, &Repository{ + b := testIndexBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, @@ -3621,7 +3621,7 @@ func TestSearchTypeLanguage(t *testing.T) { t.Log(b.languageMap) t.Run("LineMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3668,7 +3668,7 @@ func TestSearchTypeLanguage(t *testing.T) { }) t.Run("ChunkMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3716,12 +3716,12 @@ func TestSearchTypeLanguage(t *testing.T) { func TestStats(t *testing.T) { ignored := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), } - repoListEntries := func(b *IndexBuilder) []RepoListEntry { + repoListEntries := func(b *IndexBuilder) []zoekt.RepoListEntry { searcher := searcherForTest(t, b) indexdata := searcher.(*indexData) return indexdata.repoListEntry @@ -3730,9 +3730,9 @@ func TestStats(t *testing.T) { t.Run("one empty repo", func(t *testing.T) { b := testIndexBuilder(t, nil) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 0, @@ -3756,9 +3756,9 @@ func TestStats(t *testing.T) { Document{Name: "doc 1", Content: []byte("content 1")}, ) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3778,7 +3778,7 @@ func TestStats(t *testing.T) { t.Run("one compound shard", func(t *testing.T) { b := testIndexBuilderCompound(t, - []*Repository{ + []*zoekt.Repository{ {Name: "repo 0"}, {Name: "repo 1"}, }, @@ -3794,9 +3794,9 @@ func TestStats(t *testing.T) { }, ) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3808,7 +3808,7 @@ func TestStats(t *testing.T) { }, }, { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3828,7 +3828,7 @@ func TestStats(t *testing.T) { t.Run("compound shard with empty repos", func(t *testing.T) { b := testIndexBuilderCompound(t, - []*Repository{ + []*zoekt.Repository{ {Name: "repo 0"}, {Name: "repo 1"}, {Name: "repo 2"}, @@ -3845,18 +3845,18 @@ func TestStats(t *testing.T) { ) got := repoListEntries(b) - entryEmpty := RepoListEntry{Stats: RepoStats{ + entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ Shards: 1, Documents: 0, ContentBytes: 0, }} - entryNonEmpty := RepoListEntry{Stats: RepoStats{ + entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ Shards: 1, Documents: 1, ContentBytes: 14, }} - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ entryNonEmpty, entryEmpty, entryNonEmpty, @@ -3898,8 +3898,8 @@ func TestWordSearch(t *testing.T) { } got := sres.Files[0].LineMatches[0] - want := LineMatch{ - LineFragments: []LineFragmentMatch{{ + want := zoekt.LineMatch{ + LineFragments: []zoekt.LineFragmentMatch{{ LineOffset: 7, Offset: 7, MatchLength: 3, @@ -3932,12 +3932,12 @@ func TestWordSearch(t *testing.T) { } got := sres.Files[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: content, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, - End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, + End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, }}, } @@ -3971,14 +3971,14 @@ func BenchmarkScoreChunkMatches(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - sres, err := searcher.Search(ctx, q, &SearchOptions{ChunkMatches: true, NumContextLines: 1}) + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) if err != nil { b.Fatal(err) } matches := sres.Files if len(matches) == 0 { - b.Fatalf("want file match, got none") + b.Fatalf("want file index, got none") } } }) diff --git a/indexbuilder.go b/index/indexbuilder.go similarity index 93% rename from indexbuilder.go rename to index/indexbuilder.go index 027edf9f4..f4ade2f51 100644 --- a/indexbuilder.go +++ b/index/indexbuilder.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -29,6 +29,7 @@ import ( "time" "unicode/utf8" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/languages" ) @@ -194,7 +195,7 @@ type IndexBuilder struct { namePostings *postingsBuilder // root repositories - repoList []Repository + repoList []zoekt.Repository // name to index. subRepoIndices []map[string]uint32 @@ -213,8 +214,8 @@ type IndexBuilder struct { ID string } -func (d *Repository) verify() error { - for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} { +func verify(repo *zoekt.Repository) error { + for _, t := range []string{repo.FileURLTemplate, repo.LineFragmentTemplate, repo.CommitURLTemplate} { if _, err := ParseTemplate(t); err != nil { return err } @@ -266,11 +267,11 @@ func (b *IndexBuilder) NumFiles() int { // NewIndexBuilder creates a fresh IndexBuilder. The passed in // Repository contains repo metadata, and may be set to nil. -func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { +func NewIndexBuilder(r *zoekt.Repository) (*IndexBuilder, error) { b := newIndexBuilder() if r == nil { - r = &Repository{} + r = &zoekt.Repository{} } if err := b.setRepository(r); err != nil { return nil, err @@ -292,8 +293,8 @@ func newIndexBuilder() *IndexBuilder { } } -func (b *IndexBuilder) setRepository(desc *Repository) error { - if err := desc.verify(); err != nil { +func (b *IndexBuilder) setRepository(desc *zoekt.Repository) error { + if err := verify(desc); err != nil { return err } @@ -304,7 +305,7 @@ func (b *IndexBuilder) setRepository(desc *Repository) error { repo := *desc // copy subrepomap without root - repo.SubRepoMap = map[string]*Repository{} + repo.SubRepoMap = map[string]*zoekt.Repository{} for k, v := range desc.SubRepoMap { if k != "" { repo.SubRepoMap[k] = v @@ -316,30 +317,9 @@ func (b *IndexBuilder) setRepository(desc *Repository) error { return b.populateSubRepoIndices() } -type DocumentSection struct { - Start, End uint32 -} - -// Document holds a document (file) to index. -type Document struct { - Name string - Content []byte - Branches []string - SubRepositoryPath string - Language string - - // If set, something is wrong with the file contents, and this - // is the reason it wasn't indexed. - SkipReason string - - // Document sections for symbols. Offsets should use bytes. - Symbols []DocumentSection - SymbolsMetaData []*Symbol -} - type symbolSlice struct { symbols []DocumentSection - metaData []*Symbol + metaData []*zoekt.Symbol } func (s symbolSlice) Len() int { return len(s.symbols) } @@ -370,7 +350,7 @@ func (b *IndexBuilder) populateSubRepoIndices() error { return nil } -func mkSubRepoIndices(repo Repository) map[string]uint32 { +func mkSubRepoIndices(repo zoekt.Repository) map[string]uint32 { paths := []string{""} for k := range repo.SubRepoMap { paths = append(paths, k) @@ -401,7 +381,7 @@ func (b *IndexBuilder) symbolKindID(t string) uint32 { return b.symKindIndex[t] } -func (b *IndexBuilder) addSymbols(symbols []*Symbol) { +func (b *IndexBuilder) addSymbols(symbols []*zoekt.Symbol) { for _, sym := range symbols { b.symMetaData = append(b.symMetaData, // This field was removed due to redundancy. To avoid diff --git a/indexbuilder_test.go b/index/indexbuilder_test.go similarity index 98% rename from indexbuilder_test.go rename to index/indexbuilder_test.go index 7487a5ad3..e66b3cf83 100644 --- a/indexbuilder_test.go +++ b/index/indexbuilder_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "strings" diff --git a/indexdata.go b/index/indexdata.go similarity index 96% rename from indexdata.go rename to index/indexdata.go index 3e8095d3b..557ea6314 100644 --- a/indexdata.go +++ b/index/indexdata.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" @@ -24,6 +24,7 @@ import ( "slices" "unicode/utf8" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -76,8 +77,8 @@ type indexData struct { // name => mask (power of 2) branchIDs []map[string]uint - metaData IndexMetadata - repoMetaData []Repository + metaData zoekt.IndexMetadata + repoMetaData []zoekt.Repository subRepos []uint32 subRepoPaths [][]string @@ -91,7 +92,7 @@ type indexData struct { // inverse of LanguageMap in metaData languageMap map[uint16]string - repoListEntry []RepoListEntry + repoListEntry []zoekt.RepoListEntry // repository indexes for all the files repos []uint16 @@ -139,7 +140,7 @@ func (d *symbolData) kind(i uint32) []byte { } // data returns the symbol at index i -func (d *symbolData) data(i uint32) *Symbol { +func (d *symbolData) data(i uint32) *zoekt.Symbol { size := uint32(4 * 4) // 4 uint32s offset := i * size if offset >= uint32(len(d.symMetaData)) { @@ -147,7 +148,7 @@ func (d *symbolData) data(i uint32) *Symbol { } metadata := d.symMetaData[offset : offset+size] - sym := &Symbol{} + sym := &zoekt.Symbol{} key := uint32SliceAt(metadata, 1) sym.Kind = string(d.kind(key)) key = uint32SliceAt(metadata, 2) @@ -172,10 +173,10 @@ func (d *indexData) getLanguage(idx uint32) uint16 { } // calculates stats for files in the range [start, end). -func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { +func (d *indexData) calculateStatsForFileRange(start, end uint32) zoekt.RepoStats { if start >= end { // An empty shard for an empty repository. - return RepoStats{ + return zoekt.RepoStats{ Shards: 1, } } @@ -190,7 +191,7 @@ func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { // here). Right now I don't like that these numbers are not true, especially // after aggregation. For now I will move forward with this until we can // chat more. - return RepoStats{ + return zoekt.RepoStats{ ContentBytes: int64(bytesContent) + int64(bytesFN), Documents: int(end - start), // CR keegan for stefan: our shard count is going to go out of whack, @@ -206,7 +207,7 @@ func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { } func (d *indexData) calculateStats() error { - d.repoListEntry = make([]RepoListEntry, 0, len(d.repoMetaData)) + d.repoListEntry = make([]zoekt.RepoListEntry, 0, len(d.repoMetaData)) var start, end uint32 for repoID, md := range d.repoMetaData { @@ -218,7 +219,7 @@ func (d *indexData) calculateStats() error { return fmt.Errorf("shard documents out of order with respect to repositories: expected document %d to be part of repo %d", start, repoID) } - d.repoListEntry = append(d.repoListEntry, RepoListEntry{ + d.repoListEntry = append(d.repoListEntry, zoekt.RepoListEntry{ Repository: md, IndexMetadata: d.metaData, Stats: d.calculateStatsForFileRange(start, end), @@ -429,7 +430,7 @@ func (d *indexData) iterateNgrams(query *query.Substring) (*ngramIterationResult return &ngramIterationResults{ matchIterator: &noMatchTree{ Why: "freq=0", - Stats: Stats{ + Stats: zoekt.Stats{ NgramLookups: ngramLookups, }, }, diff --git a/indexdata_test.go b/index/indexdata_test.go similarity index 99% rename from indexdata_test.go rename to index/indexdata_test.go index f5bd677c8..82044f0e7 100644 --- a/indexdata_test.go +++ b/index/indexdata_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "math/rand" diff --git a/indexfile_other.go b/index/indexfile_other.go similarity index 99% rename from indexfile_other.go rename to index/indexfile_other.go index f192d9beb..2a5b427b9 100644 --- a/indexfile_other.go +++ b/index/indexfile_other.go @@ -15,7 +15,7 @@ //go:build !linux && !darwin // +build !linux,!darwin -package zoekt +package index import ( "fmt" diff --git a/indexfile_unix.go b/index/indexfile_unix.go similarity index 99% rename from indexfile_unix.go rename to index/indexfile_unix.go index fea4fae76..5771ff54f 100644 --- a/indexfile_unix.go +++ b/index/indexfile_unix.go @@ -14,7 +14,7 @@ //go:build linux || darwin -package zoekt +package index import ( "fmt" diff --git a/limit.go b/index/limit.go similarity index 81% rename from limit.go rename to index/limit.go index 1f67fc3fd..9de2b1eab 100644 --- a/limit.go +++ b/index/limit.go @@ -1,11 +1,15 @@ -package zoekt +package index -import "log" +import ( + "log" + + "github.com/sourcegraph/zoekt" +) // SortAndTruncateFiles is a convenience around SortFiles and // DisplayTruncator. Given an aggregated files it will sort and then truncate // based on the search options. -func SortAndTruncateFiles(files []FileMatch, opts *SearchOptions) []FileMatch { +func SortAndTruncateFiles(files []zoekt.FileMatch, opts *zoekt.SearchOptions) []zoekt.FileMatch { SortFiles(files) truncator, _ := NewDisplayTruncator(opts) files, _ = truncator(files) @@ -16,12 +20,12 @@ func SortAndTruncateFiles(files []FileMatch, opts *SearchOptions) []FileMatch { // display limits by truncating and mutating before. hasMore is true until the // limits are exhausted. Once hasMore is false each subsequent call will // return an empty after and hasMore false. -type DisplayTruncator func(before []FileMatch) (after []FileMatch, hasMore bool) +type DisplayTruncator func(before []zoekt.FileMatch) (after []zoekt.FileMatch, hasMore bool) // NewDisplayTruncator will return a DisplayTruncator which enforces the limits in // opts. If there are no limits to enforce, hasLimits is false and there is no // need to call DisplayTruncator. -func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits bool) { +func NewDisplayTruncator(opts *zoekt.SearchOptions) (_ DisplayTruncator, hasLimits bool) { docLimit := opts.MaxDocDisplayCount docLimited := docLimit > 0 @@ -31,12 +35,12 @@ func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits boo done := false if !docLimited && !matchLimited { - return func(fm []FileMatch) ([]FileMatch, bool) { + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { return fm, true }, false } - return func(fm []FileMatch) ([]FileMatch, bool) { + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { if done { return nil, false } @@ -60,8 +64,8 @@ func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits boo }, true } -func limitMatches(files []FileMatch, limit int, chunkMatches bool) ([]FileMatch, int) { - var limiter func(file *FileMatch, limit int) int +func limitMatches(files []zoekt.FileMatch, limit int, chunkMatches bool) ([]zoekt.FileMatch, int) { + var limiter func(file *zoekt.FileMatch, limit int) int if chunkMatches { limiter = limitChunkMatches } else { @@ -78,7 +82,7 @@ func limitMatches(files []FileMatch, limit int, chunkMatches bool) ([]FileMatch, // Limit the number of ChunkMatches in the given FileMatch, returning the // remaining limit, if any. -func limitChunkMatches(file *FileMatch, limit int) int { +func limitChunkMatches(file *zoekt.FileMatch, limit int) int { for i := range file.ChunkMatches { cm := &file.ChunkMatches[i] if len(cm.Ranges) > limit { @@ -127,7 +131,7 @@ func limitChunkMatches(file *FileMatch, limit int) int { // Limit the number of LineMatches in the given FileMatch, returning the // remaining limit, if any. -func limitLineMatches(file *FileMatch, limit int) int { +func limitLineMatches(file *zoekt.FileMatch, limit int) int { for i := range file.LineMatches { lm := &file.LineMatches[i] if len(lm.LineFragments) > limit { diff --git a/limit_test.go b/index/limit_test.go similarity index 82% rename from limit_test.go rename to index/limit_test.go index b6df2c492..a310ad21c 100644 --- a/limit_test.go +++ b/index/limit_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "bytes" @@ -6,6 +6,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt" ) func TestLimitMatches(t *testing.T) { @@ -62,8 +63,8 @@ func TestLimitMatches(t *testing.T) { for _, tc := range cases { t.Run("ChunkMatches", func(t *testing.T) { // Generate a ChunkMatch suitable for testing `LimitChunkMatches`. - generateChunkMatch := func(numRanges, lineNumber int) (ChunkMatch, int) { - cm := ChunkMatch{SymbolInfo: make([]*Symbol, numRanges)} + generateChunkMatch := func(numRanges, lineNumber int) (zoekt.ChunkMatch, int) { + cm := zoekt.ChunkMatch{SymbolInfo: make([]*zoekt.Symbol, numRanges)} // To simplify testing, we generate Content and the associated // Ranges with fixed logic: each ChunkMatch has 1 line of @@ -74,11 +75,11 @@ func TestLimitMatches(t *testing.T) { // 1 line of context. cm.Content = append(cm.Content, []byte("context\n")...) for i := 0; i < numRanges; i += 1 { - cm.Ranges = append(cm.Ranges, Range{ + cm.Ranges = append(cm.Ranges, zoekt.Range{ // We only provide LineNumber as that's all that's // relevant. - Start: Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, - End: Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, + Start: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, + End: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, }) cm.Content = append(cm.Content, []byte(fmt.Sprintf("range%dStart\nrange%dEnd\n", i, i))...) } @@ -91,19 +92,19 @@ func TestLimitMatches(t *testing.T) { return cm, lineNumber + (2 * numRanges) + 4 } - res := SearchResult{} + res := zoekt.SearchResult{} for _, file := range tc.in { - fm := FileMatch{} + fm := zoekt.FileMatch{} lineNumber := 0 for _, numRanges := range file { - var cm ChunkMatch + var cm zoekt.ChunkMatch cm, lineNumber = generateChunkMatch(numRanges, lineNumber) fm.ChunkMatches = append(fm.ChunkMatches, cm) } res.Files = append(res.Files, fm) } - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ MaxMatchDisplayCount: tc.limit, ChunkMatches: true, }) @@ -133,16 +134,16 @@ func TestLimitMatches(t *testing.T) { }) t.Run("LineMatches", func(t *testing.T) { - res := SearchResult{} + res := zoekt.SearchResult{} for _, file := range tc.in { - fm := FileMatch{} + fm := zoekt.FileMatch{} for _, numFragments := range file { - fm.LineMatches = append(fm.LineMatches, LineMatch{LineFragments: make([]LineFragmentMatch, numFragments)}) + fm.LineMatches = append(fm.LineMatches, zoekt.LineMatch{LineFragments: make([]zoekt.LineFragmentMatch, numFragments)}) } res.Files = append(res.Files, fm) } - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ MaxMatchDisplayCount: tc.limit, ChunkMatches: false, }) diff --git a/matchiter.go b/index/matchiter.go similarity index 95% rename from matchiter.go rename to index/matchiter.go index 98bf6b1ca..982677eb5 100644 --- a/matchiter.go +++ b/index/matchiter.go @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" "fmt" + + "github.com/sourcegraph/zoekt" ) // candidateMatch is a candidate match for a substring. @@ -73,7 +75,7 @@ type matchIterator interface { // updateStats is called twice. After matchtree construction and after // searching is done. Implementations must take care to not report // statistics twice. - updateStats(*Stats) + updateStats(*zoekt.Stats) } // noMatchTree is both matchIterator and matchTree that matches nothing. @@ -81,7 +83,7 @@ type noMatchTree struct { Why string // Stats captures the work done to create the noMatchTree. - Stats Stats + Stats zoekt.Stats } func (t *noMatchTree) String() string { @@ -102,9 +104,9 @@ func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree return matchesNone } -func (t *noMatchTree) updateStats(s *Stats) { +func (t *noMatchTree) updateStats(s *zoekt.Stats) { s.Add(t.Stats) - t.Stats = Stats{} + t.Stats = zoekt.Stats{} } func (m *candidateMatch) String() string { @@ -166,7 +168,7 @@ func (i *ngramDocIterator) prepare(nextDoc uint32) { i.fileIdx = nextDoc } -func (i *ngramDocIterator) updateStats(s *Stats) { +func (i *ngramDocIterator) updateStats(s *zoekt.Stats) { i.iter.updateStats(s) s.NgramMatches += i.matchCount s.NgramLookups += i.ngramLookups diff --git a/index/matchiter_test.go b/index/matchiter_test.go new file mode 100644 index 000000000..5de9075e8 --- /dev/null +++ b/index/matchiter_test.go @@ -0,0 +1,40 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "reflect" + "testing" +) + +func TestMatchSize(t *testing.T) { + cases := []struct { + v any + size int + }{{ + v: candidateMatch{}, + size: 80, + }, { + v: candidateChunk{}, + size: 40, + }} + for _, c := range cases { + got := reflect.TypeOf(c.v).Size() + if int(got) != c.size { + t.Errorf(`sizeof struct %T has changed from %d to %d. +These are match structs that occur a lot in memory, so we optimize size. +When changing, please ensure there isn't unnecessary padding via the +tool fieldalignment then update this test.`, c.v, c.size, got) + } + } +} diff --git a/matchtree.go b/index/matchtree.go similarity index 99% rename from matchtree.go rename to index/matchtree.go index ce30f0980..75e0d5958 100644 --- a/matchtree.go +++ b/index/matchtree.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -23,7 +23,7 @@ import ( "unicode/utf8" "github.com/grafana/regexp" - + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/syntaxutil" "github.com/sourcegraph/zoekt/query" ) @@ -604,9 +604,9 @@ func visitMatchTree(t matchTree, f func(matchTree)) { // updateMatchTreeStats calls updateStats on all atoms in mt which have that // function defined. -func updateMatchTreeStats(mt matchTree, stats *Stats) { +func updateMatchTreeStats(mt matchTree, stats *zoekt.Stats) { visitMatchTree(mt, func(mt matchTree) { - if atom, ok := mt.(interface{ updateStats(*Stats) }); ok { + if atom, ok := mt.(interface{ updateStats(*zoekt.Stats) }); ok { atom.updateStats(stats) } }) diff --git a/matchtree_test.go b/index/matchtree_test.go similarity index 96% rename from matchtree_test.go rename to index/matchtree_test.go index 1fece0f5b..ec3af2718 100644 --- a/matchtree_test.go +++ b/index/matchtree_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "reflect" @@ -21,6 +21,7 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -287,7 +288,7 @@ func TestSymbolMatchTree(t *testing.T) { func TestRepoSet(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, + repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 2, 3, 3}, } @@ -310,7 +311,7 @@ func TestRepoSet(t *testing.T) { func TestRepo(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, + repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 0, 1}, } @@ -333,7 +334,7 @@ func TestRepo(t *testing.T) { func TestBranchesRepos(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{ + repoMetaData: []zoekt.Repository{ {ID: hash("foo"), Name: "foo"}, {ID: hash("bar"), Name: "bar"}, }, @@ -366,7 +367,7 @@ func TestBranchesRepos(t *testing.T) { func TestRepoIDs(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, + repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 2, 3, 3}, } diff --git a/merge.go b/index/merge.go similarity index 97% rename from merge.go rename to index/merge.go index 0473663e1..5acebc6e6 100644 --- a/merge.go +++ b/index/merge.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "crypto/sha1" @@ -10,6 +10,7 @@ import ( "runtime" "sort" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant" ) @@ -95,7 +96,7 @@ func merge(ds ...*indexData) (*IndexBuilder, error) { } sort.Slice(ds, func(i, j int) bool { - return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority + return ds[i].repoMetaData[0].GetPriority() > ds[j].repoMetaData[0].GetPriority() }) ib := newIndexBuilder() @@ -237,7 +238,7 @@ func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error return err } - doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) + doc.SymbolsMetaData = make([]*zoekt.Symbol, len(doc.Symbols)) for i := range doc.SymbolsMetaData { doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) } diff --git a/merge_test.go b/index/merge_test.go similarity index 92% rename from merge_test.go rename to index/merge_test.go index 07abf1ca6..7544f4f82 100644 --- a/merge_test.go +++ b/index/merge_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "os" @@ -6,6 +6,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt" ) // We compare 2 simple shards before and after the transformation @@ -13,12 +14,12 @@ import ( // identical. func TestExplode(t *testing.T) { simpleShards := []string{ - "./testdata/shards/repo_v16.00000.zoekt", - "./testdata/shards/repo2_v16.00000.zoekt", + ".././testdata/shards/repo_v16.00000.zoekt", + ".././testdata/shards/repo2_v16.00000.zoekt", } // repo name -> IndexMetadata - m := make(map[string]*IndexMetadata, 2) + m := make(map[string]*zoekt.IndexMetadata, 2) // merge var files []IndexFile diff --git a/read.go b/index/read.go similarity index 96% rename from read.go rename to index/read.go index 189ec64c4..a44254733 100644 --- a/read.go +++ b/index/read.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" @@ -25,6 +25,7 @@ import ( "sort" "github.com/rs/xid" + "github.com/sourcegraph/zoekt" ) // IndexFile is a file suitable for concurrent read access. For performance @@ -242,7 +243,7 @@ func (r *reader) readJSON(data interface{}, sec simpleSection) error { // canReadVersion returns checks if zoekt can read in md. If it can't a // non-nil error is returned. -func canReadVersion(md *IndexMetadata) bool { +func canReadVersion(md *zoekt.IndexMetadata) bool { // Backwards compatible with v16 return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion } @@ -262,7 +263,7 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { } d.metaData = *md - d.repoMetaData = make([]Repository, 0, len(repos)) + d.repoMetaData = make([]zoekt.Repository, 0, len(repos)) for _, r := range repos { d.repoMetaData = append(d.repoMetaData, *r) } @@ -412,8 +413,8 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { return &d, nil } -func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*Repository, *IndexMetadata, error) { - var md IndexMetadata +func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { + var md zoekt.IndexMetadata if err := r.readJSON(&md, metaData); err != nil { return nil, nil, err } @@ -433,13 +434,13 @@ func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSectio } } - var repos []*Repository + var repos []*zoekt.Repository if md.IndexFormatVersion >= 17 { if err := json.Unmarshal(blob, &repos); err != nil { return nil, &md, err } } else { - repos = make([]*Repository, 1) + repos = make([]*zoekt.Repository, 1) if err := json.Unmarshal(blob, &repos[0]); err != nil { return nil, &md, err } @@ -568,7 +569,7 @@ func (d *indexData) readDocSections(i uint32, buf []DocumentSection) ([]Document // results coming from this searcher are valid only for the lifetime // of the Searcher itself, ie. []byte members should be copied into // fresh buffers if the result is to survive closing the shard. -func NewSearcher(r IndexFile) (Searcher, error) { +func NewSearcher(r IndexFile) (zoekt.Searcher, error) { rd := &reader{r: r} var toc indexTOC @@ -585,7 +586,7 @@ func NewSearcher(r IndexFile) (Searcher, error) { // ReadMetadata returns the metadata of index shard without reading // the index data. The IndexFile is not closed. -func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { +func ReadMetadata(inf IndexFile) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { rd := &reader{r: inf} var toc indexTOC err := rd.readTOCSections(&toc, []string{"metaData", "repoMetaData"}) @@ -597,7 +598,7 @@ func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { // ReadMetadataPathAlive is like ReadMetadataPath except that it only returns // alive repositories. -func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { +func ReadMetadataPathAlive(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { repos, id, err := ReadMetadataPath(p) if err != nil { return nil, nil, err @@ -614,7 +615,7 @@ func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { // ReadMetadataPath returns the metadata of index shard at p without reading // the index data. ReadMetadataPath is a helper for ReadMetadata which opens // the IndexFile at p. -func ReadMetadataPath(p string) ([]*Repository, *IndexMetadata, error) { +func ReadMetadataPath(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { f, err := os.Open(p) if err != nil { return nil, nil, err diff --git a/read_test.go b/index/read_test.go similarity index 97% rename from read_test.go rename to index/read_test.go index 7b3827b1d..2b90b23ba 100644 --- a/read_test.go +++ b/index/read_test.go @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" "context" "encoding/json" - "flag" "fmt" "io/fs" "os" @@ -30,12 +29,10 @@ import ( "testing" "github.com/google/go-cmp/cmp" - + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) -var update = flag.Bool("update", false, "update golden files") - func TestReadWrite(t *testing.T) { b, err := NewIndexBuilder(nil) if err != nil { @@ -199,7 +196,7 @@ func TestGet(t *testing.T) { } } -func loadShard(fn string) (Searcher, error) { +func loadShard(fn string) (zoekt.Searcher, error) { f, err := os.Open(fn) if err != nil { return nil, err @@ -222,7 +219,7 @@ func TestReadSearch(t *testing.T) { type out struct { FormatVersion int FeatureVersion int - FileMatches [][]FileMatch + FileMatches [][]zoekt.FileMatch } qs := []query.Q{ @@ -259,7 +256,7 @@ func TestReadSearch(t *testing.T) { FeatureVersion: index.metaData.IndexFeatureVersion, } for _, q := range qs { - res, err := shard.Search(context.Background(), q, &SearchOptions{}) + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) if err != nil { t.Fatalf("failed search %s on %s during updating: %v", q, name, err) } @@ -291,7 +288,7 @@ func TestReadSearch(t *testing.T) { } for j, q := range qs { - res, err := shard.Search(context.Background(), q, &SearchOptions{}) + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) if err != nil { t.Fatalf("failed search %s on %s: %v", q, name, err) } diff --git a/score.go b/index/score.go similarity index 93% rename from score.go rename to index/score.go index 1becccc14..84bfab4d7 100644 --- a/score.go +++ b/index/score.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -20,6 +20,7 @@ import ( "math" "strings" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/ctags" ) @@ -36,12 +37,12 @@ type chunkScore struct { // scoreChunk calculates the score for each line in the chunk based on its candidate matches, and returns the score of // the best-scoring line, along with its line number. // Invariant: there should be at least one input candidate, len(ms) > 0. -func (p *contentProvider) scoreChunk(ms []*candidateMatch, language string, opts *SearchOptions) (chunkScore, []*Symbol) { +func (p *contentProvider) scoreChunk(ms []*candidateMatch, language string, opts *zoekt.SearchOptions) (chunkScore, []*zoekt.Symbol) { nl := p.newlines() var bestScore lineScore bestLine := 0 - var symbolInfo []*Symbol + var symbolInfo []*zoekt.Symbol start := 0 currentLine := -1 @@ -92,7 +93,7 @@ type lineScore struct { // - All candidate matches are assumed to come from the same line in the content. // - If this line represents a filename, then lineNumber must be -1. // - There should be at least one input candidate, len(ms) > 0. -func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineNumber int, opts *SearchOptions) (lineScore, []*Symbol) { +func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineNumber int, opts *zoekt.SearchOptions) (lineScore, []*zoekt.Symbol) { if opts.UseBM25Scoring { score, symbolInfo := p.scoreLineBM25(ms, lineNumber) ls := lineScore{score: score} @@ -112,7 +113,7 @@ func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineN } filename := p.data(true) - var symbolInfo []*Symbol + var symbolInfo []*zoekt.Symbol var bestScore lineScore for i, m := range ms { @@ -164,7 +165,7 @@ func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineN // information. if m.symbol { if symbolInfo == nil { - symbolInfo = make([]*Symbol, len(ms)) + symbolInfo = make([]*zoekt.Symbol, len(ms)) } // findSymbols does not hydrate in Sym. So we need to store it. si.Sym = string(sym) @@ -200,7 +201,7 @@ func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineN // Notes: // - This BM25 calculation skips inverse document frequency (idf) to keep the implementation simple. // - It uses the same calculateTermFrequency method as BM25 file scoring, which boosts filename and symbol matches. -func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (float64, []*Symbol) { +func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (float64, []*zoekt.Symbol) { // If this is a filename, then don't compute BM25. The score would not be comparable to line scores. if lineNumber < 0 { return 0, nil @@ -221,8 +222,8 @@ func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (f score += ((k + 1.0) * float64(f)) / (k*(1.0-b+b*L) + float64(f)) } - // Check if any match comes from a symbol match tree, and if so hydrate in symbol information - var symbolInfo []*Symbol + // Check if any index comes from a symbol match tree, and if so hydrate in symbol information + var symbolInfo []*zoekt.Symbol for _, m := range ms { if m.symbol { if sec, si, ok := p.findSymbol(m); ok && si != nil { @@ -245,7 +246,7 @@ type termDocumentFrequency map[string]int // - Symbol matches also count more than content matches, to reward matches on symbol definitions. func (p *contentProvider) calculateTermFrequency(cands []*candidateMatch, df termDocumentFrequency) map[string]int { // Treat each candidate match as a term and compute the frequencies. For now, ignore case sensitivity and - // ignore whether the match is a word boundary. + // ignore whether the index is a word boundary. termFreqs := map[string]int{} for _, m := range cands { term := string(m.substrLowered) @@ -264,20 +265,20 @@ func (p *contentProvider) calculateTermFrequency(cands []*candidateMatch, df ter // scoreFile computes a score for the file match using various scoring signals, like // whether there's an exact match on a symbol, the number of query clauses that matched, etc. -func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *SearchOptions) { +func (d *indexData) scoreFile(fileMatch *zoekt.FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *zoekt.SearchOptions) { atomMatchCount := 0 visitMatchAtoms(mt, known, func(mt matchTree) { atomMatchCount++ }) addScore := func(what string, computed float64) { - fileMatch.addScore(what, computed, -1, opts.DebugScore) + fileMatch.AddScore(what, computed, -1, opts.DebugScore) } // atom-count boosts files with matches from more than 1 atom. The // maximum boost is scoreFactorAtomMatch. if atomMatchCount > 0 { - fileMatch.addScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) + fileMatch.AddScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) } maxFileScore := 0.0 @@ -342,7 +343,7 @@ type termFrequency struct { // Unlike standard file scoring, this scoring strategy ignores all other signals including document ranks. This keeps // things simple for now, since BM25 is not normalized and can be tricky to combine with other scoring signals. It also // ignores the individual LineMatch and ChunkMatch scores, instead calculating a score over all matches in the file. -func (d *indexData) scoreFilesUsingBM25(fileMatches []FileMatch, tfs []termFrequency, df termDocumentFrequency, opts *SearchOptions) { +func (d *indexData) scoreFilesUsingBM25(fileMatches []zoekt.FileMatch, tfs []termFrequency, df termDocumentFrequency, opts *zoekt.SearchOptions) { // Use standard parameter defaults used in Lucene (https://lucene.apache.org/core/10_1_0/core/org/apache/lucene/search/similarities/BM25Similarity.html) k, b := 1.2, 0.75 diff --git a/section.go b/index/section.go similarity index 99% rename from section.go rename to index/section.go index 380054f8f..02fee05eb 100644 --- a/section.go +++ b/index/section.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" diff --git a/toc.go b/index/toc.go similarity index 99% rename from toc.go rename to index/toc.go index 8eee56950..1367d9613 100644 --- a/toc.go +++ b/index/toc.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index // IndexFormatVersion is a version number. It is increased every time the // on-disk index format is changed. diff --git a/tombstones.go b/index/tombstones.go similarity index 94% rename from tombstones.go rename to index/tombstones.go index a44eb37b1..1b6f457d5 100644 --- a/tombstones.go +++ b/index/tombstones.go @@ -1,13 +1,15 @@ -package zoekt +package index import ( "encoding/json" "fmt" "os" "path/filepath" + + "github.com/sourcegraph/zoekt" ) -var mockRepos []*Repository +var mockRepos []*zoekt.Repository // SetTombstone idempotently sets a tombstone for repoName in .meta. func SetTombstone(shardPath string, repoID uint32) error { @@ -20,7 +22,7 @@ func UnsetTombstone(shardPath string, repoID uint32) error { } func setTombstone(shardPath string, repoID uint32, tombstone bool) error { - var repos []*Repository + var repos []*zoekt.Repository var err error if mockRepos != nil { @@ -90,6 +92,3 @@ func JsonMarshalRepoMetaTemp(shardPath string, repositoryMetadata interface{}) ( return f.Name(), finalPath, nil } - -// umask holds the Umask of the current process -var umask os.FileMode diff --git a/tombstones_test.go b/index/tombstones_test.go similarity index 80% rename from tombstones_test.go rename to index/tombstones_test.go index 7a39cfff7..30120b85b 100644 --- a/tombstones_test.go +++ b/index/tombstones_test.go @@ -1,10 +1,12 @@ -package zoekt +package index import ( "encoding/json" "os" "path/filepath" "testing" + + "github.com/sourcegraph/zoekt" ) func TestSetTombstone(t *testing.T) { @@ -24,7 +26,7 @@ func TestSetTombstone(t *testing.T) { isAlive := func(alive []bool) { t.Helper() blob := readMeta(ghostShard) - ghostRepos := []*Repository{} + ghostRepos := []*zoekt.Repository{} if err := json.Unmarshal(blob, &ghostRepos); err != nil { t.Fatal(err) } @@ -51,10 +53,10 @@ func TestSetTombstone(t *testing.T) { isAlive([]bool{false, true, true}) } -func mkRepos(repoNames ...string) []*Repository { - ret := make([]*Repository, 0, len(repoNames)) +func mkRepos(repoNames ...string) []*zoekt.Repository { + ret := make([]*zoekt.Repository, 0, len(repoNames)) for i, n := range repoNames { - ret = append(ret, &Repository{ID: uint32(i + 1), Name: n}) + ret = append(ret, &zoekt.Repository{ID: uint32(i + 1), Name: n}) } return ret } diff --git a/tombstones_unix.go b/index/tombstones_unix.go similarity index 91% rename from tombstones_unix.go rename to index/tombstones_unix.go index f983b5096..b4c6e9bad 100644 --- a/tombstones_unix.go +++ b/index/tombstones_unix.go @@ -1,6 +1,6 @@ //go:build !windows && !wasm -package zoekt +package index import ( "os" diff --git a/tombstones_windows.go b/index/tombstones_windows.go similarity index 84% rename from tombstones_windows.go rename to index/tombstones_windows.go index 3b7d6ffa1..8ac62aa07 100644 --- a/tombstones_windows.go +++ b/index/tombstones_windows.go @@ -1,4 +1,4 @@ -package zoekt +package index func init() { // no setting of file permissions on Windows diff --git a/write.go b/index/write.go similarity index 98% rename from write.go rename to index/write.go index 278ebc025..2ea52de93 100644 --- a/write.go +++ b/index/write.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bufio" @@ -23,6 +23,8 @@ import ( "io" "sort" "time" + + "github.com/sourcegraph/zoekt" ) func (w *writer) writeTOC(toc *indexTOC) { @@ -174,7 +176,7 @@ func (b *IndexBuilder) Write(out io.Writer) error { indexTime = time.Now().UTC() } - if err := b.writeJSON(&IndexMetadata{ + if err := b.writeJSON(&zoekt.IndexMetadata{ IndexFormatVersion: b.indexFormatVersion, IndexTime: indexTime, IndexFeatureVersion: b.featureVersion, diff --git a/internal/archive/e2e_test.go b/internal/archive/e2e_test.go index d836fbb9e..c3e282a3c 100644 --- a/internal/archive/e2e_test.go +++ b/internal/archive/e2e_test.go @@ -17,7 +17,7 @@ import ( "time" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" "github.com/stretchr/testify/require" @@ -160,7 +160,7 @@ func testIndexIncrementally(t *testing.T, format string) { for _, test := range tests { largeFiles, wantNumFiles := test.largeFiles, test.wantNumFiles - bopts := build.Options{ + bopts := index.Options{ SizeMax: fileSize - 1, IndexDir: indexDir, LargeFiles: largeFiles, @@ -233,7 +233,7 @@ func testLatestCommitDate(t *testing.T, format string) { // Index indexDir := t.TempDir() - bopts := build.Options{ + bopts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -253,7 +253,7 @@ func testLatestCommitDate(t *testing.T, format string) { indexFiles, err := f.Readdirnames(1) require.Len(t, indexFiles, 1) - repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) + repos, _, err := index.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) require.NoError(t, err) require.Len(t, repos, 1) require.True(t, repos[0].LatestCommitDate.Equal(modTime)) diff --git a/internal/archive/index.go b/internal/archive/index.go index c8836768f..b940d6942 100644 --- a/internal/archive/index.go +++ b/internal/archive/index.go @@ -10,7 +10,7 @@ import ( "sync" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) // Options specify the archive specific indexing options. @@ -74,7 +74,7 @@ func (o *Options) SetDefaults() { } // Index archive specified in opts using bopts. -func Index(opts Options, bopts build.Options) error { +func Index(opts Options, bopts index.Options) error { opts.SetDefaults() if opts.Name == "" && opts.RepoURL == "" { @@ -114,7 +114,7 @@ func Index(opts Options, bopts build.Options) error { defer a.Close() bopts.RepositoryDescription.Source = opts.Archive - var builder *build.Builder + var builder *index.Builder once := sync.Once{} var onceErr error @@ -124,7 +124,7 @@ func Index(opts Options, bopts build.Options) error { once.Do(func() { // We use the ModTime of the first file as a proxy for the latest commit date. bopts.RepositoryDescription.LatestCommitDate = f.ModTime - builder, onceErr = build.NewBuilder(bopts) + builder, onceErr = index.NewBuilder(bopts) }) if onceErr != nil { return onceErr @@ -140,7 +140,7 @@ func Index(opts Options, bopts build.Options) error { return nil } - return builder.Add(zoekt.Document{ + return builder.Add(index.Document{ Name: name, Content: contents, Branches: brs, diff --git a/internal/debugserver/debug.go b/internal/debugserver/debug.go index 55792d8d8..7b7fd4d7d 100644 --- a/internal/debugserver/debug.go +++ b/internal/debugserver/debug.go @@ -9,9 +9,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/sourcegraph/zoekt/index" "golang.org/x/net/trace" - - "github.com/sourcegraph/zoekt" ) var registerOnce sync.Once @@ -87,5 +86,5 @@ func AddHandlers(mux *http.ServeMux, enablePprof bool, p ...DebugPage) { func register() { promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "zoekt_version", - }, []string{"version"}).WithLabelValues(zoekt.Version).Set(1) + }, []string{"version"}).WithLabelValues(index.Version).Set(1) } diff --git a/build/e2e_test.go b/internal/e2e/e2e_index_test.go similarity index 77% rename from build/e2e_test.go rename to internal/e2e/e2e_index_test.go index 757a7e809..fa32fcaba 100644 --- a/build/e2e_test.go +++ b/internal/e2e/e2e_index_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package e2e import ( "bytes" @@ -22,7 +22,6 @@ import ( "log" "os" "path/filepath" - "reflect" "runtime" "sort" "strconv" @@ -34,6 +33,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/internal/tenant" "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" @@ -41,10 +41,10 @@ import ( "github.com/stretchr/testify/require" ) -func TestBasic(t *testing.T) { +func TestBasicIndexing(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -54,7 +54,7 @@ func TestBasic(t *testing.T) { SizeMax: 1 << 20, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -75,12 +75,12 @@ func TestBasic(t *testing.T) { t.Fatalf("want multiple shards, got %v", fs) } - _, md0, err := zoekt.ReadMetadataPath(fs[0]) + _, md0, err := index.ReadMetadataPath(fs[0]) if err != nil { t.Fatal(err) } for _, f := range fs[1:] { - _, md, err := zoekt.ReadMetadataPath(f) + _, md, err := index.ReadMetadataPath(f) if err != nil { t.Fatal(err) } @@ -126,7 +126,7 @@ func TestBasic(t *testing.T) { // "repo-mutated". We do this inside retry helper since we have noticed // some flakiness on github CI. for _, p := range fs { - repos, _, err := zoekt.ReadMetadataPath(p) + repos, _, err := index.ReadMetadataPath(p) if err != nil { t.Fatal(err) } @@ -166,7 +166,7 @@ func TestSearchTenant(t *testing.T) { tnt1, err := tenant.FromContext(ctx1) require.NoError(t, err) - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -177,7 +177,7 @@ func TestSearchTenant(t *testing.T) { SizeMax: 1 << 20, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -198,12 +198,12 @@ func TestSearchTenant(t *testing.T) { t.Fatalf("want multiple shards, got %v", fs) } - _, md0, err := zoekt.ReadMetadataPath(fs[0]) + _, md0, err := index.ReadMetadataPath(fs[0]) if err != nil { t.Fatal(err) } for _, f := range fs[1:] { - _, md, err := zoekt.ReadMetadataPath(f) + _, md, err := index.ReadMetadataPath(f) if err != nil { t.Fatal(err) } @@ -249,7 +249,7 @@ func TestListTenant(t *testing.T) { tnt1, err := tenant.FromContext(ctx1) require.NoError(t, err) - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -258,7 +258,7 @@ func TestListTenant(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -334,7 +334,7 @@ func TestLargeFileOption(t *testing.T) { dir := t.TempDir() sizeMax := 1000 - opts := Options{ + opts := index.Options{ IndexDir: dir, LargeFiles: []string{"F0", "F1", "F2", "!F1"}, RepositoryDescription: zoekt.Repository{ @@ -343,7 +343,7 @@ func TestLargeFileOption(t *testing.T) { SizeMax: sizeMax, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -385,7 +385,7 @@ func TestLargeFileOption(t *testing.T) { func TestUpdate(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -396,7 +396,7 @@ func TestUpdate(t *testing.T) { SizeMax: 1 << 20, } - if b, err := NewBuilder(opts); err != nil { + if b, err := index.NewBuilder(opts); err != nil { t.Fatalf("NewBuilder: %v", err) } else { if err := b.AddFile("F", []byte("hoi")); err != nil { @@ -431,7 +431,7 @@ func TestUpdate(t *testing.T) { FileURLTemplate: "url2", } - if b, err := NewBuilder(opts); err != nil { + if b, err := index.NewBuilder(opts); err != nil { t.Fatalf("NewBuilder: %v", err) } else { if err := b.AddFile("F", []byte("hoi")); err != nil { @@ -478,7 +478,7 @@ func TestUpdate(t *testing.T) { func TestDeleteOldShards(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -489,7 +489,7 @@ func TestDeleteOldShards(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -520,7 +520,7 @@ func TestDeleteOldShards(t *testing.T) { // Do again, without sharding. opts.ShardMax = 1 << 20 - b, err = NewBuilder(opts) + b, err = index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -542,7 +542,7 @@ func TestDeleteOldShards(t *testing.T) { } // Again, but don't index anything; should leave old shards intact. - b, err = NewBuilder(opts) + b, err = index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -558,141 +558,10 @@ func TestDeleteOldShards(t *testing.T) { } } -func TestPartialSuccess(t *testing.T) { - dir := t.TempDir() - - opts := Options{ - IndexDir: dir, - ShardMax: 1024, - SizeMax: 1 << 20, - Parallelism: 1, - } - opts.RepositoryDescription.Name = "repo" - opts.SetDefaults() - - b, err := NewBuilder(opts) - if err != nil { - t.Fatalf("NewBuilder: %v", err) - } - - for i := 0; i < 4; i++ { - nm := fmt.Sprintf("F%d", i) - _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) - } - b.buildError = fmt.Errorf("any error") - - // No error checking. - _ = b.Finish() - - // Finish cleans up temporary files. - if fs, err := filepath.Glob(dir + "/*"); err != nil { - t.Errorf("glob(%s): %v", dir, err) - } else if len(fs) != 0 { - t.Errorf("got shards %v, want []", fs) - } -} - -type filerankCase struct { - name string - docs []*zoekt.Document - want []int -} - -func testFileRankAspect(t *testing.T, c filerankCase) { - var want []*zoekt.Document - for _, j := range c.want { - want = append(want, c.docs[j]) - } - - got := make([]*zoekt.Document, len(c.docs)) - copy(got, c.docs) - sortDocuments(got) - - print := func(ds []*zoekt.Document) string { - r := "" - for _, d := range ds { - r += fmt.Sprintf("%v, ", d) - } - return r - } - if !reflect.DeepEqual(got, want) { - t.Errorf("got docs [%v], want [%v]", print(got), print(want)) - } -} - -func TestFileRank(t *testing.T) { - for _, c := range []filerankCase{{ - name: "filename", - docs: []*zoekt.Document{ - { - Name: "longlonglong", - Content: []byte("bla"), - }, - { - Name: "short", - Content: []byte("bla"), - }, - }, - want: []int{1, 0}, - }, { - name: "test", - docs: []*zoekt.Document{ - { - Name: "foo_test.go", - Content: []byte("bla"), - }, - { - Name: "longlonglong", - Content: []byte("bla"), - }, - }, - want: []int{1, 0}, - }, { - name: "content", - docs: []*zoekt.Document{ - { - Content: []byte("bla"), - }, - { - Content: []byte("blablablabla"), - }, - { - Content: []byte("blabla"), - }, - }, - want: []int{0, 2, 1}, - }, { - name: "skipped docs", - docs: []*zoekt.Document{ - { - Name: "binary_file", - SkipReason: "binary file", - }, - { - Name: "some_test.go", - Content: []byte("bla"), - }, - { - Name: "large_file.go", - SkipReason: "too large", - }, - { - Name: "file.go", - Content: []byte("blabla"), - }, - }, - want: []int{3, 1, 0, 2}, - }} { - t.Run(c.name, func(t *testing.T) { - testFileRankAspect(t, c) - }) - } -} - func TestEmptyContent(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -700,7 +569,7 @@ func TestEmptyContent(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -734,21 +603,22 @@ func TestDeltaShards(t *testing.T) { // TODO: Need to write a test for compound shards as well. type step struct { name string - documents []zoekt.Document - optFn func(t *testing.T, o *Options) + documents []index.Document + optFn func(t *testing.T, o *index.Options) query string - expectedDocuments []zoekt.Document + changedFile string + expectedDocuments []index.Document } var ( - fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} - fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} + fooAtMain = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} + fooAtMainV2 = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} - fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} + fooAtMainAndRelease = index.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} - barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} - barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} + barAtMain = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} + barAtMainV2 = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} ) for _, test := range []struct { @@ -760,51 +630,50 @@ func TestDeltaShards(t *testing.T) { steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMain}, + documents: []index.Document{barAtMain, fooAtMain}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, + expectedDocuments: []index.Document{barAtMain, fooAtMain}, }, { name: "add new version of foo, tombstone older ones", - documents: []zoekt.Document{fooAtMainV2}, - optFn: func(t *testing.T, o *Options) { + documents: []index.Document{fooAtMainV2}, + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain, fooAtMainV2}, }, { name: "add new version of bar, tombstone older ones", - documents: []zoekt.Document{barAtMainV2}, - optFn: func(t *testing.T, o *Options) { + documents: []index.Document{barAtMainV2}, + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"bar.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2}, + changedFile: "bar.go", + expectedDocuments: []index.Document{barAtMainV2, fooAtMainV2}, }, - }, - }, + }}, { name: "tombstone older documents even if the latest shard has no documents", steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMain}, + documents: []index.Document{barAtMain, fooAtMain}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, + expectedDocuments: []index.Document{barAtMain, fooAtMain}, }, { // a build with no documents could represent a deletion name: "tombstone older documents", documents: nil, - optFn: func(t *testing.T, o *Options) { + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain}, }, }, }, @@ -813,19 +682,19 @@ func TestDeltaShards(t *testing.T) { steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMainAndRelease}, + documents: []index.Document{barAtMain, fooAtMainAndRelease}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease}, + expectedDocuments: []index.Document{barAtMain, fooAtMainAndRelease}, }, { name: "tombstone foo", documents: nil, - optFn: func(t *testing.T, o *Options) { + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain}, }, }, }, @@ -856,7 +725,7 @@ func TestDeltaShards(t *testing.T) { return a.Name < b.Name }) - buildOpts := Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: repository, } @@ -866,7 +735,8 @@ func TestDeltaShards(t *testing.T) { step.optFn(t, &buildOpts) } - b, err := NewBuilder(buildOpts) + b, err := index.NewBuilder(buildOpts) + b.MarkFileAsChangedOrRemoved(step.changedFile) if err != nil { t.Fatalf("step %q: NewBuilder: %s", step.name, err) } @@ -893,7 +763,7 @@ func TestDeltaShards(t *testing.T) { } state, _ := buildOpts.IndexState() - if diff := cmp.Diff(IndexStateEqual, state); diff != "" { + if diff := cmp.Diff(index.IndexStateEqual, state); diff != "" { t.Errorf("unexpected diff in index state (-want +got):\n%s", diff) } @@ -911,17 +781,17 @@ func TestDeltaShards(t *testing.T) { t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err) } - var receivedDocuments []zoekt.Document + var receivedDocuments []index.Document for _, f := range result.Files { - receivedDocuments = append(receivedDocuments, zoekt.Document{ + receivedDocuments = append(receivedDocuments, index.Document{ Name: f.FileName, Content: f.Content, }) } cmpOpts := []cmp.Option{ - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), - cmpopts.SortSlices(func(a, b zoekt.Document) bool { + cmpopts.IgnoreFields(index.Document{}, "Branches"), + cmpopts.SortSlices(func(a, b index.Document) bool { if a.Name < b.Name { return true } diff --git a/internal/e2e/e2e_rank_test.go b/internal/e2e/e2e_rank_test.go index 2e856aa30..24abef51a 100644 --- a/internal/e2e/e2e_rank_test.go +++ b/internal/e2e/e2e_rank_test.go @@ -16,7 +16,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/archive" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" @@ -247,7 +247,7 @@ func indexURL(indexDir, u string) error { // languageMap[lang] = ctags.ScipCTags // } - err := archive.Index(opts, build.Options{ + err := archive.Index(opts, index.Options{ IndexDir: indexDir, CTagsMustSucceed: true, RepositoryDescription: zoekt.Repository{ @@ -370,3 +370,15 @@ func requireCTags(tb testing.TB) { tb.Skip("universal-ctags is missing") } } + +func checkScipCTags() string { + if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" { + return ctags + } + + if ctags, err := exec.LookPath("scip-ctags"); err == nil { + return ctags + } + + return "" +} diff --git a/build/testdata/example.cc b/internal/e2e/examples/example.cc similarity index 100% rename from build/testdata/example.cc rename to internal/e2e/examples/example.cc diff --git a/build/testdata/example.java b/internal/e2e/examples/example.java similarity index 100% rename from build/testdata/example.java rename to internal/e2e/examples/example.java diff --git a/build/testdata/example.kt b/internal/e2e/examples/example.kt similarity index 100% rename from build/testdata/example.kt rename to internal/e2e/examples/example.kt diff --git a/build/testdata/example.py b/internal/e2e/examples/example.py similarity index 100% rename from build/testdata/example.py rename to internal/e2e/examples/example.py diff --git a/build/testdata/example.rb b/internal/e2e/examples/example.rb similarity index 100% rename from build/testdata/example.rb rename to internal/e2e/examples/example.rb diff --git a/build/testdata/example.scala b/internal/e2e/examples/example.scala similarity index 100% rename from build/testdata/example.scala rename to internal/e2e/examples/example.scala diff --git a/build/testdata/large_file.cc b/internal/e2e/examples/large_file.cc similarity index 100% rename from build/testdata/large_file.cc rename to internal/e2e/examples/large_file.cc diff --git a/build/scoring_test.go b/internal/e2e/scoring_test.go similarity index 96% rename from build/scoring_test.go rename to internal/e2e/scoring_test.go index 72ba0d55d..d80186d91 100644 --- a/build/scoring_test.go +++ b/internal/e2e/scoring_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package e2e import ( "context" @@ -21,6 +21,7 @@ import ( "testing" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/ctags" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" @@ -66,7 +67,7 @@ func TestFileNameMatch(t *testing.T) { } func TestBM25(t *testing.T) { - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") if err != nil { t.Fatal(err) } @@ -97,7 +98,7 @@ func TestBM25(t *testing.T) { // line 54: private static B runInnerInterface(InnerInterface fn, A a) { wantBestLineMatch: 54, }, { - // Another content-only match + // another content-only match fileName: "example.java", query: &query.And{Children: []query.Q{ &query.Substring{Pattern: "system"}, @@ -135,7 +136,7 @@ func TestBM25(t *testing.T) { } func TestJava(t *testing.T) { - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") if err != nil { t.Fatal(err) } @@ -300,7 +301,7 @@ func TestJava(t *testing.T) { } func TestKotlin(t *testing.T) { - exampleKotlin, err := os.ReadFile("./testdata/example.kt") + exampleKotlin, err := os.ReadFile("./examples/example.kt") if err != nil { t.Fatal(err) } @@ -365,7 +366,7 @@ func TestKotlin(t *testing.T) { } func TestCpp(t *testing.T) { - exampleCpp, err := os.ReadFile("./testdata/example.cc") + exampleCpp, err := os.ReadFile("./examples/example.cc") if err != nil { t.Fatal(err) } @@ -422,7 +423,7 @@ func TestCpp(t *testing.T) { } func TestPython(t *testing.T) { - examplePython, err := os.ReadFile("./testdata/example.py") + examplePython, err := os.ReadFile("./examples/example.py") if err != nil { t.Fatal(err) } @@ -466,7 +467,7 @@ func TestPython(t *testing.T) { } func TestRuby(t *testing.T) { - exampleRuby, err := os.ReadFile("./testdata/example.rb") + exampleRuby, err := os.ReadFile("./examples/example.rb") if err != nil { t.Fatal(err) } @@ -506,7 +507,7 @@ func TestRuby(t *testing.T) { } func TestScala(t *testing.T) { - exampleScala, err := os.ReadFile("./testdata/example.scala") + exampleScala, err := os.ReadFile("./examples/example.scala") if err != nil { t.Fatal(err) } @@ -641,19 +642,17 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag t.Run(name, func(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", }, - LanguageMap: ctags.LanguageMap{ - normalizeLanguage(c.language): parserType, - }, + LanguageMap: ctags.LanguageMap{c.language: parserType}, } epsilon := 0.01 - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -707,14 +706,14 @@ func withoutTiebreaker(fullScore float64, useBM25 bool) float64 { if useBM25 { return fullScore } - return math.Trunc(fullScore / zoekt.ScoreOffset) + return math.Trunc(fullScore / index.ScoreOffset) } func TestRepoRanks(t *testing.T) { requireCTags(t) dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -722,7 +721,7 @@ func TestRepoRanks(t *testing.T) { } searchQuery := &query.Substring{Content: true, Pattern: "Inner"} - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") if err != nil { t.Fatal(err) } @@ -758,12 +757,12 @@ func TestRepoRanks(t *testing.T) { Rank: c.repoRank, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava}) + err = b.Add(index.Document{Name: "example.java", Content: exampleJava}) if err != nil { t.Fatal(err) } diff --git a/internal/gitindex/ignore_test.go b/internal/gitindex/ignore_test.go index 2284f819d..c8c14c6a4 100644 --- a/internal/gitindex/ignore_test.go +++ b/internal/gitindex/ignore_test.go @@ -10,7 +10,7 @@ import ( "testing" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" ) @@ -56,7 +56,7 @@ func TestIgnore(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", diff --git a/internal/gitindex/index.go b/internal/gitindex/index.go index 9db15bc0f..3fd341a1f 100644 --- a/internal/gitindex/index.go +++ b/internal/gitindex/index.go @@ -33,15 +33,14 @@ import ( "strings" "github.com/go-git/go-billy/v5/osfs" + "github.com/go-git/go-git/v5/config" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/storage/filesystem" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" - - "github.com/go-git/go-git/v5/config" - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/object" + "github.com/sourcegraph/zoekt/index" git "github.com/go-git/go-git/v5" ) @@ -313,7 +312,7 @@ type Options struct { RepoCacheDir string // Indexing options. - BuildOptions build.Options + BuildOptions index.Options // Prefix of the branch to index, e.g. `remotes/origin`. BranchPrefix string @@ -512,7 +511,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { } } - builder, err := build.NewBuilder(opts.BuildOptions) + builder, err := index.NewBuilder(opts.BuildOptions) if err != nil { return false, fmt.Errorf("build.NewBuilder: %w", err) } @@ -611,7 +610,7 @@ type repoPathRanks struct { func (r repoPathRanks) rank(path string, content []byte) float64 { if rank, ok := r.Paths[path]; ok { return rank - } else if build.IsLowPriority(path, content) { + } else if index.IsLowPriority(path, content) { return 0.0 } else { return r.MeanRank @@ -689,7 +688,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f // If it isn't consistent, that we can't proceed with a delta build (and the caller should fall back to a // normal one). - if !build.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { + if !index.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { var existingBranchNames []string for _, b := range existingRepository.Branches { existingBranchNames = append(existingBranchNames, b.Name) @@ -707,7 +706,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f } // Check if the build options hash does not match the repository metadata's hash - // If it does not match then one or more index options has changed and will require a normal build instead of a delta build + // If it does not index then one or more index options has changed and will require a normal build instead of a delta build if options.BuildOptions.GetHash() != existingRepository.IndexOptions { return nil, nil, nil, fmt.Errorf("one or more index options previously stored for repository %s (ID: %d) does not match the index options for this requested build; These index option updates are incompatible with delta build. new index options: %+v", existingRepository.Name, existingRepository.ID, options.BuildOptions.HashOptions()) } @@ -894,8 +893,8 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ func createDocument(key fileKey, repos map[fileKey]BlobLocation, - opts build.Options, -) (zoekt.Document, error) { + opts index.Options, +) (index.Document, error) { repo := repos[key] blob, err := repo.GitRepo.BlobObject(key.ID) branches := repos[key].Branches @@ -906,7 +905,7 @@ func createDocument(key fileKey, } if err != nil { - return zoekt.Document{}, err + return index.Document{}, err } keyFullPath := key.FullPath() @@ -916,10 +915,10 @@ func createDocument(key fileKey, contents, err := blobContents(blob) if err != nil { - return zoekt.Document{}, err + return index.Document{}, err } - return zoekt.Document{ + return index.Document{ SubRepositoryPath: key.SubRepoPath, Name: keyFullPath, Content: contents, @@ -927,8 +926,8 @@ func createDocument(key fileKey, }, nil } -func skippedLargeDoc(key fileKey, branches []string, opts build.Options) zoekt.Document { - return zoekt.Document{ +func skippedLargeDoc(key fileKey, branches []string, opts index.Options) index.Document { + return index.Document{ SkipReason: fmt.Sprintf("file size exceeds maximum size %d", opts.SizeMax), Name: key.FullPath(), Branches: branches, diff --git a/internal/gitindex/index_test.go b/internal/gitindex/index_test.go index 731dc8442..a9470988b 100644 --- a/internal/gitindex/index_test.go +++ b/internal/gitindex/index_test.go @@ -33,8 +33,8 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" ) @@ -54,7 +54,7 @@ func TestIndexEmptyRepo(t *testing.T) { } opts := Options{ RepoDir: filepath.Join(dir, "repo", ".git"), - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: desc, IndexDir: dir, }, @@ -73,7 +73,7 @@ func TestIndexNonexistentRepo(t *testing.T) { opts := Options{ RepoDir: "does/not/exist", Branches: []string{"main"}, - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: desc, IndexDir: dir, }, @@ -92,8 +92,8 @@ func TestIndexTinyRepo(t *testing.T) { executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo")) repoDir := filepath.Join(dir, "repo") - executeCommand(t, repoDir, exec.Command("git", "config", "user.name", "Thomas")) - executeCommand(t, repoDir, exec.Command("git", "config", "user.email", "thomas@google.com")) + executeCommand(t, repoDir, exec.Command("git", "config", "--local", "user.name", "Thomas")) + executeCommand(t, repoDir, exec.Command("git", "config", "--local", "user.email", "thomas@google.com")) if err := os.WriteFile(filepath.Join(repoDir, "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil { t.Fatalf("WriteFile: %v", err) @@ -106,7 +106,7 @@ func TestIndexTinyRepo(t *testing.T) { opts := Options{ RepoDir: filepath.Join(dir, testDir), Branches: []string{"main"}, - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: zoekt.Repository{Name: "repo"}, IndexDir: dir, }, @@ -143,7 +143,7 @@ func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd { } func TestIndexDeltaBasic(t *testing.T) { - type branchToDocumentMap map[string][]zoekt.Document + type branchToDocumentMap map[string][]index.Document type step struct { name string @@ -152,25 +152,25 @@ func TestIndexDeltaBasic(t *testing.T) { optFn func(t *testing.T, options *Options) expectedFallbackToNormalBuild bool - expectedDocuments []zoekt.Document + expectedDocuments []index.Document } - helloWorld := zoekt.Document{Name: "hello_world.txt", Content: []byte("hello")} + helloWorld := index.Document{Name: "hello_world.txt", Content: []byte("hello")} - fruitV1 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} - fruitV1InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} - fruitV1WithNewName := zoekt.Document{Name: "new_fruit.txt", Content: fruitV1.Content} + fruitV1 := index.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} + fruitV1InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} + fruitV1WithNewName := index.Document{Name: "new_fruit.txt", Content: fruitV1.Content} - fruitV2 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("grapes")} - fruitV2InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} + fruitV2 := index.Document{Name: "best_fruit.txt", Content: []byte("grapes")} + fruitV2InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} - fruitV3 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("oranges")} - fruitV4 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("apples")} + fruitV3 := index.Document{Name: "best_fruit.txt", Content: []byte("oranges")} + fruitV4 := index.Document{Name: "best_fruit.txt", Content: []byte("apples")} - foo := zoekt.Document{Name: "foo.txt", Content: []byte("bar")} + foo := index.Document{Name: "foo.txt", Content: []byte("bar")} - emptySourcegraphIgnore := zoekt.Document{Name: ignore.IgnoreFile} - sourcegraphIgnoreWithContent := zoekt.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} + emptySourcegraphIgnore := index.Document{Name: ignore.IgnoreFile} + sourcegraphIgnoreWithContent := index.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} for _, test := range []struct { name string @@ -184,21 +184,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1}, + "main": []index.Document{helloWorld, fruitV1}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, { name: "add newer version of fruits", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV2}, + expectedDocuments: []index.Document{helloWorld, fruitV2}, }, }, }, @@ -209,21 +209,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo, fruitV1InFolder}, + "main": []index.Document{foo, fruitV1InFolder}, }, - expectedDocuments: []zoekt.Document{foo, fruitV1InFolder}, + expectedDocuments: []index.Document{foo, fruitV1InFolder}, }, { name: "add newer version of fruits inside folder", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2InFolder}, + "main": []index.Document{fruitV2InFolder}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV2InFolder}, + expectedDocuments: []index.Document{foo, fruitV2InFolder}, }, }, }, @@ -234,21 +234,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1}, + "main": []index.Document{helloWorld, fruitV1}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, { name: "add new file - foo", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, }, }, }, @@ -259,23 +259,23 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1, foo}, + "main": []index.Document{helloWorld, fruitV1, foo}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, }, { name: "delete foo file", addedDocuments: nil, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, }, }, @@ -286,27 +286,27 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, }, { name: "replace fruits v3 with v4 on 'dev', delete fruits on 'main'", addedDocuments: branchToDocumentMap{ - "dev": []zoekt.Document{fruitV4}, + "dev": []index.Document{fruitV4}, }, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV2, fruitV4}, + expectedDocuments: []index.Document{fruitV2, fruitV4}, }, }, }, @@ -317,25 +317,25 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, + expectedDocuments: []index.Document{fruitV1, fruitV2}, }, { name: "rename fruits file on 'main' + ensure that unmodified fruits file on 'release' is still searchable", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1WithNewName}, + "main": []index.Document{fruitV1WithNewName}, }, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1WithNewName, fruitV2}, + expectedDocuments: []index.Document{fruitV1WithNewName, fruitV2}, }, }, }, @@ -346,23 +346,23 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "dev": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV1}, + "dev": []index.Document{fruitV2}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, + expectedDocuments: []index.Document{fruitV1, fruitV2}, }, { name: "switch main to dev's older version of fruits + bump dev's fruits to new version", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV2, fruitV3}, }, }, }, @@ -373,10 +373,10 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1, foo}, - "dev": []zoekt.Document{helloWorld}, + "main": []index.Document{fruitV1, foo}, + "dev": []index.Document{helloWorld}, }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, { name: "first no-op (normal build -> delta build)", @@ -384,7 +384,7 @@ func TestIndexDeltaBasic(t *testing.T) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, { name: "second no-op (delta build -> delta build)", @@ -392,7 +392,7 @@ func TestIndexDeltaBasic(t *testing.T) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, }, }, @@ -403,14 +403,14 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "attempt delta build on a repository that hasn't been indexed yet", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld}, + "main": []index.Document{helloWorld}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{helloWorld}, + expectedDocuments: []index.Document{helloWorld}, }, }, }, @@ -421,17 +421,17 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, }, { name: "try delta build after dropping 'main' branch from index ", addedDocuments: branchToDocumentMap{ - "release": []zoekt.Document{fruitV4}, + "release": []index.Document{fruitV4}, }, optFn: func(t *testing.T, o *Options) { o.Branches = []string{"HEAD", "release", "dev"} // a bit of a hack to override it this way, but it gets the job done @@ -439,7 +439,7 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV3, fruitV4}, + expectedDocuments: []index.Document{fruitV3, fruitV4}, }, }, }, @@ -450,15 +450,15 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, - expectedDocuments: []zoekt.Document{fruitV1}, + expectedDocuments: []index.Document{fruitV1}, }, { name: "try delta build after updating Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -466,12 +466,12 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV2}, + expectedDocuments: []index.Document{fruitV2}, }, { name: "try delta build after reverting Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -479,7 +479,7 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV3}, + expectedDocuments: []index.Document{fruitV3}, }, }, }, @@ -490,15 +490,15 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, - expectedDocuments: []zoekt.Document{fruitV1}, + expectedDocuments: []index.Document{fruitV1}, }, { name: "try delta build after updating Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -506,19 +506,19 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV2}, + expectedDocuments: []index.Document{fruitV2}, }, { name: "try another delta build while CTags is still disabled", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true o.BuildOptions.DisableCTags = true }, - expectedDocuments: []zoekt.Document{fruitV3}, + expectedDocuments: []index.Document{fruitV3}, }, }, }, @@ -529,22 +529,22 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{emptySourcegraphIgnore}, + "main": []index.Document{emptySourcegraphIgnore}, }, - expectedDocuments: []zoekt.Document{emptySourcegraphIgnore}, + expectedDocuments: []index.Document{emptySourcegraphIgnore}, }, { name: "attempt delta build after modifying ignore file", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{sourcegraphIgnoreWithContent}, + "main": []index.Document{sourcegraphIgnoreWithContent}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{sourcegraphIgnoreWithContent}, + expectedDocuments: []index.Document{sourcegraphIgnoreWithContent}, }, }, }, @@ -555,37 +555,37 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup: first shard", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, - expectedDocuments: []zoekt.Document{foo}, + expectedDocuments: []index.Document{foo}, }, { name: "setup: second shard (delta)", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV1}, + expectedDocuments: []index.Document{foo, fruitV1}, }, { name: "setup: third shard (delta)", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld}, + "main": []index.Document{helloWorld}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld}, + expectedDocuments: []index.Document{foo, fruitV1, helloWorld}, }, { name: "attempt another delta build after we already blew past the shard threshold", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2InFolder}, + "main": []index.Document{fruitV2InFolder}, }, optFn: func(t *testing.T, o *Options) { o.DeltaShardNumberFallbackThreshold = 2 @@ -593,7 +593,7 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, + expectedDocuments: []index.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, }, }, }, @@ -664,7 +664,7 @@ func TestIndexDeltaBasic(t *testing.T) { } // setup: prepare indexOptions with given overrides - buildOptions := build.Options{ + buildOptions := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repository", @@ -736,15 +736,15 @@ func TestIndexDeltaBasic(t *testing.T) { t.Fatalf("Search: %s", err) } - var receivedDocuments []zoekt.Document + var receivedDocuments []index.Document for _, f := range result.Files { - receivedDocuments = append(receivedDocuments, zoekt.Document{ + receivedDocuments = append(receivedDocuments, index.Document{ Name: f.FileName, Content: f.Content, }) } - for _, docs := range [][]zoekt.Document{step.expectedDocuments, receivedDocuments} { + for _, docs := range [][]index.Document{step.expectedDocuments, receivedDocuments} { sort.Slice(docs, func(i, j int) bool { a, b := docs[i], docs[j] @@ -763,7 +763,7 @@ func TestIndexDeltaBasic(t *testing.T) { } compareOptions := []cmp.Option{ - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), + cmpopts.IgnoreFields(index.Document{}, "Branches"), cmpopts.EquateEmpty(), } @@ -923,7 +923,7 @@ func TestSetTemplates(t *testing.T) { assertOutput := func(templateText string, want string) { t.Helper() - tt, err := zoekt.ParseTemplate(templateText) + tt, err := index.ParseTemplate(templateText) if err != nil { t.Fatal(err) } @@ -968,7 +968,7 @@ func BenchmarkPrepareNormalBuild(b *testing.B) { Submodules: false, BranchPrefix: "refs/heads/", Branches: []string{"main"}, - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: zoekt.Repository{ Name: "test-repo", URL: "https://github.com/example/test-repo", diff --git a/internal/gitindex/tree_test.go b/internal/gitindex/tree_test.go index 7e1bb850f..c38a22b2c 100644 --- a/internal/gitindex/tree_test.go +++ b/internal/gitindex/tree_test.go @@ -30,8 +30,8 @@ import ( "github.com/google/go-cmp/cmp" "github.com/grafana/regexp" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/shards" "github.com/sourcegraph/zoekt/query" ) @@ -202,7 +202,7 @@ func TestSubmoduleIndex(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -306,7 +306,7 @@ func TestSearchSymlinkByContent(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -363,7 +363,7 @@ func TestAllowMissingBranch(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } @@ -429,7 +429,7 @@ func TestBranchWildcard(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -475,7 +475,7 @@ func TestSkipSubmodules(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "gerrit.googlesource.com/adir", @@ -507,7 +507,7 @@ func TestFullAndShortRefNames(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -560,7 +560,7 @@ func TestLatestCommit(t *testing.T) { t.Fatalf("createMultibranchRepo: %v", err) } - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", diff --git a/internal/profiler/profiler.go b/internal/profiler/profiler.go index 84039ac92..7e0fc433b 100644 --- a/internal/profiler/profiler.go +++ b/internal/profiler/profiler.go @@ -5,7 +5,7 @@ import ( "os" "cloud.google.com/go/profiler" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) // Init starts the supported profilers IFF the environment variable is set. @@ -13,7 +13,7 @@ func Init(svcName string) { if os.Getenv("GOOGLE_CLOUD_PROFILER_ENABLED") != "" { err := profiler.Start(profiler.Config{ Service: svcName, - ServiceVersion: zoekt.Version, + ServiceVersion: index.Version, MutexProfiling: true, AllocForceGC: true, }) diff --git a/internal/shards/aggregate.go b/internal/shards/aggregate.go index a89d99593..3febd4cf7 100644 --- a/internal/shards/aggregate.go +++ b/internal/shards/aggregate.go @@ -7,8 +7,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) var metricFinalAggregateSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ @@ -46,7 +46,7 @@ func (c *collectSender) Send(r *zoekt.SearchResult) { if len(r.Files) > 0 { c.aggregate.Files = append(c.aggregate.Files, r.Files...) - c.aggregate.Files = zoekt.SortAndTruncateFiles(c.aggregate.Files, c.opts) + c.aggregate.Files = index.SortAndTruncateFiles(c.aggregate.Files, c.opts) for k, v := range r.RepoURLs { c.aggregate.RepoURLs[k] = v @@ -150,7 +150,7 @@ func newFlushCollectSender(opts *zoekt.SearchOptions, sender zoekt.Sender) (zoek // limitSender wraps a sender and calls cancel once the truncator has finished // truncating. -func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator zoekt.DisplayTruncator) zoekt.Sender { +func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator index.DisplayTruncator) zoekt.Sender { return zoekt.SenderFunc(func(result *zoekt.SearchResult) { var hasMore bool result.Files, hasMore = truncator(result.Files) diff --git a/internal/shards/eval_test.go b/internal/shards/eval_test.go index c53011294..fbed60f20 100644 --- a/internal/shards/eval_test.go +++ b/internal/shards/eval_test.go @@ -6,23 +6,24 @@ import ( "testing" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" ) func TestSearchTypeRepo(t *testing.T) { ss := newShardedSearcher(2) nextShardNum := 1 - addShard := func(docs ...zoekt.Document) { + addShard := func(docs ...index.Document) { b := testIndexBuilder(t, &zoekt.Repository{ID: 1, Name: "reponame"}, docs...) shard := searcherForTest(t, b) ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) nextShardNum++ } addShard( - zoekt.Document{Name: "f1", Content: []byte("bla the needle")}, - zoekt.Document{Name: "f2", Content: []byte("another file another needle")}) + index.Document{Name: "f1", Content: []byte("bla the needle")}, + index.Document{Name: "f2", Content: []byte("another file another needle")}) addShard( - zoekt.Document{Name: "f3", Content: []byte("another shard")}) + index.Document{Name: "f3", Content: []byte("another shard")}) searcher := &typeRepoSearcher{ss} search := func(q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { @@ -96,7 +97,7 @@ func TestSearchTypeRepo(t *testing.T) { t.Fatalf("got %v, want 0 matches", len(res.Files)) } - // no match by path + // no index by path res = search(query.NewAnd( &query.Type{ Type: query.TypeRepo, diff --git a/internal/shards/sched.go b/internal/shards/sched.go index c9fcd8c99..4028a17fd 100644 --- a/internal/shards/sched.go +++ b/internal/shards/sched.go @@ -340,7 +340,7 @@ func parseTuneables(v string) map[string]int { // - batch timedout // - released // -// We have separate gauges and counters for exclusive processes which match +// We have separate gauges and counters for exclusive processes which index // what we track for normal processes: // // - exclusive queued diff --git a/internal/shards/shards.go b/internal/shards/shards.go index 770c54e0d..307b05d2f 100644 --- a/internal/shards/shards.go +++ b/internal/shards/shards.go @@ -28,6 +28,7 @@ import ( "sync" "time" + "github.com/sourcegraph/zoekt/index" "golang.org/x/sync/semaphore" "github.com/prometheus/client_golang/prometheus" @@ -623,7 +624,7 @@ func (ss *shardedSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zo // For streaming, the wrapping has to happen in the inverted order. sender = copyFileSender(sender) - if truncator, hasLimits := zoekt.NewDisplayTruncator(opts); hasLimits { + if truncator, hasLimits := index.NewDisplayTruncator(opts); hasLimits { var cancel context.CancelFunc ctx, cancel = context.WithCancel(ctx) defer cancel() @@ -806,19 +807,19 @@ search: // sendByRepository splits a zoekt.SearchResult by repository and calls // sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult -// to contain results with the same zoekt.SearchResult.Priority only. +// to contain results with the same zoekt.SearchResult.priority only. // // We split by repository instead of by priority because it is easier to set // RepoURLs and LineFragments in zoekt.SearchResult. func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) { if len(result.RepoURLs) <= 1 || len(result.Files) == 0 { - zoekt.SortFiles(result.Files) + index.SortFiles(result.Files) sender.Send(result) return } send := func(repoName string, a, b int, stats zoekt.Stats) { - zoekt.SortFiles(result.Files[a:b]) + index.SortFiles(result.Files[a:b]) sender.Send(&zoekt.SearchResult{ Stats: stats, Progress: zoekt.Progress{ @@ -1203,11 +1204,11 @@ func loadShard(fn string) (zoekt.Searcher, error) { return nil, err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return nil, err } - s, err := zoekt.NewSearcher(iFile) + s, err := index.NewSearcher(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err) diff --git a/internal/shards/shards_test.go b/internal/shards/shards_test.go index c6bfbb601..715dae94d 100644 --- a/internal/shards/shards_test.go +++ b/internal/shards/shards_test.go @@ -36,6 +36,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" @@ -199,7 +200,7 @@ func TestShardedSearcher_Ranking(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, priority float64, docs ...zoekt.Document) { + addShard := func(repo string, priority float64, docs ...index.Document) { r := &zoekt.Repository{ID: hash(repo), Name: repo} r.RawConfig = map[string]string{ "public": "1", @@ -213,10 +214,10 @@ func TestShardedSearcher_Ranking(t *testing.T) { nextShardNum++ } - addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) - addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) + addShard("weekend-project", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("moderately-popular", 500, index.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("weekend-project-2", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("super-star", 5000, index.Document{Name: "f1", Content: []byte("foo bar bas")}) want := []string{ "super-star", @@ -241,7 +242,7 @@ func TestShardedSearcher_DocumentRanking(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, rank uint16, docs ...zoekt.Document) { + addShard := func(repo string, rank uint16, docs ...index.Document) { r := &zoekt.Repository{ID: hash(repo), Name: repo} r.RawConfig = map[string]string{ "public": "1", @@ -255,11 +256,11 @@ func TestShardedSearcher_DocumentRanking(t *testing.T) { nextShardNum++ } - addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) - addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) - addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) - addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, - zoekt.Document{Name: "f5", Content: []byte("fooooo")}) + addShard("old-project", 1, index.Document{Name: "f1", Content: []byte("foobar")}) + addShard("recent", 2, index.Document{Name: "f2", Content: []byte("foobaz")}) + addShard("old-project-2", 1, index.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("new", 3, index.Document{Name: "f4", Content: []byte("foo baz")}, + index.Document{Name: "f5", Content: []byte("fooooo")}) // Run a stream search and gather the results var results []*zoekt.SearchResult @@ -409,7 +410,7 @@ func (s *memSeeker) Size() (uint32, error) { } func TestUnloadIndex(t *testing.T) { - b := testIndexBuilder(t, nil, zoekt.Document{ + b := testIndexBuilder(t, nil, index.Document{ Name: "filename", Content: []byte("needle needle needle"), }) @@ -420,7 +421,7 @@ func TestUnloadIndex(t *testing.T) { } indexBytes := buf.Bytes() indexFile := &memSeeker{indexBytes} - searcher, err := zoekt.NewSearcher(indexFile) + searcher, err := index.NewSearcher(indexFile) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -471,7 +472,7 @@ func TestShardedSearcher_List(t *testing.T) { }, } - doc := zoekt.Document{ + doc := index.Document{ Name: "foo.go", Content: []byte("bar\nbaz"), Branches: []string{"main", "dev"}, @@ -608,8 +609,8 @@ func TestShardedSearcher_List(t *testing.T) { } } -func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { - b, err := zoekt.NewIndexBuilder(repo) +func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...index.Document) *index.IndexBuilder { + b, err := index.NewIndexBuilder(repo) if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } @@ -622,14 +623,14 @@ func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Docume return b } -func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { +func searcherForTest(t testing.TB, b *index.IndexBuilder) zoekt.Searcher { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) } f := &memSeeker{buf.Bytes()} - searcher, err := zoekt.NewSearcher(f) + searcher, err := index.NewSearcher(f) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -650,7 +651,7 @@ func reposForTest(n int) (result []*zoekt.Repository) { func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { builder := testIndexBuilder(b, r) - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), Content: []byte("needle needle needle haystack"), }); err != nil { @@ -658,7 +659,7 @@ func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt. } for i := 1; i < numFiles; i++ { - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), Content: []byte("haystack haystack haystack"), }); err != nil { @@ -754,7 +755,7 @@ func TestRawQuerySearch(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { + addShard := func(repo string, rawConfig map[string]string, docs ...index.Document) { r := &zoekt.Repository{Name: repo} r.RawConfig = rawConfig b := testIndexBuilder(t, r, docs...) @@ -762,9 +763,9 @@ func TestRawQuerySearch(t *testing.T) { ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) nextShardNum++ } - addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) - addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("public", map[string]string{"public": "1"}, index.Document{Name: "f1", Content: []byte("foo bar bas")}) + addShard("private_archived", map[string]string{"archived": "1"}, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, index.Document{Name: "f3", Content: []byte("foo bar")}) cases := []struct { pattern string @@ -979,7 +980,7 @@ func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { func TestFileBasedSearch(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, @@ -990,8 +991,8 @@ func TestFileBasedSearch(t *testing.T) { c2 := []byte("In Dutch, ananas means pineapple") // -----------0123456789012345678901234567890123456789 b := testIndexBuilder(t, nil, - zoekt.Document{Name: "f1", Content: c1}, - zoekt.Document{Name: "f2", Content: c2}, + index.Document{Name: "f1", Content: c1}, + index.Document{Name: "f2", Content: c2}, ) for _, tt := range cases { @@ -1019,17 +1020,17 @@ func TestFileBasedSearch(t *testing.T) { func TestWordBoundaryRanking(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, } b := testIndexBuilder(t, nil, - zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, - zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, + index.Document{Name: "f1", Content: []byte("xbytex xbytex")}, + index.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, // -----------------------------------------0123456 789012 34567890 - zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) + index.Document{Name: "f3", Content: []byte("xbytex ybytex")}) for _, tt := range cases { for _, useDocumentRanks := range []bool{false, true} { @@ -1059,7 +1060,7 @@ func TestWordBoundaryRanking(t *testing.T) { func TestAtomCountScore(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, @@ -1072,9 +1073,9 @@ func TestAtomCountScore(t *testing.T) { {Name: "needle", Version: "v2"}, }, }, - zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, - zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, - zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) + index.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, + index.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, + index.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) for _, tt := range cases { for _, useDocumentRanks := range []bool{false, true} { @@ -1101,9 +1102,9 @@ func TestAtomCountScore(t *testing.T) { func TestUseBM25Scoring(t *testing.T) { b := testIndexBuilder(t, &zoekt.Repository{}, - zoekt.Document{Name: "f1", Content: []byte("one two two three")}, - zoekt.Document{Name: "f2", Content: []byte("one two one two")}, - zoekt.Document{Name: "f3", Content: []byte("one three three three")}) + index.Document{Name: "f1", Content: []byte("one two two three")}, + index.Document{Name: "f2", Content: []byte("one two one two")}, + index.Document{Name: "f3", Content: []byte("one three three three")}) ss := newShardedSearcher(1) searcher := searcherForTest(t, b) @@ -1133,7 +1134,7 @@ func TestUseBM25Scoring(t *testing.T) { } } -func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { +func testShardedStreamSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { ss := newShardedSearcher(1) searcher := searcherForTest(t, ib) ss.replace(map[string]zoekt.Searcher{"r1": searcher}) @@ -1153,7 +1154,7 @@ func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, us return files } -func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { +func testShardedSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { ss := newShardedSearcher(1) searcher := searcherForTest(t, ib) ss.replace(map[string]zoekt.Searcher{"r1": searcher}) diff --git a/internal/shards/watcher.go b/internal/shards/watcher.go index f64163f0c..e089163cc 100644 --- a/internal/shards/watcher.go +++ b/internal/shards/watcher.go @@ -26,7 +26,7 @@ import ( "time" "github.com/fsnotify/fsnotify" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) type shardLoader interface { @@ -130,7 +130,7 @@ func (s *DirectoryWatcher) scan() error { // In the case of downgrades, avoid reading // newer index formats. - if version > zoekt.IndexFormatVersion && version > zoekt.NextIndexFormatVersion { + if version > index.IndexFormatVersion && version > index.NextIndexFormatVersion { continue } diff --git a/internal/shards/watcher_test.go b/internal/shards/watcher_test.go index be91fb4ba..b521708ac 100644 --- a/internal/shards/watcher_test.go +++ b/internal/shards/watcher_test.go @@ -21,7 +21,7 @@ import ( "testing" "time" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) type loggingLoader struct { @@ -182,7 +182,7 @@ func TestDirWatcherLoadLatest(t *testing.T) { // t.Fatalf("got %v, want 'empty'", err) // } - want := zoekt.NextIndexFormatVersion + want := index.NextIndexFormatVersion shardLatest := filepath.Join(dir, fmt.Sprintf("foo_v%d.00000.zoekt", want)) for delta := -1; delta <= 1; delta++ { diff --git a/web/api.go b/web/api.go index 4f8c1468c..072e5e79c 100644 --- a/web/api.go +++ b/web/api.go @@ -54,7 +54,7 @@ type FileMatch struct { ResultID string Language string // If this was a duplicate result, this will contain the file - // of the first match. + // of the first index. DuplicateID string Branches []string diff --git a/web/e2e_test.go b/web/e2e_test.go index 2b234837e..5afd5b0e0 100644 --- a/web/e2e_test.go +++ b/web/e2e_test.go @@ -30,6 +30,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" @@ -54,14 +55,14 @@ func (s *memSeeker) Name() string { return "memSeeker" } -func searcherForTest(t *testing.T, b *zoekt.IndexBuilder) zoekt.Streamer { +func searcherForTest(t *testing.T, b *index.IndexBuilder) zoekt.Streamer { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) } f := &memSeeker{buf.Bytes()} - searcher, err := zoekt.NewSearcher(f) + searcher, err := index.NewSearcher(f) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -83,7 +84,7 @@ func (a adapter) StreamSearch(ctx context.Context, q query.Q, opts *zoekt.Search } func TestBasic(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`, @@ -94,7 +95,7 @@ func TestBasic(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ // use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807 Name: "foo/bar+baz", Content: []byte("to carry water in the no later bla"), @@ -149,7 +150,7 @@ func TestBasic(t *testing.T) { } func TestPrint(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", CommitURLTemplate: "{{.Version}}", @@ -160,7 +161,7 @@ func TestPrint(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -168,7 +169,7 @@ func TestPrint(t *testing.T) { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "dir/f2", Content: []byte("blabla"), Branches: []string{"master"}, @@ -202,7 +203,7 @@ func TestPrint(t *testing.T) { } func TestPrintDefault(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, @@ -210,7 +211,7 @@ func TestPrintDefault(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -272,7 +273,7 @@ type Expectation struct { } func TestFormatJson(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, @@ -280,7 +281,7 @@ func TestFormatJson(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -327,7 +328,7 @@ func TestFormatJson(t *testing.T) { } func TestContextLines(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, @@ -335,28 +336,28 @@ func TestContextLines(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("one line\nsecond snippet\nthird thing\nfourth\nfifth block\nsixth example\nseventh"), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f3", Content: []byte("\n\n\n\nto carry water in the no later bla\n\n\n\n"), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f4", Content: []byte("un \n \n\ttrois\n \n\nsix\n "), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f5", Content: []byte("\ngreen\npastures\n\nhere"), Branches: []string{"master"}, @@ -440,7 +441,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=one&format=json&ctx=2": { - "match at start returns After but no Before", + "index at start returns After but no Before", FileMatch{ FileName: "f2", Repo: "name", @@ -461,7 +462,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=seventh&format=json&ctx=2": { - "match at end returns Before but no After", + "index at end returns Before but no After", FileMatch{ FileName: "f2", Repo: "name", @@ -482,7 +483,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=seventh&format=json&ctx=10": { - "match with large context at end returns whole document", + "index with large context at end returns whole document", FileMatch{ FileName: "f2", Repo: "name", @@ -503,7 +504,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=one&format=json&ctx=10": { - "match with large context at start returns whole document", + "index with large context at start returns whole document", FileMatch{ FileName: "f2", Repo: "name", @@ -651,7 +652,7 @@ func checkResultMatches(t *testing.T, ts *httptest.Server, req string, expected } func TestContextLinesMustBeValid(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, @@ -659,7 +660,7 @@ func TestContextLinesMustBeValid(t *testing.T) { if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -746,13 +747,13 @@ func TestCrash(t *testing.T) { } func TestHostCustomization(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { t.Fatalf("NewIndexBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "file", Content: []byte("bla"), }); err != nil { @@ -798,7 +799,7 @@ func TestHostCustomization(t *testing.T) { } func TestDupResult(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { @@ -806,7 +807,7 @@ func TestDupResult(t *testing.T) { } for i := 0; i < 2; i++ { - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: fmt.Sprintf("file%d", i), Content: []byte("bla"), }); err != nil { @@ -848,7 +849,7 @@ func TestDupResult(t *testing.T) { } func TestTruncateLine(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { @@ -856,7 +857,7 @@ func TestTruncateLine(t *testing.T) { } largePadding := bytes.Repeat([]byte{'a'}, 100*1000) // 100kb - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "file", Content: append(append(largePadding, []byte("helloworld")...), largePadding...), }); err != nil { @@ -904,7 +905,7 @@ func TestTruncateLine(t *testing.T) { } func TestHealthz(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewIndexBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { @@ -912,7 +913,7 @@ func TestHealthz(t *testing.T) { } for i := 0; i < 2; i++ { - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: fmt.Sprintf("file%d", i), Content: []byte("bla"), }); err != nil { diff --git a/web/server.go b/web/server.go index 5e9e1ac1b..84efb225d 100644 --- a/web/server.go +++ b/web/server.go @@ -32,8 +32,10 @@ import ( "time" "github.com/grafana/regexp" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" zjson "github.com/sourcegraph/zoekt/internal/json" + + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" "github.com/sourcegraph/zoekt/query" ) @@ -152,7 +154,7 @@ func (s *Server) getTextTemplate(str string) *texttemplate.Template { return t } - t, err := zoekt.ParseTemplate(str) + t, err := index.ParseTemplate(str) if err != nil { log.Printf("text template parse error: %v", err) t = texttemplate.Must(texttemplate.New("empty").Parse(""))