mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
This commit is contained in:
		@@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 | 
				
			|||||||
	return parseTreeEntries(data, nil)
 | 
						return parseTreeEntries(data, nil)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var sepSpace = []byte{' '}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
					func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
				
			||||||
	entries := make([]*TreeEntry, 0, 10)
 | 
						var err error
 | 
				
			||||||
 | 
						entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
 | 
				
			||||||
	for pos := 0; pos < len(data); {
 | 
						for pos := 0; pos < len(data); {
 | 
				
			||||||
		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
 | 
							// expect line to be of the form:
 | 
				
			||||||
 | 
							// <mode> <type> <sha> <space-padded-size>\t<filename>
 | 
				
			||||||
 | 
							// <mode> <type> <sha>\t<filename>
 | 
				
			||||||
 | 
							posEnd := bytes.IndexByte(data[pos:], '\n')
 | 
				
			||||||
 | 
							if posEnd == -1 {
 | 
				
			||||||
 | 
								posEnd = len(data)
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								posEnd += pos
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							line := data[pos:posEnd]
 | 
				
			||||||
 | 
							posTab := bytes.IndexByte(line, '\t')
 | 
				
			||||||
 | 
							if posTab == -1 {
 | 
				
			||||||
 | 
								return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		entry := new(TreeEntry)
 | 
							entry := new(TreeEntry)
 | 
				
			||||||
		entry.ptree = ptree
 | 
							entry.ptree = ptree
 | 
				
			||||||
		if pos+6 > len(data) {
 | 
					
 | 
				
			||||||
			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
 | 
							entryAttrs := line[:posTab]
 | 
				
			||||||
 | 
							entryName := line[posTab+1:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
 | 
				
			||||||
 | 
							_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
 | 
				
			||||||
 | 
							entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
 | 
				
			||||||
 | 
							if len(entryAttrs) > 0 {
 | 
				
			||||||
 | 
								entrySize := entryAttrs // the last field is the space-padded-size
 | 
				
			||||||
 | 
								entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
 | 
				
			||||||
 | 
								entry.sized = true
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		switch string(data[pos : pos+6]) {
 | 
					
 | 
				
			||||||
 | 
							switch string(entryMode) {
 | 
				
			||||||
		case "100644":
 | 
							case "100644":
 | 
				
			||||||
			entry.entryMode = EntryModeBlob
 | 
								entry.entryMode = EntryModeBlob
 | 
				
			||||||
			pos += 12 // skip over "100644 blob "
 | 
					 | 
				
			||||||
		case "100755":
 | 
							case "100755":
 | 
				
			||||||
			entry.entryMode = EntryModeExec
 | 
								entry.entryMode = EntryModeExec
 | 
				
			||||||
			pos += 12 // skip over "100755 blob "
 | 
					 | 
				
			||||||
		case "120000":
 | 
							case "120000":
 | 
				
			||||||
			entry.entryMode = EntryModeSymlink
 | 
								entry.entryMode = EntryModeSymlink
 | 
				
			||||||
			pos += 12 // skip over "120000 blob "
 | 
					 | 
				
			||||||
		case "160000":
 | 
							case "160000":
 | 
				
			||||||
			entry.entryMode = EntryModeCommit
 | 
								entry.entryMode = EntryModeCommit
 | 
				
			||||||
			pos += 14 // skip over "160000 object "
 | 
					 | 
				
			||||||
		case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
 | 
							case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
 | 
				
			||||||
			entry.entryMode = EntryModeTree
 | 
								entry.entryMode = EntryModeTree
 | 
				
			||||||
			pos += 12 // skip over "040000 tree "
 | 
					 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
 | 
								return nil, fmt.Errorf("unknown type: %v", string(entryMode))
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if pos+40 > len(data) {
 | 
							entry.ID, err = NewIDFromString(string(entryObjectID))
 | 
				
			||||||
			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		id, err := NewIDFromString(string(data[pos : pos+40]))
 | 
					 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
 | 
								return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		entry.ID = id
 | 
					 | 
				
			||||||
		pos += 41 // skip over sha and trailing space
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		end := pos + bytes.IndexByte(data[pos:], '\t')
 | 
					 | 
				
			||||||
		if end < pos {
 | 
					 | 
				
			||||||
			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
 | 
					 | 
				
			||||||
		entry.sized = true
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		pos = end + 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		end = pos + bytes.IndexByte(data[pos:], '\n')
 | 
					 | 
				
			||||||
		if end < pos {
 | 
					 | 
				
			||||||
			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// In case entry name is surrounded by double quotes(it happens only in git-shell).
 | 
							if len(entryName) > 0 && entryName[0] == '"' {
 | 
				
			||||||
		if data[pos] == '"' {
 | 
								entry.name, err = strconv.Unquote(string(entryName))
 | 
				
			||||||
			entry.name, err = strconv.Unquote(string(data[pos:end]))
 | 
					 | 
				
			||||||
			if err != nil {
 | 
								if err != nil {
 | 
				
			||||||
				return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
 | 
									return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			entry.name = string(data[pos:end])
 | 
								entry.name = string(entryName)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pos = end + 1
 | 
							pos = posEnd + 1
 | 
				
			||||||
		entries = append(entries, entry)
 | 
							entries = append(entries, entry)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return entries, nil
 | 
						return entries, nil
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -12,7 +12,7 @@ import (
 | 
				
			|||||||
	"github.com/stretchr/testify/assert"
 | 
						"github.com/stretchr/testify/assert"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestParseTreeEntries(t *testing.T) {
 | 
					func TestParseTreeEntriesLong(t *testing.T) {
 | 
				
			||||||
	testCases := []struct {
 | 
						testCases := []struct {
 | 
				
			||||||
		Input    string
 | 
							Input    string
 | 
				
			||||||
		Expected []*TreeEntry
 | 
							Expected []*TreeEntry
 | 
				
			||||||
@@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) {
 | 
				
			|||||||
		assert.NoError(t, err)
 | 
							assert.NoError(t, err)
 | 
				
			||||||
		assert.Len(t, entries, len(testCase.Expected))
 | 
							assert.Len(t, entries, len(testCase.Expected))
 | 
				
			||||||
		for i, entry := range entries {
 | 
							for i, entry := range entries {
 | 
				
			||||||
			assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
 | 
								assert.EqualValues(t, testCase.Expected[i], entry)
 | 
				
			||||||
			assert.EqualValues(t, testCase.Expected[i].name, entry.name)
 | 
					 | 
				
			||||||
			assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
 | 
					 | 
				
			||||||
			assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
 | 
					 | 
				
			||||||
			assert.EqualValues(t, testCase.Expected[i].size, entry.size)
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestParseTreeEntriesShort(t *testing.T) {
 | 
				
			||||||
 | 
						testCases := []struct {
 | 
				
			||||||
 | 
							Input    string
 | 
				
			||||||
 | 
							Expected []*TreeEntry
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af	README.md
 | 
				
			||||||
 | 
					040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d	assets
 | 
				
			||||||
 | 
					`,
 | 
				
			||||||
 | 
								Expected: []*TreeEntry{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										ID:        MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
 | 
				
			||||||
 | 
										name:      "README.md",
 | 
				
			||||||
 | 
										entryMode: EntryModeBlob,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										ID:        MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
 | 
				
			||||||
 | 
										name:      "assets",
 | 
				
			||||||
 | 
										entryMode: EntryModeTree,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for _, testCase := range testCases {
 | 
				
			||||||
 | 
							entries, err := ParseTreeEntries([]byte(testCase.Input))
 | 
				
			||||||
 | 
							assert.NoError(t, err)
 | 
				
			||||||
 | 
							assert.Len(t, entries, len(testCase.Expected))
 | 
				
			||||||
 | 
							for i, entry := range entries {
 | 
				
			||||||
 | 
								assert.EqualValues(t, testCase.Expected[i], entry)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestParseTreeEntriesInvalid(t *testing.T) {
 | 
				
			||||||
 | 
						// there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315
 | 
				
			||||||
 | 
						entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af"))
 | 
				
			||||||
 | 
						assert.Error(t, err)
 | 
				
			||||||
 | 
						assert.Len(t, entries, 0)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	tree := commit.Tree
 | 
						tree := commit.Tree
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	entries, err := tree.ListEntriesRecursive()
 | 
						entries, err := tree.ListEntriesRecursiveWithSize()
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, err
 | 
							return nil, err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) {
 | 
				
			|||||||
	return entries, nil
 | 
						return entries, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
 | 
					// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees
 | 
				
			||||||
func (t *Tree) ListEntriesRecursive() (Entries, error) {
 | 
					func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
 | 
				
			||||||
	if t.gogitTree == nil {
 | 
						if t.gogitTree == nil {
 | 
				
			||||||
		err := t.loadTreeObject()
 | 
							err := t.loadTreeObject()
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
@@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	return entries, nil
 | 
						return entries, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
 | 
				
			||||||
 | 
					func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
 | 
				
			||||||
 | 
						return t.ListEntriesRecursiveWithSize()
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) {
 | 
				
			|||||||
	return t.entries, err
 | 
						return t.entries, err
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
 | 
					// listEntriesRecursive returns all entries of current tree recursively including all subtrees
 | 
				
			||||||
func (t *Tree) ListEntriesRecursive() (Entries, error) {
 | 
					// extraArgs could be "-l" to get the size, which is slower
 | 
				
			||||||
 | 
					func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) {
 | 
				
			||||||
	if t.entriesRecursiveParsed {
 | 
						if t.entriesRecursiveParsed {
 | 
				
			||||||
		return t.entriesRecursive, nil
 | 
							return t.entriesRecursive, nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path})
 | 
						args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...)
 | 
				
			||||||
 | 
						args = append(args, t.ID.String())
 | 
				
			||||||
 | 
						stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path})
 | 
				
			||||||
	if runErr != nil {
 | 
						if runErr != nil {
 | 
				
			||||||
		return nil, runErr
 | 
							return nil, runErr
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	return t.entriesRecursive, err
 | 
						return t.entriesRecursive, err
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
 | 
				
			||||||
 | 
					func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
 | 
				
			||||||
 | 
						return t.listEntriesRecursive()
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
 | 
				
			||||||
 | 
					func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
 | 
				
			||||||
 | 
						return t.listEntriesRecursive("--long")
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -22,9 +22,9 @@ func TreeList(ctx *context.Context) {
 | 
				
			|||||||
		return
 | 
							return
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	entries, err := tree.ListEntriesRecursive()
 | 
						entries, err := tree.ListEntriesRecursiveFast()
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		ctx.ServerError("ListEntriesRecursive", err)
 | 
							ctx.ServerError("ListEntriesRecursiveFast", err)
 | 
				
			||||||
		return
 | 
							return
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	entries.CustomSort(base.NaturalSortLess)
 | 
						entries.CustomSort(base.NaturalSortLess)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
 | 
				
			|||||||
	tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
 | 
						tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
 | 
				
			||||||
	var entries git.Entries
 | 
						var entries git.Entries
 | 
				
			||||||
	if recursive {
 | 
						if recursive {
 | 
				
			||||||
		entries, err = gitTree.ListEntriesRecursive()
 | 
							entries, err = gitTree.ListEntriesRecursiveWithSize()
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		entries, err = gitTree.ListEntries()
 | 
							entries, err = gitTree.ListEntries()
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user