mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Fix language stat calculation (#11692)
* Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							ea4c139cd2
						
					
				
				
					commit
					9d652002c6
				
			@@ -26,22 +26,6 @@ type LanguageStat struct {
 | 
				
			|||||||
	CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
 | 
						CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// specialLanguages defines list of languages that are excluded from the calculation
 | 
					 | 
				
			||||||
// unless they are the only language present in repository. Only languages which under
 | 
					 | 
				
			||||||
// normal circumstances are not considered to be code should be listed here.
 | 
					 | 
				
			||||||
var specialLanguages = map[string]struct{}{
 | 
					 | 
				
			||||||
	"XML":      {},
 | 
					 | 
				
			||||||
	"JSON":     {},
 | 
					 | 
				
			||||||
	"TOML":     {},
 | 
					 | 
				
			||||||
	"YAML":     {},
 | 
					 | 
				
			||||||
	"INI":      {},
 | 
					 | 
				
			||||||
	"SQL":      {},
 | 
					 | 
				
			||||||
	"SVG":      {},
 | 
					 | 
				
			||||||
	"Text":     {},
 | 
					 | 
				
			||||||
	"Markdown": {},
 | 
					 | 
				
			||||||
	"other":    {},
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// LanguageStatList defines a list of language statistics
 | 
					// LanguageStatList defines a list of language statistics
 | 
				
			||||||
type LanguageStatList []*LanguageStat
 | 
					type LanguageStatList []*LanguageStat
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
 | 
				
			|||||||
	langPerc := make(map[string]float32)
 | 
						langPerc := make(map[string]float32)
 | 
				
			||||||
	var otherPerc float32 = 100
 | 
						var otherPerc float32 = 100
 | 
				
			||||||
	var total int64
 | 
						var total int64
 | 
				
			||||||
	// Check that repository has at least one non-special language
 | 
					
 | 
				
			||||||
	var skipSpecial bool
 | 
					 | 
				
			||||||
	for _, stat := range stats {
 | 
						for _, stat := range stats {
 | 
				
			||||||
		if _, ok := specialLanguages[stat.Language]; !ok {
 | 
					 | 
				
			||||||
			skipSpecial = true
 | 
					 | 
				
			||||||
			break
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	for _, stat := range stats {
 | 
					 | 
				
			||||||
		// Exclude specific languages from percentage calculation
 | 
					 | 
				
			||||||
		if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		total += stat.Size
 | 
							total += stat.Size
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if total > 0 {
 | 
						if total > 0 {
 | 
				
			||||||
		for _, stat := range stats {
 | 
							for _, stat := range stats {
 | 
				
			||||||
			// Exclude specific languages from percentage calculation
 | 
					 | 
				
			||||||
			if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
 | 
					 | 
				
			||||||
				continue
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
 | 
								perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
 | 
				
			||||||
			if perc <= 0.1 {
 | 
								if perc <= 0.1 {
 | 
				
			||||||
				continue
 | 
									continue
 | 
				
			||||||
@@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
 | 
				
			|||||||
			langPerc[stat.Language] = perc
 | 
								langPerc[stat.Language] = perc
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
 | 
							otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		otherPerc = 100
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if otherPerc > 0 {
 | 
						if otherPerc > 0 {
 | 
				
			||||||
		langPerc["other"] = otherPerc
 | 
							langPerc["other"] = otherPerc
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,6 +19,20 @@ import (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
const fileSizeLimit int64 = 16 * 1024 * 1024
 | 
					const fileSizeLimit int64 = 16 * 1024 * 1024
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// specialLanguages defines list of languages that are excluded from the calculation
 | 
				
			||||||
 | 
					// unless they are the only language present in repository. Only languages which under
 | 
				
			||||||
 | 
					// normal circumstances are not considered to be code should be listed here.
 | 
				
			||||||
 | 
					var specialLanguages = []string{
 | 
				
			||||||
 | 
						"XML",
 | 
				
			||||||
 | 
						"JSON",
 | 
				
			||||||
 | 
						"TOML",
 | 
				
			||||||
 | 
						"YAML",
 | 
				
			||||||
 | 
						"INI",
 | 
				
			||||||
 | 
						"SVG",
 | 
				
			||||||
 | 
						"Text",
 | 
				
			||||||
 | 
						"Markdown",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// GetLanguageStats calculates language stats for git repository at specified commit
 | 
					// GetLanguageStats calculates language stats for git repository at specified commit
 | 
				
			||||||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
 | 
					func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
 | 
				
			||||||
	r, err := git.PlainOpen(repo.Path)
 | 
						r, err := git.PlainOpen(repo.Path)
 | 
				
			||||||
@@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	sizes := make(map[string]int64)
 | 
						sizes := make(map[string]int64)
 | 
				
			||||||
	err = tree.Files().ForEach(func(f *object.File) error {
 | 
						err = tree.Files().ForEach(func(f *object.File) error {
 | 
				
			||||||
		if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
							if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
				
			||||||
			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
 | 
								enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
 | 
				
			||||||
			return nil
 | 
								return nil
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		language := analyze.GetCodeLanguage(f.Name, content)
 | 
							language := analyze.GetCodeLanguage(f.Name, content)
 | 
				
			||||||
		if language == enry.OtherLanguage || language == "" {
 | 
							if language == enry.OtherLanguage || language == "" {
 | 
				
			||||||
			language = "other"
 | 
								return nil
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// group languages, such as Pug -> HTML; SCSS -> CSS
 | 
				
			||||||
 | 
							group := enry.GetLanguageGroup(language)
 | 
				
			||||||
 | 
							if group != "" {
 | 
				
			||||||
 | 
								language = group
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sizes[language] += f.Size
 | 
							sizes[language] += f.Size
 | 
				
			||||||
@@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
				
			|||||||
		return nil, err
 | 
							return nil, err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if len(sizes) == 0 {
 | 
						// filter special languages unless they are the only language
 | 
				
			||||||
		sizes["other"] = 0
 | 
						if len(sizes) > 1 {
 | 
				
			||||||
 | 
							for _, language := range specialLanguages {
 | 
				
			||||||
 | 
								delete(sizes, language)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return sizes, nil
 | 
						return sizes, nil
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) {
 | 
				
			|||||||
	assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
 | 
						assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
 | 
				
			||||||
	langs, err := repo.GetTopLanguageStats(5)
 | 
						langs, err := repo.GetTopLanguageStats(5)
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Len(t, langs, 1)
 | 
						assert.Empty(t, langs)
 | 
				
			||||||
	assert.Equal(t, "other", langs[0].Language)
 | 
					 | 
				
			||||||
	assert.Equal(t, float32(100), langs[0].Percentage)
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user