mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	* Change language statistics to save size instead of percentage (#11681) * Change language statistics to save size instead of percentage in database Co-Authored-By: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Do not exclude if only language * Fix edge cases with special langauges Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Fix language stat calculation (#11692) * Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com>
This commit is contained in:
		@@ -212,6 +212,8 @@ var migrations = []Migration{
 | 
			
		||||
	NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
 | 
			
		||||
	// v139 -> v140
 | 
			
		||||
	NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
 | 
			
		||||
	// v140 -> v141
 | 
			
		||||
	NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetCurrentDBVersion returns the current db version
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								models/migrations/v140.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								models/migrations/v140.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
// Copyright 2020 The Gitea Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a MIT-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package migrations
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
 | 
			
		||||
	"xorm.io/xorm"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
 | 
			
		||||
	// LanguageStat see models/repo_language_stats.go
 | 
			
		||||
	type LanguageStat struct {
 | 
			
		||||
		Size int64 `xorm:"NOT NULL DEFAULT 0"`
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// RepoIndexerType specifies the repository indexer type
 | 
			
		||||
	type RepoIndexerType int
 | 
			
		||||
 | 
			
		||||
	const (
 | 
			
		||||
		// RepoIndexerTypeCode code indexer
 | 
			
		||||
		RepoIndexerTypeCode RepoIndexerType = iota // 0
 | 
			
		||||
		// RepoIndexerTypeStats repository stats indexer
 | 
			
		||||
		RepoIndexerTypeStats // 1
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	// RepoIndexerStatus see models/repo_indexer.go
 | 
			
		||||
	type RepoIndexerStatus struct {
 | 
			
		||||
		IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if err := x.Sync2(new(LanguageStat)); err != nil {
 | 
			
		||||
		return fmt.Errorf("Sync2: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})
 | 
			
		||||
 | 
			
		||||
	// Delete language stat statuses
 | 
			
		||||
	truncExpr := "TRUNCATE TABLE"
 | 
			
		||||
	if setting.Database.UseSQLite3 {
 | 
			
		||||
		truncExpr = "DELETE FROM"
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Delete language stats
 | 
			
		||||
	if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sess := x.NewSession()
 | 
			
		||||
	defer sess.Close()
 | 
			
		||||
	return dropTableColumns(sess, "language_stat", "percentage")
 | 
			
		||||
}
 | 
			
		||||
@@ -20,7 +20,8 @@ type LanguageStat struct {
 | 
			
		||||
	CommitID    string
 | 
			
		||||
	IsPrimary   bool
 | 
			
		||||
	Language    string             `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
 | 
			
		||||
	Percentage  float32            `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
 | 
			
		||||
	Percentage  float32            `xorm:"-"`
 | 
			
		||||
	Size        int64              `xorm:"NOT NULL DEFAULT 0"`
 | 
			
		||||
	Color       string             `xorm:"-"`
 | 
			
		||||
	CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
 | 
			
		||||
}
 | 
			
		||||
@@ -34,12 +35,36 @@ func (stats LanguageStatList) loadAttributes() {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
 | 
			
		||||
	langPerc := make(map[string]float32)
 | 
			
		||||
	var otherPerc float32 = 100
 | 
			
		||||
	var total int64
 | 
			
		||||
 | 
			
		||||
	for _, stat := range stats {
 | 
			
		||||
		total += stat.Size
 | 
			
		||||
	}
 | 
			
		||||
	if total > 0 {
 | 
			
		||||
		for _, stat := range stats {
 | 
			
		||||
			perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
 | 
			
		||||
			if perc <= 0.1 {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			otherPerc -= perc
 | 
			
		||||
			langPerc[stat.Language] = perc
 | 
			
		||||
		}
 | 
			
		||||
		otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
 | 
			
		||||
	}
 | 
			
		||||
	if otherPerc > 0 {
 | 
			
		||||
		langPerc["other"] = otherPerc
 | 
			
		||||
	}
 | 
			
		||||
	return langPerc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
 | 
			
		||||
	stats := make(LanguageStatList, 0, 6)
 | 
			
		||||
	if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
 | 
			
		||||
	if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	stats.loadAttributes()
 | 
			
		||||
	return stats, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -54,13 +79,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	perc := stats.getLanguagePercentages()
 | 
			
		||||
	topstats := make(LanguageStatList, 0, limit)
 | 
			
		||||
	var other float32
 | 
			
		||||
	for i := range stats {
 | 
			
		||||
		if stats[i].Language == "other" || len(topstats) >= limit {
 | 
			
		||||
			other += stats[i].Percentage
 | 
			
		||||
		if _, ok := perc[stats[i].Language]; !ok {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if stats[i].Language == "other" || len(topstats) >= limit {
 | 
			
		||||
			other += perc[stats[i].Language]
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		stats[i].Percentage = perc[stats[i].Language]
 | 
			
		||||
		topstats = append(topstats, stats[i])
 | 
			
		||||
	}
 | 
			
		||||
	if other > 0 {
 | 
			
		||||
@@ -71,11 +101,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
 | 
			
		||||
			Percentage: float32(math.Round(float64(other)*10) / 10),
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
	topstats.loadAttributes()
 | 
			
		||||
	return topstats, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// UpdateLanguageStats updates the language statistics for repository
 | 
			
		||||
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
 | 
			
		||||
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
 | 
			
		||||
	sess := x.NewSession()
 | 
			
		||||
	if err := sess.Begin(); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
@@ -87,15 +118,15 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	var topLang string
 | 
			
		||||
	var p float32
 | 
			
		||||
	for lang, perc := range stats {
 | 
			
		||||
		if perc > p {
 | 
			
		||||
			p = perc
 | 
			
		||||
	var s int64
 | 
			
		||||
	for lang, size := range stats {
 | 
			
		||||
		if size > s {
 | 
			
		||||
			s = size
 | 
			
		||||
			topLang = strings.ToLower(lang)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for lang, perc := range stats {
 | 
			
		||||
	for lang, size := range stats {
 | 
			
		||||
		upd := false
 | 
			
		||||
		llang := strings.ToLower(lang)
 | 
			
		||||
		for _, s := range oldstats {
 | 
			
		||||
@@ -103,8 +134,8 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
 | 
			
		||||
			if strings.ToLower(s.Language) == llang {
 | 
			
		||||
				s.CommitID = commitID
 | 
			
		||||
				s.IsPrimary = llang == topLang
 | 
			
		||||
				s.Percentage = perc
 | 
			
		||||
				if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
 | 
			
		||||
				s.Size = size
 | 
			
		||||
				if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
 | 
			
		||||
					return err
 | 
			
		||||
				}
 | 
			
		||||
				upd = true
 | 
			
		||||
@@ -118,7 +149,7 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
 | 
			
		||||
				CommitID:  commitID,
 | 
			
		||||
				IsPrimary: llang == topLang,
 | 
			
		||||
				Language:  lang,
 | 
			
		||||
				Percentage: perc,
 | 
			
		||||
				Size:      size,
 | 
			
		||||
			}); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
@@ -153,7 +184,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	RepoLang := make(LanguageStatList, 0, 6)
 | 
			
		||||
	if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
 | 
			
		||||
	if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if len(RepoLang) > 0 {
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,6 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"io"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"math"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/analyze"
 | 
			
		||||
 | 
			
		||||
@@ -20,8 +19,22 @@ import (
 | 
			
		||||
 | 
			
		||||
const fileSizeLimit int64 = 16 * 1024 * 1024
 | 
			
		||||
 | 
			
		||||
// specialLanguages defines list of languages that are excluded from the calculation
 | 
			
		||||
// unless they are the only language present in repository. Only languages which under
 | 
			
		||||
// normal circumstances are not considered to be code should be listed here.
 | 
			
		||||
var specialLanguages = []string{
 | 
			
		||||
	"XML",
 | 
			
		||||
	"JSON",
 | 
			
		||||
	"TOML",
 | 
			
		||||
	"YAML",
 | 
			
		||||
	"INI",
 | 
			
		||||
	"SVG",
 | 
			
		||||
	"Text",
 | 
			
		||||
	"Markdown",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetLanguageStats calculates language stats for git repository at specified commit
 | 
			
		||||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
 | 
			
		||||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
 | 
			
		||||
	r, err := git.PlainOpen(repo.Path)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
@@ -43,9 +56,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sizes := make(map[string]int64)
 | 
			
		||||
	var total int64
 | 
			
		||||
	err = tree.Files().ForEach(func(f *object.File) error {
 | 
			
		||||
		if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
			
		||||
		if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
			
		||||
			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
@@ -63,8 +75,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// group languages, such as Pug -> HTML; SCSS -> CSS
 | 
			
		||||
		group := enry.GetLanguageGroup(language)
 | 
			
		||||
		if group != "" {
 | 
			
		||||
			language = group
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		sizes[language] += f.Size
 | 
			
		||||
		total += f.Size
 | 
			
		||||
 | 
			
		||||
		return nil
 | 
			
		||||
	})
 | 
			
		||||
@@ -72,21 +89,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	stats := make(map[string]float32)
 | 
			
		||||
	var otherPerc float32 = 100
 | 
			
		||||
	for language, size := range sizes {
 | 
			
		||||
		perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
 | 
			
		||||
		if perc <= 0.1 {
 | 
			
		||||
			continue
 | 
			
		||||
	// filter special languages unless they are the only language
 | 
			
		||||
	if len(sizes) > 1 {
 | 
			
		||||
		for _, language := range specialLanguages {
 | 
			
		||||
			delete(sizes, language)
 | 
			
		||||
		}
 | 
			
		||||
		otherPerc -= perc
 | 
			
		||||
		stats[language] = perc
 | 
			
		||||
	}
 | 
			
		||||
	otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
 | 
			
		||||
	if otherPerc > 0 {
 | 
			
		||||
		stats["other"] = otherPerc
 | 
			
		||||
	}
 | 
			
		||||
	return stats, nil
 | 
			
		||||
 | 
			
		||||
	return sizes, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readFile(f *object.File, limit int64) ([]byte, error) {
 | 
			
		||||
 
 | 
			
		||||
@@ -34,9 +34,10 @@ func TestRepoStatsIndex(t *testing.T) {
 | 
			
		||||
 | 
			
		||||
	repo, err := models.GetRepositoryByID(1)
 | 
			
		||||
	assert.NoError(t, err)
 | 
			
		||||
	status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
 | 
			
		||||
	assert.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
 | 
			
		||||
	langs, err := repo.GetTopLanguageStats(5)
 | 
			
		||||
	assert.NoError(t, err)
 | 
			
		||||
	assert.Len(t, langs, 1)
 | 
			
		||||
	assert.Equal(t, "other", langs[0].Language)
 | 
			
		||||
	assert.Equal(t, float32(100), langs[0].Percentage)
 | 
			
		||||
	assert.Empty(t, langs)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user