From 9028ca0142c6d845961e30a407529f994859b75c Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Tue, 12 Aug 2025 09:52:33 -0400 Subject: [PATCH 1/3] rebase Signed-off-by: Wenqi Mou --- pkg/statistics/handle/handle.go | 28 ------------------- pkg/statistics/handle/storage/read.go | 2 +- pkg/statistics/handle/types/interfaces.go | 4 --- .../handle/usage/session_stats_collect.go | 6 ++-- 4 files changed, 5 insertions(+), 35 deletions(-) diff --git a/pkg/statistics/handle/handle.go b/pkg/statistics/handle/handle.go index 3dd4211bd0f71..ad53346bbb25c 100644 --- a/pkg/statistics/handle/handle.go +++ b/pkg/statistics/handle/handle.go @@ -19,7 +19,6 @@ import ( "time" "github.com/pingcap/tidb/pkg/ddl/notifier" - "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack" @@ -202,33 +201,6 @@ func (h *Handle) getStatsByPhysicalID(physicalTableID int64, tblInfo *model.Tabl return nil, false } -// GetPartitionStatsByID retrieves the partition stats from cache by partition ID. -func (h *Handle) GetPartitionStatsByID(is infoschema.InfoSchema, pid int64) *statistics.Table { - return h.getPartitionStatsByID(is, pid) -} - -func (h *Handle) getPartitionStatsByID(is infoschema.InfoSchema, pid int64) *statistics.Table { - var statsTbl *statistics.Table - intest.Assert(h != nil, "stats handle is nil") - tbl, ok := h.Get(pid) - if !ok { - tbl, ok := h.TableInfoByID(is, pid) - if !ok { - return nil - } - // TODO: it's possible don't rely on the full table meta to do it here. - statsTbl = statistics.PseudoTable(tbl.Meta(), false, true) - statsTbl.PhysicalID = pid - if tbl.Meta().GetPartitionInfo() == nil || h.Len() < 64 { - h.UpdateStatsCache(types.CacheUpdate{ - Updated: []*statistics.Table{statsTbl}, - }) - } - return nil - } - return tbl -} - // FlushStats flushes the cached stats update into store. func (h *Handle) FlushStats() { if err := h.DumpStatsDeltaToKV(true); err != nil { diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 6df12902879b0..688e7bb1ae2fb 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -710,7 +710,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. - if loadNeeded && !analyzed { + if loadNeeded { fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo) statsTbl = statsTbl.Copy() statsTbl.SetCol(col.ID, fakeCol) diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index 29e3d667ac72f..e6ae4b0ad6d50 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -548,10 +548,6 @@ type StatsHandle interface { // Note: this function may return nil if the table is not found in the cache. GetNonPseudoPhysicalTableStats(physicalTableID int64) (*statistics.Table, bool) - // GetPartitionStatsByID retrieves the partition stats from cache by partition ID. - // TODO: remove this function and use GetPhysicalTableStats instead. - GetPartitionStatsByID(is infoschema.InfoSchema, pid int64) *statistics.Table - // StatsGC is used to do the GC job. StatsGC diff --git a/pkg/statistics/handle/usage/session_stats_collect.go b/pkg/statistics/handle/usage/session_stats_collect.go index 84d4db1dab94c..3d96b03b4ef20 100644 --- a/pkg/statistics/handle/usage/session_stats_collect.go +++ b/pkg/statistics/handle/usage/session_stats_collect.go @@ -72,8 +72,10 @@ func (s *statsUsageImpl) needDumpStatsDelta(is infoschema.InfoSchema, dumpAll bo // Dump the stats to kv at least once 5 minutes. return true } - statsTbl := s.statsHandle.GetPartitionStatsByID(is, id) - if statsTbl == nil || statsTbl.Pseudo || statsTbl.RealtimeCount == 0 || float64(item.Count)/float64(statsTbl.RealtimeCount) > DumpStatsDeltaRatio { + // use GetNonPseudoPhysicalTableStats to avoid creating pseudo tables and dropping instantly + statsTable, ok := s.statsHandle.GetNonPseudoPhysicalTableStats(id) + if !ok || statsTable.RealtimeCount == 0 || + float64(item.Count)/float64(statsTable.RealtimeCount) > DumpStatsDeltaRatio { // Dump the stats when there are many modifications. return true } From e4de46346d8668ce639339d42f0a4b100dfd2908 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Wed, 13 Aug 2025 10:36:57 -0400 Subject: [PATCH 2/3] address comments Signed-off-by: Wenqi Mou --- pkg/statistics/handle/storage/read.go | 2 +- pkg/statistics/handle/usage/session_stats_collect.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 688e7bb1ae2fb..6df12902879b0 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -710,7 +710,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. - if loadNeeded { + if loadNeeded && !analyzed { fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo) statsTbl = statsTbl.Copy() statsTbl.SetCol(col.ID, fakeCol) diff --git a/pkg/statistics/handle/usage/session_stats_collect.go b/pkg/statistics/handle/usage/session_stats_collect.go index 3d96b03b4ef20..aac768eb838f4 100644 --- a/pkg/statistics/handle/usage/session_stats_collect.go +++ b/pkg/statistics/handle/usage/session_stats_collect.go @@ -73,8 +73,8 @@ func (s *statsUsageImpl) needDumpStatsDelta(is infoschema.InfoSchema, dumpAll bo return true } // use GetNonPseudoPhysicalTableStats to avoid creating pseudo tables and dropping instantly - statsTable, ok := s.statsHandle.GetNonPseudoPhysicalTableStats(id) - if !ok || statsTable.RealtimeCount == 0 || + statsTable, found := s.statsHandle.GetNonPseudoPhysicalTableStats(id) + if !found || statsTable == nil || statsTable.RealtimeCount == 0 || float64(item.Count)/float64(statsTable.RealtimeCount) > DumpStatsDeltaRatio { // Dump the stats when there are many modifications. return true From 4badfdf6e3b7165390afdaede36c5c48530f70f4 Mon Sep 17 00:00:00 2001 From: 0xPoe Date: Wed, 19 Nov 2025 11:01:51 +0100 Subject: [PATCH 3/3] fix: import --- pkg/statistics/handle/handle.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/statistics/handle/handle.go b/pkg/statistics/handle/handle.go index ad53346bbb25c..c2549bf227d08 100644 --- a/pkg/statistics/handle/handle.go +++ b/pkg/statistics/handle/handle.go @@ -19,6 +19,7 @@ import ( "time" "github.com/pingcap/tidb/pkg/ddl/notifier" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack"