Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,34 @@ export class ColumnChartSpecModel<T> {
const xMidField = "xMid";
const yField = "value";

// Calculate xStart and xEnd for each value count
// Adjust bar widths to ensure minimum width
const MIN_BAR_WIDTH_PERCENT = 0.02; // 2% minimum width
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The magic number 0.02 should be defined as a named constant at the module or class level for better maintainability and reusability. Consider defining it as const MIN_BAR_WIDTH_PERCENT = 0.02 outside the function scope.

Copilot uses AI. Check for mistakes.
let totalAdjustment = 0;
let totalLargeBarProportion = 0;
const proportions: number[] = [];

for (const vc of valueCounts) {
const prop = vc.count / total;
proportions.push(prop);

if (prop < MIN_BAR_WIDTH_PERCENT) {
totalAdjustment += MIN_BAR_WIDTH_PERCENT - prop;
} else {
totalLargeBarProportion += prop;
}
}

const adjustedProportions = proportions.map((prop) => {
if (prop < MIN_BAR_WIDTH_PERCENT) {
return MIN_BAR_WIDTH_PERCENT;
}
if (totalAdjustment > 0 && totalLargeBarProportion > 0) {
// Proportionally reduce large bars to make room for small bars
return prop - (prop / totalLargeBarProportion) * totalAdjustment;
}
return prop;
});

const newValueCounts: {
count: number;
value: string;
Expand All @@ -637,19 +664,20 @@ export class ColumnChartSpecModel<T> {
xMid: number;
proportion: number;
}[] = [];

let xStart = 0;
for (const valueCount of valueCounts) {
const xEnd = xStart + valueCount.count;
for (const [i, valueCount] of valueCounts.entries()) {
const barWidth = adjustedProportions[i] * total;
const xEnd = xStart + barWidth;
const xMid = (xStart + xEnd) / 2;
const proportion = (xEnd - xStart) / total;

newValueCounts.push({
count: valueCount.count,
value: valueCount.value,
xStart,
xEnd,
xMid,
proportion,
proportion: adjustedProportions[i],
});
xStart = xEnd;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export const TableColumnSummary = <TData, TValue>({
}

return (
<div className="flex justify-between w-full px-2 whitespace-pre">
<div className="flex justify-between w-full px-2 whitespace-pre -mt-1.5">
<span>{renderDate(stats.min, type)}</span>
{stats.min === stats.max ? null : (
<span>-{renderDate(stats.max, type)}</span>
Expand Down Expand Up @@ -145,15 +145,20 @@ export const TableColumnSummary = <TData, TValue>({
return null;
case "time":
return null;
case "string":
if (!spec) {
return (
<div className="flex flex-col whitespace-pre">
<span>unique: {prettyNumber(stats.unique, locale)}</span>
</div>
);
case "string": {
const lessThan5Categories =
typeof stats.unique === "number" && stats.unique <= 5;
// Number of categories will be easily visible from the chart, so don't show it here.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think its that easy to parse number of unique values, i would have to count each bar chart

if (spec && lessThan5Categories) {
return null;
}
return null;

return (
<div className="flex flex-col whitespace-pre -mt-1.5">
<span>categories: {prettyNumber(stats.unique, locale)}</span>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe unique over categories? i don't always think of each item as a category

</div>
);
}
case "unknown":
return (
<div className="flex flex-col whitespace-pre">
Expand Down
17 changes: 15 additions & 2 deletions marimo/_plugins/ui/_impl/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,10 @@ def _get_column_summaries(
):
try:
val_counts = self._get_value_counts(
column, DEFAULT_VALUE_COUNTS_SIZE, total_rows
column=column,
size=DEFAULT_VALUE_COUNTS_SIZE,
total_rows=total_rows,
others_threshold=1,
)
if len(val_counts) > 0:
value_counts[column] = val_counts
Expand Down Expand Up @@ -1043,7 +1046,12 @@ def _get_column_summaries(
)

def _get_value_counts(
self, column: ColumnName, size: int, total_rows: int
self,
*,
column: ColumnName,
size: int,
total_rows: int,
others_threshold: int | None = None,
) -> list[ValueCount]:
"""Get value counts for a column. The last item will be 'others' with the count of remaining
unique values. If there are only unique values, we return 'unique values' instead.
Expand All @@ -1052,6 +1060,8 @@ def _get_value_counts(
column (ColumnName): The column to get value counts for.
size (int): The number of value counts to return.
total_rows (int): The total number of rows in the table.
others_threshold (int | None): The threshold for values to be considered 'others'.
If a value has a count less than or equal to this threshold, it will be considered 'others'.
Comment on lines +1063 to +1064
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The documentation states values with counts 'less than or equal to' the threshold are filtered, but the implementation at line 1089 only checks count <= others_threshold. This is correct, but the documentation could be clearer. Consider rephrasing to: 'The threshold for filtering values into others. Values with counts at or below this threshold will be excluded from the individual value list and grouped into others.'

Suggested change
others_threshold (int | None): The threshold for values to be considered 'others'.
If a value has a count less than or equal to this threshold, it will be considered 'others'.
others_threshold (int | None): The threshold for filtering values into others.
Values with counts at or below this threshold will be excluded from the individual value list and grouped into others.

Copilot uses AI. Check for mistakes.

Returns:
list[ValueCount]: The value counts.
Expand All @@ -1076,6 +1086,9 @@ def _get_value_counts(

sum_count = 0
for value, count in top_k_rows:
if others_threshold is not None and count <= others_threshold:
break

sum_count += count
value = str(value) if value is not None else "null"
value_counts.append(ValueCount(value=value, count=count))
Expand Down
22 changes: 22 additions & 0 deletions tests/_plugins/ui/_impl/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,28 @@ def test_with_search(self, table: ui.table) -> None:
)
assert value_counts == [ValueCount(value="1", count=2)]

def test_with_others_threshold(self, table: ui.table) -> None:
value_counts = table._get_value_counts(
column="repeat",
size=2,
total_rows=self.total_rows,
others_threshold=1,
)
assert value_counts == [
ValueCount(value="1", count=2),
ValueCount(value="others", count=3),
]

value_counts = table._get_value_counts(
column="repeat",
size=2,
total_rows=self.total_rows,
others_threshold=2,
)
assert value_counts == [
ValueCount(value="others", count=5),
]


def test_table_with_frozen_columns() -> None:
data = {
Expand Down
Loading