Update histogram math (#13680)

ui: Fix many edge cases of rendering a histogram

---------

Signed-off-by: Manik Rana <manikrana54@gmail.com>
This commit is contained in:
Manik Rana 2024-07-04 17:31:09 +05:30 committed by GitHub
parent c94c5b64c3
commit c9bc1c2be0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 427 additions and 53 deletions

View file

@ -2,22 +2,104 @@ import React, { FC } from 'react';
import { UncontrolledTooltip } from 'reactstrap';
import { Histogram } from '../../types/types';
import { bucketRangeString } from './DataTable';
import {
calculateDefaultExpBucketWidth,
findMinPositive,
findMaxNegative,
findZeroAxisLeft,
showZeroAxis,
findZeroBucket,
ScaleType,
} from './HistogramHelpers';
type ScaleType = 'linear' | 'exponential';
interface HistogramChartProps {
histogram: Histogram;
index: number;
scale: ScaleType;
}
const HistogramChart: FC<{ histogram: Histogram; index: number; scale: ScaleType }> = ({ index, histogram, scale }) => {
const HistogramChart: FC<HistogramChartProps> = ({ index, histogram, scale }) => {
const { buckets } = histogram;
const rangeMax = buckets ? parseFloat(buckets[buckets.length - 1][2]) : 0;
const countMax = buckets ? buckets.map((b) => parseFloat(b[3])).reduce((a, b) => Math.max(a, b)) : 0;
if (!buckets || buckets.length === 0) {
return <div>No data</div>;
}
const formatter = Intl.NumberFormat('en', { notation: 'compact' });
const positiveBuckets = buckets?.filter((b) => parseFloat(b[1]) >= 0); // we only want to show buckets with range >= 0
const xLabelTicks = scale === 'linear' ? [0.25, 0.5, 0.75, 1] : [1];
// For linear scales, the count of a histogram bucket is represented by its area rather than its height. This means it considers
// both the count and the range (width) of the bucket. For this, we can set the height of the bucket proportional
// to its frequency density (fd). The fd is the count of the bucket divided by the width of the bucket.
const fds = [];
for (const bucket of buckets) {
const left = parseFloat(bucket[1]);
const right = parseFloat(bucket[2]);
const count = parseFloat(bucket[3]);
const width = right - left;
// This happens when a user want observations of precisely zero to be included in the zero bucket
if (width === 0) {
fds.push(0);
continue;
}
fds.push(count / width);
}
const fdMax = Math.max(...fds);
const first = buckets[0];
const last = buckets[buckets.length - 1];
const rangeMax = parseFloat(last[2]);
const rangeMin = parseFloat(first[1]);
const countMax = Math.max(...buckets.map((b) => parseFloat(b[3])));
const defaultExpBucketWidth = calculateDefaultExpBucketWidth(last, buckets);
const maxPositive = rangeMax > 0 ? rangeMax : 0;
const minPositive = findMinPositive(buckets);
const maxNegative = findMaxNegative(buckets);
const minNegative = parseFloat(first[1]) < 0 ? parseFloat(first[1]) : 0;
// Calculate the borders of positive and negative buckets in the exponential scale from left to right
const startNegative = minNegative !== 0 ? -Math.log(Math.abs(minNegative)) : 0;
const endNegative = maxNegative !== 0 ? -Math.log(Math.abs(maxNegative)) : 0;
const startPositive = minPositive !== 0 ? Math.log(minPositive) : 0;
const endPositive = maxPositive !== 0 ? Math.log(maxPositive) : 0;
console.log(
'startNegative',
startNegative,
'endNegative',
endNegative,
'startPositive',
startPositive,
'endPositive',
endPositive
);
// Calculate the width of negative, positive, and all exponential bucket ranges on the x-axis
const xWidthNegative = endNegative - startNegative;
const xWidthPositive = endPositive - startPositive;
const xWidthTotal = xWidthNegative + defaultExpBucketWidth + xWidthPositive;
console.log('xWidthNegative', xWidthNegative, 'xWidthPositive', xWidthPositive, 'xWidthTotal', xWidthTotal);
const zeroBucketIdx = findZeroBucket(buckets);
const zeroAxisLeft = findZeroAxisLeft(
scale,
rangeMin,
rangeMax,
minPositive,
maxNegative,
zeroBucketIdx,
xWidthNegative,
xWidthTotal,
defaultExpBucketWidth
);
const zeroAxis = showZeroAxis(zeroAxisLeft);
return (
<div className="histogram-y-wrapper">
<div className="histogram-y-labels">
{[1, 0.75, 0.5, 0.25].map((i) => (
<div key={i} className="histogram-y-label">
{formatter.format(countMax * i)}
{scale === 'linear' ? '' : formatter.format(countMax * i)}
</div>
))}
<div key={0} className="histogram-y-label" style={{ height: 0 }}>
@ -31,62 +113,228 @@ const HistogramChart: FC<{ histogram: Histogram; index: number; scale: ScaleType
<div className="histogram-y-grid" style={{ bottom: i * 100 + '%' }}></div>
<div className="histogram-y-tick" style={{ bottom: i * 100 + '%' }}></div>
<div className="histogram-x-grid" style={{ left: i * 100 + '%' }}></div>
<div className="histogram-x-tick" style={{ left: i * 100 + '%' }}></div>
</React.Fragment>
))}
{positiveBuckets?.map((b, bIdx) => {
const bucketIdx = `bucket-${index}-${bIdx}-${Math.ceil(parseFloat(b[3]) * 100)}`;
const bucketLeft =
scale === 'linear' ? (parseFloat(b[1]) / rangeMax) * 100 + '%' : (bIdx / positiveBuckets.length) * 100 + '%';
const bucketWidth =
scale === 'linear'
? ((parseFloat(b[2]) - parseFloat(b[1])) / rangeMax) * 100 + '%'
: 100 / positiveBuckets.length + '%';
return (
<React.Fragment key={bIdx}>
<div
id={bucketIdx}
className="histogram-bucket-slot"
style={{
left: bucketLeft,
width: bucketWidth,
}}
>
<div
id={bucketIdx}
className="histogram-bucket"
style={{
height: (parseFloat(b[3]) / countMax) * 100 + '%',
}}
></div>
<UncontrolledTooltip
style={{ maxWidth: 'unset', padding: 10, textAlign: 'left' }}
placement="bottom"
target={bucketIdx}
>
<strong>range:</strong> {bucketRangeString(b)}
<br />
<strong>count:</strong> {b[3]}
</UncontrolledTooltip>
</div>
</React.Fragment>
);
})}
<div className="histogram-x-tick" style={{ left: '0%' }}></div>
<div className="histogram-x-tick" style={{ left: zeroAxisLeft }}></div>
<div className="histogram-x-grid" style={{ left: zeroAxisLeft }}></div>
<div className="histogram-x-tick" style={{ left: '100%' }}></div>
<RenderHistogramBars
buckets={buckets}
scale={scale}
rangeMin={rangeMin}
rangeMax={rangeMax}
index={index}
fds={fds}
fdMax={fdMax}
countMax={countMax}
defaultExpBucketWidth={defaultExpBucketWidth}
minPositive={minPositive}
maxNegative={maxNegative}
startPositive={startPositive}
startNegative={startNegative}
xWidthPositive={xWidthPositive}
xWidthNegative={xWidthNegative}
xWidthTotal={xWidthTotal}
/>
<div className="histogram-axes"></div>
</div>
<div className="histogram-x-labels">
<div key={0} className="histogram-x-label" style={{ width: 0 }}>
0
<div className="histogram-x-label">
<React.Fragment>
<div style={{ position: 'absolute', left: 0 }}>{formatter.format(rangeMin)}</div>
{rangeMin < 0 && zeroAxis && <div style={{ position: 'absolute', left: zeroAxisLeft }}>0</div>}
<div style={{ position: 'absolute', right: 0 }}>{formatter.format(rangeMax)}</div>
</React.Fragment>
</div>
{xLabelTicks.map((i) => (
<div key={i} className="histogram-x-label">
<div style={{ position: 'absolute', right: i === 1 ? 0 : -18 }}>{formatter.format(rangeMax * i)}</div>
</div>
))}
</div>
</div>
</div>
);
};
interface RenderHistogramProps {
buckets: [number, string, string, string][];
scale: ScaleType;
rangeMin: number;
rangeMax: number;
index: number;
fds: number[];
fdMax: number;
countMax: number;
defaultExpBucketWidth: number;
minPositive: number;
maxNegative: number;
startPositive: number;
startNegative: number;
xWidthNegative: number;
xWidthPositive: number;
xWidthTotal: number;
}
const RenderHistogramBars: FC<RenderHistogramProps> = ({
buckets,
scale,
rangeMin,
rangeMax,
index,
fds,
fdMax,
countMax,
defaultExpBucketWidth,
minPositive,
maxNegative,
startPositive,
startNegative,
xWidthNegative,
xWidthPositive,
xWidthTotal,
}) => {
return (
<React.Fragment>
{buckets.map((b, bIdx) => {
const left = parseFloat(b[1]);
const right = parseFloat(b[2]);
const count = parseFloat(b[3]);
const bucketIdx = `bucket-${index}-${bIdx}-${Math.ceil(parseFloat(b[3]) * 100)}`;
const logWidth = Math.abs(Math.log(Math.abs(right)) - Math.log(Math.abs(left)));
const expBucketWidth = logWidth === 0 ? defaultExpBucketWidth : logWidth;
let bucketWidth = '';
let bucketLeft = '';
let bucketHeight = '';
switch (scale) {
case 'linear':
bucketWidth = ((right - left) / (rangeMax - rangeMin)) * 100 + '%';
bucketLeft = ((left - rangeMin) / (rangeMax - rangeMin)) * 100 + '%';
if (left === 0 && right === 0) {
bucketLeft = '0%'; // do not render zero-width zero bucket
bucketWidth = '0%';
}
bucketHeight = (fds[bIdx] / fdMax) * 100 + '%';
break;
case 'exponential':
let adjust = 0; // if buckets are all positive/negative, we need to remove the width of the zero bucket
if (minPositive === 0 || maxNegative === 0) {
adjust = defaultExpBucketWidth;
}
bucketWidth = (expBucketWidth / (xWidthTotal - adjust)) * 100 + '%';
if (left < 0) {
// negative buckets boundary
bucketLeft = (-(Math.log(Math.abs(left)) + startNegative) / (xWidthTotal - adjust)) * 100 + '%';
} else {
// positive buckets boundary
bucketLeft =
((Math.log(left) - startPositive + defaultExpBucketWidth + xWidthNegative - adjust) /
(xWidthTotal - adjust)) *
100 +
'%';
}
if (left < 0 && right > 0) {
// if the bucket crosses the zero axis
bucketLeft = (xWidthNegative / xWidthTotal) * 100 + '%';
}
if (left === 0 && right === 0) {
// do not render zero width zero bucket
bucketLeft = '0%';
bucketWidth = '0%';
}
bucketHeight = (count / countMax) * 100 + '%';
break;
default:
throw new Error('Invalid scale');
}
console.log(
'[',
left,
',',
right,
']',
'\n',
'fds[bIdx]',
fds[bIdx],
'\n',
'fdMax',
fdMax,
'\n',
'bucketIdx',
bucketIdx,
'\n',
'bucketLeft',
bucketLeft,
'\n',
'bucketWidth',
bucketWidth,
'\n',
'bucketHeight',
bucketHeight,
'\n',
'defaultExpBucketWidth',
defaultExpBucketWidth,
'\n',
'expBucketWidth',
expBucketWidth,
'\n',
'startNegative',
startNegative,
'\n',
'startPositive',
startPositive,
'\n',
'minPositive',
minPositive,
'\n',
'maxNegative',
maxNegative,
'xWidthNegative',
xWidthNegative,
'\n',
'xWidthTotal',
xWidthTotal,
'\n',
'xWidthPositive',
xWidthPositive,
'\n'
);
return (
<React.Fragment key={bIdx}>
<div
id={bucketIdx}
className="histogram-bucket-slot"
style={{
left: bucketLeft,
width: bucketWidth,
}}
>
<div
id={bucketIdx}
className="histogram-bucket"
style={{
height: bucketHeight,
}}
></div>
<UncontrolledTooltip
style={{ maxWidth: 'unset', padding: 10, textAlign: 'left' }}
placement="bottom"
target={bucketIdx}
>
<strong>range:</strong> {bucketRangeString(b)}
<br />
<strong>count:</strong> {count}
</UncontrolledTooltip>
</div>
</React.Fragment>
);
})}
</React.Fragment>
);
};
export default HistogramChart;

View file

@ -0,0 +1,126 @@
export type ScaleType = 'linear' | 'exponential';
// Calculates a default width of exponential histogram bucket ranges. If the last bucket is [0, 0],
// the width is calculated using the second to last bucket. returns error if the last bucket is [-0, 0],
export function calculateDefaultExpBucketWidth(
last: [number, string, string, string],
buckets: [number, string, string, string][]
): number {
if (parseFloat(last[2]) === 0 || parseFloat(last[1]) === 0) {
if (buckets.length > 1) {
return Math.abs(
Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][2]))) -
Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][1])))
);
} else {
throw new Error('Only one bucket in histogram ([-0, 0]). Cannot calculate defaultExpBucketWidth.');
}
} else {
return Math.abs(Math.log(Math.abs(parseFloat(last[2]))) - Math.log(Math.abs(parseFloat(last[1]))));
}
}
// Finds the lowest positive value from the bucket ranges
// Returns 0 if no positive values are found or if there are no buckets.
export function findMinPositive(buckets: [number, string, string, string][]) {
if (!buckets || buckets.length === 0) {
return 0; // no buckets
}
for (let i = 0; i < buckets.length; i++) {
const right = parseFloat(buckets[i][2]);
const left = parseFloat(buckets[i][1]);
if (left > 0) {
return left;
}
if (left < 0 && right > 0) {
return right;
}
if (i === buckets.length - 1) {
if (right > 0) {
return right;
}
}
}
return 0; // all buckets are negative
}
// Finds the lowest negative value from the bucket ranges
// Returns 0 if no negative values are found or if there are no buckets.
export function findMaxNegative(buckets: [number, string, string, string][]) {
if (!buckets || buckets.length === 0) {
return 0; // no buckets
}
for (let i = 0; i < buckets.length; i++) {
const right = parseFloat(buckets[i][2]);
const left = parseFloat(buckets[i][1]);
const prevRight = i > 0 ? parseFloat(buckets[i - 1][2]) : 0;
if (right >= 0) {
if (i === 0) {
if (left < 0) {
return left; // return the first negative bucket
}
return 0; // all buckets are positive
}
return prevRight; // return the last negative bucket
}
}
console.log('findmaxneg returning: ', buckets[buckets.length - 1][2]);
return parseFloat(buckets[buckets.length - 1][2]); // all buckets are negative
}
// Calculates the left position of the zero axis as a percentage string.
export function findZeroAxisLeft(
scale: ScaleType,
rangeMin: number,
rangeMax: number,
minPositive: number,
maxNegative: number,
zeroBucketIdx: number,
widthNegative: number,
widthTotal: number,
expBucketWidth: number
): string {
if (scale === 'linear') {
return ((0 - rangeMin) / (rangeMax - rangeMin)) * 100 + '%';
} else {
if (maxNegative === 0) {
return '0%';
}
if (minPositive === 0) {
return '100%';
}
if (zeroBucketIdx === -1) {
// if there is no zero bucket, we must zero axis between buckets around zero
return (widthNegative / widthTotal) * 100 + '%';
}
if ((widthNegative + 0.5 * expBucketWidth) / widthTotal > 0) {
return ((widthNegative + 0.5 * expBucketWidth) / widthTotal) * 100 + '%';
} else {
return '0%';
}
}
}
// Determines if the zero axis should be shown such that the zero label does not overlap with the range labels.
// The zero axis is shown if it is between 5% and 95% of the graph.
export function showZeroAxis(zeroAxisLeft: string) {
const axisNumber = parseFloat(zeroAxisLeft.slice(0, -1));
if (5 < axisNumber && axisNumber < 95) {
return true;
}
return false;
}
// Finds the index of the bucket whose range includes zero
export function findZeroBucket(buckets: [number, string, string, string][]): number {
for (let i = 0; i < buckets.length; i++) {
const left = parseFloat(buckets[i][1]);
const right = parseFloat(buckets[i][2]);
if (left <= 0 && right >= 0) {
return i;
}
}
return -1;
}