January 17, 2018

箱线图(Box and Whisker Plot)

箱线图(Box and Whisker Plot)

译自:http://www.datavizcatalogue.com/methods/box_plot.html

Description

箱线图(Box and Whisker Plot或Box Plot)是利用四分位数可视化展现一系列数据的方式。平行于箱子的延展线被称为"Whiskers",其被用于指示大于较大四分位数和小于较小四分位数的变化。有时候会将异常值绘制成点,并与“Whiskers”共线。箱形图可以垂直或水平绘制。

虽然箱线图与柱状图和密度图相比,虽然看起来很原始,但是所占用的空间少。这在比较多个数据集的分布时是很有优势的。

从箱线图中可以观察到的信息有:

  • 关键值,包括平均数,中位数,四分位数等
  • 是否有异常值,及异常值的具体数值
  • 数据是否对称
  • 数据聚合的紧密程度
  • 数据是否倾斜及向何处倾斜

箱线图最常见的两个变种分别是:可变宽度箱线图(variable-width Box Plots)和缺口箱线图(notched Box Plots)。

Anatomy

D3

box-d3

body {
    font - family: "Helvetica Neue", Helvetica, Arial, sans - serif;
}

.box {
    font: 10 px sans - serif;
}

.box line,
.box rect,
.box circle {
    fill: #fff;
    stroke: #000;
    stroke-width: 1.5px;
}

.box .center {
    stroke-dasharray: 3, 3;
}

.box .outlier {
    fill: none;
    stroke: # ccc;
}
(function() {

    // Inspired by http://informationandvisualization.de/blog/box-plot
    d3.box = function() {
        var width = 1,
            height = 1,
            duration = 0,
            domain = null,
            value = Number,
            whiskers = boxWhiskers,
            quartiles = boxQuartiles,
            tickFormat = null;

        // For each small multiple…
        function box(g) {
            g.each(function(d, i) {
                d = d.map(value).sort(d3.ascending);
                var g = d3.select(this),
                    n = d.length,
                    min = d[0],
                    max = d[n - 1];

                // Compute quartiles. Must return exactly 3 elements.
                var quartileData = d.quartiles = quartiles(d);

                // Compute whiskers. Must return exactly 2 elements, or null.
                var whiskerIndices = whiskers && whiskers.call(this, d, i),
                    whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });

                // Compute outliers. If no whiskers are specified, all data are "outliers".
                // We compute the outliers as indices, so that we can join across transitions!
                var outlierIndices = whiskerIndices ?
                    d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n)) :
                    d3.range(n);

                // Compute the new x-scale.
                var x1 = d3.scaleLinear()
                    .domain(domain && domain.call(this, d, i) || [min, max])
                    .range([height, 0]);

                // Retrieve the old x-scale, if this is an update.
                var x0 = this.__chart__ || d3.scaleLinear()
                    .domain([0, Infinity])
                    .range(x1.range());

                // Stash the new scale.
                this.__chart__ = x1;

                // Update center line: the vertical line spanning the whiskers.
                g.selectAll("line.center")
                    .data(whiskerData ? [whiskerData] : [])
                    .enter().insert("line", "rect")
                    .attr("class", "center")
                    .attr("x1", width / 2)
                    .attr("x2", width / 2)
                    .attr("y1", function(d) { return x1(d[0]); })
                    .attr("y2", function(d) { return x1(d[1]); });

                // Update innerquartile box.
                g.selectAll("rect.box")
                    .data([quartileData])
                    .enter().append("rect")
                    .attr("class", "box")
                    .attr("x", 0)
                    .attr("width", width)
                    .attr("y", function(d) { return x1(d[2]); })
                    .attr("height", function(d) { return x1(d[0]) - x1(d[2]); });

                // Update median line.
                g.selectAll("line.median")
                    .data([quartileData[1]])
                    .enter().append("line")
                    .attr("class", "median")
                    .attr("x1", 0)
                    .attr("x2", width)
                    .attr("y1", x1)
                    .attr("y2", x1);

                // Update whiskers.
                g.selectAll("line.whisker")
                    .data(whiskerData || [])
                    .enter().insert("line", "circle, text")
                    .attr("class", "whisker")
                    .attr("x1", 0)
                    .attr("x2", width)
                    .attr("y1", x1)
                    .attr("y2", x1);

                // Update outliers.
                g.selectAll("circle.outlier")
                    .data(outlierIndices, Number)
                    .enter().insert("circle", "text")
                    .attr("class", "outlier")
                    .attr("r", 5)
                    .attr("cx", width / 2)
                    .attr("cy", function(i) { return x1(d[i]); });

                // Compute the tick format.
                var format = tickFormat || x1.tickFormat(8);

                g.selectAll("text.box")
                    .data(quartileData)
                    .enter().append("text")
                    .attr("class", "box")
                    .attr("dy", ".3em")
                    .attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
                    .attr("x", function(d, i) { return i & 1 ? width : 0 })
                    .attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
                    .text(format)
                    .attr("y", x1);

                g.selectAll("text.whisker")
                    .data(whiskerData || [])
                    .enter().append("text")
                    .attr("class", "whisker")
                    .attr("dy", ".3em")
                    .attr("dx", 6)
                    .attr("x", width)
                    .text(format)
                    .attr("y", x1);
            });
        }

        box.width = function(x) {
            if (!arguments.length) return width;
            width = x;
            return box;
        };

        box.height = function(x) {
            if (!arguments.length) return height;
            height = x;
            return box;
        };

        box.tickFormat = function(x) {
            if (!arguments.length) return tickFormat;
            tickFormat = x;
            return box;
        };
        box.domain = function(x) {
            if (!arguments.length) return domain;
            domain = x == null ? x : function() { return x; };
            return box;
        };

        box.value = function(x) {
            if (!arguments.length) return value;
            value = x;
            return box;
        };

        box.whiskers = function(x) {
            if (!arguments.length) return whiskers;
            whiskers = x;
            return box;
        };

        box.quartiles = function(x) {
            if (!arguments.length) return quartiles;
            quartiles = x;
            return box;
        };

        return box;
    };

    function boxWhiskers(d) {
        return [0, d.length - 1];
    }

    function boxQuartiles(d) {
        return [
            d3.quantile(d, .25),
            d3.quantile(d, .5),
            d3.quantile(d, .75)
        ];
    }

})();

var margin = { top: 10, right: 50, bottom: 20, left: 50 },
    width = 120 - margin.left - margin.right,
    height = 500 - margin.top - margin.bottom;

var min = Infinity,
    max = -Infinity;

var chart = d3.box()
    .whiskers(iqr(1))
    .width(width)
    .height(height);

d3.csv("box.csv", function(error, csv) {
    if (error) throw error;

    var data = [];

    csv.forEach(function(x) {
        var e = Math.floor(x.Expt - 1),
            r = Math.floor(x.Run - 1),
            s = Math.floor(x.Speed),
            d = data[e];
        if (!d) d = data[e] = [s];
        else d.push(s);
        if (s > max) max = s;
        if (s < min) min = s;
    });

    chart.domain([min, max]);

    var svg = d3.select("body").selectAll("svg")
        .data(data)
        .enter().append("svg")
        .attr("class", "box")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.bottom + margin.top)
        .append("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")")
        .call(chart);
});

function iqr(k) {
    return function(d, i) {
        var q1 = d.quartiles[0],
            q3 = d.quartiles[2],
            iqr = (q3 - q1) * k,
            i = -1,
            j = d.length;
        while (d[++i] < q1 - iqr);
        while (d[--j] > q3 + iqr);
        return [i, j];
    };
}

Echarts

box-echarts

var myChart = echarts.init(document.getElementById("container"));
var data = echarts.dataTool.prepareBoxplotData([
    [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960],
    [960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810, 880, 880, 830, 800, 790, 760, 800],
    [880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910, 850, 870, 840, 840, 850, 840, 840, 840],
    [890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860, 880, 720, 840, 850, 850, 780],
    [890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870, 810, 740, 810, 940, 950, 800, 810, 870]
]);

option = {
    xAxis: {
        type: 'category',
        data: data.axisData,
        boundaryGap: true,
        nameGap: 30,
    },
    yAxis: {
        type: 'value',
    },
    series: [{
            name: 'boxplot',
            type: 'boxplot',
            data: data.boxData,
        },
        {
            name: 'outlier',
            type: 'scatter',
            data: data.outliers
        }
    ]
};
myChart.setOption(option, true);