diff --git a/docs/_scripts/_hooks.py b/docs/_scripts/_hooks.py index 3d92b17a..e4ab1671 100644 --- a/docs/_scripts/_hooks.py +++ b/docs/_scripts/_hooks.py @@ -16,6 +16,7 @@ def on_page_markdown(md: str, page: Page, **kwargs: Any) -> str: """Called when mkdocs is building the markdown for a page.""" def _add_images(matchobj: re.Match[str]) -> str: + prefix = matchobj.group(0).split("\n", 1)[0] # ``` python ...` code: str = matchobj.group(1).strip() # source code if code.startswith("#!name:"): @@ -25,7 +26,7 @@ def _add_images(matchobj: re.Match[str]) -> str: reldepth = "../" * page.file.src_path.count(os.sep) dest = f"{reldepth}_images/{name}.png" link = f"\n![]({dest}){{ loading=lazy, width={width}px }}\n\n" - new_md = "```python\n" + code + "\n```" + link + new_md = f"{prefix}\n{code}\n```{link}" return new_md elif code.startswith("#!html:"): code, name = _get_html_name(code) @@ -35,16 +36,17 @@ def _add_images(matchobj: re.Match[str]) -> str: f'' ) - new_md = "```python\n" + code + "\n```\n\n" + html_text + "\n" + new_md = f"{prefix}\n{code}\n```\n\n{html_text}\n" return new_md elif code.startswith("#!"): _, other = code.split("\n", 1) else: other = code - return "```python\n" + other + "\n```" + return f"{prefix}\n{other}\n```" - md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL) + # md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL) + md = re.sub("``` ?python.*?\n([^`]*)```", _add_images, md) return md diff --git a/docs/_scripts/_screenshots.py b/docs/_scripts/_screenshots.py index 31ad4eec..2e6f32a0 100644 --- a/docs/_scripts/_screenshots.py +++ b/docs/_scripts/_screenshots.py @@ -13,7 +13,8 @@ from whitecanvas.theme import update_default DOCS: Path = Path(__file__).parent.parent -CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL) +# CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL) +CODE_BLOCK = re.compile("``` ?python.*?\n([^`]*)```") def _exec_code(src: str, ns: dict, dest: str) -> dict[str, Any]: try: @@ -104,6 +105,8 @@ def main() -> None: raise RuntimeError( f"Error evaluating code\n\n{code}\n\nfor {dest!r}" ) from e - plt.close("all") + # close all if there's more than 10 figures + if len(plt.get_fignums()) > 10: + plt.close("all") main() diff --git a/docs/categorical/cat_cat.md b/docs/categorical/cat_cat.md new file mode 100644 index 00000000..4a7e2626 --- /dev/null +++ b/docs/categorical/cat_cat.md @@ -0,0 +1,26 @@ +# Categorical × Categorical Data + +Here is an example of a data frame with two categorical columns. + +``` python +import numpy as np + +# sample data +rng = np.random.default_rng(12345) + +df = { + "x": ["A"] * 60 + ["B"] * 30 + ["C"] * 40, + "y": ["X"] * 70 + ["Y"] * 60, + "value": rng.normal(size=130), +} +``` + +To use categorical columns for both x- and y-axis, aggregation is required. + +``` python +#!name: cat_cat_heatmap +from whitecanvas import new_canvas + +canvas = new_canvas("matplotlib") +canvas.cat_xy(df, x="x", y="y").mean().add_heatmap("value") +``` diff --git a/docs/categorical/cat_num.md b/docs/categorical/cat_num.md index 91818dab..991a617a 100644 --- a/docs/categorical/cat_num.md +++ b/docs/categorical/cat_num.md @@ -6,10 +6,10 @@ In this section, following data will be used as an example: import numpy as np from whitecanvas import new_canvas -rng = np.random.default_rng(12345) +rng = np.random.default_rng(3) df = { "category": ["A"] * 40 + ["B"] * 50, - "observation": np.concatenate([rng.random(40), rng.random(50) + 1.3]), + "observation": np.concatenate([rng.normal(2.0, size=40), rng.normal(3.3, size=50)]), "replicate": [0] * 23 + [1] * 17 + [0] * 22 + [1] * 28, "temperature": rng.normal(scale=2.8, size=90) + 22.0, } @@ -218,8 +218,8 @@ canvas.show() format string. ``` python -#!skip -canvas = new_canvas("matplotlib") +#!html: categorical_axis_stripplot_hover +canvas = new_canvas("plotly", size=(400, 300)) ( canvas .cat_x(df, x="category", y="observation") @@ -247,7 +247,7 @@ canvas.show() Similarly, each marker color can represent a numerical value. `update_colormap` will map the value with an arbitrary colormap. -``` python +``` python hl_lines="6" #!name: categorical_axis_stripplot_by_color canvas = new_canvas("matplotlib") ( @@ -282,7 +282,7 @@ canvas.show() Although rug plot does not directly use markers, it also use a line to represent each data point. -``` python +``` python hl_lines="5" #!name: categorical_axis_rugplot canvas = new_canvas("matplotlib") ( @@ -293,9 +293,11 @@ canvas = new_canvas("matplotlib") canvas.show() ``` -Some methods defined for marker-type plots can also be used for rug plot. +Some methods defined for marker-type plots can also be used for rug plot. For example, +`update_colormap` will change the color of the rug lines based on the values of the +specified column. -``` python +``` python hl_lines="6" #!name: categorical_axis_rugplot_colormap canvas = new_canvas("matplotlib") ( @@ -310,7 +312,7 @@ canvas.show() `scale_by_density` will change the length of the rugs to represent the density of the data points. -``` python +``` python hl_lines="6" #!name: categorical_axis_rugplot_density canvas = new_canvas("matplotlib") ( @@ -322,16 +324,150 @@ canvas = new_canvas("matplotlib") canvas.show() ``` -Rug plot can also be overlaid with violin plot with `with_rug` method. +## Overlaying Plots -``` python +Different types of plots have their own strengths and weaknesses. To make the plot more +informative, it is often necessary to overlay different types of plots. + +You can simply call different methds to overlay different types of plots, but in some +cases it is not that easy. For example, to add rug plot to violin plot, you have to +correctly set the lengths of the rug lines so that their edges exactly match the edges +of the violins. + +Some types of plots are implemented with methods to efficiently overlay them with other +plots. All of them use method chaining so that the API is very clean. + +### Rug plot over violin plot + +Violin plot can be overlaid with rug plot using `with_rug` method. Edges of the rug lines match exactly with the edges of the violins. Of cource, you can hover over the rug lines to see the details. + +``` python hl_lines="6" #!name: categorical_axis_violin_with_rug canvas = new_canvas("matplotlib") ( canvas .cat_x(df, x="category", y="observation") .add_violinplot(color="replicate") - .with_rug() + .with_rug(color="purple") ) canvas.show() ``` + +### Box plot over violin plot + +Violin plot can be overlaid with box plot using `with_box` method. Color of the box plot +follows the convention of other plotting softwares by default. + +``` python hl_lines="6" +#!name: categorical_axis_violin_with_box +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") + .with_box(width=2.0, extent=0.05) +) +canvas.show() +``` + +If the violins are edge only, the box plot will be filled with the same color. + +``` python hl_lines="6-7" +#!name: categorical_axis_violin_with_box_edge_only +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") + .as_edge_only() + .with_box(width=2.0, extent=0.05) +) +canvas.show() +``` + +### Markers over violin plot + +Violin plot has `with_strip` and `with_swarm` methods to overlay markers. + +``` python hl_lines="6" +#!name: categorical_axis_violin_with_strip +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") + .with_strip(symbol="D", size=8, color="black") +) +``` + +``` python hl_lines="6" +#!name: categorical_axis_violin_with_swarm +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") + .with_swarm(size=8, color="black") +) +``` + +### Add outliers + +Box plot and violin plot are usually combined with outlier markers, as these plots are +not good at showing the details of the sparse data points. +For these plots, `with_outliers` method will add outliers, and optionally change the +whisker lengths for the box plot. + +This is the example of adding outliers to the box plot. Because outliers are shown as a +strip plot, arguments specific to strip plot (`symbol`, `size`, `extent` and `seed`) can be used. + +``` python hl_lines="6" +#!name: categorical_axis_box_with_outliers +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_boxplot(color="replicate") + .with_outliers(size=8) +) +``` + +If the box plot is edge only, the outliers will be the same. + +``` python hl_lines="6" +#!name: categorical_axis_box_with_outliers_edge_only +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_boxplot(color="replicate") + .as_edge_only() + .with_outliers() +) +``` + +Setting `update_whiskers` to `False` will not change the whisker lengths. + +``` python hl_lines="6" +#!name: categorical_axis_box_with_outliers_no_updates +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_boxplot(color="replicate") + .with_outliers(update_whiskers=False) +) +``` + +Violin plot also supports `with_outliers` method. + +``` python hl_lines="6" +#!name: categorical_axis_violin_with_outliers +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") + .with_outliers(size=8) +) +``` diff --git a/docs/categorical/index.md b/docs/categorical/index.md index 5bd09010..4057a784 100644 --- a/docs/categorical/index.md +++ b/docs/categorical/index.md @@ -10,10 +10,14 @@ any external plotting libraries or DataFrames, and are more flexible in some cas Methods starting with "cat" return categorical plotters. Methods include: -- `cat` ... plotter for numerical data in x/y-axis categorized by such as color. -- `cat_x` ... plotter for categorical data in x-axis. -- `cat_y` ... plotter for categorical data in y-axis. -- `cat_xy` ... plotter for categorical data in both x- and y-axis. +- `cat` ... plotter for numerical data in x/y-axis categorized by such as color → + [Numerical × Numerical Data](num_num.md). +- `cat_x` ... plotter for categorical data in x-axis → + [Categorical × Numerical Data](cat_num.md). +- `cat_y` ... plotter for categorical data in y-axis → + [Categorical × Numerical Data](cat_num.md). +- `cat_xy` ... plotter for categorical data in both x- and y-axis → + [Categorical × Categorical Data](cat_cat.md). These methods need a tabular data and the names of the columns that will be used as the x and y values. diff --git a/examples/boxplot_with_outliers.py b/examples/boxplot_with_outliers.py new file mode 100644 index 00000000..77a17203 --- /dev/null +++ b/examples/boxplot_with_outliers.py @@ -0,0 +1,20 @@ +from whitecanvas import new_canvas +import pandas as pd + +def main(): + url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv" + df = pd.read_csv(url) + + canvas = new_canvas("matplotlib:qt") + + layer = ( + canvas.cat_x(df, "smoker", "tip") + .add_violinplot(color="sex") + .as_edge_only() + .with_outliers(symbol="D") + ) + canvas.add_legend() + canvas.show(block=True) + +if __name__ == "__main__": + main() diff --git a/mkdocs.yml b/mkdocs.yml index 8960d830..6d65603b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,6 +43,7 @@ nav: - Overview: categorical/index.md - Numerical × Numerical Data: categorical/num_num.md - Categorical × Numerical Data: categorical/cat_num.md + - Categorical × Categorical Data: categorical/cat_cat.md - Aggregation: categorical/aggregation.md - Event Handling: - Overview: events/index.md diff --git a/whitecanvas/canvas/_base.py b/whitecanvas/canvas/_base.py index 72d215bc..100d742c 100644 --- a/whitecanvas/canvas/_base.py +++ b/whitecanvas/canvas/_base.py @@ -1778,7 +1778,7 @@ def _cb_reordered(self): if isinstance(layer, _l.PrimitiveLayer): layer_backends.append(layer._backend) elif isinstance(layer, _l.LayerGroup): - for child in layer.iter_children_recursive(): + for child in layer.iter_primitive(): layer_backends.append(child._backend) elif isinstance(layer, _l.LayerWrapper): for child in _iter_layers(layer): @@ -1871,7 +1871,7 @@ def _iter_layers( if isinstance(layer, _l.PrimitiveLayer): yield layer elif isinstance(layer, _l.LayerGroup): - yield from layer.iter_children_recursive() + yield from layer.iter_primitive() elif isinstance(layer, _l.LayerWrapper): yield from _iter_layers(layer._base_layer) else: diff --git a/whitecanvas/canvas/dataframe/_base.py b/whitecanvas/canvas/dataframe/_base.py index 254fa46b..c3f9e18f 100644 --- a/whitecanvas/canvas/dataframe/_base.py +++ b/whitecanvas/canvas/dataframe/_base.py @@ -118,6 +118,38 @@ def iter_arrays( res = tuple(sl[i] for i in inv_indices) yield sl, dd[_res_map[res]] + _map[key], group + def category_map_with_dodge( + self, + by: tuple[str, ...], + dodge: tuple[str, ...] | None = None, + ) -> dict[tuple, int]: + """Category mapping considering dodge.""" + if dodge is None: + dodge = () + if set(self._offsets) > set(by): + raise ValueError( + f"offsets must be a subset of by, got offsets={self._offsets!r} and " + f"by={by!r}" + ) + indices = [by.index(d) for d in self._offsets] + _map = self.category_map(self._offsets) + if not dodge: + return _map + out = {} + if set(self._offsets) & set(dodge): + raise ValueError( + f"offsets and dodge must be disjoint, got offsets={self._offsets!r}" + f" and dodge={dodge!r}" + ) + inv_indices = [by.index(d) for d in dodge] + _res_map = self.category_map(dodge) + _nres = len(_res_map) + for sl, _ in self._df.group_by(by): + key = tuple(sl[i] for i in indices) + res = tuple(sl[i] for i in inv_indices) + out[sl] = _res_map[res] + _map[key] * _nres + return out + def prep_arrays( self, by: tuple[str, ...], diff --git a/whitecanvas/canvas/dataframe/_both_cat.py b/whitecanvas/canvas/dataframe/_both_cat.py index fd6f66fe..b6ca4a21 100644 --- a/whitecanvas/canvas/dataframe/_both_cat.py +++ b/whitecanvas/canvas/dataframe/_both_cat.py @@ -161,7 +161,7 @@ def add_heatmap( df_agg = self._aggregate(df, by_both, value) map_x = self._cat_iter_x.prep_position_map(self._x) map_y = self._cat_iter_y.prep_position_map(self._y) - dtype = df[value].dtype + dtype = df_agg[value].dtype if dtype.kind not in "fiub": raise ValueError(f"Column {value!r} is not numeric.") arr = np.full((len(map_y), len(map_x)), fill, dtype=dtype) diff --git a/whitecanvas/canvas/layerlist.py b/whitecanvas/canvas/layerlist.py index 06b4bd53..7a14b271 100644 --- a/whitecanvas/canvas/layerlist.py +++ b/whitecanvas/canvas/layerlist.py @@ -72,6 +72,6 @@ def get(self, idx: str, default: _V | None = None) -> Layer | _V | None: def iter_primitives(self) -> Iterable[PrimitiveLayer]: for layer in self: if isinstance(layer, LayerGroup): - yield from layer.iter_children_recursive() + yield from layer.iter_primitive() else: yield layer diff --git a/whitecanvas/layers/_base.py b/whitecanvas/layers/_base.py index 0617ef5c..5007f6ad 100644 --- a/whitecanvas/layers/_base.py +++ b/whitecanvas/layers/_base.py @@ -42,7 +42,7 @@ def __init__(self, name: str | None = None): self.events = self.__class__._events_class() self._name = name if name is not None else self.__class__.__name__ self._x_hint = self._y_hint = None - self._is_grouped = False + self._group_layer_ref: weakref.ReferenceType[LayerGroup] | None = None self._canvas_ref = lambda: None _set_deprecated_aliases(self) @@ -208,13 +208,13 @@ def __init__(self, name: str | None = None): def iter_children(self) -> Iterator[Layer]: """Iterate over all children.""" - def iter_children_recursive(self) -> Iterator[PrimitiveLayer[BaseProtocol]]: + def iter_primitive(self) -> Iterator[PrimitiveLayer[BaseProtocol]]: for child in self.iter_children(): if isinstance(child, LayerGroup): - yield from child.iter_children_recursive() + yield from child.iter_primitive() elif isinstance(child, LayerWrapper): if isinstance(child._base_layer, LayerGroup): - yield from child._base_layer.iter_children_recursive() + yield from child._base_layer.iter_primitive() else: yield child._base_layer else: diff --git a/whitecanvas/layers/_mixin.py b/whitecanvas/layers/_mixin.py index a9683f7c..2696cdbd 100644 --- a/whitecanvas/layers/_mixin.py +++ b/whitecanvas/layers/_mixin.py @@ -230,6 +230,16 @@ def _as_legend_info(self): hatch = self.hatch[0] return _legend.FaceInfo(color, hatch) + @property + def alpha(self) -> float: + return self.color[:, 3] + + @alpha.setter + def alpha(self, value): + color = self.color.copy() + color[:, 3] = value + self.color = color + class MultiPropertyEdgeBase(EdgeNamespace): def update( @@ -258,6 +268,16 @@ def _as_legend_info(self): style = self.style[0] return _legend.EdgeInfo(color, width, style) + @property + def alpha(self) -> float: + return self.color[:, 3] + + @alpha.setter + def alpha(self, value): + color = self.color.copy() + color[:, 3] = value + self.color = color + class MonoFace(SinglePropertyFaceBase): @property @@ -448,16 +468,6 @@ def hatch(self, hatch: str | Hatch | Iterable[str | Hatch]): self._layer._backend._plt_set_face_hatch(hatch) self.events.hatch.emit(hatch) - @property - def alpha(self) -> float: - return self.color[:, 3] - - @alpha.setter - def alpha(self, value): - color = self.color.copy() - color[:, 3] = value - self.color = color - class MultiEdge(MultiPropertyEdgeBase): @property @@ -506,16 +516,6 @@ def style(self, style: str | LineStyle | Iterable[str | LineStyle]): self._layer._backend._plt_set_edge_style(style) self.events.style.emit(style) - @property - def alpha(self) -> float: - return self.color[:, 3] - - @alpha.setter - def alpha(self, value): - color = self.color.copy() - color[:, 3] = value - self.color = color - _NFace = TypeVar("_NFace", bound=FaceNamespace) _NEdge = TypeVar("_NEdge", bound=EdgeNamespace) diff --git a/whitecanvas/layers/group/__init__.py b/whitecanvas/layers/group/__init__.py index bfe26118..45c1926f 100644 --- a/whitecanvas/layers/group/__init__.py +++ b/whitecanvas/layers/group/__init__.py @@ -1,4 +1,8 @@ -from whitecanvas.layers.group._collections import LayerCollectionBase, LayerTuple +from whitecanvas.layers.group._collections import ( + LayerCollectionBase, + LayerTuple, + MainAndOtherLayers, +) from whitecanvas.layers.group.band_collection import BandCollection, ViolinPlot from whitecanvas.layers.group.boxplot import BoxPlot from whitecanvas.layers.group.graph import Graph @@ -22,6 +26,7 @@ "BandCollection", "BracketText", "Panel", + "MainAndOtherLayers", "LabeledLine", "LabeledMarkers", "LabeledBars", diff --git a/whitecanvas/layers/group/_collections.py b/whitecanvas/layers/group/_collections.py index e01a121d..a2146c01 100644 --- a/whitecanvas/layers/group/_collections.py +++ b/whitecanvas/layers/group/_collections.py @@ -1,6 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Iterable, Iterator, MutableSequence, Sequence, TypeVar +import weakref +from typing import ( + TYPE_CHECKING, + Generic, + Iterable, + Iterator, + Literal, + MutableSequence, + Sequence, + TypeVar, + overload, +) from psygnal import Signal @@ -30,7 +41,7 @@ class LayerContainer(LayerGroup): def __init__(self, children: Iterable[Layer], name: str | None = None): super().__init__(name=name) - self._children = [_check_layer(c) for c in children] + self._children = [_process_grouping(c, self) for c in children] self._ordering_indices = self._default_ordering(len(self._children)) self._emit_layer_grouped() @@ -97,7 +108,7 @@ class LayerCollectionBase(LayerContainer, MutableSequence[_L]): _children: list[_L] def __getitem__(self, n: int) -> _L: - """The n-th markers layer.""" + """The n-th layer.""" if not hasattr(n, "__index__"): raise TypeError(f"Index must be an integer, not {type(n)}") return self._children[n] @@ -125,11 +136,13 @@ def insert(self, n: int, layer: _L): if _canvas := self._canvas_ref(): _canvas._canvas()._plt_add_layer(layer._backend) layer._connect_canvas(_canvas) + _process_grouping(layer, self) self._children.insert(n, layer) self._ordering_indices.insert(n, len(self._ordering_indices)) return None def _as_legend_item(self): + """Use the first layer as the main legend item.""" if len(self) == 0: return _legend.EmptyLegendItem() return self[0]._as_legend_item() @@ -140,10 +153,54 @@ def iter_children(self) -> Iterator[_L]: ... -def _check_layer(l) -> Layer: +_L0 = TypeVar("_L0", bound=Layer) +_L1 = TypeVar("_L1", bound=Layer) + + +class MainAndOtherLayers(LayerTuple, Generic[_L0, _L1]): + @overload + def __getitem__(self, n: Literal[0]) -> _L0: + ... + + @overload + def __getitem__(self, n: Literal[0]) -> _L1: + ... + + @overload + def __getitem__(self, n: int) -> Layer: + ... + + def __getitem__(self, n): + """The n-th layer.""" + return super().__getitem__(n) + + def _insert(self, layer: Layer): + if layer._canvas_ref() is not None: + raise ValueError(f"{layer!r} is already added to a canvas") + if _canvas := self._canvas_ref(): + if isinstance(layer, PrimitiveLayer): + _canvas._canvas()._plt_add_layer(layer._backend) + elif isinstance(layer, LayerGroup): + for l in layer.iter_primitive(): + _canvas._canvas()._plt_add_layer(l._backend) + layer._connect_canvas(_canvas) + _process_grouping(layer, self) + self._children.insert(1, layer) + self._ordering_indices.insert(1, len(self._ordering_indices)) + return None + + def _as_legend_item(self): + """Use the first layer as the main legend item.""" + if len(self) == 0: + # this should never happen, but just in case + return _legend.EmptyLegendItem() + return self[0]._as_legend_item() + + +def _process_grouping(l, parent: Layer) -> Layer: if not isinstance(l, Layer): raise TypeError(f"{l!r} is not a Layer") - if l._is_grouped: + if l._group_layer_ref is not None: raise ValueError(f"{l!r} is already grouped") - l._is_grouped = True + l._group_layer_ref = weakref.ref(parent) return l diff --git a/whitecanvas/layers/group/band_collection.py b/whitecanvas/layers/group/band_collection.py index 78a0daec..0aa87861 100644 --- a/whitecanvas/layers/group/band_collection.py +++ b/whitecanvas/layers/group/band_collection.py @@ -173,7 +173,8 @@ def _getter(x: XYYData): @property def orient(self) -> Orientation: - return self._orient + """Orientation of the violin plot (perpendicular to the fill orientation).""" + return self._orient.transpose() @property def ndata(self) -> int: diff --git a/whitecanvas/layers/group/boxplot.py b/whitecanvas/layers/group/boxplot.py index 2d1125b3..f831114e 100644 --- a/whitecanvas/layers/group/boxplot.py +++ b/whitecanvas/layers/group/boxplot.py @@ -3,17 +3,16 @@ from typing import Iterable import numpy as np -from cmap import Color from numpy.typing import NDArray from whitecanvas.backend import Backend from whitecanvas.layers._mixin import ( AbstractFaceEdgeMixin, EnumArray, - MonoEdge, + MultiEdge, MultiFace, - SinglePropertyEdgeBase, - SinglePropertyFaceBase, + MultiPropertyEdgeBase, + MultiPropertyFaceBase, ) from whitecanvas.layers._primitive import Bars, MultiLine from whitecanvas.layers.group._cat_utils import check_array_input @@ -56,21 +55,22 @@ class BoxPlot(LayerContainer, AbstractFaceEdgeMixin["BoxFace", "BoxEdge"]): def __init__( self, - boxes: Bars[MultiFace, MonoEdge], + boxes: Bars[MultiFace, MultiEdge], whiskers: MultiLine, medians: MultiLine, - # outliers: Markers | None = None, *, name: str | None = None, orient: Orientation = Orientation.VERTICAL, + capsize: float = 0.15, ): super().__init__([boxes, whiskers, medians], name=name) AbstractFaceEdgeMixin.__init__(self, BoxFace(self), BoxEdge(self)) self._orient = Orientation.parse(orient) + self._capsize = capsize self._init_events() @property - def boxes(self) -> Bars[MultiFace, MonoEdge]: + def boxes(self) -> Bars[MultiFace, MultiEdge]: """The boxes layer (Bars).""" return self._children[0] @@ -111,7 +111,7 @@ def from_arrays( extent=extent, backend=backend, ).with_face_multi( hatch=hatch, color=color, alpha=alpha, - ).with_edge(color="black") # fmt: skip + ).with_edge_multi(color="black") # fmt: skip if ori.is_vertical: segs = _xyy_to_segments( x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], capsize @@ -136,7 +136,7 @@ def from_arrays( medsegs, name="medians", color="black", alpha=alpha, backend=backend, ) # fmt: skip - return cls(box, whiskers, medians, name=name, orient=ori) + return cls(box, whiskers, medians, name=name, orient=ori, capsize=capsize) @property def orient(self) -> Orientation: @@ -168,6 +168,41 @@ def move(self, shift: float) -> BoxPlot: canvas._autoscale_for_layer(self, pad_rel=0.025) return self + def _update_data(self, agg_arr: NDArray[np.number]): + x = self.boxes.data.x + extent = self.boxes.bar_width + self.boxes.set_data(ydata=agg_arr[3] - agg_arr[1], bottom=agg_arr[1]) + if self.orient.is_vertical: + segs = _xyy_to_segments( + x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], self._capsize + ) + medsegs = [ + [(x0 - extent / 2, y0), (x0 + extent / 2, y0)] + for x0, y0 in zip(x, agg_arr[2]) + ] + else: + segs = _yxx_to_segments( + x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], self._capsize + ) + medsegs = [ + [(x0, y0 - extent / 2), (x0, y0 + extent / 2)] + for x0, y0 in zip(x, agg_arr[2]) + ] + self.whiskers.data = segs + self.medians.data = medsegs + return None + + def _get_sep_values(self) -> NDArray[np.number]: + """(5, N) array of min, 25%, 50%, 75%, max.""" + idx = 1 if self.orient.is_vertical else 0 + stop = self.boxes.ndata * 2 + _min = [seg[0, idx] for seg in self.whiskers.data[0:stop:2]] + _p25 = self.boxes.bottom + _median = [seg[0, idx] for seg in self.medians.data] + _p75 = self.boxes.top + _max = [seg[1, idx] for seg in self.whiskers.data[1:stop:2]] + return np.stack([_min, _p25, _median, _p75, _max], axis=0) + def _make_sure_hatch_visible(self): _is_no_width = self.edge.width == 0 if np.any(_is_no_width): @@ -175,6 +210,12 @@ def _make_sure_hatch_visible(self): self.edge.width = np.where(_is_no_width, 1, self.edge.width) self.edge.color = np.where(_is_no_width, ec, self.edge.color) + def _xndata(self) -> int: + nboxes = self.boxes.ndata + nlines = self.whiskers.ndata + assert nboxes * 2 == nlines or nboxes * 4 == nlines, f"{nboxes=}, {nlines=}" + return nlines // nboxes + def _xyy_to_segments( x: ArrayLike1D, @@ -241,7 +282,7 @@ def _yxx_to_segments( return segments_0 + segments_1 + cap0 + cap1 -class BoxFace(SinglePropertyFaceBase): +class BoxFace(MultiPropertyFaceBase): _layer: BoxPlot @property @@ -268,48 +309,13 @@ def hatch(self, hatch: str | Hatch | Iterable[str | Hatch]): self._layer.boxes.face.hatch = hatches self.events.hatch.emit(hatches) - @property - def alpha(self) -> float: - """Alpha value of the face.""" - return self.color[:, 3] - - @alpha.setter - def alpha(self, value): - color = self.color.copy() - color[:, 3] = value - self.color = color - def update( - self, - *, - color: ColorType | _Void = _void, - hatch: Hatch | str | _Void = _void, - alpha: float | _Void = _void, - ) -> BoxPlot: - """ - Update the face properties. - - Parameters - ---------- - color : ColorType, optional - Color of the face. - hatch : FacePattern, optional - Fill hatch of the face. - alpha : float, optional - Alpha value of the face. - """ - if color is not _void: - self.color = color - if hatch is not _void: - self.hatch = hatch - if alpha is not _void: - self.alpha = alpha - return self._layer - - -class BoxEdge(SinglePropertyEdgeBase): +class BoxEdge(MultiPropertyEdgeBase): _layer: BoxPlot + def _xndata(self) -> int: + return self._layer._xndata() + @property def color(self) -> NDArray[np.floating]: """Edge color of the box plot.""" @@ -317,9 +323,10 @@ def color(self) -> NDArray[np.floating]: @color.setter def color(self, color: ColorType): - col = np.array(Color(color), dtype=np.float32) # assert a single color + ndata = self._layer.boxes.ndata + col = as_color_array(color, ndata) self._layer.boxes.edge.color = col - self._layer.whiskers.color = col + self._layer.whiskers.color = np.concatenate([col] * self._xndata(), axis=0) self._layer.medians.color = col self.events.color.emit(col) @@ -330,10 +337,12 @@ def width(self) -> NDArray[np.float32]: @width.setter def width(self, width: float): - self._layer.boxes.edge.width = width - self._layer.whiskers.width = width - self._layer.medians.width = width - self.events.width.emit(width) + ndata = self._layer.boxes.ndata + widths = as_any_1d_array(width, ndata, dtype=np.float32) + self._layer.boxes.edge.width = widths + self._layer.whiskers.width = np.tile(widths, self._xndata()) + self._layer.medians.width = widths + self.events.width.emit(widths) @property def style(self) -> EnumArray[LineStyle]: @@ -342,36 +351,14 @@ def style(self) -> EnumArray[LineStyle]: @style.setter def style(self, style: str | LineStyle): - style = LineStyle(style) - self._layer.boxes.edge.style = style - self._layer.whiskers.style = style - self._layer.medians.style = style + ndata = self._layer.boxes.ndata + if isinstance(style, (str, LineStyle)): + styles = np.full(ndata, LineStyle(style), dtype=object) + else: + styles = np.array(style, dtype=object) + if styles.shape != (ndata,): + raise ValueError("Invalid shape of the style array.") + self._layer.boxes.edge.style = styles + self._layer.whiskers.style = np.tile(styles, self._xndata()) + self._layer.medians.style = styles self.events.style.emit(style) - - @property - def alpha(self) -> float: - return self.color[3] - - @alpha.setter - def alpha(self, value): - color = self.color.copy() - color[3] = value - self.color = color - - def update( - self, - *, - color: ColorType | _Void = _void, - style: LineStyle | str | _Void = _void, - width: float | _Void = _void, - alpha: float | _Void = _void, - ) -> BoxPlot: - if color is not _void: - self.color = color - if style is not _void: - self.style = style - if width is not _void: - self.width = width - if alpha is not _void: - self.alpha = alpha - return self._layer diff --git a/whitecanvas/layers/group/marker_collection.py b/whitecanvas/layers/group/marker_collection.py index bcc28831..9d4fc2e7 100644 --- a/whitecanvas/layers/group/marker_collection.py +++ b/whitecanvas/layers/group/marker_collection.py @@ -430,6 +430,8 @@ def with_hover_text(self, text: str | Iterable[Any]) -> Self: def with_hover_template(self, template: str, extra: Any | None = None) -> Self: """Add hover template to the markers.""" xs, ys = self.data + if xs.size == 0: # empty layer + return self if self._backend_name in ("plotly", "bokeh"): # conversion for HTML template = template.replace("\n", "
") params = parse_texts(template, xs.size, extra) diff --git a/whitecanvas/layers/tabular/_box_like.py b/whitecanvas/layers/tabular/_box_like.py index b34313ba..b13c64e9 100644 --- a/whitecanvas/layers/tabular/_box_like.py +++ b/whitecanvas/layers/tabular/_box_like.py @@ -2,24 +2,25 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable, Generic, Sequence, TypeVar +from typing import TYPE_CHECKING, Callable, Generic, Literal, Sequence, TypeVar import numpy as np from cmap import Color from whitecanvas import theme from whitecanvas.backend import Backend -from whitecanvas.layers import _legend, _mixin +from whitecanvas.layers import Layer, _legend, _mixin from whitecanvas.layers import group as _lg from whitecanvas.layers.tabular import _jitter, _shared from whitecanvas.layers.tabular import _plans as _p -from whitecanvas.layers.tabular._df_compat import DataFrameWrapper +from whitecanvas.layers.tabular._df_compat import DataFrameWrapper, parse from whitecanvas.types import ( ColormapType, ColorType, Hatch, LineStyle, Orientation, + Symbol, _Void, ) from whitecanvas.utils.type_check import is_real_number @@ -28,11 +29,12 @@ from typing_extensions import Self from whitecanvas.canvas.dataframe._base import CatIterator - from whitecanvas.layers.tabular import DFRugGroups + from whitecanvas.layers.tabular import DFMarkerGroups, DFRugGroups _FE = _mixin.AbstractFaceEdgeMixin[_mixin.FaceNamespace, _mixin.EdgeNamespace] _DF = TypeVar("_DF") +_L = TypeVar("_L", bound=Layer) _void = _Void() @@ -246,7 +248,7 @@ def __init__( name: str | None = None, orient: Orientation = Orientation.VERTICAL, extent: float = 0.8, - shape: str = "both", + shape: Literal["both", "left", "right"] = "both", backend: str | Backend | None = None, ): _splitby, dodge = _shared.norm_dodge( @@ -261,8 +263,9 @@ def __init__( ) # fmt: skip super().__init__(base, cat.df) _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) + self._offsets = cat.offsets self._value = value - self._map = cat.prep_position_map(_splitby, dodge) + self._dodge = dodge self.with_hover_template("\n".join(f"{k}: {{{k}!r}}" for k in self._splitby)) @property @@ -296,13 +299,25 @@ def with_rug( self, *, width: float = 1.0, - color="black", - ): - """Overlay rug plot on the violins.""" + color: ColorType | None = None, + ) -> _lg.MainAndOtherLayers[Self, DFRugGroups[_DF]]: + """Overlay rug plot on the violins and return the violin layer.""" from whitecanvas.layers.tabular import DFRugGroups + canvas = self._canvas_ref() + if canvas is None: + raise ValueError("No canvas to add the rug plot.") _extent = self.base.extent - jitter = _jitter.CategoricalJitter(self._splitby, self._map) + if color is not None: + colors = Color(color) + elif self._is_edge_only(): + colors = self._color_by.by + else: + colors = Color("#1F1F1F") + jitter = _jitter.CategoricalJitter( + self._splitby, + self._make_cat_iterator().prep_position_map(self._splitby, self._dodge), + ) if self.base._shape == "both": align = "center" elif self.base._shape == "left": @@ -310,16 +325,297 @@ def with_rug( else: align = "low" rug = DFRugGroups.from_table( - self._source, jitter, self._value, color=color, width=width, extent=_extent, - backend=self.base._backend_name, + self._source, jitter, self._value, color=colors, width=width, + extent=_extent, backend=self.base._backend_name, ).scale_by_density(align=align) # fmt: skip - old_name = self.name - return _ViolinRugTuple([self, rug], name=old_name) + return _combine_main_and_others(self, rug) + + def with_box( + self, + *, + color: ColorType | None = None, + median_color: ColorType = "white", + width: float | None = None, + extent: float = 0.1, + capsize: float = 0.0, + ) -> _lg.MainAndOtherLayers[Self, DFBoxPlot[_DF]]: + """ + Overlay box plot on the violins and return the violin layer. + + Following the convension of many statistical software, the box plot is colored + by black if the violin faces are colored, and colored by the edge color + otherwise. The median line is colored by the given median color. + + Parameters + ---------- + color : color-type, optional + Color of the box plot. If not given, it will be colored by "#1F1F1F" if + the violin faces are colored, and by the edge color of the violin plot + otherwise. + median_color : color-type, optional + Color of the median line of the box plot. + width : float, optional + Width of the whiskers of the boxplot. Use violin edge width if not given. + extent : float, optional + Relative width of the boxes. + capsize : float, optional + Relative size of the caps of the whiskers. + """ + + canvas = self._canvas_ref() + if canvas is None: + raise ValueError("No canvas to add the box plot.") + if color is not None: + colors = Color(color) + else: + if np.all(self.base.edge.width > 0) and np.all(self.base.edge.alpha > 0): + colors = self.base.edge.color + else: + colors = Color("#1F1F1F") + if width is None: + width = self.base.edge.width.mean() + box = DFBoxPlot( + self._make_cat_iterator(), self._value, name=f"boxplot-of-{self.name}", + color=None, hatch=Hatch.SOLID, dodge=self._dodge, width=width, + orient=self.orient, capsize=capsize, extent=extent, + backend=canvas._get_backend(), + ) # fmt: skip + box.base.boxes.face.color = colors + box.base.edge.color = colors + box.base.medians.color = Color(median_color) + return _combine_main_and_others(self, box) + + def with_outliers( + self, + *, + color: ColorType | None = None, + symbol: str | Symbol = Symbol.CIRCLE, + size: float | None = None, + ratio: float = 1.5, + extent: float = 0.1, + seed: int | None = 0, + ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]: + """ + Overlay outliers on the box plot and return the box plot layer. + + Parameters + ---------- + color : color-type, optional + Color of the outliers. To make sure the outliers are easily visible, face + color will always be transparent. If a constant color is given, all the + edges will be colored by the same color. By default, the edge colors are + the same as the edge colors of the box plot. + symbol : str or Symbol, optional + Symbol of the outlier markers. + size : float, optional + Size of the outlier markers. If not given, it will be set to the theme + default. + ratio : float, optional + Ratio of the interquartile range (IQR) to determine the outliers. Data + points outside of the range [Q1 - ratio * IQR, Q3 + ratio * IQR] will be + considered as outliers. + extent : float, optional + Relative width of the jitter range (same effect as the `extent` argument of + the `add_stripplot` method). + seed : int, optional + Random seed for the jitter (same effect as the `seed` argument of the + `add_stripplot` method). + """ + from whitecanvas.canvas.dataframe._base import CatIterator + from whitecanvas.layers.tabular import DFMarkerGroups + + canvas = self._canvas_ref() + size = theme._default("markers.size", size) + if canvas is None: + raise ValueError("No canvas to add the outliers.") + + is_edge_only = self._is_edge_only() + + # category iterator is used to calculate positions and indices + _cat_self = CatIterator(self._source, offsets=self._offsets) + _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge) + _extent = _cat_self.zoom_factor(self._dodge) * extent + _cat_map = _cat_self.category_map_with_dodge(self._splitby, self._dodge) + + # calculate outliers and update the separators + df_outliers = {c: [] for c in (*self._splitby, self._value)} + colors = [] + for sl, sub in self._source.group_by(self._splitby): + arr = sub[self._value] + q1, q3 = np.quantile(arr, [0.25, 0.75]) + iqr = q3 - q1 # interquartile range + low = q1 - ratio * iqr # lower bound of inliers + high = q3 + ratio * iqr # upper bound of inliers + idx_cat = _cat_map[sl] + outliers = arr[(arr < low) | (arr > high)] + for _cat, _s in zip(sl, self._splitby): + df_outliers[_s].extend([_cat] * outliers.size) + df_outliers[self._value].extend(outliers) + if is_edge_only: + _this_color = self.base.edge.color[idx_cat] + else: + _this_color = self.base.face.color[idx_cat] + colors.extend([_this_color] * outliers.size) + + df_outliers = parse(df_outliers) + xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed) + yj = _jitter.IdentityJitter(self._value).check(df_outliers) + new = DFMarkerGroups( + df_outliers, xj, yj, name=f"outliers-of-{self.name}", color=Color("black"), + orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if color is None: + if is_edge_only: # edge only + new._apply_color(np.stack(colors, axis=0, dtype=np.float32)) + new.as_edge_only(width=self.base.edge.width.mean()) + return _combine_main_and_others(self, new) + + def with_strip( + self, + *, + color: ColorType | None = None, + symbol: str | Symbol = Symbol.CIRCLE, + size: str | None = None, + extent: float = 0.2, + seed: int | None = 0, + ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]: + """ + Overlay strip plot on the violins. + + Parameters + ---------- + color : color-type, optional + Color of the strip plot. If not given, it will be colored by the violin + face color. + symbol : str or Symbol, optional + Symbol of the strip plot markers. + size : float, optional + Size of the strip plot markers. If not given, it will be set to the theme + default. + extent : float, optional + Relative width of the jitter range. + seed : int, optional + Random seed for the jitter. + """ + from whitecanvas.canvas.dataframe._base import CatIterator + from whitecanvas.layers.tabular import DFMarkerGroups + + canvas = self._canvas_ref() + size = theme._default("markers.size", size) + if canvas is None: + raise ValueError("No canvas to add the outliers.") + + if color is None: + color = self._color_by.by + else: + color = Color(color) + + # category iterator is used to calculate positions and indices + _cat_self = CatIterator(self._source, offsets=self._offsets) + _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge) + _extent = _cat_self.zoom_factor(self._dodge) * extent + df = self._source + xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed) + yj = _jitter.IdentityJitter(self._value).check(df) + new = DFMarkerGroups( + df, xj, yj, name=f"outliers-of-{self.name}", color=color, + orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if self._is_edge_only(): + new.as_edge_only(width=self.base.edge.width.mean()) + return _combine_main_and_others(self, new) + + def with_swarm( + self, + *, + color: ColorType | None = None, + symbol: str | Symbol = Symbol.CIRCLE, + size: str | None = None, + extent: float = 0.8, + sort: bool = False, + ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]: + """ + Overlay swarm plot on the violins. + + Parameters + ---------- + color : color-type, optional + Color of the strip plot. If not given, it will be colored by the violin + face color. + symbol : str or Symbol, optional + Symbol of the strip plot markers. + size : float, optional + Size of the strip plot markers. If not given, it will be set to the theme + default. + extent : float, optional + Relative width of the jitter range. + sort : bool, default False + If True, the markers will be sorted by the value. + """ + from whitecanvas.canvas.dataframe._base import CatIterator + from whitecanvas.layers.tabular import DFMarkerGroups + + canvas = self._canvas_ref() + size = theme._default("markers.size", size) + if canvas is None: + raise ValueError("No canvas to add the outliers.") + + if color is None: + color = self._color_by.by + else: + color = Color(color) + + # category iterator is used to calculate positions and indices + _cat_self = CatIterator(self._source, offsets=self._offsets) + _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge) + _extent = _cat_self.zoom_factor(self._dodge) * extent + df = self._source + + if sort: + df = df.sort(self._value) + lims = df[self._value].min(), df[self._value].max() + xj = _jitter.SwarmJitter( + self._splitby, _pos_map, self._value, lims, extent=_extent + ) + yj = _jitter.IdentityJitter(self._value).check(df) + new = DFMarkerGroups( + df, xj, yj, name=f"outliers-of-{self.name}", color=color, + orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if self._is_edge_only(): + new.as_edge_only(width=self.base.edge.width.mean()) + return _combine_main_and_others(self, new) + + def as_edge_only( + self, + width: float = 3.0, + style: str | LineStyle = LineStyle.SOLID, + ) -> Self: + """ + Replace the violin edge color with the face color and delete the face color. + + Parameters + ---------- + width : float, optional + Width of the edge. + style : str or LineStyle, optional + Style of the edge. + """ + self.base.with_edge(color=self.base.face.color, width=width, style=style) + self.base.face.update(alpha=0.0) + return self - # def with_box(self): def _as_legend_item(self) -> _legend.LegendItemCollection: return _BoxLikeMixin._as_legend_item(self) + def _make_cat_iterator(self) -> CatIterator[_DF]: + from whitecanvas.canvas.dataframe._base import CatIterator + + return CatIterator(self._source, offsets=self._offsets) + + def _is_edge_only(self) -> bool: + return np.all(self.base.face.alpha < 1e-6) + class DFBoxPlot( _shared.DataFrameLayerWrapper[_lg.BoxPlot, _DF], _BoxLikeMixin, Generic[_DF] @@ -330,6 +626,7 @@ def __init__( value: str, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, + width: float = 1.0, dodge: str | tuple[str, ...] | bool | None = None, name: str | None = None, orient: Orientation = Orientation.VERTICAL, @@ -348,8 +645,12 @@ def __init__( x, arr, name=name, orient=orient, capsize=_capsize, extent=_extent, backend=backend, ) # fmt: skip + base.edge.width = width super().__init__(base, cat.df) _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) + self._offsets = cat.offsets + self._value = value + self._dodge = dodge @property def orient(self) -> Orientation: @@ -374,6 +675,120 @@ def with_hover_template(self, template: str) -> Self: self.base.boxes.with_hover_template(template, extra=extra) return self + def with_outliers( + self, + *, + color: ColorType | None = None, + symbol: str | Symbol = Symbol.CIRCLE, + size: float | None = None, + ratio: float = 1.5, + extent: float = 0.1, + seed: int | None = 0, + update_whiskers: bool = True, + ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]: + """ + Overlay outliers on the box plot. + + Parameters + ---------- + color : color-type, optional + Color of the outliers. To make sure the outliers are easily visible, face + color will always be transparent. If a constant color is given, all the + edges will be colored by the same color. By default, the edge colors are + the same as the edge colors of the box plot. + symbol : str or Symbol, optional + Symbol of the outlier markers. + size : float, optional + Size of the outlier markers. If not given, it will be set to the theme + default. + ratio : float, optional + Ratio of the interquartile range (IQR) to determine the outliers. Data + points outside of the range [Q1 - ratio * IQR, Q3 + ratio * IQR] will be + considered as outliers. + extent : float, optional + Relative width of the jitter range (same effect as the `extent` argument of + the `add_stripplot` method). + seed : int, optional + Random seed for the jitter (same effect as the `seed` argument of the + `add_stripplot` method). + update_whiskers : bool, default True + If True, the whiskers of the box plot will be updated to exclude the + outliers. + """ + from whitecanvas.canvas.dataframe._base import CatIterator + from whitecanvas.layers.tabular import DFMarkerGroups + + canvas = self._canvas_ref() + size = theme._default("markers.size", size) + if canvas is None: + raise ValueError("No canvas to add the outliers.") + + is_edge_only = np.all(self.base.boxes.face.alpha < 1e-6) + + # category iterator is used to calculate positions and indices + _cat_self = CatIterator(self._source, offsets=self._offsets) + _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge) + _extent = _cat_self.zoom_factor(self._dodge) * extent + _cat_map = _cat_self.category_map_with_dodge(self._splitby, self._dodge) + + # calculate outliers and update the separators + df_outliers = {c: [] for c in (*self._splitby, self._value)} + agg_values = self.base._get_sep_values() # for updating whiskers + colors = [] + for sl, sub in self._source.group_by(self._splitby): + arr = sub[self._value] + q1, q3 = np.quantile(arr, [0.25, 0.75]) + iqr = q3 - q1 # interquartile range + low = q1 - ratio * iqr # lower bound of inliers + high = q3 + ratio * iqr # upper bound of inliers + idx_cat = _cat_map[sl] + inliers = arr[(arr >= low) & (arr <= high)] + agg_values[0, idx_cat] = inliers.min() + agg_values[4, idx_cat] = inliers.max() + outliers = arr[(arr < low) | (arr > high)] + for _cat, _s in zip(sl, self._splitby): + df_outliers[_s].extend([_cat] * outliers.size) + df_outliers[self._value].extend(outliers) + if is_edge_only: + _this_color = self.base.edge.color[idx_cat] + else: + _this_color = self.base.face.color[idx_cat] + colors.extend([_this_color] * outliers.size) + + df_outliers = parse(df_outliers) + xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed) + yj = _jitter.IdentityJitter(self._value).check(df_outliers) + new = DFMarkerGroups( + df_outliers, xj, yj, name=f"outliers-of-{self.name}", color=Color("black"), + orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if color is None: + if is_edge_only: # edge only + new._apply_color(np.stack(colors, axis=0, dtype=np.float32)) + new.as_edge_only(width=self.base.edge.width.mean()) + if update_whiskers: + self.base._update_data(agg_values) + return _combine_main_and_others(self, new) + + def as_edge_only( + self, + width: float = 3.0, + style: str | LineStyle = LineStyle.SOLID, + ) -> Self: + """ + Replace the violin edge color with the face color and delete the face color. + + Parameters + ---------- + width : float, optional + Width of the edge. + style : str or LineStyle, optional + Style of the edge. + """ + self.base.with_edge(color=self.base.face.color, width=width, style=style) + self.base.face.update(alpha=0.0) + return self + def _as_legend_item(self) -> _legend.LegendItemCollection: return _BoxLikeMixin._as_legend_item(self) @@ -601,14 +1016,20 @@ def _as_legend_item(self) -> _legend.LegendItemCollection: return _BoxLikeMixin._as_legend_item(self) -class _ViolinRugTuple(_lg.LayerTuple): - @property - def violin(self) -> DFViolinPlot: - return self._children[0] +_L0 = TypeVar("_L0", bound=Layer) +_L1 = TypeVar("_L1", bound=Layer) - @property - def rug(self) -> DFRugGroups: - return self._children[1] - def _as_legend_item(self) -> _legend.LegendItem: - return self.violin._as_legend_item() +def _combine_main_and_others( + layer: _L0, + incoming: _L1, +) -> _lg.MainAndOtherLayers[_L0, _L1]: + if layer._group_layer_ref is None: + return _lg.MainAndOtherLayers([layer, incoming], name=layer.name) + group_layer = layer._group_layer_ref() + if group_layer is None: + raise ValueError("Parent layer group is deleted.") + elif not isinstance(group_layer, _lg.MainAndOtherLayers): + raise ValueError(f"Parent layer group is incorrect type {type(group_layer)}.") + group_layer._insert(incoming) + return group_layer diff --git a/whitecanvas/layers/tabular/_jitter.py b/whitecanvas/layers/tabular/_jitter.py index 05684c83..a099fcca 100644 --- a/whitecanvas/layers/tabular/_jitter.py +++ b/whitecanvas/layers/tabular/_jitter.py @@ -46,7 +46,10 @@ def _map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: args = [src[b] for b in self._by] out = np.zeros(len(src), dtype=np.float32) for row, pos in self._mapping.items(): - sl = np.all(np.column_stack([a == r for a, r in zip(args, row)]), axis=1) + arrs = [a == r for a, r in zip(args, row) if a.size > 0] + if len(arrs) == 0: + continue + sl = np.all(np.column_stack(arrs), axis=1) out[sl] = pos return out diff --git a/whitecanvas/utils/normalize.py b/whitecanvas/utils/normalize.py index e8cc4dbf..a25d7acd 100644 --- a/whitecanvas/utils/normalize.py +++ b/whitecanvas/utils/normalize.py @@ -85,6 +85,8 @@ def as_color_array(color, size: int) -> NDArray[np.float32]: col = arr_color(color) return np.repeat(col[np.newaxis, :], size, axis=0) if isinstance(color, np.ndarray): + if color.size == 0 and size == 0: + return color if color.dtype.kind in "OU": if color.shape != (size,): raise ValueError(