diff --git a/docs/_scripts/_hooks.py b/docs/_scripts/_hooks.py
index 3d92b17a..e4ab1671 100644
--- a/docs/_scripts/_hooks.py
+++ b/docs/_scripts/_hooks.py
@@ -16,6 +16,7 @@ def on_page_markdown(md: str, page: Page, **kwargs: Any) -> str:
"""Called when mkdocs is building the markdown for a page."""
def _add_images(matchobj: re.Match[str]) -> str:
+ prefix = matchobj.group(0).split("\n", 1)[0] # ``` python ...`
code: str = matchobj.group(1).strip() # source code
if code.startswith("#!name:"):
@@ -25,7 +26,7 @@ def _add_images(matchobj: re.Match[str]) -> str:
reldepth = "../" * page.file.src_path.count(os.sep)
dest = f"{reldepth}_images/{name}.png"
link = f"\n![]({dest}){{ loading=lazy, width={width}px }}\n\n"
- new_md = "```python\n" + code + "\n```" + link
+ new_md = f"{prefix}\n{code}\n```{link}"
return new_md
elif code.startswith("#!html:"):
code, name = _get_html_name(code)
@@ -35,16 +36,17 @@ def _add_images(matchobj: re.Match[str]) -> str:
f''
)
- new_md = "```python\n" + code + "\n```\n\n" + html_text + "\n"
+ new_md = f"{prefix}\n{code}\n```\n\n{html_text}\n"
return new_md
elif code.startswith("#!"):
_, other = code.split("\n", 1)
else:
other = code
- return "```python\n" + other + "\n```"
+ return f"{prefix}\n{other}\n```"
- md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL)
+ # md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL)
+ md = re.sub("``` ?python.*?\n([^`]*)```", _add_images, md)
return md
diff --git a/docs/_scripts/_screenshots.py b/docs/_scripts/_screenshots.py
index 31ad4eec..2e6f32a0 100644
--- a/docs/_scripts/_screenshots.py
+++ b/docs/_scripts/_screenshots.py
@@ -13,7 +13,8 @@
from whitecanvas.theme import update_default
DOCS: Path = Path(__file__).parent.parent
-CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL)
+# CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL)
+CODE_BLOCK = re.compile("``` ?python.*?\n([^`]*)```")
def _exec_code(src: str, ns: dict, dest: str) -> dict[str, Any]:
try:
@@ -104,6 +105,8 @@ def main() -> None:
raise RuntimeError(
f"Error evaluating code\n\n{code}\n\nfor {dest!r}"
) from e
- plt.close("all")
+ # close all if there's more than 10 figures
+ if len(plt.get_fignums()) > 10:
+ plt.close("all")
main()
diff --git a/docs/categorical/cat_cat.md b/docs/categorical/cat_cat.md
new file mode 100644
index 00000000..4a7e2626
--- /dev/null
+++ b/docs/categorical/cat_cat.md
@@ -0,0 +1,26 @@
+# Categorical × Categorical Data
+
+Here is an example of a data frame with two categorical columns.
+
+``` python
+import numpy as np
+
+# sample data
+rng = np.random.default_rng(12345)
+
+df = {
+ "x": ["A"] * 60 + ["B"] * 30 + ["C"] * 40,
+ "y": ["X"] * 70 + ["Y"] * 60,
+ "value": rng.normal(size=130),
+}
+```
+
+To use categorical columns for both x- and y-axis, aggregation is required.
+
+``` python
+#!name: cat_cat_heatmap
+from whitecanvas import new_canvas
+
+canvas = new_canvas("matplotlib")
+canvas.cat_xy(df, x="x", y="y").mean().add_heatmap("value")
+```
diff --git a/docs/categorical/cat_num.md b/docs/categorical/cat_num.md
index 91818dab..991a617a 100644
--- a/docs/categorical/cat_num.md
+++ b/docs/categorical/cat_num.md
@@ -6,10 +6,10 @@ In this section, following data will be used as an example:
import numpy as np
from whitecanvas import new_canvas
-rng = np.random.default_rng(12345)
+rng = np.random.default_rng(3)
df = {
"category": ["A"] * 40 + ["B"] * 50,
- "observation": np.concatenate([rng.random(40), rng.random(50) + 1.3]),
+ "observation": np.concatenate([rng.normal(2.0, size=40), rng.normal(3.3, size=50)]),
"replicate": [0] * 23 + [1] * 17 + [0] * 22 + [1] * 28,
"temperature": rng.normal(scale=2.8, size=90) + 22.0,
}
@@ -218,8 +218,8 @@ canvas.show()
format string.
``` python
-#!skip
-canvas = new_canvas("matplotlib")
+#!html: categorical_axis_stripplot_hover
+canvas = new_canvas("plotly", size=(400, 300))
(
canvas
.cat_x(df, x="category", y="observation")
@@ -247,7 +247,7 @@ canvas.show()
Similarly, each marker color can represent a numerical value. `update_colormap` will map
the value with an arbitrary colormap.
-``` python
+``` python hl_lines="6"
#!name: categorical_axis_stripplot_by_color
canvas = new_canvas("matplotlib")
(
@@ -282,7 +282,7 @@ canvas.show()
Although rug plot does not directly use markers, it also use a line to represent each
data point.
-``` python
+``` python hl_lines="5"
#!name: categorical_axis_rugplot
canvas = new_canvas("matplotlib")
(
@@ -293,9 +293,11 @@ canvas = new_canvas("matplotlib")
canvas.show()
```
-Some methods defined for marker-type plots can also be used for rug plot.
+Some methods defined for marker-type plots can also be used for rug plot. For example,
+`update_colormap` will change the color of the rug lines based on the values of the
+specified column.
-``` python
+``` python hl_lines="6"
#!name: categorical_axis_rugplot_colormap
canvas = new_canvas("matplotlib")
(
@@ -310,7 +312,7 @@ canvas.show()
`scale_by_density` will change the length of the rugs to represent the density of the
data points.
-``` python
+``` python hl_lines="6"
#!name: categorical_axis_rugplot_density
canvas = new_canvas("matplotlib")
(
@@ -322,16 +324,150 @@ canvas = new_canvas("matplotlib")
canvas.show()
```
-Rug plot can also be overlaid with violin plot with `with_rug` method.
+## Overlaying Plots
-``` python
+Different types of plots have their own strengths and weaknesses. To make the plot more
+informative, it is often necessary to overlay different types of plots.
+
+You can simply call different methds to overlay different types of plots, but in some
+cases it is not that easy. For example, to add rug plot to violin plot, you have to
+correctly set the lengths of the rug lines so that their edges exactly match the edges
+of the violins.
+
+Some types of plots are implemented with methods to efficiently overlay them with other
+plots. All of them use method chaining so that the API is very clean.
+
+### Rug plot over violin plot
+
+Violin plot can be overlaid with rug plot using `with_rug` method. Edges of the rug lines match exactly with the edges of the violins. Of cource, you can hover over the rug lines to see the details.
+
+``` python hl_lines="6"
#!name: categorical_axis_violin_with_rug
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
- .with_rug()
+ .with_rug(color="purple")
)
canvas.show()
```
+
+### Box plot over violin plot
+
+Violin plot can be overlaid with box plot using `with_box` method. Color of the box plot
+follows the convention of other plotting softwares by default.
+
+``` python hl_lines="6"
+#!name: categorical_axis_violin_with_box
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_violinplot(color="replicate")
+ .with_box(width=2.0, extent=0.05)
+)
+canvas.show()
+```
+
+If the violins are edge only, the box plot will be filled with the same color.
+
+``` python hl_lines="6-7"
+#!name: categorical_axis_violin_with_box_edge_only
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_violinplot(color="replicate")
+ .as_edge_only()
+ .with_box(width=2.0, extent=0.05)
+)
+canvas.show()
+```
+
+### Markers over violin plot
+
+Violin plot has `with_strip` and `with_swarm` methods to overlay markers.
+
+``` python hl_lines="6"
+#!name: categorical_axis_violin_with_strip
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_violinplot(color="replicate")
+ .with_strip(symbol="D", size=8, color="black")
+)
+```
+
+``` python hl_lines="6"
+#!name: categorical_axis_violin_with_swarm
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_violinplot(color="replicate")
+ .with_swarm(size=8, color="black")
+)
+```
+
+### Add outliers
+
+Box plot and violin plot are usually combined with outlier markers, as these plots are
+not good at showing the details of the sparse data points.
+For these plots, `with_outliers` method will add outliers, and optionally change the
+whisker lengths for the box plot.
+
+This is the example of adding outliers to the box plot. Because outliers are shown as a
+strip plot, arguments specific to strip plot (`symbol`, `size`, `extent` and `seed`) can be used.
+
+``` python hl_lines="6"
+#!name: categorical_axis_box_with_outliers
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_boxplot(color="replicate")
+ .with_outliers(size=8)
+)
+```
+
+If the box plot is edge only, the outliers will be the same.
+
+``` python hl_lines="6"
+#!name: categorical_axis_box_with_outliers_edge_only
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_boxplot(color="replicate")
+ .as_edge_only()
+ .with_outliers()
+)
+```
+
+Setting `update_whiskers` to `False` will not change the whisker lengths.
+
+``` python hl_lines="6"
+#!name: categorical_axis_box_with_outliers_no_updates
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_boxplot(color="replicate")
+ .with_outliers(update_whiskers=False)
+)
+```
+
+Violin plot also supports `with_outliers` method.
+
+``` python hl_lines="6"
+#!name: categorical_axis_violin_with_outliers
+canvas = new_canvas("matplotlib")
+(
+ canvas
+ .cat_x(df, x="category", y="observation")
+ .add_violinplot(color="replicate")
+ .with_outliers(size=8)
+)
+```
diff --git a/docs/categorical/index.md b/docs/categorical/index.md
index 5bd09010..4057a784 100644
--- a/docs/categorical/index.md
+++ b/docs/categorical/index.md
@@ -10,10 +10,14 @@ any external plotting libraries or DataFrames, and are more flexible in some cas
Methods starting with "cat" return categorical plotters. Methods include:
-- `cat` ... plotter for numerical data in x/y-axis categorized by such as color.
-- `cat_x` ... plotter for categorical data in x-axis.
-- `cat_y` ... plotter for categorical data in y-axis.
-- `cat_xy` ... plotter for categorical data in both x- and y-axis.
+- `cat` ... plotter for numerical data in x/y-axis categorized by such as color →
+ [Numerical × Numerical Data](num_num.md).
+- `cat_x` ... plotter for categorical data in x-axis →
+ [Categorical × Numerical Data](cat_num.md).
+- `cat_y` ... plotter for categorical data in y-axis →
+ [Categorical × Numerical Data](cat_num.md).
+- `cat_xy` ... plotter for categorical data in both x- and y-axis →
+ [Categorical × Categorical Data](cat_cat.md).
These methods need a tabular data and the names of the columns that will be used as the
x and y values.
diff --git a/examples/boxplot_with_outliers.py b/examples/boxplot_with_outliers.py
new file mode 100644
index 00000000..77a17203
--- /dev/null
+++ b/examples/boxplot_with_outliers.py
@@ -0,0 +1,20 @@
+from whitecanvas import new_canvas
+import pandas as pd
+
+def main():
+ url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
+ df = pd.read_csv(url)
+
+ canvas = new_canvas("matplotlib:qt")
+
+ layer = (
+ canvas.cat_x(df, "smoker", "tip")
+ .add_violinplot(color="sex")
+ .as_edge_only()
+ .with_outliers(symbol="D")
+ )
+ canvas.add_legend()
+ canvas.show(block=True)
+
+if __name__ == "__main__":
+ main()
diff --git a/mkdocs.yml b/mkdocs.yml
index 8960d830..6d65603b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -43,6 +43,7 @@ nav:
- Overview: categorical/index.md
- Numerical × Numerical Data: categorical/num_num.md
- Categorical × Numerical Data: categorical/cat_num.md
+ - Categorical × Categorical Data: categorical/cat_cat.md
- Aggregation: categorical/aggregation.md
- Event Handling:
- Overview: events/index.md
diff --git a/whitecanvas/canvas/_base.py b/whitecanvas/canvas/_base.py
index 72d215bc..100d742c 100644
--- a/whitecanvas/canvas/_base.py
+++ b/whitecanvas/canvas/_base.py
@@ -1778,7 +1778,7 @@ def _cb_reordered(self):
if isinstance(layer, _l.PrimitiveLayer):
layer_backends.append(layer._backend)
elif isinstance(layer, _l.LayerGroup):
- for child in layer.iter_children_recursive():
+ for child in layer.iter_primitive():
layer_backends.append(child._backend)
elif isinstance(layer, _l.LayerWrapper):
for child in _iter_layers(layer):
@@ -1871,7 +1871,7 @@ def _iter_layers(
if isinstance(layer, _l.PrimitiveLayer):
yield layer
elif isinstance(layer, _l.LayerGroup):
- yield from layer.iter_children_recursive()
+ yield from layer.iter_primitive()
elif isinstance(layer, _l.LayerWrapper):
yield from _iter_layers(layer._base_layer)
else:
diff --git a/whitecanvas/canvas/dataframe/_base.py b/whitecanvas/canvas/dataframe/_base.py
index 254fa46b..c3f9e18f 100644
--- a/whitecanvas/canvas/dataframe/_base.py
+++ b/whitecanvas/canvas/dataframe/_base.py
@@ -118,6 +118,38 @@ def iter_arrays(
res = tuple(sl[i] for i in inv_indices)
yield sl, dd[_res_map[res]] + _map[key], group
+ def category_map_with_dodge(
+ self,
+ by: tuple[str, ...],
+ dodge: tuple[str, ...] | None = None,
+ ) -> dict[tuple, int]:
+ """Category mapping considering dodge."""
+ if dodge is None:
+ dodge = ()
+ if set(self._offsets) > set(by):
+ raise ValueError(
+ f"offsets must be a subset of by, got offsets={self._offsets!r} and "
+ f"by={by!r}"
+ )
+ indices = [by.index(d) for d in self._offsets]
+ _map = self.category_map(self._offsets)
+ if not dodge:
+ return _map
+ out = {}
+ if set(self._offsets) & set(dodge):
+ raise ValueError(
+ f"offsets and dodge must be disjoint, got offsets={self._offsets!r}"
+ f" and dodge={dodge!r}"
+ )
+ inv_indices = [by.index(d) for d in dodge]
+ _res_map = self.category_map(dodge)
+ _nres = len(_res_map)
+ for sl, _ in self._df.group_by(by):
+ key = tuple(sl[i] for i in indices)
+ res = tuple(sl[i] for i in inv_indices)
+ out[sl] = _res_map[res] + _map[key] * _nres
+ return out
+
def prep_arrays(
self,
by: tuple[str, ...],
diff --git a/whitecanvas/canvas/dataframe/_both_cat.py b/whitecanvas/canvas/dataframe/_both_cat.py
index fd6f66fe..b6ca4a21 100644
--- a/whitecanvas/canvas/dataframe/_both_cat.py
+++ b/whitecanvas/canvas/dataframe/_both_cat.py
@@ -161,7 +161,7 @@ def add_heatmap(
df_agg = self._aggregate(df, by_both, value)
map_x = self._cat_iter_x.prep_position_map(self._x)
map_y = self._cat_iter_y.prep_position_map(self._y)
- dtype = df[value].dtype
+ dtype = df_agg[value].dtype
if dtype.kind not in "fiub":
raise ValueError(f"Column {value!r} is not numeric.")
arr = np.full((len(map_y), len(map_x)), fill, dtype=dtype)
diff --git a/whitecanvas/canvas/layerlist.py b/whitecanvas/canvas/layerlist.py
index 06b4bd53..7a14b271 100644
--- a/whitecanvas/canvas/layerlist.py
+++ b/whitecanvas/canvas/layerlist.py
@@ -72,6 +72,6 @@ def get(self, idx: str, default: _V | None = None) -> Layer | _V | None:
def iter_primitives(self) -> Iterable[PrimitiveLayer]:
for layer in self:
if isinstance(layer, LayerGroup):
- yield from layer.iter_children_recursive()
+ yield from layer.iter_primitive()
else:
yield layer
diff --git a/whitecanvas/layers/_base.py b/whitecanvas/layers/_base.py
index 0617ef5c..5007f6ad 100644
--- a/whitecanvas/layers/_base.py
+++ b/whitecanvas/layers/_base.py
@@ -42,7 +42,7 @@ def __init__(self, name: str | None = None):
self.events = self.__class__._events_class()
self._name = name if name is not None else self.__class__.__name__
self._x_hint = self._y_hint = None
- self._is_grouped = False
+ self._group_layer_ref: weakref.ReferenceType[LayerGroup] | None = None
self._canvas_ref = lambda: None
_set_deprecated_aliases(self)
@@ -208,13 +208,13 @@ def __init__(self, name: str | None = None):
def iter_children(self) -> Iterator[Layer]:
"""Iterate over all children."""
- def iter_children_recursive(self) -> Iterator[PrimitiveLayer[BaseProtocol]]:
+ def iter_primitive(self) -> Iterator[PrimitiveLayer[BaseProtocol]]:
for child in self.iter_children():
if isinstance(child, LayerGroup):
- yield from child.iter_children_recursive()
+ yield from child.iter_primitive()
elif isinstance(child, LayerWrapper):
if isinstance(child._base_layer, LayerGroup):
- yield from child._base_layer.iter_children_recursive()
+ yield from child._base_layer.iter_primitive()
else:
yield child._base_layer
else:
diff --git a/whitecanvas/layers/_mixin.py b/whitecanvas/layers/_mixin.py
index a9683f7c..2696cdbd 100644
--- a/whitecanvas/layers/_mixin.py
+++ b/whitecanvas/layers/_mixin.py
@@ -230,6 +230,16 @@ def _as_legend_info(self):
hatch = self.hatch[0]
return _legend.FaceInfo(color, hatch)
+ @property
+ def alpha(self) -> float:
+ return self.color[:, 3]
+
+ @alpha.setter
+ def alpha(self, value):
+ color = self.color.copy()
+ color[:, 3] = value
+ self.color = color
+
class MultiPropertyEdgeBase(EdgeNamespace):
def update(
@@ -258,6 +268,16 @@ def _as_legend_info(self):
style = self.style[0]
return _legend.EdgeInfo(color, width, style)
+ @property
+ def alpha(self) -> float:
+ return self.color[:, 3]
+
+ @alpha.setter
+ def alpha(self, value):
+ color = self.color.copy()
+ color[:, 3] = value
+ self.color = color
+
class MonoFace(SinglePropertyFaceBase):
@property
@@ -448,16 +468,6 @@ def hatch(self, hatch: str | Hatch | Iterable[str | Hatch]):
self._layer._backend._plt_set_face_hatch(hatch)
self.events.hatch.emit(hatch)
- @property
- def alpha(self) -> float:
- return self.color[:, 3]
-
- @alpha.setter
- def alpha(self, value):
- color = self.color.copy()
- color[:, 3] = value
- self.color = color
-
class MultiEdge(MultiPropertyEdgeBase):
@property
@@ -506,16 +516,6 @@ def style(self, style: str | LineStyle | Iterable[str | LineStyle]):
self._layer._backend._plt_set_edge_style(style)
self.events.style.emit(style)
- @property
- def alpha(self) -> float:
- return self.color[:, 3]
-
- @alpha.setter
- def alpha(self, value):
- color = self.color.copy()
- color[:, 3] = value
- self.color = color
-
_NFace = TypeVar("_NFace", bound=FaceNamespace)
_NEdge = TypeVar("_NEdge", bound=EdgeNamespace)
diff --git a/whitecanvas/layers/group/__init__.py b/whitecanvas/layers/group/__init__.py
index bfe26118..45c1926f 100644
--- a/whitecanvas/layers/group/__init__.py
+++ b/whitecanvas/layers/group/__init__.py
@@ -1,4 +1,8 @@
-from whitecanvas.layers.group._collections import LayerCollectionBase, LayerTuple
+from whitecanvas.layers.group._collections import (
+ LayerCollectionBase,
+ LayerTuple,
+ MainAndOtherLayers,
+)
from whitecanvas.layers.group.band_collection import BandCollection, ViolinPlot
from whitecanvas.layers.group.boxplot import BoxPlot
from whitecanvas.layers.group.graph import Graph
@@ -22,6 +26,7 @@
"BandCollection",
"BracketText",
"Panel",
+ "MainAndOtherLayers",
"LabeledLine",
"LabeledMarkers",
"LabeledBars",
diff --git a/whitecanvas/layers/group/_collections.py b/whitecanvas/layers/group/_collections.py
index e01a121d..a2146c01 100644
--- a/whitecanvas/layers/group/_collections.py
+++ b/whitecanvas/layers/group/_collections.py
@@ -1,6 +1,17 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Iterable, Iterator, MutableSequence, Sequence, TypeVar
+import weakref
+from typing import (
+ TYPE_CHECKING,
+ Generic,
+ Iterable,
+ Iterator,
+ Literal,
+ MutableSequence,
+ Sequence,
+ TypeVar,
+ overload,
+)
from psygnal import Signal
@@ -30,7 +41,7 @@ class LayerContainer(LayerGroup):
def __init__(self, children: Iterable[Layer], name: str | None = None):
super().__init__(name=name)
- self._children = [_check_layer(c) for c in children]
+ self._children = [_process_grouping(c, self) for c in children]
self._ordering_indices = self._default_ordering(len(self._children))
self._emit_layer_grouped()
@@ -97,7 +108,7 @@ class LayerCollectionBase(LayerContainer, MutableSequence[_L]):
_children: list[_L]
def __getitem__(self, n: int) -> _L:
- """The n-th markers layer."""
+ """The n-th layer."""
if not hasattr(n, "__index__"):
raise TypeError(f"Index must be an integer, not {type(n)}")
return self._children[n]
@@ -125,11 +136,13 @@ def insert(self, n: int, layer: _L):
if _canvas := self._canvas_ref():
_canvas._canvas()._plt_add_layer(layer._backend)
layer._connect_canvas(_canvas)
+ _process_grouping(layer, self)
self._children.insert(n, layer)
self._ordering_indices.insert(n, len(self._ordering_indices))
return None
def _as_legend_item(self):
+ """Use the first layer as the main legend item."""
if len(self) == 0:
return _legend.EmptyLegendItem()
return self[0]._as_legend_item()
@@ -140,10 +153,54 @@ def iter_children(self) -> Iterator[_L]:
...
-def _check_layer(l) -> Layer:
+_L0 = TypeVar("_L0", bound=Layer)
+_L1 = TypeVar("_L1", bound=Layer)
+
+
+class MainAndOtherLayers(LayerTuple, Generic[_L0, _L1]):
+ @overload
+ def __getitem__(self, n: Literal[0]) -> _L0:
+ ...
+
+ @overload
+ def __getitem__(self, n: Literal[0]) -> _L1:
+ ...
+
+ @overload
+ def __getitem__(self, n: int) -> Layer:
+ ...
+
+ def __getitem__(self, n):
+ """The n-th layer."""
+ return super().__getitem__(n)
+
+ def _insert(self, layer: Layer):
+ if layer._canvas_ref() is not None:
+ raise ValueError(f"{layer!r} is already added to a canvas")
+ if _canvas := self._canvas_ref():
+ if isinstance(layer, PrimitiveLayer):
+ _canvas._canvas()._plt_add_layer(layer._backend)
+ elif isinstance(layer, LayerGroup):
+ for l in layer.iter_primitive():
+ _canvas._canvas()._plt_add_layer(l._backend)
+ layer._connect_canvas(_canvas)
+ _process_grouping(layer, self)
+ self._children.insert(1, layer)
+ self._ordering_indices.insert(1, len(self._ordering_indices))
+ return None
+
+ def _as_legend_item(self):
+ """Use the first layer as the main legend item."""
+ if len(self) == 0:
+ # this should never happen, but just in case
+ return _legend.EmptyLegendItem()
+ return self[0]._as_legend_item()
+
+
+def _process_grouping(l, parent: Layer) -> Layer:
if not isinstance(l, Layer):
raise TypeError(f"{l!r} is not a Layer")
- if l._is_grouped:
+ if l._group_layer_ref is not None:
raise ValueError(f"{l!r} is already grouped")
- l._is_grouped = True
+ l._group_layer_ref = weakref.ref(parent)
return l
diff --git a/whitecanvas/layers/group/band_collection.py b/whitecanvas/layers/group/band_collection.py
index 78a0daec..0aa87861 100644
--- a/whitecanvas/layers/group/band_collection.py
+++ b/whitecanvas/layers/group/band_collection.py
@@ -173,7 +173,8 @@ def _getter(x: XYYData):
@property
def orient(self) -> Orientation:
- return self._orient
+ """Orientation of the violin plot (perpendicular to the fill orientation)."""
+ return self._orient.transpose()
@property
def ndata(self) -> int:
diff --git a/whitecanvas/layers/group/boxplot.py b/whitecanvas/layers/group/boxplot.py
index 2d1125b3..f831114e 100644
--- a/whitecanvas/layers/group/boxplot.py
+++ b/whitecanvas/layers/group/boxplot.py
@@ -3,17 +3,16 @@
from typing import Iterable
import numpy as np
-from cmap import Color
from numpy.typing import NDArray
from whitecanvas.backend import Backend
from whitecanvas.layers._mixin import (
AbstractFaceEdgeMixin,
EnumArray,
- MonoEdge,
+ MultiEdge,
MultiFace,
- SinglePropertyEdgeBase,
- SinglePropertyFaceBase,
+ MultiPropertyEdgeBase,
+ MultiPropertyFaceBase,
)
from whitecanvas.layers._primitive import Bars, MultiLine
from whitecanvas.layers.group._cat_utils import check_array_input
@@ -56,21 +55,22 @@ class BoxPlot(LayerContainer, AbstractFaceEdgeMixin["BoxFace", "BoxEdge"]):
def __init__(
self,
- boxes: Bars[MultiFace, MonoEdge],
+ boxes: Bars[MultiFace, MultiEdge],
whiskers: MultiLine,
medians: MultiLine,
- # outliers: Markers | None = None,
*,
name: str | None = None,
orient: Orientation = Orientation.VERTICAL,
+ capsize: float = 0.15,
):
super().__init__([boxes, whiskers, medians], name=name)
AbstractFaceEdgeMixin.__init__(self, BoxFace(self), BoxEdge(self))
self._orient = Orientation.parse(orient)
+ self._capsize = capsize
self._init_events()
@property
- def boxes(self) -> Bars[MultiFace, MonoEdge]:
+ def boxes(self) -> Bars[MultiFace, MultiEdge]:
"""The boxes layer (Bars)."""
return self._children[0]
@@ -111,7 +111,7 @@ def from_arrays(
extent=extent, backend=backend,
).with_face_multi(
hatch=hatch, color=color, alpha=alpha,
- ).with_edge(color="black") # fmt: skip
+ ).with_edge_multi(color="black") # fmt: skip
if ori.is_vertical:
segs = _xyy_to_segments(
x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], capsize
@@ -136,7 +136,7 @@ def from_arrays(
medsegs, name="medians", color="black", alpha=alpha, backend=backend,
) # fmt: skip
- return cls(box, whiskers, medians, name=name, orient=ori)
+ return cls(box, whiskers, medians, name=name, orient=ori, capsize=capsize)
@property
def orient(self) -> Orientation:
@@ -168,6 +168,41 @@ def move(self, shift: float) -> BoxPlot:
canvas._autoscale_for_layer(self, pad_rel=0.025)
return self
+ def _update_data(self, agg_arr: NDArray[np.number]):
+ x = self.boxes.data.x
+ extent = self.boxes.bar_width
+ self.boxes.set_data(ydata=agg_arr[3] - agg_arr[1], bottom=agg_arr[1])
+ if self.orient.is_vertical:
+ segs = _xyy_to_segments(
+ x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], self._capsize
+ )
+ medsegs = [
+ [(x0 - extent / 2, y0), (x0 + extent / 2, y0)]
+ for x0, y0 in zip(x, agg_arr[2])
+ ]
+ else:
+ segs = _yxx_to_segments(
+ x, agg_arr[0], agg_arr[1], agg_arr[3], agg_arr[4], self._capsize
+ )
+ medsegs = [
+ [(x0, y0 - extent / 2), (x0, y0 + extent / 2)]
+ for x0, y0 in zip(x, agg_arr[2])
+ ]
+ self.whiskers.data = segs
+ self.medians.data = medsegs
+ return None
+
+ def _get_sep_values(self) -> NDArray[np.number]:
+ """(5, N) array of min, 25%, 50%, 75%, max."""
+ idx = 1 if self.orient.is_vertical else 0
+ stop = self.boxes.ndata * 2
+ _min = [seg[0, idx] for seg in self.whiskers.data[0:stop:2]]
+ _p25 = self.boxes.bottom
+ _median = [seg[0, idx] for seg in self.medians.data]
+ _p75 = self.boxes.top
+ _max = [seg[1, idx] for seg in self.whiskers.data[1:stop:2]]
+ return np.stack([_min, _p25, _median, _p75, _max], axis=0)
+
def _make_sure_hatch_visible(self):
_is_no_width = self.edge.width == 0
if np.any(_is_no_width):
@@ -175,6 +210,12 @@ def _make_sure_hatch_visible(self):
self.edge.width = np.where(_is_no_width, 1, self.edge.width)
self.edge.color = np.where(_is_no_width, ec, self.edge.color)
+ def _xndata(self) -> int:
+ nboxes = self.boxes.ndata
+ nlines = self.whiskers.ndata
+ assert nboxes * 2 == nlines or nboxes * 4 == nlines, f"{nboxes=}, {nlines=}"
+ return nlines // nboxes
+
def _xyy_to_segments(
x: ArrayLike1D,
@@ -241,7 +282,7 @@ def _yxx_to_segments(
return segments_0 + segments_1 + cap0 + cap1
-class BoxFace(SinglePropertyFaceBase):
+class BoxFace(MultiPropertyFaceBase):
_layer: BoxPlot
@property
@@ -268,48 +309,13 @@ def hatch(self, hatch: str | Hatch | Iterable[str | Hatch]):
self._layer.boxes.face.hatch = hatches
self.events.hatch.emit(hatches)
- @property
- def alpha(self) -> float:
- """Alpha value of the face."""
- return self.color[:, 3]
-
- @alpha.setter
- def alpha(self, value):
- color = self.color.copy()
- color[:, 3] = value
- self.color = color
- def update(
- self,
- *,
- color: ColorType | _Void = _void,
- hatch: Hatch | str | _Void = _void,
- alpha: float | _Void = _void,
- ) -> BoxPlot:
- """
- Update the face properties.
-
- Parameters
- ----------
- color : ColorType, optional
- Color of the face.
- hatch : FacePattern, optional
- Fill hatch of the face.
- alpha : float, optional
- Alpha value of the face.
- """
- if color is not _void:
- self.color = color
- if hatch is not _void:
- self.hatch = hatch
- if alpha is not _void:
- self.alpha = alpha
- return self._layer
-
-
-class BoxEdge(SinglePropertyEdgeBase):
+class BoxEdge(MultiPropertyEdgeBase):
_layer: BoxPlot
+ def _xndata(self) -> int:
+ return self._layer._xndata()
+
@property
def color(self) -> NDArray[np.floating]:
"""Edge color of the box plot."""
@@ -317,9 +323,10 @@ def color(self) -> NDArray[np.floating]:
@color.setter
def color(self, color: ColorType):
- col = np.array(Color(color), dtype=np.float32) # assert a single color
+ ndata = self._layer.boxes.ndata
+ col = as_color_array(color, ndata)
self._layer.boxes.edge.color = col
- self._layer.whiskers.color = col
+ self._layer.whiskers.color = np.concatenate([col] * self._xndata(), axis=0)
self._layer.medians.color = col
self.events.color.emit(col)
@@ -330,10 +337,12 @@ def width(self) -> NDArray[np.float32]:
@width.setter
def width(self, width: float):
- self._layer.boxes.edge.width = width
- self._layer.whiskers.width = width
- self._layer.medians.width = width
- self.events.width.emit(width)
+ ndata = self._layer.boxes.ndata
+ widths = as_any_1d_array(width, ndata, dtype=np.float32)
+ self._layer.boxes.edge.width = widths
+ self._layer.whiskers.width = np.tile(widths, self._xndata())
+ self._layer.medians.width = widths
+ self.events.width.emit(widths)
@property
def style(self) -> EnumArray[LineStyle]:
@@ -342,36 +351,14 @@ def style(self) -> EnumArray[LineStyle]:
@style.setter
def style(self, style: str | LineStyle):
- style = LineStyle(style)
- self._layer.boxes.edge.style = style
- self._layer.whiskers.style = style
- self._layer.medians.style = style
+ ndata = self._layer.boxes.ndata
+ if isinstance(style, (str, LineStyle)):
+ styles = np.full(ndata, LineStyle(style), dtype=object)
+ else:
+ styles = np.array(style, dtype=object)
+ if styles.shape != (ndata,):
+ raise ValueError("Invalid shape of the style array.")
+ self._layer.boxes.edge.style = styles
+ self._layer.whiskers.style = np.tile(styles, self._xndata())
+ self._layer.medians.style = styles
self.events.style.emit(style)
-
- @property
- def alpha(self) -> float:
- return self.color[3]
-
- @alpha.setter
- def alpha(self, value):
- color = self.color.copy()
- color[3] = value
- self.color = color
-
- def update(
- self,
- *,
- color: ColorType | _Void = _void,
- style: LineStyle | str | _Void = _void,
- width: float | _Void = _void,
- alpha: float | _Void = _void,
- ) -> BoxPlot:
- if color is not _void:
- self.color = color
- if style is not _void:
- self.style = style
- if width is not _void:
- self.width = width
- if alpha is not _void:
- self.alpha = alpha
- return self._layer
diff --git a/whitecanvas/layers/group/marker_collection.py b/whitecanvas/layers/group/marker_collection.py
index bcc28831..9d4fc2e7 100644
--- a/whitecanvas/layers/group/marker_collection.py
+++ b/whitecanvas/layers/group/marker_collection.py
@@ -430,6 +430,8 @@ def with_hover_text(self, text: str | Iterable[Any]) -> Self:
def with_hover_template(self, template: str, extra: Any | None = None) -> Self:
"""Add hover template to the markers."""
xs, ys = self.data
+ if xs.size == 0: # empty layer
+ return self
if self._backend_name in ("plotly", "bokeh"): # conversion for HTML
template = template.replace("\n", "
")
params = parse_texts(template, xs.size, extra)
diff --git a/whitecanvas/layers/tabular/_box_like.py b/whitecanvas/layers/tabular/_box_like.py
index b34313ba..b13c64e9 100644
--- a/whitecanvas/layers/tabular/_box_like.py
+++ b/whitecanvas/layers/tabular/_box_like.py
@@ -2,24 +2,25 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Callable, Generic, Sequence, TypeVar
+from typing import TYPE_CHECKING, Callable, Generic, Literal, Sequence, TypeVar
import numpy as np
from cmap import Color
from whitecanvas import theme
from whitecanvas.backend import Backend
-from whitecanvas.layers import _legend, _mixin
+from whitecanvas.layers import Layer, _legend, _mixin
from whitecanvas.layers import group as _lg
from whitecanvas.layers.tabular import _jitter, _shared
from whitecanvas.layers.tabular import _plans as _p
-from whitecanvas.layers.tabular._df_compat import DataFrameWrapper
+from whitecanvas.layers.tabular._df_compat import DataFrameWrapper, parse
from whitecanvas.types import (
ColormapType,
ColorType,
Hatch,
LineStyle,
Orientation,
+ Symbol,
_Void,
)
from whitecanvas.utils.type_check import is_real_number
@@ -28,11 +29,12 @@
from typing_extensions import Self
from whitecanvas.canvas.dataframe._base import CatIterator
- from whitecanvas.layers.tabular import DFRugGroups
+ from whitecanvas.layers.tabular import DFMarkerGroups, DFRugGroups
_FE = _mixin.AbstractFaceEdgeMixin[_mixin.FaceNamespace, _mixin.EdgeNamespace]
_DF = TypeVar("_DF")
+_L = TypeVar("_L", bound=Layer)
_void = _Void()
@@ -246,7 +248,7 @@ def __init__(
name: str | None = None,
orient: Orientation = Orientation.VERTICAL,
extent: float = 0.8,
- shape: str = "both",
+ shape: Literal["both", "left", "right"] = "both",
backend: str | Backend | None = None,
):
_splitby, dodge = _shared.norm_dodge(
@@ -261,8 +263,9 @@ def __init__(
) # fmt: skip
super().__init__(base, cat.df)
_BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by)
+ self._offsets = cat.offsets
self._value = value
- self._map = cat.prep_position_map(_splitby, dodge)
+ self._dodge = dodge
self.with_hover_template("\n".join(f"{k}: {{{k}!r}}" for k in self._splitby))
@property
@@ -296,13 +299,25 @@ def with_rug(
self,
*,
width: float = 1.0,
- color="black",
- ):
- """Overlay rug plot on the violins."""
+ color: ColorType | None = None,
+ ) -> _lg.MainAndOtherLayers[Self, DFRugGroups[_DF]]:
+ """Overlay rug plot on the violins and return the violin layer."""
from whitecanvas.layers.tabular import DFRugGroups
+ canvas = self._canvas_ref()
+ if canvas is None:
+ raise ValueError("No canvas to add the rug plot.")
_extent = self.base.extent
- jitter = _jitter.CategoricalJitter(self._splitby, self._map)
+ if color is not None:
+ colors = Color(color)
+ elif self._is_edge_only():
+ colors = self._color_by.by
+ else:
+ colors = Color("#1F1F1F")
+ jitter = _jitter.CategoricalJitter(
+ self._splitby,
+ self._make_cat_iterator().prep_position_map(self._splitby, self._dodge),
+ )
if self.base._shape == "both":
align = "center"
elif self.base._shape == "left":
@@ -310,16 +325,297 @@ def with_rug(
else:
align = "low"
rug = DFRugGroups.from_table(
- self._source, jitter, self._value, color=color, width=width, extent=_extent,
- backend=self.base._backend_name,
+ self._source, jitter, self._value, color=colors, width=width,
+ extent=_extent, backend=self.base._backend_name,
).scale_by_density(align=align) # fmt: skip
- old_name = self.name
- return _ViolinRugTuple([self, rug], name=old_name)
+ return _combine_main_and_others(self, rug)
+
+ def with_box(
+ self,
+ *,
+ color: ColorType | None = None,
+ median_color: ColorType = "white",
+ width: float | None = None,
+ extent: float = 0.1,
+ capsize: float = 0.0,
+ ) -> _lg.MainAndOtherLayers[Self, DFBoxPlot[_DF]]:
+ """
+ Overlay box plot on the violins and return the violin layer.
+
+ Following the convension of many statistical software, the box plot is colored
+ by black if the violin faces are colored, and colored by the edge color
+ otherwise. The median line is colored by the given median color.
+
+ Parameters
+ ----------
+ color : color-type, optional
+ Color of the box plot. If not given, it will be colored by "#1F1F1F" if
+ the violin faces are colored, and by the edge color of the violin plot
+ otherwise.
+ median_color : color-type, optional
+ Color of the median line of the box plot.
+ width : float, optional
+ Width of the whiskers of the boxplot. Use violin edge width if not given.
+ extent : float, optional
+ Relative width of the boxes.
+ capsize : float, optional
+ Relative size of the caps of the whiskers.
+ """
+
+ canvas = self._canvas_ref()
+ if canvas is None:
+ raise ValueError("No canvas to add the box plot.")
+ if color is not None:
+ colors = Color(color)
+ else:
+ if np.all(self.base.edge.width > 0) and np.all(self.base.edge.alpha > 0):
+ colors = self.base.edge.color
+ else:
+ colors = Color("#1F1F1F")
+ if width is None:
+ width = self.base.edge.width.mean()
+ box = DFBoxPlot(
+ self._make_cat_iterator(), self._value, name=f"boxplot-of-{self.name}",
+ color=None, hatch=Hatch.SOLID, dodge=self._dodge, width=width,
+ orient=self.orient, capsize=capsize, extent=extent,
+ backend=canvas._get_backend(),
+ ) # fmt: skip
+ box.base.boxes.face.color = colors
+ box.base.edge.color = colors
+ box.base.medians.color = Color(median_color)
+ return _combine_main_and_others(self, box)
+
+ def with_outliers(
+ self,
+ *,
+ color: ColorType | None = None,
+ symbol: str | Symbol = Symbol.CIRCLE,
+ size: float | None = None,
+ ratio: float = 1.5,
+ extent: float = 0.1,
+ seed: int | None = 0,
+ ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]:
+ """
+ Overlay outliers on the box plot and return the box plot layer.
+
+ Parameters
+ ----------
+ color : color-type, optional
+ Color of the outliers. To make sure the outliers are easily visible, face
+ color will always be transparent. If a constant color is given, all the
+ edges will be colored by the same color. By default, the edge colors are
+ the same as the edge colors of the box plot.
+ symbol : str or Symbol, optional
+ Symbol of the outlier markers.
+ size : float, optional
+ Size of the outlier markers. If not given, it will be set to the theme
+ default.
+ ratio : float, optional
+ Ratio of the interquartile range (IQR) to determine the outliers. Data
+ points outside of the range [Q1 - ratio * IQR, Q3 + ratio * IQR] will be
+ considered as outliers.
+ extent : float, optional
+ Relative width of the jitter range (same effect as the `extent` argument of
+ the `add_stripplot` method).
+ seed : int, optional
+ Random seed for the jitter (same effect as the `seed` argument of the
+ `add_stripplot` method).
+ """
+ from whitecanvas.canvas.dataframe._base import CatIterator
+ from whitecanvas.layers.tabular import DFMarkerGroups
+
+ canvas = self._canvas_ref()
+ size = theme._default("markers.size", size)
+ if canvas is None:
+ raise ValueError("No canvas to add the outliers.")
+
+ is_edge_only = self._is_edge_only()
+
+ # category iterator is used to calculate positions and indices
+ _cat_self = CatIterator(self._source, offsets=self._offsets)
+ _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge)
+ _extent = _cat_self.zoom_factor(self._dodge) * extent
+ _cat_map = _cat_self.category_map_with_dodge(self._splitby, self._dodge)
+
+ # calculate outliers and update the separators
+ df_outliers = {c: [] for c in (*self._splitby, self._value)}
+ colors = []
+ for sl, sub in self._source.group_by(self._splitby):
+ arr = sub[self._value]
+ q1, q3 = np.quantile(arr, [0.25, 0.75])
+ iqr = q3 - q1 # interquartile range
+ low = q1 - ratio * iqr # lower bound of inliers
+ high = q3 + ratio * iqr # upper bound of inliers
+ idx_cat = _cat_map[sl]
+ outliers = arr[(arr < low) | (arr > high)]
+ for _cat, _s in zip(sl, self._splitby):
+ df_outliers[_s].extend([_cat] * outliers.size)
+ df_outliers[self._value].extend(outliers)
+ if is_edge_only:
+ _this_color = self.base.edge.color[idx_cat]
+ else:
+ _this_color = self.base.face.color[idx_cat]
+ colors.extend([_this_color] * outliers.size)
+
+ df_outliers = parse(df_outliers)
+ xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed)
+ yj = _jitter.IdentityJitter(self._value).check(df_outliers)
+ new = DFMarkerGroups(
+ df_outliers, xj, yj, name=f"outliers-of-{self.name}", color=Color("black"),
+ orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(),
+ ) # fmt: skip
+ if color is None:
+ if is_edge_only: # edge only
+ new._apply_color(np.stack(colors, axis=0, dtype=np.float32))
+ new.as_edge_only(width=self.base.edge.width.mean())
+ return _combine_main_and_others(self, new)
+
+ def with_strip(
+ self,
+ *,
+ color: ColorType | None = None,
+ symbol: str | Symbol = Symbol.CIRCLE,
+ size: str | None = None,
+ extent: float = 0.2,
+ seed: int | None = 0,
+ ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]:
+ """
+ Overlay strip plot on the violins.
+
+ Parameters
+ ----------
+ color : color-type, optional
+ Color of the strip plot. If not given, it will be colored by the violin
+ face color.
+ symbol : str or Symbol, optional
+ Symbol of the strip plot markers.
+ size : float, optional
+ Size of the strip plot markers. If not given, it will be set to the theme
+ default.
+ extent : float, optional
+ Relative width of the jitter range.
+ seed : int, optional
+ Random seed for the jitter.
+ """
+ from whitecanvas.canvas.dataframe._base import CatIterator
+ from whitecanvas.layers.tabular import DFMarkerGroups
+
+ canvas = self._canvas_ref()
+ size = theme._default("markers.size", size)
+ if canvas is None:
+ raise ValueError("No canvas to add the outliers.")
+
+ if color is None:
+ color = self._color_by.by
+ else:
+ color = Color(color)
+
+ # category iterator is used to calculate positions and indices
+ _cat_self = CatIterator(self._source, offsets=self._offsets)
+ _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge)
+ _extent = _cat_self.zoom_factor(self._dodge) * extent
+ df = self._source
+ xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed)
+ yj = _jitter.IdentityJitter(self._value).check(df)
+ new = DFMarkerGroups(
+ df, xj, yj, name=f"outliers-of-{self.name}", color=color,
+ orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(),
+ ) # fmt: skip
+ if self._is_edge_only():
+ new.as_edge_only(width=self.base.edge.width.mean())
+ return _combine_main_and_others(self, new)
+
+ def with_swarm(
+ self,
+ *,
+ color: ColorType | None = None,
+ symbol: str | Symbol = Symbol.CIRCLE,
+ size: str | None = None,
+ extent: float = 0.8,
+ sort: bool = False,
+ ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]:
+ """
+ Overlay swarm plot on the violins.
+
+ Parameters
+ ----------
+ color : color-type, optional
+ Color of the strip plot. If not given, it will be colored by the violin
+ face color.
+ symbol : str or Symbol, optional
+ Symbol of the strip plot markers.
+ size : float, optional
+ Size of the strip plot markers. If not given, it will be set to the theme
+ default.
+ extent : float, optional
+ Relative width of the jitter range.
+ sort : bool, default False
+ If True, the markers will be sorted by the value.
+ """
+ from whitecanvas.canvas.dataframe._base import CatIterator
+ from whitecanvas.layers.tabular import DFMarkerGroups
+
+ canvas = self._canvas_ref()
+ size = theme._default("markers.size", size)
+ if canvas is None:
+ raise ValueError("No canvas to add the outliers.")
+
+ if color is None:
+ color = self._color_by.by
+ else:
+ color = Color(color)
+
+ # category iterator is used to calculate positions and indices
+ _cat_self = CatIterator(self._source, offsets=self._offsets)
+ _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge)
+ _extent = _cat_self.zoom_factor(self._dodge) * extent
+ df = self._source
+
+ if sort:
+ df = df.sort(self._value)
+ lims = df[self._value].min(), df[self._value].max()
+ xj = _jitter.SwarmJitter(
+ self._splitby, _pos_map, self._value, lims, extent=_extent
+ )
+ yj = _jitter.IdentityJitter(self._value).check(df)
+ new = DFMarkerGroups(
+ df, xj, yj, name=f"outliers-of-{self.name}", color=color,
+ orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(),
+ ) # fmt: skip
+ if self._is_edge_only():
+ new.as_edge_only(width=self.base.edge.width.mean())
+ return _combine_main_and_others(self, new)
+
+ def as_edge_only(
+ self,
+ width: float = 3.0,
+ style: str | LineStyle = LineStyle.SOLID,
+ ) -> Self:
+ """
+ Replace the violin edge color with the face color and delete the face color.
+
+ Parameters
+ ----------
+ width : float, optional
+ Width of the edge.
+ style : str or LineStyle, optional
+ Style of the edge.
+ """
+ self.base.with_edge(color=self.base.face.color, width=width, style=style)
+ self.base.face.update(alpha=0.0)
+ return self
- # def with_box(self):
def _as_legend_item(self) -> _legend.LegendItemCollection:
return _BoxLikeMixin._as_legend_item(self)
+ def _make_cat_iterator(self) -> CatIterator[_DF]:
+ from whitecanvas.canvas.dataframe._base import CatIterator
+
+ return CatIterator(self._source, offsets=self._offsets)
+
+ def _is_edge_only(self) -> bool:
+ return np.all(self.base.face.alpha < 1e-6)
+
class DFBoxPlot(
_shared.DataFrameLayerWrapper[_lg.BoxPlot, _DF], _BoxLikeMixin, Generic[_DF]
@@ -330,6 +626,7 @@ def __init__(
value: str,
color: str | tuple[str, ...] | None = None,
hatch: str | tuple[str, ...] | None = None,
+ width: float = 1.0,
dodge: str | tuple[str, ...] | bool | None = None,
name: str | None = None,
orient: Orientation = Orientation.VERTICAL,
@@ -348,8 +645,12 @@ def __init__(
x, arr, name=name, orient=orient, capsize=_capsize, extent=_extent,
backend=backend,
) # fmt: skip
+ base.edge.width = width
super().__init__(base, cat.df)
_BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by)
+ self._offsets = cat.offsets
+ self._value = value
+ self._dodge = dodge
@property
def orient(self) -> Orientation:
@@ -374,6 +675,120 @@ def with_hover_template(self, template: str) -> Self:
self.base.boxes.with_hover_template(template, extra=extra)
return self
+ def with_outliers(
+ self,
+ *,
+ color: ColorType | None = None,
+ symbol: str | Symbol = Symbol.CIRCLE,
+ size: float | None = None,
+ ratio: float = 1.5,
+ extent: float = 0.1,
+ seed: int | None = 0,
+ update_whiskers: bool = True,
+ ) -> _lg.MainAndOtherLayers[Self, DFMarkerGroups[_DF]]:
+ """
+ Overlay outliers on the box plot.
+
+ Parameters
+ ----------
+ color : color-type, optional
+ Color of the outliers. To make sure the outliers are easily visible, face
+ color will always be transparent. If a constant color is given, all the
+ edges will be colored by the same color. By default, the edge colors are
+ the same as the edge colors of the box plot.
+ symbol : str or Symbol, optional
+ Symbol of the outlier markers.
+ size : float, optional
+ Size of the outlier markers. If not given, it will be set to the theme
+ default.
+ ratio : float, optional
+ Ratio of the interquartile range (IQR) to determine the outliers. Data
+ points outside of the range [Q1 - ratio * IQR, Q3 + ratio * IQR] will be
+ considered as outliers.
+ extent : float, optional
+ Relative width of the jitter range (same effect as the `extent` argument of
+ the `add_stripplot` method).
+ seed : int, optional
+ Random seed for the jitter (same effect as the `seed` argument of the
+ `add_stripplot` method).
+ update_whiskers : bool, default True
+ If True, the whiskers of the box plot will be updated to exclude the
+ outliers.
+ """
+ from whitecanvas.canvas.dataframe._base import CatIterator
+ from whitecanvas.layers.tabular import DFMarkerGroups
+
+ canvas = self._canvas_ref()
+ size = theme._default("markers.size", size)
+ if canvas is None:
+ raise ValueError("No canvas to add the outliers.")
+
+ is_edge_only = np.all(self.base.boxes.face.alpha < 1e-6)
+
+ # category iterator is used to calculate positions and indices
+ _cat_self = CatIterator(self._source, offsets=self._offsets)
+ _pos_map = _cat_self.prep_position_map(self._splitby, self._dodge)
+ _extent = _cat_self.zoom_factor(self._dodge) * extent
+ _cat_map = _cat_self.category_map_with_dodge(self._splitby, self._dodge)
+
+ # calculate outliers and update the separators
+ df_outliers = {c: [] for c in (*self._splitby, self._value)}
+ agg_values = self.base._get_sep_values() # for updating whiskers
+ colors = []
+ for sl, sub in self._source.group_by(self._splitby):
+ arr = sub[self._value]
+ q1, q3 = np.quantile(arr, [0.25, 0.75])
+ iqr = q3 - q1 # interquartile range
+ low = q1 - ratio * iqr # lower bound of inliers
+ high = q3 + ratio * iqr # upper bound of inliers
+ idx_cat = _cat_map[sl]
+ inliers = arr[(arr >= low) & (arr <= high)]
+ agg_values[0, idx_cat] = inliers.min()
+ agg_values[4, idx_cat] = inliers.max()
+ outliers = arr[(arr < low) | (arr > high)]
+ for _cat, _s in zip(sl, self._splitby):
+ df_outliers[_s].extend([_cat] * outliers.size)
+ df_outliers[self._value].extend(outliers)
+ if is_edge_only:
+ _this_color = self.base.edge.color[idx_cat]
+ else:
+ _this_color = self.base.face.color[idx_cat]
+ colors.extend([_this_color] * outliers.size)
+
+ df_outliers = parse(df_outliers)
+ xj = _jitter.UniformJitter(self._splitby, _pos_map, extent=_extent, seed=seed)
+ yj = _jitter.IdentityJitter(self._value).check(df_outliers)
+ new = DFMarkerGroups(
+ df_outliers, xj, yj, name=f"outliers-of-{self.name}", color=Color("black"),
+ orient=self.orient, symbol=symbol, size=size, backend=canvas._get_backend(),
+ ) # fmt: skip
+ if color is None:
+ if is_edge_only: # edge only
+ new._apply_color(np.stack(colors, axis=0, dtype=np.float32))
+ new.as_edge_only(width=self.base.edge.width.mean())
+ if update_whiskers:
+ self.base._update_data(agg_values)
+ return _combine_main_and_others(self, new)
+
+ def as_edge_only(
+ self,
+ width: float = 3.0,
+ style: str | LineStyle = LineStyle.SOLID,
+ ) -> Self:
+ """
+ Replace the violin edge color with the face color and delete the face color.
+
+ Parameters
+ ----------
+ width : float, optional
+ Width of the edge.
+ style : str or LineStyle, optional
+ Style of the edge.
+ """
+ self.base.with_edge(color=self.base.face.color, width=width, style=style)
+ self.base.face.update(alpha=0.0)
+ return self
+
def _as_legend_item(self) -> _legend.LegendItemCollection:
return _BoxLikeMixin._as_legend_item(self)
@@ -601,14 +1016,20 @@ def _as_legend_item(self) -> _legend.LegendItemCollection:
return _BoxLikeMixin._as_legend_item(self)
-class _ViolinRugTuple(_lg.LayerTuple):
- @property
- def violin(self) -> DFViolinPlot:
- return self._children[0]
+_L0 = TypeVar("_L0", bound=Layer)
+_L1 = TypeVar("_L1", bound=Layer)
- @property
- def rug(self) -> DFRugGroups:
- return self._children[1]
- def _as_legend_item(self) -> _legend.LegendItem:
- return self.violin._as_legend_item()
+def _combine_main_and_others(
+ layer: _L0,
+ incoming: _L1,
+) -> _lg.MainAndOtherLayers[_L0, _L1]:
+ if layer._group_layer_ref is None:
+ return _lg.MainAndOtherLayers([layer, incoming], name=layer.name)
+ group_layer = layer._group_layer_ref()
+ if group_layer is None:
+ raise ValueError("Parent layer group is deleted.")
+ elif not isinstance(group_layer, _lg.MainAndOtherLayers):
+ raise ValueError(f"Parent layer group is incorrect type {type(group_layer)}.")
+ group_layer._insert(incoming)
+ return group_layer
diff --git a/whitecanvas/layers/tabular/_jitter.py b/whitecanvas/layers/tabular/_jitter.py
index 05684c83..a099fcca 100644
--- a/whitecanvas/layers/tabular/_jitter.py
+++ b/whitecanvas/layers/tabular/_jitter.py
@@ -46,7 +46,10 @@ def _map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]:
args = [src[b] for b in self._by]
out = np.zeros(len(src), dtype=np.float32)
for row, pos in self._mapping.items():
- sl = np.all(np.column_stack([a == r for a, r in zip(args, row)]), axis=1)
+ arrs = [a == r for a, r in zip(args, row) if a.size > 0]
+ if len(arrs) == 0:
+ continue
+ sl = np.all(np.column_stack(arrs), axis=1)
out[sl] = pos
return out
diff --git a/whitecanvas/utils/normalize.py b/whitecanvas/utils/normalize.py
index e8cc4dbf..a25d7acd 100644
--- a/whitecanvas/utils/normalize.py
+++ b/whitecanvas/utils/normalize.py
@@ -85,6 +85,8 @@ def as_color_array(color, size: int) -> NDArray[np.float32]:
col = arr_color(color)
return np.repeat(col[np.newaxis, :], size, axis=0)
if isinstance(color, np.ndarray):
+ if color.size == 0 and size == 0:
+ return color
if color.dtype.kind in "OU":
if color.shape != (size,):
raise ValueError(