Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overlaying methods for box plot and violin plot #25

Merged
merged 7 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions docs/_scripts/_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def on_page_markdown(md: str, page: Page, **kwargs: Any) -> str:
"""Called when mkdocs is building the markdown for a page."""

def _add_images(matchobj: re.Match[str]) -> str:
prefix = matchobj.group(0).split("\n", 1)[0] # ``` python ...`
code: str = matchobj.group(1).strip() # source code

if code.startswith("#!name:"):
Expand All @@ -25,7 +26,7 @@ def _add_images(matchobj: re.Match[str]) -> str:
reldepth = "../" * page.file.src_path.count(os.sep)
dest = f"{reldepth}_images/{name}.png"
link = f"\n![]({dest}){{ loading=lazy, width={width}px }}\n\n"
new_md = "```python\n" + code + "\n```" + link
new_md = f"{prefix}\n{code}\n```{link}"
return new_md
elif code.startswith("#!html:"):
code, name = _get_html_name(code)
Expand All @@ -35,16 +36,17 @@ def _add_images(matchobj: re.Match[str]) -> str:
f'<iframe src={dest} frameborder="0" width="400px" height="300px" '
'scrolling="no"></iframe>'
)
new_md = "```python\n" + code + "\n```\n\n" + html_text + "\n"
new_md = f"{prefix}\n{code}\n```\n\n{html_text}\n"
return new_md
elif code.startswith("#!"):
_, other = code.split("\n", 1)
else:
other = code
return "```python\n" + other + "\n```"
return f"{prefix}\n{other}\n```"


md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL)
# md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL)
md = re.sub("``` ?python.*?\n([^`]*)```", _add_images, md)

return md

Expand Down
7 changes: 5 additions & 2 deletions docs/_scripts/_screenshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from whitecanvas.theme import update_default

DOCS: Path = Path(__file__).parent.parent
CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL)
# CODE_BLOCK = re.compile("``` ?python\n([^`]*)```", re.DOTALL)
CODE_BLOCK = re.compile("``` ?python.*?\n([^`]*)```")

def _exec_code(src: str, ns: dict, dest: str) -> dict[str, Any]:
try:
Expand Down Expand Up @@ -104,6 +105,8 @@ def main() -> None:
raise RuntimeError(
f"Error evaluating code\n\n{code}\n\nfor {dest!r}"
) from e
plt.close("all")
# close all if there's more than 10 figures
if len(plt.get_fignums()) > 10:
plt.close("all")

main()
26 changes: 26 additions & 0 deletions docs/categorical/cat_cat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Categorical &times; Categorical Data

Here is an example of a data frame with two categorical columns.

``` python
import numpy as np

# sample data
rng = np.random.default_rng(12345)

df = {
"x": ["A"] * 60 + ["B"] * 30 + ["C"] * 40,
"y": ["X"] * 70 + ["Y"] * 60,
"value": rng.normal(size=130),
}
```

To use categorical columns for both x- and y-axis, aggregation is required.

``` python
#!name: cat_cat_heatmap
from whitecanvas import new_canvas

canvas = new_canvas("matplotlib")
canvas.cat_xy(df, x="x", y="y").mean().add_heatmap("value")
```
160 changes: 148 additions & 12 deletions docs/categorical/cat_num.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ In this section, following data will be used as an example:
import numpy as np
from whitecanvas import new_canvas

rng = np.random.default_rng(12345)
rng = np.random.default_rng(3)
df = {
"category": ["A"] * 40 + ["B"] * 50,
"observation": np.concatenate([rng.random(40), rng.random(50) + 1.3]),
"observation": np.concatenate([rng.normal(2.0, size=40), rng.normal(3.3, size=50)]),
"replicate": [0] * 23 + [1] * 17 + [0] * 22 + [1] * 28,
"temperature": rng.normal(scale=2.8, size=90) + 22.0,
}
Expand Down Expand Up @@ -218,8 +218,8 @@ canvas.show()
format string.

``` python
#!skip
canvas = new_canvas("matplotlib")
#!html: categorical_axis_stripplot_hover
canvas = new_canvas("plotly", size=(400, 300))
(
canvas
.cat_x(df, x="category", y="observation")
Expand Down Expand Up @@ -247,7 +247,7 @@ canvas.show()
Similarly, each marker color can represent a numerical value. `update_colormap` will map
the value with an arbitrary colormap.

``` python
``` python hl_lines="6"
#!name: categorical_axis_stripplot_by_color
canvas = new_canvas("matplotlib")
(
Expand Down Expand Up @@ -282,7 +282,7 @@ canvas.show()
Although rug plot does not directly use markers, it also use a line to represent each
data point.

``` python
``` python hl_lines="5"
#!name: categorical_axis_rugplot
canvas = new_canvas("matplotlib")
(
Expand All @@ -293,9 +293,11 @@ canvas = new_canvas("matplotlib")
canvas.show()
```

Some methods defined for marker-type plots can also be used for rug plot.
Some methods defined for marker-type plots can also be used for rug plot. For example,
`update_colormap` will change the color of the rug lines based on the values of the
specified column.

``` python
``` python hl_lines="6"
#!name: categorical_axis_rugplot_colormap
canvas = new_canvas("matplotlib")
(
Expand All @@ -310,7 +312,7 @@ canvas.show()
`scale_by_density` will change the length of the rugs to represent the density of the
data points.

``` python
``` python hl_lines="6"
#!name: categorical_axis_rugplot_density
canvas = new_canvas("matplotlib")
(
Expand All @@ -322,16 +324,150 @@ canvas = new_canvas("matplotlib")
canvas.show()
```

Rug plot can also be overlaid with violin plot with `with_rug` method.
## Overlaying Plots

``` python
Different types of plots have their own strengths and weaknesses. To make the plot more
informative, it is often necessary to overlay different types of plots.

You can simply call different methds to overlay different types of plots, but in some
cases it is not that easy. For example, to add rug plot to violin plot, you have to
correctly set the lengths of the rug lines so that their edges exactly match the edges
of the violins.

Some types of plots are implemented with methods to efficiently overlay them with other
plots. All of them use method chaining so that the API is very clean.

### Rug plot over violin plot

Violin plot can be overlaid with rug plot using `with_rug` method. Edges of the rug lines match exactly with the edges of the violins. Of cource, you can hover over the rug lines to see the details.

``` python hl_lines="6"
#!name: categorical_axis_violin_with_rug
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.with_rug()
.with_rug(color="purple")
)
canvas.show()
```

### Box plot over violin plot

Violin plot can be overlaid with box plot using `with_box` method. Color of the box plot
follows the convention of other plotting softwares by default.

``` python hl_lines="6"
#!name: categorical_axis_violin_with_box
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.with_box(width=2.0, extent=0.05)
)
canvas.show()
```

If the violins are edge only, the box plot will be filled with the same color.

``` python hl_lines="6-7"
#!name: categorical_axis_violin_with_box_edge_only
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.as_edge_only()
.with_box(width=2.0, extent=0.05)
)
canvas.show()
```

### Markers over violin plot

Violin plot has `with_strip` and `with_swarm` methods to overlay markers.

``` python hl_lines="6"
#!name: categorical_axis_violin_with_strip
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.with_strip(symbol="D", size=8, color="black")
)
```

``` python hl_lines="6"
#!name: categorical_axis_violin_with_swarm
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.with_swarm(size=8, color="black")
)
```

### Add outliers

Box plot and violin plot are usually combined with outlier markers, as these plots are
not good at showing the details of the sparse data points.
For these plots, `with_outliers` method will add outliers, and optionally change the
whisker lengths for the box plot.

This is the example of adding outliers to the box plot. Because outliers are shown as a
strip plot, arguments specific to strip plot (`symbol`, `size`, `extent` and `seed`) can be used.

``` python hl_lines="6"
#!name: categorical_axis_box_with_outliers
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_boxplot(color="replicate")
.with_outliers(size=8)
)
```

If the box plot is edge only, the outliers will be the same.

``` python hl_lines="6"
#!name: categorical_axis_box_with_outliers_edge_only
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_boxplot(color="replicate")
.as_edge_only()
.with_outliers()
)
```

Setting `update_whiskers` to `False` will not change the whisker lengths.

``` python hl_lines="6"
#!name: categorical_axis_box_with_outliers_no_updates
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_boxplot(color="replicate")
.with_outliers(update_whiskers=False)
)
```

Violin plot also supports `with_outliers` method.

``` python hl_lines="6"
#!name: categorical_axis_violin_with_outliers
canvas = new_canvas("matplotlib")
(
canvas
.cat_x(df, x="category", y="observation")
.add_violinplot(color="replicate")
.with_outliers(size=8)
)
```
12 changes: 8 additions & 4 deletions docs/categorical/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ any external plotting libraries or DataFrames, and are more flexible in some cas

Methods starting with "cat" return categorical plotters. Methods include:

- `cat` ... plotter for numerical data in x/y-axis categorized by such as color.
- `cat_x` ... plotter for categorical data in x-axis.
- `cat_y` ... plotter for categorical data in y-axis.
- `cat_xy` ... plotter for categorical data in both x- and y-axis.
- `cat` ... plotter for numerical data in x/y-axis categorized by such as color &rarr;
[Numerical &times; Numerical Data](num_num.md).
- `cat_x` ... plotter for categorical data in x-axis &rarr;
[Categorical &times; Numerical Data](cat_num.md).
- `cat_y` ... plotter for categorical data in y-axis &rarr;
[Categorical &times; Numerical Data](cat_num.md).
- `cat_xy` ... plotter for categorical data in both x- and y-axis &rarr;
[Categorical &times; Categorical Data](cat_cat.md).

These methods need a tabular data and the names of the columns that will be used as the
x and y values.
Expand Down
20 changes: 20 additions & 0 deletions examples/boxplot_with_outliers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from whitecanvas import new_canvas
import pandas as pd

def main():
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
df = pd.read_csv(url)

canvas = new_canvas("matplotlib:qt")

layer = (
canvas.cat_x(df, "smoker", "tip")
.add_violinplot(color="sex")
.as_edge_only()
.with_outliers(symbol="D")
)
canvas.add_legend()
canvas.show(block=True)

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ nav:
- Overview: categorical/index.md
- Numerical &times; Numerical Data: categorical/num_num.md
- Categorical &times; Numerical Data: categorical/cat_num.md
- Categorical &times; Categorical Data: categorical/cat_cat.md
- Aggregation: categorical/aggregation.md
- Event Handling:
- Overview: events/index.md
Expand Down
4 changes: 2 additions & 2 deletions whitecanvas/canvas/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,7 +1778,7 @@ def _cb_reordered(self):
if isinstance(layer, _l.PrimitiveLayer):
layer_backends.append(layer._backend)
elif isinstance(layer, _l.LayerGroup):
for child in layer.iter_children_recursive():
for child in layer.iter_primitive():
layer_backends.append(child._backend)
elif isinstance(layer, _l.LayerWrapper):
for child in _iter_layers(layer):
Expand Down Expand Up @@ -1871,7 +1871,7 @@ def _iter_layers(
if isinstance(layer, _l.PrimitiveLayer):
yield layer
elif isinstance(layer, _l.LayerGroup):
yield from layer.iter_children_recursive()
yield from layer.iter_primitive()
elif isinstance(layer, _l.LayerWrapper):
yield from _iter_layers(layer._base_layer)
else:
Expand Down
Loading
Loading