diff --git a/docs/_scripts/_hooks.py b/docs/_scripts/_hooks.py index d9193996..ac97232e 100644 --- a/docs/_scripts/_hooks.py +++ b/docs/_scripts/_hooks.py @@ -25,17 +25,32 @@ def _add_images(matchobj: re.Match[str]) -> str: other = code return "```python\n" + other + "\n```" - line, other = code.split("\n", 1) - assert line.startswith("#!name:") - name = line.split(":", 1)[1].strip() - dest = f"_images/{name}.png" + code, name = _get_image_name(code) + code, width = _get_image_width(code) reldepth = "../" * page.file.src_path.count(os.sep) dest = f"{reldepth}_images/{name}.png" - link = f"\n![]({dest}){{ loading=lazy, width=360px }}\n\n" - new_md = "```python\n" + other + "\n```" + link + link = f"\n![]({dest}){{ loading=lazy, width={width}px }}\n\n" + new_md = "```python\n" + code + "\n```" + link return new_md - md = re.sub("``` ?python\n([^`]*)```", _add_images, md, re.DOTALL) + md = re.sub("``` ?python\n([^`]*)```", _add_images, md, flags=re.DOTALL) return md + +def _get_image_name(code: str) -> tuple[str, str]: + line, other = code.split("\n", 1) + assert line.startswith("#!name:") + name = line.split(":", 1)[1].strip() + return other, name + +def _get_image_width(code: str) -> tuple[str, int]: + """Get the width of the image from the code.""" + code = code.strip() + if code.startswith("#!width:"): + line, other = code.split("\n", 1) + width = int(line.split(":", 1)[1].strip()) + else: + other = code + width = 360 + return other, width diff --git a/docs/categorical/cat_num.md b/docs/categorical/cat_num.md new file mode 100644 index 00000000..201c6b8b --- /dev/null +++ b/docs/categorical/cat_num.md @@ -0,0 +1,288 @@ +# Categorical × Numerical Data + +In this section, following data will be used as an example: + +``` python +import numpy as np +from whitecanvas import new_canvas + +rng = np.random.default_rng(12345) +df = { + "category": ["A"] * 40 + ["B"] * 50, + "observation": np.concatenate([rng.random(40), rng.random(50) + 1.3]), + "replicate": [0] * 23 + [1] * 17 + [0] * 22 + [1] * 28, + "temperature": rng.normal(scale=2.8, size=90) + 22.0, +} +``` + +How can we visualize the distributions for each category? There are several plots that +use categorical axis as either the x- or y-axis, and numerical axis as the other. +Examples are: + +- Strip plot +- Swarm plot +- Violin plot +- Box plot + +Aside from the categorical axis, data points may further be grouped by other features, +such as the marker symbol and the marker size. Things are even more complicated when +the markers represent numerical values, such as their size being proportional to the +value, or colored by a colormap. + +`whitecanvas` provides a consistent and simple interface to handle all these cases. +Methods used for this purpose are `cat_x` and `cat_y`, where `cat_x` will deem the +x-axis as categorical, and `cat_y` will do the same for the y-axis. + +``` python +#!skip +canvas = new_canvas("matplotlib") + +# create the categorical plotter. +cat_plt_x = canvas.cat_x(df, x="category", y="observation") +cat_plt_y = canvas.cat_y(df, x="observation", y="category") +``` + +`cat_x` and `cat_y` use the argument `x=` and `y=` to specify the columns that are used +for the plot, where `x=` is the categorical axis for `cat_x` and `y=` for `cat_y`. + +``` note +This is one of the important difference between `seaborn`. In `seaborn`, `orient` are +used to specify the orientation of the plots. This design forces the user to add the +argument `orient=` to every plot even though the orientation rarely changes during the +use of the same figure. In `whitecanvas`, you don't have to specify the orientation +once a categorical plotter is created by either `cat_x` or `cat_y`. +``` + +Multiplt columns can be used for the categorical axis, but only one column can be used +for the numerical axis. + +``` python +#!skip +# OK +canvas.cat_x(df, x=["category", "replicate"], y="observation") +# OK +canvas.cat_y(df, x="observation", y=["category", "replicate"]) +# NG +canvas.cat_x(df, x="category", y=["observation", "temperature"]) +``` + +## Non-marker-type Plots + +Since plots without data point markers are more straightforward, we will start with +them. It includes `add_violinplot`, `add_boxplot`, `add_pointplot` and `add_barplot`. + +``` python +#!name: categorical_axis_violin_0 +canvas = new_canvas("matplotlib") +canvas.cat_x(df, x="category", y="observation").add_violinplot() +canvas.show() +``` + +Violins can also be shown in different color. Specify the `color=` argument to do that. + +``` python +#!name: categorical_axis_violin_1 +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate") +) +canvas.show() +``` + +By default, groups with different colors do not overlap. This is controlled by the +`dodge=` argument. Set `dodge=False` to make them overlap (although it is not the way +we usually do). + +``` python +#!name: categorical_axis_violin_2 +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="replicate", dodge=False) +) +canvas.show() +``` + +`hatch=` can also be specified in a similar way. It will change the hatch pattern of the +violins. + +``` python +#!name: categorical_axis_violin_4 +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(hatch="replicate") +) +canvas.show() +``` + +`color` and `hatch` can overlap with each other or the `x=` or `y=` argument. + +``` python +#!name: categorical_axis_violin_5 +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_violinplot(color="category") +) +canvas.show() +``` + +`add_boxplot`, `add_pointplot` and `add_barplot` is very similar to `add_violinplot`. + +``` python +#!name: categorical_axis_many_plots +#!width: 700 +from whitecanvas import hgrid + +canvas = hgrid(ncols=3, size=(1600, 600), backend="matplotlib") + +c0 = canvas.add_canvas(0) +c0.cat_x(df, x="category", y="observation").add_boxplot() +c0.title = "boxplot" + +c1 = canvas.add_canvas(1) +c1.cat_x(df, x="category", y="observation").add_pointplot() +c1.title = "pointplot" + +c2 = canvas.add_canvas(2) +c2.cat_x(df, x="category", y="observation").add_barplot() +c2.title = "barplot" + +canvas.show() +``` + +## Marker-type Plots + +``` python +#!name: categorical_axis_stripplot +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_stripplot(color="replicate") +) +``` + +``` python +#!name: categorical_axis_stripplot_dodge +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_stripplot(color="replicate", dodge=True) +) +``` + +As for the `Markers` layer, `as_edge_only` will convert the face features to the edge features. + +``` python +#!name: categorical_axis_stripplot_dodge_edge_only +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_stripplot(color="replicate", dodge=True) + .as_edge_only(width=2) +) +``` + +Each marker size can represent a numerical value. `with_size` will map the numerical +values of a column to the size of the markers. + +``` python +#!name: categorical_axis_stripplot_by_size +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_stripplot() + .with_size("temperature") +) +``` + +Similarly, each marker color can represent a numerical value. `with_colormap` will map the value with an arbitrary colormap. + +``` python +#!name: categorical_axis_stripplot_by_color +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_stripplot() + .with_colormap("temperature", cmap="coolwarm") +) +``` + +Swarm plot is another way to visualize all the data points with markers. + +``` python +#!name: categorical_axis_swarmplot +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category", y="observation") + .add_swarmplot(sort=True) + .with_colormap("temperature", cmap="coolwarm") +) +``` + +## Aggregation + +Showing the aggregated data is a common way to efficiently visualize a lot of data. This +task is usually done by the module specific group-by methods, but `whitecanvas` provides +a built-in method to simplify the process. + +In following example, `mean()` is used to prepare a mean-aggregated plotter, which has +`add_markers` method to add the mean markers to the plotter. + +``` python +#!name: categorical_axis_stripplot_and_agg_mean +canvas = new_canvas("matplotlib") + +# create a categorical plotter +cat_plt = canvas.cat_x(df, x="category", y="observation") + +# plot all the data +cat_plt.add_stripplot(color="category") +# plot the mean +cat_plt.mean().add_markers(color="category", size=20) + +canvas.show() +``` + +Similar `add_*` methods include `add_line()` and `add_bars()`. + +``` python +#!name: categorical_axis_stripplot_and_agg_line +canvas = new_canvas("matplotlib") + +# create a categorical plotter +cat_plt = canvas.cat_x(df, x="category", y="observation") + +# plot all the data +cat_plt.add_stripplot(color="category") +# plot the mean +cat_plt.mean().add_line(width=3, color="black") + +canvas.show() +``` + +Count plot is a special case of the aggregation. Use `count()` to make the plotter. + +``` python +#!name: categorical_axis_countplot +canvas = new_canvas("matplotlib") +( + canvas + .cat_x(df, x="category") + .count() + .add_bars(color="replicate", dodge=True) +) +canvas.show() +``` diff --git a/docs/categorical/categorical_axis.md b/docs/categorical/categorical_axis.md deleted file mode 100644 index 772c6eed..00000000 --- a/docs/categorical/categorical_axis.md +++ /dev/null @@ -1,140 +0,0 @@ -# Categorical Axis - -There are several plots that use categorical axis. Examples are: - -- Strip plot -- Swarm plot -- Violin plot -- Box plot - -Aside from the categorical axis, data points may further be grouped by other features, -such as the marker symbol and the marker size. Things are even more complicated when -the markers represent numerical values, such as their size being proportional to the -value, or colored by a colormap. - -`whitecanvas` provides a consistent and simple interface to handle all these cases. In -this section, following data will be used as an example: - -``` python -import numpy as np -from whitecanvas import new_canvas - -rng = np.random.default_rng(12345) -df = { - "category": ["A"] * 40 + ["B"] * 50, - "observation": np.concatenate([rng.random(40), rng.random(50) + 1.3]), - "replicate": [0] * 20 + [1] * 20 + [0] * 25 + [1] * 25, - "temperature": rng.normal(scale=2.8, size=90) + 22.0, -} -``` - -## Non-marker-type Plots - -Since plots without data point markers are more straightforward, we will start with -them. It includes `add_violinplot`, `add_boxplot`, `add_pointplot` and `add_barplot`. - -``` python -#!name: categorical_axis_violin_0 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_violinplot("category", "observation") -canvas.show() -``` - -The first argument of `add_violinplot` is the column that defines the offset (shift -from 0 in the categorical axis). The second one is the column that is used for the -values. - -Offset can be defined by multiple columns. You can pass a sequence of column names to -do that. - -``` python -#!name: categorical_axis_violin_1 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_violinplot(["category", "replicate"], "observation") -canvas.show() -``` - -Violons can also be shown in different color. Specify the `color=` argument to do that. - -``` python -#!name: categorical_axis_violin_2 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_violinplot("category", "observation", color="replicate") -canvas.show() -``` - -You can see that the violins overlaps. It is because only "category" is used for the -offsets. Offsets, colors and other properties are calculated **independently**. - -To separate them, we need to add "replicate" to the offset. - -``` python -#!name: categorical_axis_violin_3 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_violinplot( - offset=["category", "replicate"], - value="observation", - color="replicate" -) -canvas.show() -``` - -`hatch=` can also be specified in a similar way. Again, All the properties are -independent. - -``` python -#!name: categorical_axis_violin_4 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_violinplot( - offset=["category", "replicate"], - value="observation", - color="replicate", - hatch="category", -) -canvas -``` - -!!! note - This is different from the `seaborn` interface, where `hue=` and `dodge=` are used - to separate groups. As you can see in these examples, this is how `whitecanvas` - can easily handle more complicated cases without confusion. - -`add_boxplot`, `add_pointplot` and `add_barplot` is very similar to `add_violinplot`. - -``` python -#!name: categorical_axis_boxplot_0 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_boxplot( - offset=["category", "replicate"], - value="observation", - color="replicate", - hatch="category", -) -canvas -``` - -``` python -#!name: categorical_axis_pointplot_0 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_pointplot( - offset=["category", "replicate"], - value="observation", - color="replicate", - hatch="category", -) -canvas -``` - -``` python -#!name: categorical_axis_barplot_0 -canvas = new_canvas("matplotlib") -canvas.cat(df).add_barplot( - offset=["category", "replicate"], - value="observation", - color="replicate", - hatch="category", -) -canvas -``` - -## Marker-type Plots diff --git a/docs/categorical/index.md b/docs/categorical/index.md index 89dbe9ab..f5039dac 100644 --- a/docs/categorical/index.md +++ b/docs/categorical/index.md @@ -6,10 +6,18 @@ support for high-level categorical plotting methods that use DataFrame objects a In `whitecanvas`, similar functions are provided, but these methods do not depend on any external plotting libraries or DataFrames, and are more flexible in some cases. -## The `cat` Method +## The Categorical Plotters -The `cat` method converts a tabular data into a categorical plotter. Currently, -following objects are allowed as input: +Methods starting with "cat" return categorical plotters. Methods include: + +- `cat` ... plotter for numerical data in x/y-axis categorized by such as color. +- `cat_x` ... plotter for categorical data in x-axis. +- `cat_y` ... plotter for categorical data in y-axis. +- `cat_xy` ... plotter for categorical data in both x- and y-axis. + +These methods need a tabular data and the names of the columns that will be used as the +x and y values. +Currently, following objects are allowed as the tabular data input: - `dict` of array-like objects - `pandas.DataFrame` @@ -32,6 +40,6 @@ df = { "value": rng.normal(size=130), } -canvas.cat(df).add_stripplot("label", "value").with_edge(color="black") +canvas.cat_x(df, x="label", y="value").add_stripplot().with_edge(color="black") canvas.show() ``` diff --git a/docs/categorical/lines_and_markers.md b/docs/categorical/lines_and_markers.md deleted file mode 100644 index 0ac88435..00000000 --- a/docs/categorical/lines_and_markers.md +++ /dev/null @@ -1,44 +0,0 @@ -# Categorical Lines and Markers - -Line plot and scatter plot use numerical values for both x and y axes. In this case, -the plot is categorized by such as color, marker symbol, etc. - -``` python -from whitecanvas import new_canvas - -# sample data -df = { - "label": ["A"] * 5 + ["B"] * 5, - "x": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4], - "y": [3, 1, 2, 4, 3, 5, 3, 3, 1, 2], -} -``` - -By setting `color=` to one of the column name, lines are split by the column and -different colors are used for each group. - -``` python -#!name: categorical_add_line_color -canvas = new_canvas("matplotlib") -canvas.cat(df).add_line("x", "y", color="label") -canvas.show() -``` - -By setting `style=`, different line styles are used instead. In the following example, -`color="black"` means that all the lines should be the same color (black). - -``` python -#!name: categorical_add_line_style -canvas = new_canvas("matplotlib") -canvas.cat(df).add_line("x", "y", color="black", style="label") -canvas.show() -``` - -In the case of markers, you can use symbols to distinguish groups. - -``` python -#!name: categorical_add_markers_symbol -canvas = new_canvas("matplotlib") -canvas.cat(df).add_markers("x", "y", symbol="label") -canvas.show() -``` diff --git a/docs/categorical/num_num.md b/docs/categorical/num_num.md new file mode 100644 index 00000000..27dbcdbf --- /dev/null +++ b/docs/categorical/num_num.md @@ -0,0 +1,131 @@ +# Numerical × Numerical Data + +## Categorical Lines and Markers + +Line plot and scatter plot use numerical values for both x and y axes. In this case, +the plot is categorized by such as color, marker symbol, etc. + +``` python +from whitecanvas import new_canvas + +# sample data +df = { + "label": ["A"] * 5 + ["B"] * 5, + "x": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4], + "y": [3, 1, 2, 4, 3, 5, 3, 3, 1, 2], +} +``` + +By setting `color=` to one of the column name, lines are split by the column and +different colors are used for each group. + +``` python +#!name: categorical_add_line_color +canvas = new_canvas("matplotlib") +canvas.cat(df, "x", "y").add_line(color="label") +canvas.show() +``` + +By setting `style=`, different line styles are used instead. In the following example, +`color="black"` means that all the lines should be the same color (black). + +``` python +#!name: categorical_add_line_style +canvas = new_canvas("matplotlib") +canvas.cat(df, "x", "y").add_line(color="black", style="label") +canvas.show() +``` + +In the case of markers, you can use symbols to distinguish groups. + +``` python +#!name: categorical_add_markers_symbol +canvas = new_canvas("matplotlib") +canvas.cat(df, "x", "y").add_markers(symbol="label") +canvas.show() +``` + +## Distribution of Numerical Data + +There are several ways to visualize the distribution of numerical data. + +- Histogram +- Kernel Density Estimation (KDE) + +These representations only use one array of numerical data. Therefore, either `x` or `y` should be empty in the `cat` method. + +``` python +import numpy as np + +rng = np.random.default_rng(12345) + +# sample data +df = { + "label": ["A"] * 60 + ["B"] * 30 + ["C"] * 40, + "X": rng.normal(loc=0.0, size=130), + "Y": rng.normal(loc=1.0, size=130), +} +``` + +`x="X"` means that the x-axis being "X" and the y-axis being the count. +Arguments forwards to the `histogram` method of `numpy`. + +``` python +#!name: cat_hist_x +canvas = new_canvas("matplotlib") +canvas.cat(df, x="X").add_hist(bins=10) +canvas.show() +``` + +To transpose the histogram, use `y="X"`. + +``` python +#!name: cat_hist_y +canvas = new_canvas("matplotlib") +canvas.cat(df, y="X").add_hist(bins=10) +canvas.show() +``` + +Histograms can be grouped by color. + +``` python +#!name: cat_hist_x_colored +canvas = new_canvas("matplotlib") +canvas.cat(df, x="X").add_hist(bins=10, color="label") +canvas.show() +``` + +If both `x` and `y` are set, the plotter cannot determine which axis to use. To tell +the plotter which axis to use, call `along_x()` or `along_y()` to restrict the +dimension. + +``` python +#!name: cat_hist_along_x +canvas = new_canvas("matplotlib") +# canvas.cat(df, x="label", y="X").add_hist(bins=10) # This will raise an error +canvas.cat(df, x="label", y="X").along_y().add_hist(bins=10) +canvas.show() +``` + +KDE can be similarly added. + +``` python +#!name: cat_kde_x +canvas = new_canvas("matplotlib") +canvas.cat(df, x="X").add_kde(color="label") +canvas.show() +``` + +2-dimensional histogram can be added by `add_hist2d`. + +``` python +#!name: cat_hist2d +canvas = new_canvas("matplotlib") +canvas.cat(df, x="X", y="Y").add_hist2d(cmap=["white", "blue"], bins=(8, 10)) +canvas.show() +``` + +!!! note + `add_hist` and `add_hist2d` returns completely different objects (histogram and + heatmap) and they are configured by different arguments. That's why `whitecanvas` + split them into two different methods. diff --git a/examples/raincloud_plot.py b/examples/raincloud_plot.py index 02853291..8fe6cec0 100644 --- a/examples/raincloud_plot.py +++ b/examples/raincloud_plot.py @@ -11,16 +11,14 @@ def main(): url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv" df = pd.read_csv(url) - x = "species" - y = "sepal_width" - cat_plt = canvas.cat(df) + cat_plt = canvas.cat_x(df, x="species", y="sepal_width") cat_plt.add_stripplot( - x, y, color=x, extent=0.3 + color="species", extent=0.3 ).with_edge(color="#3F3F00").with_shift(-0.3) - cat_plt.add_boxplot(x, y, color=x, extent=0.3) - cat_plt.mean().add_markers(x, y, color="black", size=10, symbol="+") + cat_plt.add_boxplot(color="species", extent=0.3) + cat_plt.mean().add_markers(color="black", size=10, symbol="+") cat_plt.add_violinplot( - x, y, color=x, extent=0.3, shape="right" + color="species", extent=0.3, shape="right" ).with_edge(color="#3F3F00").with_shift(0.2) canvas.show(block=True) diff --git a/examples/superplot.py b/examples/superplot.py new file mode 100644 index 00000000..6b2cbfe1 --- /dev/null +++ b/examples/superplot.py @@ -0,0 +1,40 @@ +import numpy as np + +from whitecanvas import new_canvas + + +def rand(mean: float, n: int) -> list[float]: + """Generate random data.""" + return np.random.normal(loc=mean, scale=mean / 4, size=n).tolist() + +def main(): + # generate some random data + np.random.seed(174623) + data = { + "label": ["Control"] * 50 + ["Treatment"] * 50, + "value": rand(1.1, 15) + rand(1.4, 20) + rand(0.9, 15) + rand(3.3, 15) + rand(2.9, 20) + rand(3.8, 15), + "replicate": [1] * 15 + [2] * 20 + [3] * 15 + [1] * 15 + [2] * 20 + [3] * 15, + } + + canvas = new_canvas("matplotlib:qt") + cat_plt = canvas.cat_x(data, x="label", y="value") + + # plot all the raw data + cat_plt.add_swarmplot(color="replicate", size=8) + + # plot the mean of each replicate + cat_plt.mean_for_each("replicate").add_markers( + color="replicate", size=18, symbol="D" + ) + + # plot the mean of all the data for control and treatment + cat_plt.mean().add_markers(color="black", size=20, symbol="+") + + # plot the mean of replicate means + cat_plt.mean_for_each("replicate").mean().add_markers( + color="black", size=30, symbol="_" + ) + canvas.show(block=True) + +if __name__ == "__main__": + main() diff --git a/mkdocs.yml b/mkdocs.yml index 76c403a6..d137731e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -33,8 +33,8 @@ nav: - Working with the Backend Objects: canvas/native_objects.md - Categorical Plot: - Overview: categorical/index.md - - Categorical Lines and Markers: categorical/lines_and_markers.md - - Categorical Axis: categorical/categorical_axis.md + - Numerical × Numerical Data: categorical/num_num.md + - Categorical × Numerical Data: categorical/cat_num.md plugins: diff --git a/tests/test_canvas.py b/tests/test_canvas.py index 744a7b1d..dae6ca34 100644 --- a/tests/test_canvas.py +++ b/tests/test_canvas.py @@ -50,7 +50,7 @@ def test_namespace_pointing_at_different_objects(): assert_color_equal(c1.x.color, "blue") def test_grid(backend: str): - cgrid = wc.grid(2, 2, link_x=True, link_y=True, backend=backend) + cgrid = wc.grid(2, 2, backend=backend).link_x().link_y() c00 = cgrid.add_canvas(0, 0) c01 = cgrid.add_canvas(0, 1) c10 = cgrid.add_canvas(1, 0) @@ -76,8 +76,8 @@ def test_grid(backend: str): def test_grid_nonuniform(backend: str): cgrid = wc.grid_nonuniform( - [2, 1], [2, 1], link_x=True, link_y=True, backend=backend - ) + [2, 1], [2, 1], backend=backend + ).link_x().link_y() c00 = cgrid.add_canvas(0, 0) c01 = cgrid.add_canvas(0, 1) c10 = cgrid.add_canvas(1, 0) @@ -101,7 +101,7 @@ def test_grid_nonuniform(backend: str): assert len(c11.layers) == 1 def test_vgrid_hgrid(backend: str): - cgrid = wc.vgrid(2, backend=backend, link_x=True, link_y=True) + cgrid = wc.vgrid(2, backend=backend).link_x().link_y() c0 = cgrid.add_canvas(0) c1 = cgrid.add_canvas(1) @@ -114,7 +114,7 @@ def test_vgrid_hgrid(backend: str): assert len(c0.layers) == 1 assert len(c1.layers) == 1 - cgrid = wc.hgrid(2, backend=backend, link_x=True, link_y=True) + cgrid = wc.hgrid(2, backend=backend).link_x().link_y() c0 = cgrid.add_canvas(0) c1 = cgrid.add_canvas(1) diff --git a/tests/test_categorical.py b/tests/test_categorical.py index dacd368a..1ebe99e6 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -4,20 +4,41 @@ from ._utils import assert_color_array_equal import pytest +def test_cat(backend: str): + canvas = new_canvas(backend=backend) + rng = np.random.default_rng(1642) + df = { + "x": rng.normal(size=30), + "y": rng.normal(size=30), + "label": np.repeat(["A", "B", "C"], 10), + } + canvas.cat(df, "x", "y").add_line() + canvas.cat(df, "x", "y").add_line(color="label") + canvas.cat(df, "x", "y").add_markers() + canvas.cat(df, "x", "y").add_markers(color="label") + canvas.cat(df, "x", "y").add_markers(hatch="label") + canvas.cat(df, "x", "y").add_hist2d(bins=(5, 4)) + canvas.cat(df, "x", "y").along_x().add_hist(bins=5) + canvas.cat(df, "x", "y").along_x().add_hist(bins=5, color="label") + canvas.cat(df, "x", "y").along_y().add_hist(bins=6) + canvas.cat(df, "x", "y").along_y().add_hist(bins=6, color="label") + @pytest.mark.parametrize("orient", ["v", "h"]) def test_cat_plots(backend: str, orient: str): canvas = new_canvas(backend=backend) df = { - "x": np.arange(30), "y": np.arange(30), "label": np.repeat(["A", "B", "C"], 10), + "c": ["P", "Q"] * 15, } - - canvas.cat(df).add_stripplot("label", "y", orient=orient) - canvas.cat(df).add_swarmplot("label", "y", orient=orient) - canvas.cat(df).add_boxplot("label", "y", orient=orient) - canvas.cat(df).add_violinplot("label", "y", orient=orient) - canvas.cat(df).add_countplot("label", orient=orient) + if orient == "v": + cat_plt = canvas.cat_x(df, "label", "y") + else: + cat_plt = canvas.cat_y(df, "y", "label") + cat_plt.add_stripplot(color="c") + cat_plt.add_swarmplot(color="c") + cat_plt.add_boxplot(color="c") + cat_plt.add_violinplot(color="c") def test_colored_plots(backend: str): canvas = new_canvas(backend=backend) @@ -27,8 +48,8 @@ def test_colored_plots(backend: str): "label": np.repeat(["A", "B", "C"], 10), } - canvas.cat(df).add_markers("x", "y", color="label") - canvas.cat(df).add_line("x", "y", color="label") + canvas.cat(df, "x", "y").add_markers(color="label") + canvas.cat(df, "x", "y").add_line(color="label") def test_markers(backend: str): canvas = new_canvas(backend=backend) @@ -40,26 +61,26 @@ def test_markers(backend: str): "label1": ["One"] * 10 + ["Two"] * 20, } - _c = canvas.cat(df) - out = _c.add_markers("x", "y", color="label0", size="size", symbol="label1") + _c = canvas.cat(df, "x", "y") + out = _c.add_markers(color="label0", size="size", symbol="label1") assert len(set(out._base_layer.symbol[:10])) == 1 assert len(set(out._base_layer.symbol[10:])) == 1 - out = _c.add_markers("x", "y", color="label1", size="size", hatch="label0") + out = _c.add_markers(color="label1", size="size", hatch="label0") assert len(set(out._base_layer.face.hatch[:10])) == 1 assert len(set(out._base_layer.face.hatch[10:20])) == 1 assert len(set(out._base_layer.face.hatch[20:])) == 1 - out = _c.add_markers("x", "y", color="label1").with_edge(color="label0") + out = _c.add_markers(color="label1").with_edge(color="label0") assert len(np.unique(out._base_layer.edge.color[:10], axis=0)) == 1 assert len(np.unique(out._base_layer.edge.color[10:20], axis=0)) == 1 assert len(np.unique(out._base_layer.edge.color[20:], axis=0)) == 1 # test scalar color - out = _c.add_markers("x", "y", color="black") + out = _c.add_markers(color="black") assert_color_array_equal(out._base_layer.face.color, "black") - out = _c.add_markers("x", "y", color="transparent").with_edge_colormap("size") + out = _c.add_markers(color="transparent").with_edge_colormap("size") def test_heatmap(backend: str): canvas = new_canvas(backend=backend) @@ -68,7 +89,7 @@ def test_heatmap(backend: str): "y": ["P", "P", "Q", "Q", "R", "R"], "z": [1, 2, 3, 4, 5, 6], } - im = canvas.cat(df).add_heatmap("x", "y", value="z") + im = canvas.cat_xy(df, "x", "y").first().add_heatmap(value="z") canvas.imref(im).add_text() df = { @@ -76,9 +97,6 @@ def test_heatmap(backend: str): "y": ["P", "Q", "Q", "Q", "P", "Q"], "z": [1.1, 2.1, 3.4, 6.4, 1.1, 6.8], } - with pytest.raises(ValueError): - # has duplication - canvas.cat(df).add_heatmap("x", "y", value="z") - im = canvas.cat(df).mean().add_heatmap("x", "y", value="z", fill=-1) + im = canvas.cat_xy(df, "x", "y").mean().add_heatmap(value="z", fill=-1) canvas.imref(im).add_text(fmt=".1f") assert im.clim == (1.1, 6.6) diff --git a/whitecanvas/backend/plotly/canvas.py b/whitecanvas/backend/plotly/canvas.py index 924e65f5..e0f153f4 100644 --- a/whitecanvas/backend/plotly/canvas.py +++ b/whitecanvas/backend/plotly/canvas.py @@ -2,7 +2,7 @@ import sys import weakref -from typing import Callable +from typing import TYPE_CHECKING, Callable import numpy as np from plotly import graph_objects as go @@ -14,6 +14,9 @@ from whitecanvas.types import MouseEvent from whitecanvas.utils.normalize import rgba_str_color +if TYPE_CHECKING: + from plotly._subplots import SubplotXY + class Canvas: def __init__( @@ -42,7 +45,7 @@ def __init__( ) self._fig.add_trace(self._scatter) - def _subplot_layout(self): + def _subplot_layout(self) -> SubplotXY: try: layout = self._fig.get_subplot(**self._loc.asdict()) except Exception: # manually wrapped backend are not created with subplots @@ -76,7 +79,15 @@ def _plt_get_ylabel(self): def _plt_reorder_layers(self, layers: list[PlotlyLayer]): model_to_idx_map = {id(layer._props): i for i, layer in enumerate(layers)} first, *data = self._fig._data - self._fig._data = [first] + [data[model_to_idx_map[id(r)]] for r in data] + ordered_data = [] + data_in_other = [] + for _data in data: + data_id = id(_data) + if data_id in model_to_idx_map: + ordered_data.append(data[model_to_idx_map[data_id]]) + else: + data_in_other.append(_data) + self._fig._data = [first, *ordered_data, *data_in_other] def _plt_get_aspect_ratio(self) -> float | None: """Get aspect ratio of canvas""" @@ -181,6 +192,9 @@ def _plt_twinx(self): kwargs["secondary_y"] = True return Canvas(self._fig, **kwargs) + def _repr_mimebundle_(self, *args, **kwargs): + return self._fig._repr_mimebundle_(*args, **kwargs) + def _convert_cb(cb): return lambda _, points, state: cb(points.point_inds) # noqa: ARG005 diff --git a/whitecanvas/backend/vispy/band.py b/whitecanvas/backend/vispy/band.py index c40d7996..90f7b3b3 100644 --- a/whitecanvas/backend/vispy/band.py +++ b/whitecanvas/backend/vispy/band.py @@ -20,7 +20,10 @@ def __init__(self, t, ydata0, ydata1, orient: Orientation): bw = np.stack([ydata1[::-1], t[::-1]], axis=1) verts = np.concatenate([fw, bw], axis=0) self._edge_style = LineStyle.SOLID - super().__init__(verts, border_width=0) + try: + super().__init__(verts, border_width=0) + except Exception: + super().__init__(verts, border_width=0, triangulate=False) self.unfreeze() self._t = t self._y0 = ydata0 diff --git a/whitecanvas/canvas/_base.py b/whitecanvas/canvas/_base.py index e0b78061..71130748 100644 --- a/whitecanvas/canvas/_base.py +++ b/whitecanvas/canvas/_base.py @@ -8,6 +8,7 @@ Iterable, Iterator, Literal, + Sequence, TypeVar, overload, ) @@ -56,6 +57,7 @@ from typing_extensions import Concatenate, ParamSpec, Self _P = ParamSpec("_P") + _DF = TypeVar("_DF") _L = TypeVar("_L", bound=_l.Layer) _L0 = TypeVar("_L0", _l.Bars, _l.Band) @@ -307,7 +309,34 @@ def update_axes( self.y.label.color = color return self - def cat(self, data, update_labels: bool = True) -> _df.DataFramePlotter: + def update_labels( + self, + title: str | None = None, + x: str | None = None, + y: str | None = None, + ) -> Self: + """ + Helper function to update the title, x, and y labels. + + >>> from whitecanvas import new_canvas + >>> canvas = new_canvas("matplotlib").update_labels("Title", "X", "Y") + """ + if title is not None: + self.title.text = title + if x is not None: + self.x.label.text = x + if y is not None: + self.y.label.text = y + return self + + def cat( + self, + data: _DF, + x: str | None = None, + y: str | None = None, + *, + update_labels: bool = True, + ) -> _df.CatPlotter[Self, _DF]: """ Categorize input data for plotting. @@ -328,9 +357,39 @@ def cat(self, data, update_labels: bool = True) -> _df.DataFramePlotter: CategorizedPlot Plotter object. """ - plotter = _df.DataFramePlotter(self, data, update_label=update_labels) + plotter = _df.CatPlotter(self, data, x, y, update_label=update_labels) return plotter + def cat_x( + self, + data: _DF, + x: str | Sequence[str] | None = None, + y: str | None = None, + *, + update_labels: bool = True, + ) -> _df.XCatPlotter[Self, _DF]: + return _df.XCatPlotter(self, data, x, y, update_labels) + + def cat_y( + self, + data: _DF, + x: str | None = None, + y: str | Sequence[str] | None = None, + *, + update_labels: bool = True, + ) -> _df.YCatPlotter[Self, _DF]: + return _df.YCatPlotter(self, data, y, x, update_labels) + + def cat_xy( + self, + data: _DF, + x: str | Sequence[str], + y: str | Sequence[str], + *, + update_labels: bool = True, + ) -> _df.XYCatPlotter[Self, _DF]: + return _df.XYCatPlotter(self, data, x, y, update_labels) + def stack_over(self, layer: _L0) -> StackOverPlotter[Self, _L0]: """ Stack new data over the existing layer. @@ -596,14 +655,15 @@ def add_hist( data: ArrayLike1D, *, bins: int | ArrayLike1D = 10, - range: tuple[float, float] | None = None, - density: bool = False, + limits: tuple[float, float] | None = None, name: str | None = None, + shape: Literal["step", "polygon", "bars"] = "bars", + kind: Literal["count", "density", "frequency", "percent"] = "count", orient: str | Orientation = Orientation.VERTICAL, color: ColorType | None = None, - alpha: float = 1.0, - hatch: str | Hatch | None = None, - ) -> _l.Bars: + width: float | None = None, + style: LineStyle | str | None = None, + ) -> _lg.Histogram: """ Add data as a histogram. @@ -616,9 +676,9 @@ def add_hist( bins : int or 1D array-like, default 10 Bins of the histogram. This parameter will directly be passed to `np.histogram`. - range : (float, float), optional - Range in which histogram will be built. This parameter will - directly be passed to `np.histogram`. + limits : (float, float), optional + Limits in which histogram will be built. This parameter will equivalent to + the `range` paraneter of `np.histogram`. density : bool, default False If True, heights of bars will be normalized so that the total area of the histogram will be 1. This parameter will directly @@ -629,10 +689,6 @@ def add_hist( Orientation of the bars. color : color-like, optional Color of the bars. - alpha : float, default 1.0 - Alpha channel of the bars. - hatch : str or FacePattern, optional - Pattern of the bar faces. Use the theme default if not specified. Returns ------- @@ -641,74 +697,11 @@ def add_hist( """ name = self._coerce_name("histogram", name) color = self._generate_colors(color) - hatch = theme._default("bars.hatch", hatch) - layer = _l.Bars.from_histogram( - data, bins=bins, range=range, density=density, name=name, color=color, - orient=orient, alpha=alpha, hatch=hatch, backend=self._get_backend(), - ) # fmt: skip - return self.add_layer(layer) - - def add_hist_line( - self, - data: ArrayLike1D, - *, - bins: int | ArrayLike1D = 10, - range: tuple[float, float] | None = None, - density: bool = False, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - color: ColorType | None = None, - width: float | None = None, - style: LineStyle | str | None = None, - alpha: float = 1.0, - antialias: bool = True, - ) -> _l.Line: - """ - Add a line plot of the histogram. - - >>> canvas.add_hist_line(np.random.normal(size=100), bins=12) - - Parameters - ---------- - data : array-like - 1D Array of data. - bins : int or 1D array-like, default 10 - Bins of the histogram. This parameter will directly be passed - to `np.histogram`. - range : (float, float), optional - Range in which histogram will be built. This parameter will - directly be passed to `np.histogram`. - density : bool, default False - If True, heights of bars will be normalized so that the total - area of the histogram will be 1. This parameter will directly - be passed to `np.histogram`. - name : str, optional - Name of the layer. - orient : str or Orientation, default Orientation.VERTICAL - Orientation of the bars. - color : color-like, optional - Color of the bars. - width : float, optional - Line width. Use the theme default if not specified. - style : str or LineStyle, optional - Line style. Use the theme default if not specified. - alpha : float, default 1.0 - Alpha channel of the line. - antialias : bool, default True - Antialiasing of the line. - - Returns - ------- - Line - The line layer that represents the histogram. - """ - name = self._coerce_name("histogram", name) - color = self._generate_colors(color) width = theme._default("line.width", width) style = theme._default("line.style", style) - layer = _l.Line.build_hist( - data, bins=bins, density=density, range=range, orient=orient, name=name, - color=color, width=width, style=style, alpha=alpha, antialias=antialias, + layer = _lg.Histogram.from_array( + data, bins=bins, limits=limits, shape=shape, kind=kind, name=name, + color=color, width=width, style=style, orient=orient, backend=self._get_backend(), ) # fmt: skip return self.add_layer(layer) @@ -721,7 +714,8 @@ def add_hist2d( cmap: ColormapType = "inferno", name: str | None = None, bins: int | tuple[int, int] = 10, - range: tuple[tuple[float, float], tuple[float, float]] | None = None, + rangex: tuple[float, float] | None = None, + rangey: tuple[float, float] | None = None, density: bool = False, ) -> _l.Image: """ @@ -747,8 +741,10 @@ def add_hist2d( bins : int or tuple[int, int], optional Bins of the histogram of X/Y dimension respectively. If an integer is given, it will be used for both dimensions. - range : (2, 2) array-like, optional - Range in which histogram will be built. + rangex : (float, float), optional + Range of x values in which histogram will be built. + rangey : (float, float), optional + Range of y values in which histogram will be built. density : bool, default False If True, values of the histogram will be normalized so that the total intensity of the histogram will be 1. @@ -759,8 +755,8 @@ def add_hist2d( Image layer representing the 2D histogram. """ layer = _l.Image.build_hist( - x, y, bins=bins, range=range, density=density, name=name, cmap=cmap, - backend=self._get_backend(), + x, y, bins=bins, range=(rangex, rangey), density=density, name=name, + cmap=cmap, backend=self._get_backend(), ) # fmt: skip return self.add_layer(layer) @@ -1180,8 +1176,8 @@ def add_kde( orient: str | Orientation = Orientation.VERTICAL, band_width: float | Literal["scott", "silverman"] = "scott", color: ColorType | None = None, - alpha: float = 1.0, - hatch: str | Hatch = Hatch.SOLID, + width: float | None = None, + style: LineStyle | str | None = None, ) -> _l.Band: """ Add data as a band layer representing kernel density estimation (KDE). @@ -1208,23 +1204,19 @@ def add_kde( Returns ------- - Band - The band layer representing KDE. + Kde + The KDE layer. """ name = self._coerce_name(_l.Band, name) color = self._generate_colors(color) + width = theme._default("line.width", width) + style = theme._default("line.style", style) - layer = _l.Band.from_kde( - data, - bottom, - name=name, - band_width=band_width, - orient=orient, - color=color, - alpha=alpha, - hatch=hatch, + layer = _lg.Kde.from_array( + data, bottom=bottom, scale=1, band_width=band_width, name=name, + orient=orient, color=color, width=width, style=style, backend=self._get_backend(), - ) + ) # fmt: skip return self.add_layer(layer) @overload diff --git a/whitecanvas/canvas/_grid.py b/whitecanvas/canvas/_grid.py index 60081852..fa2fede7 100644 --- a/whitecanvas/canvas/_grid.py +++ b/whitecanvas/canvas/_grid.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Iterator +from typing import TYPE_CHECKING, Any, Iterator import numpy as np from numpy.typing import NDArray @@ -13,6 +13,9 @@ from whitecanvas.theme import get_theme from whitecanvas.utils.normalize import arr_color +if TYPE_CHECKING: + from typing_extensions import Self + class GridEvents(SignalGroup): drawn = Signal() @@ -55,8 +58,6 @@ def uniform( nrows: int = 1, ncols: int = 1, *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> CanvasGrid: """ @@ -71,10 +72,7 @@ def uniform( backend : backend-like, optional The backend to use for the grid. """ - return CanvasGrid( - [10] * nrows, [10] * ncols, link_x=link_x, link_y=link_y, - backend=backend, - ) # fmt: skip + return CanvasGrid([10] * nrows, [10] * ncols, backend=backend) @property def shape(self) -> tuple[int, int]: @@ -88,16 +86,7 @@ def x_linked(self) -> bool: @x_linked.setter def x_linked(self, value: bool): - value = bool(value) - if value == self._x_linked: - return - if value: - for _, canvas in self.iter_canvas(): - canvas.x.events.lim.connect(self._align_xlims, unique=True) - else: - for _, canvas in self.iter_canvas(): - canvas.x.events.lim.disconnect(self._align_xlims) - self._x_linked = value + self.link_x() if value else self.unlink_x() @property def y_linked(self) -> bool: @@ -106,16 +95,65 @@ def y_linked(self) -> bool: @y_linked.setter def y_linked(self, value: bool): - value = bool(value) - if value == self._y_linked: - return - if value: + self.link_y() if value else self.unlink_y() + + def link_x(self, future: bool = True) -> Self: + """ + Link all the x-axes of the canvases in the grid. + + >>> from whitecanvas import grid + >>> g = grid(2, 2).link_x() # link x-axes of all canvases + + Parameters + ---------- + future : bool, default True + If Ture, all the canvases added in the future will also be linked. Only link + the existing canvases if False. + """ + if not self._x_linked: + for _, canvas in self.iter_canvas(): + canvas.x.events.lim.connect(self._align_xlims, unique=True) + if future: + self._x_linked = True + return self + + def link_y(self, future: bool = True) -> Self: + """ + Link all the y-axes of the canvases in the grid. + + >>> from whitecanvas import grid + >>> g = grid(2, 2).link_y() # link y-axes of all canvases + + Parameters + ---------- + future : bool, default True + If Ture, all the canvases added in the future will also be linked. Only link + the existing canvases if False. + """ + if not self._y_linked: for _, canvas in self.iter_canvas(): canvas.y.events.lim.connect(self._align_ylims, unique=True) - else: + if future: + self._y_linked = True + return self + + def unlink_x(self, future: bool = True) -> Self: + """Unlink all the x-axes of the canvases in the grid.""" + if self._x_linked: + for _, canvas in self.iter_canvas(): + canvas.x.events.lim.disconnect(self._align_xlims) + if future: + self._x_linked = False + return self + + def unlink_y(self, future: bool = True) -> Self: + """Unlink all the y-axes of the canvases in the grid.""" + if self._y_linked: for _, canvas in self.iter_canvas(): canvas.y.events.lim.disconnect(self._align_ylims) - self._y_linked = value + if future: + self._y_linked = False + return self def __repr__(self) -> str: cname = type(self).__name__ @@ -251,6 +289,21 @@ def _repr_png_(self): return file_obj.read() return None + def _ipython_display_(self, *args: Any, **kwargs: Any) -> Any: + if hasattr(self._backend_object, "_ipython_display_"): + return self._backend_object._ipython_display_(*args, **kwargs) + raise NotImplementedError() + + def _repr_mimebundle_(self, *args: Any, **kwargs: Any) -> dict: + if hasattr(self._backend_object, "_repr_mimebundle_"): + return self._backend_object._repr_mimebundle_(*args, **kwargs) + raise NotImplementedError() + + def _repr_html_(self, *args: Any, **kwargs: Any) -> str: + if hasattr(self._backend_object, "_repr_html_"): + return self._backend_object._repr_html_(*args, **kwargs) + raise NotImplementedError() + class CanvasVGrid(CanvasGrid): @override @@ -258,11 +311,9 @@ def __init__( self, heights: list[int], *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> None: - super().__init__(heights, [1], link_x=link_x, link_y=link_y, backend=backend) + super().__init__(heights, [1], backend=backend) @override def __getitem__(self, key: int) -> Canvas: @@ -277,11 +328,9 @@ def uniform( cls, nrows: int = 1, *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> CanvasVGrid: - return CanvasVGrid([1] * nrows, link_x=link_x, link_y=link_y, backend=backend) + return CanvasVGrid([1] * nrows, backend=backend) @override def add_canvas(self, row: int, **kwargs) -> Canvas: @@ -294,11 +343,9 @@ def __init__( self, widths: list[int], *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> None: - super().__init__([1], widths, link_x=link_x, link_y=link_y, backend=backend) + super().__init__([1], widths, backend=backend) @override def __getitem__(self, key: int) -> Canvas: @@ -313,11 +360,9 @@ def uniform( cls, ncols: int = 1, *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> CanvasHGrid: - return CanvasHGrid([1] * ncols, link_x=link_x, link_y=link_y, backend=backend) + return CanvasHGrid([1] * ncols, backend=backend) @override def add_canvas(self, col: int, **kwargs) -> Canvas: @@ -383,3 +428,12 @@ def screenshot(self) -> NDArray[np.uint8]: def _repr_png_(self): """Return PNG representation of the widget for QtConsole.""" return self._grid._repr_png_() + + def _repr_mimebundle_(self, *args: Any, **kwargs: Any) -> dict: + return self._grid._repr_mimebundle_(*args, **kwargs) + + def _ipython_display_(self, *args: Any, **kwargs: Any) -> Any: + return self._grid._ipython_display_(*args, **kwargs) + + def _repr_html_(self, *args: Any, **kwargs: Any) -> str: + return self._grid._repr_html_(*args, **kwargs) diff --git a/whitecanvas/canvas/_imageref.py b/whitecanvas/canvas/_imageref.py index d148b0bf..909f62a1 100644 --- a/whitecanvas/canvas/_imageref.py +++ b/whitecanvas/canvas/_imageref.py @@ -61,6 +61,7 @@ def add_text( size: int = 8, color_rule: ColorType | Callable[[np.ndarray], ColorType] | None = None, fmt: str = "", + text_invalid: str | None = None, ) -> Texts[_mixin.MonoFace, _mixin.MonoEdge, _mixin.MultiFont]: """ Add text annotation to each pixel of the image. @@ -130,14 +131,18 @@ def _color_rule(x: NDArray[np.number]) -> NDArray[np.float32]: fmt_style = "{}" for iy, y in enumerate(ys): for ix, x in enumerate(xs): - texts.append(fmt_style.format(img_data[iy, ix])) + if np.isfinite(img_data[iy, ix]): + text = fmt_style.format(img_data[iy, ix]) + else: + if text_invalid is None: + text = repr(img_data[iy, ix]) + else: + text = text_invalid + texts.append(text) xdata.append(x) ydata.append(y) colors.append(_color_rule(img_color[iy, ix])) - return canvas.add_text( - xdata, - ydata, - texts, - size=size, - anchor="center", - ).with_font_multi(color=np.stack(colors, axis=0)) + return ( + canvas.add_text(xdata, ydata, texts, size=size, anchor="center") + .with_font_multi(color=np.stack(colors, axis=0)) + ) # fmt: skip diff --git a/whitecanvas/canvas/dataframe/__init__.py b/whitecanvas/canvas/dataframe/__init__.py index 7491ce65..f8365657 100644 --- a/whitecanvas/canvas/dataframe/__init__.py +++ b/whitecanvas/canvas/dataframe/__init__.py @@ -1,3 +1,10 @@ -from whitecanvas.canvas.dataframe._plot import DataFramePlotter +from whitecanvas.canvas.dataframe._both_cat import XYCatPlotter +from whitecanvas.canvas.dataframe._feature_cat import CatPlotter +from whitecanvas.canvas.dataframe._one_cat import XCatPlotter, YCatPlotter -__all__ = ["DataFramePlotter"] +__all__ = [ + "CatPlotter", + "XCatPlotter", + "YCatPlotter", + "XYCatPlotter", +] diff --git a/whitecanvas/canvas/dataframe/_base.py b/whitecanvas/canvas/dataframe/_base.py new file mode 100644 index 00000000..2b26b74a --- /dev/null +++ b/whitecanvas/canvas/dataframe/_base.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +import itertools +import weakref +from typing import ( + TYPE_CHECKING, + Generic, + Iterator, + Literal, + Sequence, + TypeVar, + Union, +) + +import numpy as np + +from whitecanvas._exceptions import ReferenceDeletedError +from whitecanvas.layers.tabular import _utils, parse + +if TYPE_CHECKING: + from typing_extensions import Self + + from whitecanvas.canvas._base import CanvasBase + from whitecanvas.layers.tabular._dataframe import DataFrameWrapper + +_C = TypeVar("_C", bound="CanvasBase") +_DF = TypeVar("_DF") +NStr = Union[str, Sequence[str]] +AggMethods = Literal["min", "max", "mean", "median", "sum", "std"] + + +class BaseCatPlotter(Generic[_C, _DF]): + def __init__( + self, + canvas: _C, + df: _DF, + ): + self._canvas_ref = weakref.ref(canvas) + self._df = parse(df) + + def _canvas(self) -> _C: + canvas = self._canvas_ref() + if canvas is None: + raise ReferenceDeletedError("Canvas has been deleted.") + return canvas + + def __enter__(self) -> Self: + return self + + def __exit__(self, *args) -> None: + pass + + +class CatIterator(Generic[_DF]): + def __init__( + self, + df: DataFrameWrapper[_DF], + offsets: tuple[str, ...], + full: bool = True, + ): + self._df = df + self._offsets = offsets + self._full = full + self._cat_map_cache = {} + + @property + def df(self) -> DataFrameWrapper[_DF]: + return self._df + + @property + def offsets(self) -> tuple[str, ...]: + return self._offsets + + def category_map(self, columns: tuple[str, ...] | None = None) -> dict[tuple, int]: + """Calculate how to map category columns to integers.""" + if columns is None: + key = self._offsets + else: + key = tuple(columns) + if key in self._cat_map_cache: + return self._cat_map_cache[key] + if self._full: + each_uni = [_utils.unique(self._df[c], axis=None) for c in key] + _map = {uni: i for i, uni in enumerate(itertools.product(*each_uni))} + else: + group_keys = [sl for sl, _ in self._df.group_by(key)] + labels = np.array(group_keys, dtype=object) + each_uni = [_utils.unique(_l, axis=None) for _l in labels.T] + exists = set(group_keys) + i = 0 + for uni in itertools.product(*each_uni): + if uni not in exists: + continue + _map[uni] = i + i += 1 + self._cat_map_cache[key] = _map + return _map + + def iter_arrays( + self, + by: tuple[str, ...], + dodge: tuple[str, ...] | None = None, + ) -> Iterator[tuple[tuple, float, DataFrameWrapper[_DF]]]: + if dodge is None: + dodge = () + if set(self._offsets) > set(by): + raise ValueError( + f"offsets must be a subset of by, got offsets={self._offsets!r} and " + f"by={by!r}" + ) + indices = [by.index(d) for d in self._offsets] + _map = self.category_map(self._offsets) + if not dodge: + for sl, group in self._df.group_by(by): + key = tuple(sl[i] for i in indices) + yield sl, _map[key], group + else: + if set(self._offsets) & set(dodge): + raise ValueError( + f"offsets and dodge must be disjoint, got offsets={self._offsets!r}" + f" and dodge={dodge!r}" + ) + inv_indices = [by.index(d) for d in dodge] + _res_map = self.category_map(dodge) + _nres = len(_res_map) + _width = 0.8 + dmax = (_nres - 1) / 2 / _nres * _width + dd = np.linspace(-dmax, dmax, _nres) + for sl, group in self._df.group_by(by): + key = tuple(sl[i] for i in indices) + res = tuple(sl[i] for i in inv_indices) + yield sl, dd[_res_map[res]] + _map[key], group + + def prep_arrays( + self, + by: tuple[str, ...], + value: str, + dodge: tuple[str, ...] | None = None, + ) -> tuple[list[float], list[np.ndarray], list[tuple]]: + x = [] + arrays = [] + categories = [] + for sl, offset, group in self.iter_arrays(by, dodge): + x.append(offset) + arrays.append(group[value]) + categories.append(sl) + return x, arrays, categories + + def prep_position_map( + self, + by: tuple[str], + dodge: tuple[str, ...] | None = None, + ) -> dict[tuple, float]: + out = {} + for sl, offset, _ in self.iter_arrays(by, dodge): + out[sl] = offset + return out + + def axis_ticks(self) -> tuple[list[float], list[str]]: + pos = [] + labels = [] + for k, v in self.category_map(self._offsets).items(): + pos.append(v) + labels.append("\n".join(map(str, k))) + return pos, labels + + def axis_label(self) -> str: + return "/".join(self._offsets) + + def zoom_factor(self, dodge: tuple[str, ...] | None = None) -> float: + """Return the zoom factor for the given dodge.""" + if dodge: + _res_map = self.category_map(dodge) + _nres = len(_res_map) + if _nres == 1: + return 1.0 + _width = 0.8 + dmax = (_nres - 1) / 2 / _nres * _width + return 2 * dmax / (_nres - 1) + else: + return 1.0 + + def categories(self) -> list[tuple]: + return list(self.category_map(self._offsets).keys()) diff --git a/whitecanvas/canvas/dataframe/_both_cat.py b/whitecanvas/canvas/dataframe/_both_cat.py new file mode 100644 index 00000000..6e49fb3c --- /dev/null +++ b/whitecanvas/canvas/dataframe/_both_cat.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Generic, + Sequence, + TypeVar, +) + +import numpy as np + +from whitecanvas.canvas.dataframe._base import BaseCatPlotter, CatIterator +from whitecanvas.layers import tabular as _lt +from whitecanvas.types import ColormapType + +if TYPE_CHECKING: + from typing_extensions import Self + + from whitecanvas.canvas._base import CanvasBase + from whitecanvas.layers.tabular._dataframe import DataFrameWrapper + + NStr = str | Sequence[str] + +_C = TypeVar("_C", bound="CanvasBase") +_DF = TypeVar("_DF") + + +class _XYAggregator(Generic[_C, _DF]): + def __init__(self, method: str, plotter: XYCatPlotter[_C, _DF] = None): + self._method = method + self._plotter = plotter + + def __get__(self, ins: _C, owner) -> Self: + return _XYAggregator(self._method, ins) + + def __repr__(self) -> str: + return f"XYAggregator<{self._method}>" + + def __call__(self) -> XYCatAggPlotter[_C, _DF]: + """Aggregate the values before plotting it.""" + plotter = self._plotter + if plotter is None: + raise TypeError("Cannot call this method from a class.") + if plotter._x is None or plotter._y is None: + raise ValueError("Value column is not specified.") + return XYCatAggPlotter( + plotter._canvas(), + plotter._cat_iter_x, + plotter._cat_iter_y, + x=plotter._x, + y=plotter._y, + method=self._method, + ) + + +class XYCatPlotter(BaseCatPlotter[_C, _DF]): + def __init__( + self, + canvas: _C, + df: _DF, + x: str | tuple[str, ...], + y: str | tuple[str, ...], + update_label: bool = False, + ): + super().__init__(canvas, df) + if isinstance(x, str): + x = (x,) + if isinstance(y, str): + y = (y,) + self._x: tuple[str, ...] = x + self._y: tuple[str, ...] = y + self._update_label = update_label + self._cat_iter_x = CatIterator(self._df, x) + self._cat_iter_y = CatIterator(self._df, y) + if update_label: + self._update_xy_label(x, y) + self._update_axis_labels() + + def _update_xy_label( + self, + x: str | tuple[str, ...], + y: str | tuple[str, ...], + ) -> None: + """Update the x and y labels using the column names""" + canvas = self._canvas() + if not isinstance(x, str): + x = "/".join(x) + if not isinstance(y, str): + y = "/".join(y) + canvas.x.label.text = x + canvas.y.label.text = y + + def _update_axis_labels(self) -> None: + """Update the x and y labels using the column names""" + canvas = self._canvas() + canvas.x.ticks.set_labels(*self._cat_iter_x.axis_ticks()) + canvas.y.ticks.set_labels(*self._cat_iter_y.axis_ticks()) + + mean = _XYAggregator("mean") + median = _XYAggregator("median") + sum = _XYAggregator("sum") + min = _XYAggregator("min") + max = _XYAggregator("max") + count = _XYAggregator("size") + first = _XYAggregator("first") + + +class XYCatAggPlotter(BaseCatPlotter[_C, _DF]): + def __init__( + self, + canvas: _C, + cat_iter_x: CatIterator[_DF], + cat_iter_y: CatIterator[_DF], + x: str | tuple[str, ...], + y: str | tuple[str, ...], + method: str, + ): + super().__init__(canvas, cat_iter_x.df) + self._cat_iter_x = cat_iter_x + self._cat_iter_y = cat_iter_y + self._x = x + self._y = y + self._agg_method = method + + def add_heatmap( + self, + value: str, + *, + cmap: ColormapType = "inferno", + clim: tuple[float, float] | None = None, + name: str | None = None, + fill: float = 0, + ) -> _lt.DFHeatmap[_DF]: + """ + Add a heatmap whose color represents the value of the aggregated data. + + Parameters + ---------- + value : str + Column name to use as the value. + cmap : colormap-like, default "inferno" + Colormap to use for the heatmap. + clim : (float, float), optional + Color limits for the colormap. If not specified, the limits are calculated + from the data min/max. + name : str, optional + Name of the layer. + fill : float, optional + Value to fill for the cells that do not have any data. This value will not + be considered when calculating the color limits. + + Returns + ------- + DFHeatmap + Dataframe bound heatmap layer. + """ + canvas = self._canvas() + df = self._df + by_both = (*self._x, *self._y) + nx = len(self._x) + df_agg = self._aggregate(df, by_both, value) + map_x = self._cat_iter_x.prep_position_map(self._x) + map_y = self._cat_iter_y.prep_position_map(self._y) + dtype = df[value].dtype + if dtype.kind not in "fiub": + raise ValueError(f"Column {value!r} is not numeric.") + arr = np.full((len(map_y), len(map_x)), fill, dtype=dtype) + for sl, sub in df_agg.group_by(by_both): + xval, yval = sl[:nx], sl[nx:] + vals = sub[value] + if vals.size == 1: + arr[map_y[yval], map_x[xval]] = vals[0] + else: + raise ValueError(f"More than one value found for {sl!r}.") + if clim is None: + # `fill` may be outside the range of the data, so calculate clim here. + clim = df_agg[value].min(), df_agg[value].max() + layer = _lt.DFHeatmap.from_array( + df_agg, arr, name=name, cmap=cmap, clim=clim, backend=canvas._get_backend(), + ) # fmt: skip + return canvas.add_layer(layer) + + def _aggregate( + self, + df: DataFrameWrapper[_DF], + by: tuple[str, ...], + on: str, + ) -> DataFrameWrapper[_DF]: + if self._agg_method == "size": + return df.value_count(by) + elif self._agg_method == "first": + return df.value_first(by, on) + else: + if on is None: + raise ValueError("Value column is not specified.") + return df.agg_by(by, on, self._agg_method) diff --git a/whitecanvas/canvas/dataframe/_feature_cat.py b/whitecanvas/canvas/dataframe/_feature_cat.py new file mode 100644 index 00000000..3abe26ee --- /dev/null +++ b/whitecanvas/canvas/dataframe/_feature_cat.py @@ -0,0 +1,372 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Sequence, + TypeVar, +) + +from whitecanvas import theme +from whitecanvas.canvas.dataframe._base import BaseCatPlotter +from whitecanvas.layers import tabular as _lt +from whitecanvas.layers.tabular import _jitter +from whitecanvas.types import ArrayLike1D, ColormapType, Orientation + +if TYPE_CHECKING: + from whitecanvas.canvas._base import CanvasBase + + NStr = str | Sequence[str] + +_C = TypeVar("_C", bound="CanvasBase") +_DF = TypeVar("_DF") + + +class CatPlotter(BaseCatPlotter[_C, _DF]): + """ + Categorical plotter that categorizes the data by features (color, style etc.) + """ + + def __init__( + self, + canvas: _C, + df: _DF, + x: str | None, + y: str | None, + update_label: bool = False, + ): + super().__init__(canvas, df) + self._x = x + self._y = y + self._update_label = update_label + if update_label: + self._update_xy_label(x, y) + + def _get_x(self) -> str: + if self._x is None: + raise ValueError("Column for x-axis is not set") + return self._x + + def _get_y(self) -> str: + if self._y is None: + raise ValueError("Column for y-axis is not set") + return self._y + + def _update_xy_label(self, x: str | None, y: str | None) -> None: + """Update the x and y labels using the column names""" + canvas = self._canvas() + if isinstance(x, str): + canvas.x.label.text = x + if isinstance(y, str): + canvas.y.label.text = y + + def along_x(self) -> CatPlotter[_C, _DF]: + """Return the same plotter but with only x-axis set.""" + return self._copy_like(self._get_x(), None, self._update_label) + + def along_y(self) -> CatPlotter[_C, _DF]: + """Return the same plotter but with only y-axis set.""" + return self._copy_like(None, self._get_y(), self._update_label) + + def _copy_like(self, x, y, update_label): + out = self.__class__(self._canvas(), self._df, x, y, False) + out._update_label = update_label + return out + + def add_line( + self, + *, + name: str | None = None, + color: NStr | None = None, + width: str | None = None, + style: NStr | None = None, + ) -> _lt.DFLines[_DF]: + """ + Add a categorical line plot. + + >>> ### Use "time" column as x-axis and "value" column as y-axis + >>> canvas.cat(df, "time", "value").add_line() + + >>> ### Multiple lines colored by column "group" + >>> canvas.cat(df, "time", "value").add_line(color="group") + + >>> ### Multiple lines styled by column "group" + >>> canvas.cat(df, "time", "value").add_line(style="group") + + Parameters + ---------- + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + width : str, optional + Column name for line width. Must be numerical. + style : str or sequence of str, optional + Column name(s) for styling the lines. Must be categorical. + + Returns + ------- + DFLines + Line collection layer. + """ + canvas = self._canvas() + width = theme._default("line.width", width) + layer = _lt.DFLines.from_table( + self._df, self._get_x(), self._get_y(), name=name, color=color, width=width, + style=style, backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_markers( + self, + *, + name: str | None = None, + color: NStr | None = None, + hatch: NStr | None = None, + size: str | None = None, + symbol: NStr | None = None, + ) -> _lt.DFMarkers[_DF]: + """ + Add a categorical marker plot. + + >>> ### Use "time" column as x-axis and "value" column as y-axis + >>> canvas.cat(df, "time", "value").add_markers() + + >>> ### Multiple markers colored by column "group" + >>> canvas.cat(df, "time", "value").add_markers(color="group") + + >>> ### Change marker size according to "weight" column + >>> canvas.cat(df, "time", "value").add_markers(size="weight") + + >>> ### Multiple markers with hatches determined by column "group" + >>> canvas.cat(df, "time", "value").add_markers(hatch="group") + + >>> ### Multiple markers with symbols determined by "group" + >>> canvas.cat(df, "time", "value").add_markers(symbol="group") + + Parameters + ---------- + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + size : str, optional + Column name for marker size. Must be numerical. + symbol : str or sequence of str, optional + Column name(s) for symbols. Must be categorical. + + Returns + ------- + DFMarkers + Marker collection layer. + """ + canvas = self._canvas() + xj = _jitter.IdentityJitter(self._get_x()) + yj = _jitter.IdentityJitter(self._get_y()) + layer = _lt.DFMarkers( + self._df, xj, yj, name=name, color=color, hatch=hatch, + size=size, symbol=symbol, backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_hist2d( + self, + *, + cmap: ColormapType = "inferno", + name: str | None = None, + bins: int | tuple[int, int] = 10, + rangex: tuple[float, float] | None = None, + rangey: tuple[float, float] | None = None, + density: bool = False, + ): + """ + Add 2-D histogram of given x/y columns. + + >>> ### Use "tip" column as x-axis and "total_bill" column as y-axis + >>> canvas.cat(df, "tip", "total_bill").add_hist2d() + + Parameters + ---------- + cmap : colormap-like, default "inferno" + Colormap to use for the heatmap. + name : str, optional + Name of the layer. + bins : int or tuple[int, int], default 10 + If int, the number of bins for both x and y. If tuple, the number of bins + for x and y respectively. + rangex : (float, float), optional + Range of x values in which histogram will be built. + rangey : (float, float), optional + Range of y values in which histogram will be built. + density : bool, default False + If True, the result is the value of the probability density function at the + bin, normalized such that the integral over the range is 1. + + Returns + ------- + DFHeatmap + Dataframe bound heatmap layer. + """ + canvas = self._canvas() + layer = _lt.DFHeatmap.build_hist( + self._df, self._get_x(), self._get_y(), cmap=cmap, name=name, bins=bins, + range=(rangex, rangey), density=density, backend=canvas._get_backend(), + ) # fmt: skip + return canvas.add_layer(layer) + + def add_pointplot( + self, + *, + name: str | None = None, + color: NStr | None = None, + hatch: NStr | None = None, + size: float | None = None, + capsize: float = 0.15, + ): + """ + Add 2-D point plot. + + >>> ### Use "time" column as x-axis and "value" column as y-axis + >>> canvas.cat(df, "time", "value").add_pointplot() + + >>> ### Multiple point plots colored by column "group" + >>> canvas.cat(df, "time", "value").add_pointplot(color="group") + + >>> ### Multiple point plots with hatches determined by column "group" + >>> canvas.cat(df, "time", "value").add_pointplot(hatch="group") + + Parameters + ---------- + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + size : float, optional + Size of the points. + capsize : float, default 0.15 + Size of the cap on the error bars. + + Returns + ------- + DFPointPlot2D + Point plot layer. + """ + canvas = self._canvas() + layer = _lt.DFPointPlot2D( + self._df, self._get_x(), self._get_y(), name=name, color=color, + hatch=hatch, size=size, capsize=capsize, backend=canvas._get_backend(), + ) # fmt: skip + return canvas.add_layer(layer) + + def add_hist( + self, + *, + bins: int | ArrayLike1D = 10, + limits: tuple[float, float] | None = None, + kind: str = "count", + shape: str = "bars", + name: str | None = None, + color: NStr | None = None, + width: float | None = None, + style: NStr | None = None, + ): + canvas = self._canvas() + width = theme._default("line.width", width) + x0, orient = self._column_and_orient() + layer = _lt.DFHistograms.from_table( + self._df, x0, bins=bins, limits=limits, kind=kind, shape=shape, name=name, + orient=orient, color=color, width=width, style=style, + backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + if self._update_label: + if orient.is_vertical: + canvas.y.label.text = kind + else: + canvas.x.label.text = kind + + return canvas.add_layer(layer) + + def add_kde( + self, + *, + band_width: float | None = None, + name: str | None = None, + color: NStr | None = None, + width: str | None = None, + style: NStr | None = None, + ): + """ + Add lines representing kernel density estimation. + + >>> ### Use "value" column as x-axis + >>> canvas.cat(df, x="value").add_kde() + + >>> ### Multiple KDEs colored by column "group" + >>> canvas.cat(df, x="value).add_kde(color="group") + + Parameters + ---------- + band_width : float, default None + Bandwidth of the kernel density estimation. If None, use Scott's rule. + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + width : str, optional + Column name for line width. Must be numerical. + style : str or sequence of str, optional + Column name(s) for styling the lines. Must be categorical. + + Returns + ------- + WrappedLines + Line collection layer. + """ + canvas = self._canvas() + width = theme._default("line.width", width) + x0, orient = self._column_and_orient() + layer = _lt.DFKde.from_table( + self._df, x0, band_width=band_width, name=name, + orient=orient, color=color, width=width, style=style, + backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + if self._update_label: + ax_label = "density" + if orient.is_vertical: + canvas.y.label.text = ax_label + else: + canvas.x.label.text = ax_label + return canvas.add_layer(layer) + + def _column_and_orient(self) -> tuple[str, Orientation]: + if self._x is None and self._y is None: + raise ValueError("Column for either x- or y-axis must be set") + elif self._x is not None and self._y is not None: + raise ValueError("Only one of x- or y-axis can be set") + elif self._x is not None: + return self._x, Orientation.VERTICAL + else: + return self._y, Orientation.HORIZONTAL + + +class CatAggPlotter: + ... diff --git a/whitecanvas/canvas/dataframe/_one_cat.py b/whitecanvas/canvas/dataframe/_one_cat.py new file mode 100644 index 00000000..18b4c177 --- /dev/null +++ b/whitecanvas/canvas/dataframe/_one_cat.py @@ -0,0 +1,738 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Generic, Sequence, TypeVar + +import numpy as np + +from whitecanvas import theme +from whitecanvas.canvas.dataframe._base import AggMethods, BaseCatPlotter, CatIterator +from whitecanvas.layers import tabular as _lt +from whitecanvas.layers.tabular import _jitter, _shared +from whitecanvas.types import ColormapType, ColorType, Hatch, Orientation, Symbol + +if TYPE_CHECKING: + from typing_extensions import Self + + from whitecanvas.canvas._base import CanvasBase + from whitecanvas.layers.tabular._box_like import _BoxLikeMixin + from whitecanvas.layers.tabular._dataframe import DataFrameWrapper + + NStr = str | Sequence[str] + +_C = TypeVar("_C", bound="CanvasBase") +_DF = TypeVar("_DF") + + +class _Aggregator(Generic[_C, _DF]): + def __init__(self, method: str, plotter: OneAxisCatPlotter[_C, _DF] = None): + self._method = method + self._plotter = plotter + + def __get__(self, ins: _C, owner) -> Self: + return _Aggregator(self._method, ins) + + def __repr__(self) -> str: + return f"Aggregator<{self._method}>" + + def __call__(self) -> OneAxisCatAggPlotter[_C, _DF]: + """Aggregate the values before plotting it.""" + plotter = self._plotter + if plotter is None: + raise TypeError("Cannot call this method from a class.") + if self._method == "size": + value = "size" + elif plotter._value is None: + raise ValueError("Value column is not specified.") + else: + value = plotter._value + return OneAxisCatAggPlotter( + plotter._canvas(), + plotter._cat_iter, + offset=plotter._offset, + value=value, + method=self._method, + orient=plotter._orient, + ) + + +class _GroupAggregator(Generic[_C, _DF]): + def __init__(self, method: str, plotter: OneAxisCatPlotter[_C, _DF] = None): + self._method = method + self._plotter = plotter + + def __get__(self, ins: _C, owner) -> Self: + return _GroupAggregator(self._method, ins) + + def __repr__(self) -> str: + return f"GroupAggregator<{self._method}>" + + def __call__(self, by: str | tuple[str, ...]) -> OneAxisCatPlotter[_C, _DF]: + """Aggregate the values for each group before plotting it.""" + plotter = self._plotter + if isinstance(by, str): + by = (by,) + elif len(by) == 0: + raise ValueError("No column is specified for grouping.") + return type(plotter)( + plotter._canvas(), + plotter._df.agg_by((*plotter._offset, *by), plotter._value, self._method), + offset=plotter._offset, + value=plotter._value, + update_label=plotter._update_label, + ) + + +class OneAxisCatPlotter(BaseCatPlotter[_C, _DF]): + _orient: Orientation + + def __init__( + self, + canvas: _C, + df: _DF, + offset: str | tuple[str, ...] | None, + value: str | None, + update_label: bool = False, + ): + super().__init__(canvas, df) + if isinstance(offset, str): + offset = (offset,) + elif offset is None: + offset = () + self._offset: tuple[str, ...] = offset + self._cat_iter = CatIterator(self._df, offset) + self._value = value + self._update_label = update_label + if update_label: + if value is not None: + self._update_axis_labels(value) + pos, label = self._cat_iter.axis_ticks() + if self._orient.is_vertical: + canvas.x.ticks.set_labels(pos, label) + canvas.x.lim = (np.min(pos) - 0.5, np.max(pos) + 0.5) + else: + canvas.y.ticks.set_labels(pos, label) + canvas.y.lim = (np.min(pos) - 0.5, np.max(pos) + 0.5) + + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(offset={self._offset!r}, value={self._value!r}, " + f"orient={self._orient!r})" + ) + + def _update_axis_labels(self, value_label: str) -> None: + """Update the x and y labels using the column names""" + canvas = self._canvas() + offset_label = self._cat_iter.axis_label() + if self._orient.is_vertical: + canvas.x.label.text = offset_label + canvas.y.label.text = value_label + else: + canvas.x.label.text = value_label + canvas.y.label.text = offset_label + + def _get_value(self) -> str: + if self._value is None: + raise ValueError("Value column is not specified.") + return self._value + + def _update_xy_ticks(self, pos, label): + """Update the x or y ticks to categorical ticks""" + canvas = self._canvas() + if self._orient.is_vertical: + canvas.x.ticks.set_labels(pos, label) + else: + canvas.y.ticks.set_labels(pos, label) + + ### 1-D categorical ### + + def add_violinplot( + self, + *, + name: str | None = None, + color: NStr | None = None, + hatch: NStr | None = None, + dodge: NStr | bool = True, + extent: float = 0.8, + shape: str = "both", + ) -> _lt.DFViolinPlot[_DF]: + """ + Add a categorical violin plot. + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_violinplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_violinplot(dodge=True) + + Parameters + ---------- + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + extent : float, default 0.8 + Width of the violins. Usually in range (0, 1]. + shape : str, default "both" + Shape of the violins. Can be "both", "left", or "right". + + + Returns + ------- + DFViolinPlot + Violin plot layer. + """ + canvas = self._canvas() + layer = _lt.DFViolinPlot( + self._cat_iter, self._get_value(), name=name, color=color, hatch=hatch, + dodge=dodge, extent=extent, shape=shape, orient=self._orient, + backend=canvas._get_backend(), + ) # fmt: skip + self._post_add_boxlike(layer, color) + return canvas.add_layer(layer) + + def add_boxplot( + self, + *, + color: NStr | None = None, + hatch: NStr | None = None, + dodge: NStr | bool = True, + name: str | None = None, + capsize: float = 0.1, + extent: float = 0.8, + ) -> _lt.DFBoxPlot[_DF]: + """ + Add a categorical box plot. + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_boxplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_boxplot(dodge=True) + + Parameters + ---------- + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + name : str, optional + Name of the layer. + capsize : float, default 0.1 + Length of the caps as a fraction of the width of the box. + extent : float, default 0.8 + Width of the violins. Usually in range (0, 1]. + + Returns + ------- + DFBoxPlot + Box plot layer. + """ + canvas = self._canvas() + layer = _lt.DFBoxPlot( + self._cat_iter, self._get_value(), name=name, color=color, hatch=hatch, + dodge=dodge, orient=self._orient, capsize=capsize, extent=extent, + backend=canvas._get_backend(), + ) # fmt: skip + self._post_add_boxlike(layer, color) + return canvas.add_layer(layer) + + def add_pointplot( + self, + *, + color: NStr | None = None, + hatch: NStr | None = None, + dodge: NStr | bool = True, + name: str | None = None, + capsize: float = 0.1, + ) -> _lt.DFPointPlot[_DF]: + """ + Add a categorical point plot (markers with error bars). + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_pointplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_pointplot(dodge=True) + + The default estimator and errors are mean and standard deviation. To change + them, use `est_by_*` and `err_by_*` methods. + + >>> ### Use standard error x 2 (~95%) as error bars. + >>> canvas.cat_x(df, "species", "weight").add_pointplot().err_by_se(scale=2.0) + + Parameters + ---------- + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + name : str, optional + Name of the layer. + capsize : float, default 0.1 + Length of the caps as a fraction of the width of the box. + + Returns + ------- + DFPointPlot + Point plot layer. + """ + canvas = self._canvas() + layer = _lt.DFPointPlot( + self._cat_iter, self._get_value(), name=name, color=color, hatch=hatch, + dodge=dodge, orient=self._orient, capsize=capsize, + backend=canvas._get_backend(), + ) # fmt: skip + self._post_add_boxlike(layer, color) + return canvas.add_layer(layer) + + def add_barplot( + self, + *, + color: NStr | None = None, + hatch: NStr | None = None, + dodge: NStr | bool = True, + name: str | None = None, + capsize: float = 0.1, + extent: float = 0.8, + ) -> _lt.DFBarPlot[_DF]: + """ + Add a categorical bar plot (bars with error bars). + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_barplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_barplot(dodge=True) + + The default estimator and errors are mean and standard deviation. To change + them, use `est_by_*` and `err_by_*` methods. + + >>> ### Use standard error x 2 (~95%) as error bars. + >>> canvas.cat_x(df, "species", "weight").add_barplot().err_by_se(scale=2.0) + + Parameters + ---------- + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + name : str, optional + Name of the layer. + capsize : float, default 0.1 + Length of the caps as a fraction of the width of the box. + extent : float, default 0.8 + Width of the violins. Usually in range (0, 1]. + + Returns + ------- + DFBarPlot + Bar plot layer. + """ + canvas = self._canvas() + layer = _lt.DFBarPlot( + self._cat_iter, self._get_value(), name=name, color=color, hatch=hatch, + dodge=dodge, orient=self._orient, capsize=capsize, extent=extent, + backend=canvas._get_backend(), + ) # fmt: skip + self._post_add_boxlike(layer, color) + return canvas.add_layer(layer) + + def _post_add_boxlike(self, layer: _BoxLikeMixin, color): + canvas = self._canvas() + if color is not None and not layer._color_by.is_const(): + layer.with_color_palette(canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + + def add_stripplot( + self, + *, + color: NStr | None = None, + hatch: NStr | None = None, + symbol: NStr | None = None, + size: str | None = None, + dodge: NStr | bool = False, + name: str | None = None, + extent: float = 0.5, + seed: int | None = 0, + ) -> _lt.DFMarkerGroups[_DF]: + """ + Add a categorical strip plot. + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_stripplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_stripplot(dodge=True) + + Parameters + ---------- + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + symbol : str or sequence of str, optional + Column name(s) for symbols. Must be categorical. + size : str, optional + Column name for marker size. Must be numerical. + name : str, optional + Name of the layer. + extent : float, default 0.5 + Width of the violins. Usually in range (0, 1]. + seed : int, optional + Random seed for jittering. + + Returns + ------- + DFMarkerGroups + Marker collection layer. + """ + canvas = self._canvas() + symbol = theme._default("markers.symbol", symbol) + size = theme._default("markers.size", size) + + df = self._df + splitby, dodge = _shared.norm_dodge_markers( + df, self._offset, color, hatch, dodge + ) # fmt: skip + _map = self._cat_iter.prep_position_map(splitby, dodge) + _extent = self._cat_iter.zoom_factor(dodge) * extent + xj = _jitter.UniformJitter(splitby, _map, extent=_extent, seed=seed) + yj = _jitter.IdentityJitter(self._get_value()).check(df) + if not self._orient.is_vertical: + xj, yj = yj, xj + layer = _lt.DFMarkerGroups( + df, xj, yj, name=name, color=color, hatch=hatch, orient=self._orient, + symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_markers( + self, + *, + name: str | None = None, + color: NStr | None = None, + hatch: NStr | None = None, + symbol: NStr | None = None, + size: str | None = None, + dodge: NStr | bool = False, + ) -> _lt.DFMarkerGroups[_DF]: + """Alias of `add_stripplot` with no jittering.""" + return self.add_stripplot( + color=color, hatch=hatch, symbol=symbol, size=size, dodge=dodge, + extent=0, seed=0, name=name, + ) # fmt: skip + + def add_swarmplot( + self, + *, + color: NStr | None = None, + hatch: NStr | None = None, + symbol: NStr | None = None, + size: str | None = None, + dodge: NStr | bool = False, + name: str | None = None, + extent: float = 0.8, + sort: bool = False, + ) -> _lt.DFMarkerGroups[_DF]: + """ + Add a categorical swarm plot. + + >>> ### Use "species" column as categories and "weight" column as values. + >>> canvas.cat_x(df, x="species", y="weight").add_swarmplot() + + >>> ### Color by column "region" with dodging. + >>> canvas.cat_x(df, "region", "weight").add_swarmplot(dodge=True) + + Parameters + ---------- + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + symbol : str or sequence of str, optional + Column name(s) for symbols. Must be categorical. + size : str, optional + Column name for marker size. Must be numerical. + name : str, optional + Name of the layer. + extent : float, default 0.8 + Width of the violins. Usually in range (0, 1]. + sort : bool, default False + Whether to sort the data by value. + + Returns + ------- + DFMarkerGroups + Marker collection layer. + """ + canvas = self._canvas() + symbol = theme._default("markers.symbol", symbol) + size = theme._default("markers.size", size) + df = self._df + splitby, dodge = _shared.norm_dodge_markers( + df, self._offset, color, hatch, dodge + ) # fmt: skip + _map = self._cat_iter.prep_position_map(splitby, dodge) + _extent = self._cat_iter.zoom_factor(dodge) * extent + + val = self._get_value() + if sort: + df = df.sort(val) + lims = df[val].min(), df[val].max() + xj = _jitter.SwarmJitter(splitby, _map, val, limits=lims, extent=_extent) + yj = _jitter.IdentityJitter(val).check(df) + if not self._orient.is_vertical: + xj, yj = yj, xj + layer = _lt.DFMarkerGroups( + df, xj, yj, name=name, color=color, hatch=hatch, orient=self._orient, + symbol=symbol, size=size, backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(layer._color_by.by, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_hist_heatmap( + self, + cmap: ColormapType = "inferno", + clim: tuple[float, float] | None = None, + ) -> _lt.DFHeatmap[_DF]: + # TODO: implement this + raise NotImplementedError + + # aggregators and group aggregators + mean = _Aggregator("mean") + median = _Aggregator("median") + min = _Aggregator("min") + max = _Aggregator("max") + std = _Aggregator("std") + sum = _Aggregator("sum") + count = _Aggregator("size") + first = _Aggregator("first") + + mean_for_each = _GroupAggregator("mean") + median_for_each = _GroupAggregator("median") + min_for_each = _GroupAggregator("min") + max_for_each = _GroupAggregator("max") + std_for_each = _GroupAggregator("std") + sum_for_each = _GroupAggregator("sum") + first_for_each = _GroupAggregator("first") + + +class OneAxisCatAggPlotter(BaseCatPlotter[_C, _DF]): + def __init__( + self, + canvas: _C, + cat_iter: CatIterator[_DF], + offset: str | tuple[str, ...], + value: str, + method: AggMethods, + orient: Orientation, + ): + super().__init__(canvas, cat_iter._df) + self._offset = offset + self._value = value + self._agg_method = method + self._orient = orient + self._cat_iter = cat_iter + + def add_line( + self, + *, + name: str | None = None, + color: NStr | None = None, + width: float | None = None, + style: NStr | None = None, + dodge: NStr | bool = False, + ) -> _lt.DFLines[_DF]: + """ + Add line that connect the aggregated values. + + >>> canvas.cat_x(df).mean().add_line("time", "value") + + Parameters + ---------- + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + width : float, optional + Line width. + style : str or sequence of str, optional + Column name(s) for styling the lines. Must be categorical. + + Returns + ------- + DFLines + Line collection layer. + """ + # TODO: support width: str + canvas = self._canvas() + df = self._df + width = theme._default("line.width", width) + + _splitby, _dodge = _shared.norm_dodge(df, self._offset, color, dodge=dodge) + df_agg = self._aggregate(df, _splitby, self._value) + _pos_map = self._cat_iter.prep_position_map(_splitby, dodge=_dodge) + + xj = _jitter.CategoricalJitter(_splitby, _pos_map) + yj = _jitter.IdentityJitter(self._value).check(df_agg) + if not self._orient.is_vertical: + xj, yj = yj, xj + layer = _lt.DFLines.from_table( + df_agg, xj, yj, name=name, color=color, width=width, style=style, + backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(color, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_markers( + self, + *, + name: str | None = None, + color: NStr | ColorType | None = None, + hatch: NStr | Hatch | None = None, + size: str | float | None = None, + symbol: NStr | Symbol | None = None, + dodge: NStr | bool = False, + ) -> _lt.DFMarkers[_DF]: + """ + Add markers that represent the aggregated values. + + >>> canvas.cat_x(df).mean().add_markers("time", "value") + + Parameters + ---------- + x : str + Column name for x-axis. + y : str + Column name for y-axis. + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + size : str, optional + Column name for marker size. Must be numerical. + symbol : str or sequence of str, optional + Column name(s) for symbols. Must be categorical. + + Returns + ------- + DFMarkers + Marker collection layer. + """ + canvas = self._canvas() + df = self._df + _splitby, _dodge = _shared.norm_dodge( + df, self._offset, color, hatch, symbol, dodge=dodge + ) # fmt: skip + df_agg = self._aggregate(df, _splitby, self._value) + _pos_map = self._cat_iter.prep_position_map(_splitby, dodge=_dodge) + + xj = _jitter.CategoricalJitter(_splitby, _pos_map) + yj = _jitter.IdentityJitter(self._value).check(df_agg) + if not self._orient.is_vertical: + xj, yj = yj, xj + layer = _lt.DFMarkers( + df_agg, xj, yj, name=name, color=color, hatch=hatch, size=size, + symbol=symbol, backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(color, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def add_bars( + self, + *, + name: str | None = None, + color: NStr | ColorType | None = None, + hatch: NStr | Hatch | None = None, + extent: float = 0.8, + dodge: NStr | bool = True, + ) -> _lt.DFBars[_DF]: + """ + Add bars that represent the aggregated values. + + >>> canvas.cat_x(df).mean().add_bars("time", "value") + + Parameters + ---------- + x : str + Column name for x-axis. + y : str + Column name for y-axis. + name : str, optional + Name of the layer. + color : str or sequence of str, optional + Column name(s) for coloring the lines. Must be categorical. + hatch : str or sequence of str, optional + Column name(s) for hatches. Must be categorical. + width : str, optional + Column name for bar width. Must be numerical. + + Returns + ------- + DFBars + Bar collection layer. + """ + canvas = self._canvas() + df = self._df + _splitby, _dodge = _shared.norm_dodge( + df, self._offset, color, hatch, dodge=dodge + ) # fmt: skip + df_agg = self._aggregate(df, _splitby, self._value) + _pos_map = self._cat_iter.prep_position_map(_splitby, dodge=_dodge) + + xj = _jitter.CategoricalJitter(_splitby, _pos_map) + yj = _jitter.IdentityJitter(self._value).check(df_agg) + + _extent = self._cat_iter.zoom_factor(_dodge) * extent + if not self._orient.is_vertical: + xj, yj = yj, xj + layer = _lt.DFBars.from_table( + df_agg, xj, yj, name=name, color=color, hatch=hatch, extent=_extent, + backend=canvas._get_backend(), + ) # fmt: skip + if color is not None and not layer._color_by.is_const(): + layer.with_color(color, palette=canvas._color_palette) + elif color is None: + layer.with_color(canvas._color_palette.next()) + return canvas.add_layer(layer) + + def _aggregate( + self, + df: DataFrameWrapper, + by: tuple[str, ...], + on: str, + ) -> DataFrameWrapper[_DF]: + if self._agg_method == "size": + return df.value_count(by) + elif self._agg_method == "first": + return df.value_first(by, on) + else: + if on is None: + raise ValueError("Value column is not specified.") + return df.agg_by(by, on, self._agg_method) + + +class XCatPlotter(OneAxisCatPlotter[_C, _DF]): + _orient = Orientation.VERTICAL + + +class YCatPlotter(OneAxisCatPlotter[_C, _DF]): + _orient = Orientation.HORIZONTAL diff --git a/whitecanvas/canvas/dataframe/_plot.py b/whitecanvas/canvas/dataframe/_plot.py deleted file mode 100644 index fe710d6f..00000000 --- a/whitecanvas/canvas/dataframe/_plot.py +++ /dev/null @@ -1,1094 +0,0 @@ -from __future__ import annotations - -import weakref -from typing import ( - TYPE_CHECKING, - Generic, - Sequence, - TypeVar, - Union, -) - -import numpy as np - -from whitecanvas import theme -from whitecanvas._exceptions import ReferenceDeletedError -from whitecanvas.canvas.dataframe._utils import PlotArg -from whitecanvas.layers import tabular as _lt -from whitecanvas.layers.tabular._dataframe import parse -from whitecanvas.types import ( - ArrayLike1D, - ColormapType, - ColorType, - Hatch, - Orientation, - Symbol, -) - -if TYPE_CHECKING: - from whitecanvas.canvas._base import CanvasBase - from whitecanvas.layers.tabular._box_like import _BoxLikeMixin - from whitecanvas.layers.tabular._dataframe import DataFrameWrapper - -_C = TypeVar("_C", bound="CanvasBase") -_DF = TypeVar("_DF") -NStr = Union[str, Sequence[str]] -_Orientation = Union[str, Orientation] - - -class _Plotter(Generic[_C, _DF]): - def __init__( - self, - canvas: _C, - df: _DF, - update_label: bool = False, - ): - self._canvas_ref = weakref.ref(canvas) - self._df = df - self._update_label = update_label - - def _canvas(self) -> _C: - canvas = self._canvas_ref() - if canvas is None: - raise ReferenceDeletedError("Canvas has been deleted.") - return canvas - - def _update_xy_label( - self, - x: str | tuple[str, ...], - y: str | tuple[str, ...], - orient: Orientation = Orientation.VERTICAL, - ) -> None: - """Update the x and y labels using the column names""" - canvas = self._canvas() - if not isinstance(x, str): - x = "/".join(x) - if not isinstance(y, str): - y = "/".join(y) - if orient.is_vertical: - canvas.x.label.text = x - canvas.y.label.text = y - else: - canvas.x.label.text = y - canvas.y.label.text = x - - def _update_xy_ticks(self, pos, label, orient: Orientation = Orientation.VERTICAL): - """Update the x or y ticks to categorical ticks""" - canvas = self._canvas() - if orient.is_vertical: - canvas.x.ticks.set_labels(pos, label) - else: - canvas.y.ticks.set_labels(pos, label) - - -class DataFramePlotter(_Plotter[_C, _DF]): - ### 0-D categorical ### - def add_line( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | None = None, - width: str | None = None, - style: NStr | None = None, - ) -> _lt.DFLines[_DF]: - """ - Add a categorical line plot. - - >>> ### Use "time" column as x-axis and "value" column as y-axis - >>> canvas.cat(df).add_line("time", "value") - - >>> ### Multiple lines colored by column "group" - >>> canvas.cat(df).add_line("time", "value", color="group") - - >>> ### Multiple lines styled by column "group" - >>> canvas.cat(df).add_line("time", "value", style="group") - - Parameters - ---------- - x : str - Column name for x-axis. - y : str - Column name for y-axis. - name : str, optional - Name of the layer. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - width : str, optional - Column name for line width. Must be numerical. - style : str or sequence of str, optional - Column name(s) for styling the lines. Must be categorical. - - Returns - ------- - WrappedLines - Line collection layer. - """ - canvas = self._canvas() - layer = _lt.DFLines.from_table( - self._df, x, y, name=name, color=color, width=width, style=style, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - def add_markers( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | None = None, - hatch: NStr | None = None, - size: str | None = None, - symbol: NStr | None = None, - ) -> _lt.DFMarkers[_DF]: - """ - Add a categorical marker plot. - - >>> ### Use "time" column as x-axis and "value" column as y-axis - >>> canvas.cat(df).add_markers("time", "value") - - >>> ### Multiple markers colored by column "group" - >>> canvas.cat(df).add_markers("time", "value", color="group") - - >>> ### Multiple markers with hatches determined by column "group" - >>> canvas.cat(df).add_markers("time", "value", style="group") - - >>> ### Multiple markers with symbols determined by "group" - >>> canvas.cat(df).add_markers("time", "value", symbol="group") - - Parameters - ---------- - x : str - Column name for x-axis. - y : str - Column name for y-axis. - name : str, optional - Name of the layer. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - size : str, optional - Column name for marker size. Must be numerical. - symbol : str or sequence of str, optional - Column name(s) for symbols. Must be categorical. - - Returns - ------- - WrappedMarkers - Marker collection layer. - """ - canvas = self._canvas() - layer = _lt.DFMarkers.from_table( - self._df, x, y, name=name, color=color, hatch=hatch, size=size, - symbol=symbol, backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - def add_bar( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | None = None, - hatch: NStr | None = None, - extent: float = 0.8, - ) -> _lt.DFBars[_DF]: - """ - Add a categorical bar plot. - - >>> ### Use "time" column as x-axis and "value" column as y-axis - >>> canvas.cat(df).add_bar("time", "value") - - >>> ### Multiple bars colored by column "group" - >>> canvas.cat(df).add_bar("time", "value", color="group") - - >>> ### Multiple bars with hatches determined by column "group" - >>> canvas.cat(df).add_bar("time", "value", hatch="group") - - Parameters - ---------- - x : str - Column name for x-axis. - y : str - Column name for y-axis. - name : str, optional - Name of the layer. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - extent : float, optional - Width of the bars. Usually in range (0, 1]. - - Returns - ------- - WrappedBars - Bar collection layer. - """ - canvas = self._canvas() - layer = _lt.DFBars.from_table( - self._df, x, y, name=name, color=color, hatch=hatch, extent=extent, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - def add_hist_line( - self, - x: str, - *, - bins: int | ArrayLike1D = 10, - range: tuple[float, float] | None = None, - density: bool = False, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - color: NStr | None = None, - width: str | None = None, - style: NStr | None = None, - ): - """ - Add lines representing histograms. - - >>> ### Use "value" column as x-axis - >>> canvas.cat(df).add_line_hist("value", bins=8, density=True) - - >>> ### Multiple histograms colored by column "group" - >>> canvas.cat(df).add_line_hist("value", color="group") - - Parameters - ---------- - x : str - Column name for x-axis. - bins : int or array-like, default 10 - If an integer, the number of bins. If an array, the bin edges. - range : (float, float), default None - If provided, the lower and upper range of the bins. - density : bool, default False - If True, the total area of the histogram will be normalized to 1. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - width : str, optional - Column name for line width. Must be numerical. - style : str or sequence of str, optional - Column name(s) for styling the lines. Must be categorical. - - Returns - ------- - WrappedLines - Line collection layer. - """ - canvas = self._canvas() - layer = _lt.DFLines.build_hist( - self._df, x, bins=bins, range=range, density=density, name=name, - orient=orient, color=color, width=width, style=style, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - y = "density" if density else "count" - self._update_xy_label(x, y, orient) - return canvas.add_layer(layer) - - def add_kde( - self, - value: str, - *, - band_width: float | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - color: NStr | None = None, - width: str | None = None, - style: NStr | None = None, - ): - """ - Add lines representing kernel density estimation. - - >>> ### Use "value" column as x-axis - >>> canvas.cat(df).add_kde("value") - - >>> ### Multiple KDEs colored by column "group" - >>> canvas.cat(df).add_kde("value", color="group") - - Parameters - ---------- - value : str - Column name for x-axis. - band_width : float, default None - Bandwidth of the kernel density estimation. If None, use Scott's rule. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - width : str, optional - Column name for line width. Must be numerical. - style : str or sequence of str, optional - Column name(s) for styling the lines. Must be categorical. - - Returns - ------- - WrappedLines - Line collection layer. - """ - canvas = self._canvas() - layer = _lt.DFLines.build_kde( - self._df, value, band_width=band_width, name=name, - orient=orient, color=color, width=width, style=style, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(value, "density", orient) - return canvas.add_layer(layer) - - def add_hist2d( - self, - x: str, - y: str, - *, - cmap: ColormapType = "inferno", - name: str | None = None, - bins: int | tuple[int, int] = 10, - range: tuple[tuple[float, float], tuple[float, float]] | None = None, - density: bool = False, - ): - """Add 2-D histogram of given columns.""" - canvas = self._canvas() - layer = _lt.DFHeatmap.build_hist( - self._df, x, y, cmap=cmap, name=name, bins=bins, range=range, - density=density, backend=canvas._get_backend(), - ) # fmt: skip - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - ### 1-D categorical ### - - def add_violinplot( - self, - offset: tuple[str, ...], - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - extent: float = 0.8, - shape: str = "both", - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - ) -> _lt.DFViolinPlot[_DF]: - """ - Add a categorical violin plot. - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_violinplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_violinplot(offset, "weight", color="region") - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - extent : float, default 0.8 - Width of the violins. Usually in range (0, 1]. - shape : str, default "both" - Shape of the violins. Can be "both", "left", or "right". - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - - Returns - ------- - WrappedViolinPlot - Violin plot layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - layer = _lt.DFViolinPlot.from_table( - self._df, offset, value, name=name, color=color, hatch=hatch, extent=extent, - shape=shape, orient=orient, backend=canvas._get_backend(), - ) # fmt: skip - self._post_add_boxlike(layer, color, orient, value) - return canvas.add_layer(layer) - - def add_boxplot( - self, - offset: tuple[str, ...], - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - extent: float = 0.8, - ) -> _lt.DFBoxPlot[_DF]: - """ - Add a categorical box plot. - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_boxplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_boxplot(offset, "weight", color="region") - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - capsize : float, default 0.1 - Length of the caps as a fraction of the width of the box. - extent : float, default 0.8 - Width of the violins. Usually in range (0, 1]. - - Returns - ------- - WrappedBoxPlot - Box plot layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - layer = _lt.DFBoxPlot.from_table( - self._df, offset, value, name=name, color=color, hatch=hatch, orient=orient, - capsize=capsize, extent=extent, backend=canvas._get_backend(), - ) # fmt: skip - self._post_add_boxlike(layer, color, orient, value) - return canvas.add_layer(layer) - - def add_pointplot( - self, - offset: tuple[str, ...], - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - ) -> _lt.DFPointPlot[_DF]: - """ - Add a categorical point plot (markers with error bars). - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_pointplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_pointplot(offset, "weight", color="region") - - The default estimator and errors are mean and standard deviation. To change - them, use `est_by_*` and `err_by_*` methods. - - >>> ### Use standard error x 2 (~95%) as error bars. - >>> canvas.cat(df).add_pointplot("species", "weight").err_by_se(scale=2.0) - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - capsize : float, default 0.1 - Length of the caps as a fraction of the width of the box. - - Returns - ------- - WrappedPointPlot - Point plot layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - layer = _lt.DFPointPlot.from_table( - self._df, offset, value, name=name, color=color, hatch=hatch, orient=orient, - capsize=capsize, backend=canvas._get_backend(), - ) # fmt: skip - self._post_add_boxlike(layer, color, orient, value) - return canvas.add_layer(layer) - - def add_barplot( - self, - offset: tuple[str, ...], - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - extent: float = 0.8, - ) -> _lt.DFBarPlot[_DF]: - """ - Add a categorical bar plot (bars with error bars). - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_barplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_barplot(offset, "weight", color="region") - - The default estimator and errors are mean and standard deviation. To change - them, use `est_by_*` and `err_by_*` methods. - - >>> ### Use standard error x 2 (~95%) as error bars. - >>> canvas.cat(df).add_barplot("species", "weight").err_by_se(scale=2.0) - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - capsize : float, default 0.1 - Length of the caps as a fraction of the width of the box. - extent : float, default 0.8 - Width of the violins. Usually in range (0, 1]. - - Returns - ------- - WrappedBarPlot - Bar plot layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - layer = _lt.DFBarPlot.from_table( - self._df, offset, value, name=name, color=color, hatch=hatch, orient=orient, - capsize=capsize, extent=extent, backend=canvas._get_backend(), - ) # fmt: skip - self._post_add_boxlike(layer, color, orient, value) - return canvas.add_layer(layer) - - def _post_add_boxlike(self, layer: _BoxLikeMixin, color, orient, value: str): - canvas = self._canvas() - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - pos, labels, offset_labels = layer._generate_labels() - self._update_xy_ticks(pos, labels, orient=orient) - self._update_xy_label(offset_labels, value, orient=orient) - - def add_stripplot( - self, - offset: tuple[str, ...], - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - symbol: NStr | None = None, - size: str | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - extent: float = 0.5, - seed: int | None = 0, - ) -> _lt.DFMarkerGroups[_DF]: - """ - Add a categorical strip plot. - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_stripplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_stripplot(offset, "weight", color="region") - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - symbol : str or sequence of str, optional - Column name(s) for symbols. Must be categorical. - size : str, optional - Column name for marker size. Must be numerical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - extent : float, default 0.5 - Width of the violins. Usually in range (0, 1]. - seed : int, optional - Random seed for jittering. - - Returns - ------- - WrappedMarkerGroups - Marker collection layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - symbol = theme._default("markers.symbol", symbol) - size = theme._default("markers.size", size) - layer = _lt.DFMarkers.build_stripplot( - self._df, offset, value, name=name, color=color, hatch=hatch, symbol=symbol, - size=size, orient=orient, extent=extent, seed=seed, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - pos, labels = layer._generate_labels() - self._update_xy_ticks(pos, labels, orient=orient) - self._update_xy_label(offset, value, orient=orient) - return canvas.add_layer(layer) - - def add_swarmplot( - self, - offset: NStr, - value: str, - *, - color: NStr | None = None, - hatch: NStr | None = None, - symbol: NStr | None = None, - size: str | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - extent: float = 0.8, - sort: bool = False, - ) -> _lt.DFMarkerGroups[_DF]: - """ - Add a categorical swarm plot. - - >>> ### Use "species" column as categories and "weight" column as values. - >>> canvas.cat(df).add_swarmplot("species", "weight") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_swarmplot(offset, "weight", color="region") - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - value : str - Column name for y-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - symbol : str or sequence of str, optional - Column name(s) for symbols. Must be categorical. - size : str, optional - Column name for marker size. Must be numerical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - extent : float, default 0.8 - Width of the violins. Usually in range (0, 1]. - sort : bool, default False - Whether to sort the data by value. - - Returns - ------- - WrappedMarkerGroups - Marker collection layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - symbol = theme._default("markers.symbol", symbol) - size = theme._default("markers.size", size) - layer = _lt.DFMarkers.build_swarmplot( - self._df, offset, value, name=name, color=color, hatch=hatch, symbol=symbol, - size=size, orient=orient, extent=extent, sort=sort, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - pos, labels = layer._generate_labels() - self._update_xy_ticks(pos, labels, orient=orient) - self._update_xy_label(offset, value, orient=orient) - return canvas.add_layer(layer) - - def add_countplot( - self, - offset: NStr, - *, - color: NStr | None = None, - hatch: NStr | None = None, - name: str | None = None, - orient: _Orientation = Orientation.VERTICAL, - extent: float = 0.8, - ) -> _lt.DFBars[_DF]: - """ - Add a categorical count plot. - - >>> ### Count for each category in column "species". - >>> canvas.cat(df).add_countplot("species") - - >>> ### Color by column "region" with dodging. - >>> offset = ["species", "region"] # categories that offset will be added - >>> canvas.cat(df).add_countplot(offset, color="region") - - Parameters - ---------- - offset : tuple of str - Column name(s) for x-axis. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - name : str, optional - Name of the layer. - orient : str, default "vertical" - Orientation of the violins. Can be "vertical" or "horizontal". - extent : float, default 0.8 - Width of the violins. Usually in range (0, 1]. - - Returns - ------- - WrappedBars - Bar collection layer. - """ - canvas = self._canvas() - orient = Orientation.parse(orient) - layer = _lt.DFBars.build_count( - self._df, offset, color=color, hatch=hatch, orient=orient, extent=extent, - name=name, backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(layer._color_by.by, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(offset, "count", orient=orient) - return canvas.add_layer(layer) - - ### 2-D categorical ### - - def add_heatmap( - self, - x: str, - y: str, - value: str, - *, - cmap: ColormapType = "inferno", - clim: tuple[float, float] | None = None, - name: str | None = None, - fill: float = 0, - ) -> _lt.DFHeatmap[_DF]: - canvas = self._canvas() - layer = _lt.DFHeatmap.build_heatmap( - self._df, x, y, value, cmap=cmap, clim=clim, name=name, fill=fill, - backend=canvas._get_backend(), - ) # fmt: skip - if self._update_label: - self._update_xy_label(x, y) - canvas.x.ticks.set_labels(*layer._generate_xticks()) - canvas.y.ticks.set_labels(*layer._generate_yticks()) - return canvas.add_layer(layer) - - def add_pointplot2d( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | None = None, - hatch: NStr | None = None, - size: float | None = None, - capsize: float = 0.15, - ): - canvas = self._canvas() - layer = _lt.DFPointPlot2D( - parse(self._df), x, y, name=name, color=color, hatch=hatch, size=size, - capsize=capsize, backend=canvas._get_backend(), - ) # fmt: skip - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - ### Aggregation ### - - def mean(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a mean-plotter.""" - return self._agg_plotter("mean", orient) - - def std(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a std-plotter.""" - return self._agg_plotter("std", orient) - - def median(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a median-plotter.""" - return self._agg_plotter("median", orient) - - def min(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a min-plotter.""" - return self._agg_plotter("min", orient) - - def max(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a max-plotter.""" - return self._agg_plotter("max", orient) - - def sum(self, orient: _Orientation = "vertical") -> DataFrameAggPlotter[_C, _DF]: - """Return a sum-plotter.""" - return self._agg_plotter("sum", orient) - - def _agg_plotter( - self, - method: str, - orient: str | Orientation, - ) -> DataFrameAggPlotter[_C, _DF]: - return DataFrameAggPlotter( - self._canvas(), - self._df, - self._update_label, - method=method, - orient=Orientation.parse(orient), - ) - - -class DataFrameAggPlotter(_Plotter[_C, _DF]): - def __init__( - self, - canvas: _C, - df: _DF, - update_label: bool, - method: str, - orient: Orientation, - ): - super().__init__(canvas, df, update_label) - self._agg_method = method - self._orient = orient - - def add_line( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | None = None, - width: str | None = None, - style: NStr | None = None, - ) -> _lt.DFLines[_DF]: - """ - Add line that connect the aggregated values. - - >>> canvas.cat(df).mean().add_line("time", "value") - - Parameters - ---------- - x : str - Column name for x-axis. - y : str - Column name for y-axis. - name : str, optional - Name of the layer. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - width : str, optional - Column name for line width. Must be numerical. - style : str or sequence of str, optional - Column name(s) for styling the lines. Must be categorical. - - Returns - ------- - WrappedLines - Line collection layer. - """ - canvas = self._canvas() - df = parse(self._df) - keys = list(df.iter_keys()) - _color = PlotArg.from_color(keys, color) - _style = PlotArg.from_style(keys, style) - df_agg = self._aggregate(df, self._concat_tuple(x, y, _color, _style), y) - layer = _lt.DFLines.from_table( - df_agg, x, y, name=name, color=color, width=width, style=style, - backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(_color.value, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - def add_markers( - self, - x: str, - y: str, - *, - name: str | None = None, - color: NStr | ColorType | None = None, - hatch: NStr | Hatch | None = None, - size: np.str_ | float | None = None, - symbol: NStr | Symbol | None = None, - ) -> _lt.DFMarkers[_DF]: - """ - Add markers that represent the aggregated values. - - >>> canvas.cat(df).mean().add_markers("time", "value") - - Parameters - ---------- - x : str - Column name for x-axis. - y : str - Column name for y-axis. - name : str, optional - Name of the layer. - color : str or sequence of str, optional - Column name(s) for coloring the lines. Must be categorical. - hatch : str or sequence of str, optional - Column name(s) for hatches. Must be categorical. - size : str, optional - Column name for marker size. Must be numerical. - symbol : str or sequence of str, optional - Column name(s) for symbols. Must be categorical. - - Returns - ------- - WrappedMarkers - Marker collection layer. - """ - canvas = self._canvas() - df = parse(self._df) - keys = list(df.iter_keys()) - _color = PlotArg.from_color(keys, color) - _hatch = PlotArg.from_hatch(keys, hatch) - _symbol = PlotArg.from_symbol(keys, symbol) - df_agg = self._aggregate( - df, self._concat_tuple(x, y, _color, _hatch, _symbol), y - ) - layer = _lt.DFMarkers.from_table( - df_agg, x, y, name=name, color=color, hatch=hatch, size=size, - symbol=symbol, backend=canvas._get_backend(), - ) # fmt: skip - if color is not None and not layer._color_by.is_const(): - layer.with_color(_color.value, palette=canvas._color_palette) - elif color is None: - layer.with_color(canvas._color_palette.next()) - if self._update_label: - self._update_xy_label(x, y) - return canvas.add_layer(layer) - - def add_heatmap( - self, - x: str, - y: str, - value: str, - *, - cmap: ColormapType = "inferno", - clim: tuple[float, float] | None = None, - name: str | None = None, - fill: float = 0, - ) -> _lt.DFHeatmap[_DF]: - canvas = self._canvas() - df = parse(self._df) - df_agg = self._aggregate(df, (x, y), value) - layer = _lt.DFHeatmap.build_heatmap( - df_agg, x, y, value, cmap=cmap, clim=clim, name=name, fill=fill, - backend=canvas._get_backend(), - ) # fmt: skip - if self._update_label: - self._update_xy_label(x, y) - canvas.x.ticks.set_labels(*layer._generate_xticks()) - canvas.y.ticks.set_labels(*layer._generate_yticks()) - return canvas.add_layer(layer) - - def _aggregate( - self, - df: DataFrameWrapper, - by: tuple[str, ...], - on: str, - ) -> DataFrameWrapper[_DF]: - return df.agg_by(by, on, self._agg_method) - - def _concat_tuple(self, x, y, *args: PlotArg) -> tuple: - """ - Concatenate the arguments into a tuple. - - This method may return a tuple of str or other types such as Symbol, Color, etc. - """ - out = [] - if self._orient.is_vertical: - out.append(x) - else: - out.append(y) - for a in args: - if not a.is_column: - continue - elif isinstance(val := a.value, str): - out.append(val) - else: - out.extend(val) - return out diff --git a/whitecanvas/canvas/dataframe/_utils.py b/whitecanvas/canvas/dataframe/_utils.py deleted file mode 100644 index 6dd164d4..00000000 --- a/whitecanvas/canvas/dataframe/_utils.py +++ /dev/null @@ -1,132 +0,0 @@ -from __future__ import annotations - -from cmap import Color - -from whitecanvas.types import Hatch, LineStyle, Symbol - - -def _sequence_of_column_name(keys: list[str], value) -> bool: - if isinstance(value, str): - return False - if hasattr(value, "__iter__"): - for each in value: - if not isinstance(each, str): - return False - if each not in keys: - return False - return True - return False - - -class PlotArg: - def __init__(self, value, is_column: bool): - self._value = value - self._is_column = is_column - - @property - def value(self): - """The value of the argument.""" - return self._value - - @property - def is_column(self) -> bool: - """True if the value is a column name.""" - return self._is_column - - @classmethod - def from_color(cls, keys: list[str], color) -> PlotArg: - if color is None: - return PlotArg(None, False) - if isinstance(color, str): - if color in keys: - return PlotArg([color], True) - else: - return PlotArg.from_color(keys, Color(color)) - elif _sequence_of_column_name(keys, color): - return PlotArg(list(color), True) - else: - try: - col = Color(color) - except Exception: - raise ValueError( - f"'color' must be one of the column names {keys!r}, color-like " - "or sequence of them." - ) - return PlotArg(col, False) - - @classmethod - def from_symbol(cls, keys: list[str], symbol) -> PlotArg: - if symbol is None: - return PlotArg(None, False) - if isinstance(symbol, str): - if symbol in keys: - return PlotArg([symbol], True) - else: - return PlotArg.from_symbol(keys, Symbol(symbol)) - elif _sequence_of_column_name(keys, symbol): - return PlotArg(list(symbol), True) - else: - try: - sym = Symbol(symbol) - except Exception: - raise ValueError( - f"'symbol' must be one of the column names {keys!r}, symbol-like " - "or sequence of them." - ) - return PlotArg(sym, False) - - @classmethod - def from_hatch(cls, keys: list[str], hatch) -> PlotArg: - if hatch is None: - return PlotArg(None, False) - if isinstance(hatch, str): - if hatch in keys: - return PlotArg([hatch], True) - else: - return PlotArg.from_hatch(keys, Hatch(hatch)) - elif _sequence_of_column_name(keys, hatch): - return PlotArg(list(hatch), True) - else: - try: - htch = Hatch(hatch) - except Exception: - raise ValueError( - f"'hatch' must be one of the column names {keys!r}, hatch-like " - "or sequence of them." - ) from None - return PlotArg(htch, False) - - @classmethod - def from_style(cls, keys: list[str], style) -> PlotArg: - if style is None: - return PlotArg(None, False) - if isinstance(style, str): - if style in keys: - return PlotArg([style], True) - else: - return PlotArg.from_style(keys, LineStyle(style)) - elif _sequence_of_column_name(keys, style): - return PlotArg(list(style), True) - else: - try: - stl = LineStyle(style) - except Exception: - raise ValueError( - f"'style' must be one of the column names {keys!r}, style-like " - "or sequence of them." - ) from None - return PlotArg(stl, False) - - @classmethod - def from_scalar(cls, keys: list[str], value) -> PlotArg: - if value is None: - return PlotArg(None, False) - if isinstance(value, str): - if value in keys: - return PlotArg([value], True) - else: - raise ValueError(f"Not a valid column name: {value!r}") - elif _sequence_of_column_name(keys, value): - return PlotArg(list(value), True) - else: - return PlotArg(float(value), False) diff --git a/whitecanvas/core.py b/whitecanvas/core.py index 87bcc914..1e3062b2 100644 --- a/whitecanvas/core.py +++ b/whitecanvas/core.py @@ -18,8 +18,7 @@ def grid( nrows: int = 1, ncols: int = 1, *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasGrid: """ @@ -27,14 +26,12 @@ def grid( Parameters ---------- - nrows : int, optional - Number of rows, by default 1 - ncols : int, optional - Number of columns, by default 1 - link_x : bool, optional - Whether to link x axes, by default False - link_y : bool, optional - Whether to link y axes, by default False + nrows : int, default 1 + Number of rows. + ncols : int, default 1 + Number of columns. + size : (int, int), optional + Size of the grid. backend : Backend or str, optional Backend name. @@ -43,60 +40,71 @@ def grid( CanvasGrid Grid of empty canvases. """ - return CanvasGrid.uniform( - nrows, ncols, link_x=link_x, link_y=link_y, backend=backend - ) + g = CanvasGrid.uniform(nrows, ncols, backend=backend) + if size is not None: + g.size = size + return g def grid_nonuniform( heights: list[int], widths: list[int], *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasGrid: - return CanvasGrid(heights, widths, link_x=link_x, link_y=link_y, backend=backend) + g = CanvasGrid(heights, widths, backend=backend) + if size is not None: + g.size = size + return g def vgrid( nrows: int = 1, *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasVGrid: - return CanvasVGrid.uniform(nrows, link_x=link_x, link_y=link_y, backend=backend) + g = CanvasVGrid.uniform(nrows, backend=backend) + if size is not None: + g.size = size + return g def vgrid_nonuniform( heights: list[int], *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasVGrid: - return CanvasVGrid(heights, link_x=link_x, link_y=link_y, backend=backend) + g = CanvasVGrid(heights, backend=backend) + if size is not None: + g.size = size + return g def hgrid( ncols: int = 1, *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasHGrid: - return CanvasHGrid.uniform(ncols, link_x=link_x, link_y=link_y, backend=backend) + g = CanvasHGrid.uniform(ncols, backend=backend) + if size is not None: + g.size = size + return g def hgrid_nonuniform( widths: list[int], *, - link_x: bool = False, - link_y: bool = False, + size: tuple[int, int] | None = None, backend: Backend | str | None = None, ) -> CanvasHGrid: - return CanvasHGrid(widths, link_x=link_x, link_y=link_y, backend=backend) + g = CanvasHGrid(widths, backend=backend) + if size is not None: + g.size = size + return g def new_canvas( diff --git a/whitecanvas/layers/_mixin.py b/whitecanvas/layers/_mixin.py index 22a4771b..0ea3f595 100644 --- a/whitecanvas/layers/_mixin.py +++ b/whitecanvas/layers/_mixin.py @@ -640,7 +640,7 @@ def with_edge( def with_face_multi( self, color: ColorType | Sequence[ColorType] | _Void = _void, - hatch: str | Hatch | Sequence[str | Hatch] = Hatch.SOLID, + hatch: str | Hatch | Sequence[str | Hatch] | _Void = _void, alpha: float = 1, ) -> Self: if not isinstance(self._face_namespace, MultiFace): diff --git a/whitecanvas/layers/_primitive/band.py b/whitecanvas/layers/_primitive/band.py index 67cc9103..4f1de83d 100644 --- a/whitecanvas/layers/_primitive/band.py +++ b/whitecanvas/layers/_primitive/band.py @@ -2,8 +2,6 @@ from typing import Any -import numpy as np - from whitecanvas.backend import Backend from whitecanvas.layers._base import DataBoundLayer from whitecanvas.layers._mixin import FaceEdgeMixin @@ -90,34 +88,3 @@ def set_data( edge_high: ArrayLike1D | None = None, ): self.data = t, edge_low, edge_high - - @classmethod - def from_kde( - cls, - data: ArrayLike1D, - bottom: float = 0.0, - *, - name: str | None = None, - band_width: float | None = None, - color: ColorType = "blue", - alpha: float = 1.0, - hatch: str | Hatch = Hatch.SOLID, - orient: str | Orientation = Orientation.VERTICAL, - backend: Backend | str | None = None, - ): - from whitecanvas.utils.kde import gaussian_kde - - data = as_array_1d(data) - kde = gaussian_kde(data, bw_method=band_width) - - sigma = np.sqrt(kde.covariance[0, 0]) - pad = sigma * 2.5 - x = np.linspace(data.min() - pad, data.max() + pad, 100) - y1 = kde(x) - y0 = np.full_like(y1, bottom) - self = cls( - x, y0, y1, name=name, orient=orient, color=color, alpha=alpha, - hatch=hatch, backend=backend, - ) # fmt: skip - self._band_type = "kde" - return self diff --git a/whitecanvas/layers/_primitive/bars.py b/whitecanvas/layers/_primitive/bars.py index 5ad8dc29..e19a9171 100644 --- a/whitecanvas/layers/_primitive/bars.py +++ b/whitecanvas/layers/_primitive/bars.py @@ -386,7 +386,7 @@ def with_face( def with_face_multi( self, color: ColorType | Sequence[ColorType] | _Void = _void, - hatch: str | Hatch | Sequence[str | Hatch] = Hatch.SOLID, + hatch: str | Hatch | Sequence[str | Hatch] | _Void = _void, alpha: float = 1, ) -> Bars[MultiFace, _Edge]: return super().with_face_multi(color, hatch, alpha) diff --git a/whitecanvas/layers/_primitive/line.py b/whitecanvas/layers/_primitive/line.py index 085fb8d0..051dca44 100644 --- a/whitecanvas/layers/_primitive/line.py +++ b/whitecanvas/layers/_primitive/line.py @@ -144,7 +144,7 @@ def __init__( width: float = 1, alpha: float = 1.0, style: LineStyle | str = LineStyle.SOLID, - antialias: bool = False, + antialias: bool = True, backend: Backend | str | None = None, ): xdata, ydata = normalize_xy(xdata, ydata) @@ -426,35 +426,6 @@ def with_text( name=self.name, ) - @classmethod - def build_hist( - cls, - data: ArrayLike1D, - *, - bins: int | ArrayLike1D = 10, - density: bool = False, - range: tuple[float, float] | None = None, - orient: str | Orientation = Orientation.VERTICAL, - name: str | None = None, - color: ColorType = "blue", - alpha: float = 1.0, - width: float = 1.0, - style: LineStyle | str = LineStyle.SOLID, - antialias: bool = True, - backend: Backend | str | None = None, - ): - """Construct a line from a histogram.""" - data = as_array_1d(data) - counts, edges = np.histogram(data, bins, density=density, range=range) - xdata = np.concatenate(list(zip(edges[:-1], edges[1:]))) - ydata = np.concatenate(list(zip(counts, counts))) - if not Orientation.parse(orient).is_vertical: - xdata, ydata = ydata, xdata - return Line( - xdata, ydata, name=name, color=color, alpha=alpha, width=width, - style=style, antialias=antialias, backend=backend, - ) # fmt: skip - @classmethod def build_cdf( cls, diff --git a/whitecanvas/layers/_primitive/markers.py b/whitecanvas/layers/_primitive/markers.py index 484d609f..17504742 100644 --- a/whitecanvas/layers/_primitive/markers.py +++ b/whitecanvas/layers/_primitive/markers.py @@ -616,7 +616,7 @@ def with_face_multi( self, *, color: ColorType | Sequence[ColorType] | _Void = _void, - hatch: str | Hatch | Sequence[str | Hatch] = Hatch.SOLID, + hatch: str | Hatch | Sequence[str | Hatch] | _Void = _void, alpha: float = 1, ) -> Markers[MultiFace, _Edge, _Size]: """ diff --git a/whitecanvas/layers/group/__init__.py b/whitecanvas/layers/group/__init__.py index 7d02ce91..45c027bf 100644 --- a/whitecanvas/layers/group/__init__.py +++ b/whitecanvas/layers/group/__init__.py @@ -1,4 +1,4 @@ -from whitecanvas.layers.group._collections import LayerTuple +from whitecanvas.layers.group._collections import LayerCollectionBase, LayerTuple from whitecanvas.layers.group.band_collection import BandCollection, ViolinPlot from whitecanvas.layers.group.boxplot import BoxPlot from whitecanvas.layers.group.graph import Graph @@ -10,6 +10,7 @@ ) from whitecanvas.layers.group.line_band import LineBand from whitecanvas.layers.group.line_collection import LineCollection +from whitecanvas.layers.group.line_fill import Histogram, Kde from whitecanvas.layers.group.line_markers import Plot from whitecanvas.layers.group.marker_collection import MarkerCollection from whitecanvas.layers.group.stemplot import StemPlot @@ -32,4 +33,7 @@ "Graph", "StemPlot", "LayerTuple", + "Histogram", + "Kde", + "LayerCollectionBase", ] diff --git a/whitecanvas/layers/group/band_collection.py b/whitecanvas/layers/group/band_collection.py index df01865b..d5f00493 100644 --- a/whitecanvas/layers/group/band_collection.py +++ b/whitecanvas/layers/group/band_collection.py @@ -114,45 +114,10 @@ def from_arrays( kde_band_width: float | str = "scott", backend: str | Backend | None = None, ): - from whitecanvas.utils.kde import gaussian_kde - ori = Orientation.parse(orient) - if extent <= 0: - raise ValueError(f"extent must be positive, got {extent}") - x, data = check_array_input(x, data) - xyy_values: list[XYYData] = [] - for offset, values in zip(x, data): - arr = as_array_1d(values) - kde = gaussian_kde(arr, bw_method=kde_band_width) - - sigma = np.sqrt(kde.covariance[0, 0]) - pad = sigma * 2.5 - x_ = np.linspace(arr.min() - pad, arr.max() + pad, 100) - y = kde(x_) - if shape in ("both", "left"): - y0 = -y + offset - else: - y0 = np.zeros_like(y) + offset - if shape in ("both", "right"): - y1 = y + offset - else: - y1 = np.zeros_like(y) + offset - - data = XYYData(x_, y0, y1) - xyy_values.append(data) - - half_widths: list[float] = [] - for xyy in xyy_values: - half_width = np.max(np.abs(xyy.ydiff)) - if shape == "both": - half_width /= 2 - half_widths.append(half_width) - factor = extent / np.max(half_widths) / 2 - new_vals: list[XYYData] = [] - for xyy, xoffset in zip(xyy_values, x): - y0 = (xyy.y0 - xoffset) * factor + xoffset - y1 = (xyy.y1 - xoffset) * factor + xoffset - new_vals.append(XYYData(xyy.x, y0, y1)) + new_vals = cls._convert_data( + x, data, shape=shape, extent=extent, kde_band_width=kde_band_width + ) return cls( new_vals, name=name, @@ -256,3 +221,51 @@ def set_datasets( y0 = (xyy.y0 - xoffset) * factor + xoffset y1 = (xyy.y1 - xoffset) * factor + xoffset band.data = XYYData(xyy.x, y0, y1) + + @staticmethod + def _convert_data( + x: list[float], + data: list[ArrayLike], + shape: Literal["both", "left", "right"] = "both", + extent: float = 0.5, + kde_band_width: float | str = "scott", + ): + from whitecanvas.utils.kde import gaussian_kde + + if extent <= 0: + raise ValueError(f"extent must be positive, got {extent}") + x, data = check_array_input(x, data) + xyy_values: list[XYYData] = [] + for offset, values in zip(x, data): + arr = as_array_1d(values) + kde = gaussian_kde(arr, bw_method=kde_band_width) + + sigma = np.sqrt(kde.covariance[0, 0]) + pad = sigma * 2.5 + x_ = np.linspace(arr.min() - pad, arr.max() + pad, 100) + y = kde(x_) + if shape in ("both", "left"): + y0 = -y + offset + else: + y0 = np.zeros_like(y) + offset + if shape in ("both", "right"): + y1 = y + offset + else: + y1 = np.zeros_like(y) + offset + + data = XYYData(x_, y0, y1) + xyy_values.append(data) + + half_widths: list[float] = [] + for xyy in xyy_values: + half_width = np.max(np.abs(xyy.ydiff)) + if shape == "both": + half_width /= 2 + half_widths.append(half_width) + factor = extent / np.max(half_widths) / 2 + new_vals: list[XYYData] = [] + for xyy, xoffset in zip(xyy_values, x): + y0 = (xyy.y0 - xoffset) * factor + xoffset + y1 = (xyy.y1 - xoffset) * factor + xoffset + new_vals.append(XYYData(xyy.x, y0, y1)) + return new_vals diff --git a/whitecanvas/layers/group/labeled.py b/whitecanvas/layers/group/labeled.py index 3ca144c7..ba420a94 100644 --- a/whitecanvas/layers/group/labeled.py +++ b/whitecanvas/layers/group/labeled.py @@ -323,7 +323,7 @@ def __init__( self._init_events() @property - def bars(self) -> Bars: + def bars(self) -> Bars[_NFace, _NEdge]: """The bars layer.""" return self._children[0] diff --git a/whitecanvas/layers/group/line_collection.py b/whitecanvas/layers/group/line_collection.py index b37bc5dd..9b1b39ec 100644 --- a/whitecanvas/layers/group/line_collection.py +++ b/whitecanvas/layers/group/line_collection.py @@ -49,11 +49,11 @@ def width(self) -> NDArray[np.float32]: @width.setter def width(self, width: float | Sequence[float]): - if isinstance(width, float): + if isinstance(width, (int, float, np.number)): _width = [width] * len(self) else: _width = np.asarray(width, dtype=np.float32) - if len(width) != len(self): + if len(_width) != len(self): raise ValueError( f"width must be a float or a sequence of length {len(self)}" ) diff --git a/whitecanvas/layers/group/line_fill.py b/whitecanvas/layers/group/line_fill.py new file mode 100644 index 00000000..86ad6959 --- /dev/null +++ b/whitecanvas/layers/group/line_fill.py @@ -0,0 +1,382 @@ +from __future__ import annotations + +from enum import Enum +from typing import overload + +import numpy as np +from numpy.typing import NDArray + +from whitecanvas.backend import Backend +from whitecanvas.layers._primitive import Band, Line +from whitecanvas.layers.group._collections import LayerContainer +from whitecanvas.types import ArrayLike1D, ColorType, LineStyle, Orientation +from whitecanvas.utils.hist import get_hist_edges, histograms +from whitecanvas.utils.normalize import as_array_1d + + +class HistogramShape(Enum): + step = "step" + polygon = "polygon" + bars = "bars" + + +class HistogramKind(Enum): + count = "count" + density = "density" + frequency = "frequency" + percent = "percent" + + +class LineFillBase(LayerContainer): + def __init__(self, line: Line, fill: Band, name: str | None = None): + super().__init__([line, fill], name=name) + + @property + def line(self) -> Line: + """The line layer.""" + return self._children[0] + + @property + def fill(self) -> Band: + """The fill layer.""" + return self._children[1] + + @property + def orient(self) -> Orientation: + """Orientation of the line and fill layers.""" + return self.fill.orient + + @property + def color(self) -> NDArray[np.float32]: + """Color of the layer.""" + return self.line.color + + @color.setter + def color(self, color: ColorType): + self.line.color = color + self.fill.face.update(color=color, alpha=0.2) + self.fill.edge.width = 0.0 + + +class Histogram(LineFillBase): + def __init__( + self, + data: NDArray[np.number], + edges: NDArray[np.number], + limits: tuple[float, float] | None, + line: Line, + fill: Band, + shape: HistogramShape = HistogramShape.bars, + kind: HistogramKind = HistogramKind.count, + name: str | None = None, + ): + if name is None: + name = "histogram" + super().__init__(line, fill, name=name) + self._data = data + self._shape = shape + self._kind = kind + self._edges = edges + self._limits = limits + + @property + def data(self) -> NDArray[np.number]: + """The data used to plot the histogram.""" + return self._data + + @data.setter + def data(self, data: NDArray[np.number]): + data = as_array_1d(data) + xdata, ydata = self._calculate_xy( + data, self._edges, self._shape, self._kind, self._limits, clip=True + ) # fmt: skip + self._update_internal(xdata, ydata) + self._data = data + + def _update_internal(self, xdata: NDArray[np.number], ydata: NDArray[np.number]): + if self.orient.is_vertical: + self.line.data = xdata, ydata + else: + self.line.data = ydata, xdata + self.fill.data = xdata, _prep_bottom(ydata), ydata + + @property + def shape(self) -> HistogramShape: + """The shape of the histogram.""" + return self._shape + + @shape.setter + def shape(self, shape: str | HistogramShape): + shape = HistogramShape(shape) + xdata, ydata, _ = self._calculate_xy( + self._data, self._edges, shape, self._kind, self._limits + ) # fmt: skip + self._update_internal(xdata, ydata) + self._shape = shape + + @property + def kind(self) -> HistogramKind: + """The kind of the histogram.""" + return self._kind + + @kind.setter + def kind(self, kind: str | HistogramKind): + kind = HistogramKind(kind) + xdata, ydata, _ = self._calculate_xy( + self._data, self._edges, self._shape, kind, self._limits + ) # fmt: skip + self._update_internal(xdata, ydata) + self._kind = kind + + @property + def limits(self) -> tuple[float, float] | None: + """The limits of the histogram.""" + return self._limits + + @limits.setter + def limits(self, limits: tuple[float, float] | None): + xdata, ydata, _ = self._calculate_xy( + self._data, self._edges, self._shape, self._kind, limits + ) + self._update_internal(xdata, ydata) + self._limits = limits + + @property + def edges(self) -> NDArray[np.number]: + """The edges of the histogram.""" + return self._edges + + @edges.setter + def edges(self, edges: NDArray[np.number]): + edges = as_array_1d(edges) + xdata, ydata, _ = self._calculate_xy( + self._data, edges, self._shape, self._kind, self._limits + ) + self._update_internal(xdata, ydata) + self._edges = edges + + @overload + def update_edges(self, bins: int, limits: tuple[float, float] | None = None): + ... + + @overload + def update_edges(self, edges: NDArray[np.number]): + ... + + def update_edges(self, bins, limits=None): + """ + Update the edges of the histogram. + + >>> hist.update_edges(20, limits=(0, 10)) # uniform bins + >>> hist.update_edges([0, 2, 3, 5]) # non-uniform bins + """ + if limits is not None and not isinstance(bins, (int, np.number)): + raise TypeError("bins must be an integer when limits are specified.") + edges = get_hist_edges([self._data], bins, limits) + self.edges = edges + + @classmethod + def from_array( + cls, + data: NDArray[np.number], + shape: HistogramShape = HistogramShape.bars, + kind: HistogramKind = HistogramKind.count, + name: str | None = None, + bins: int = 10, + limits: tuple[float, float] | None = None, + color: ColorType = "black", + style: str | LineStyle = LineStyle.SOLID, + width: float = 1.0, + orient: str | Orientation = "vertical", + backend: str | Backend | None = None, + ) -> Histogram: + """Create a histogram from an array.""" + shape = HistogramShape(shape) + kind = HistogramKind(kind) + ori = Orientation.parse(orient) + xdata, ydata, edges = cls._calculate_xy(data, bins, shape, kind, limits) + if ori.is_vertical: + line = Line( + xdata, ydata, color=color, style=style, width=width, backend=backend + ) # fmt: skip + else: + line = Line( + ydata, xdata, color=color, style=style, width=width, backend=backend + ) + fill = Band( + xdata, _prep_bottom(ydata), ydata, color=color, alpha=0.2, orient=ori, + backend=backend, + ) # fmt: skip + return cls(data, edges, limits, line, fill, shape, kind, name=name) + + @staticmethod + def _calculate_xy( + data, + bins: int | ArrayLike1D, + shape: HistogramShape, + kind: HistogramKind, + limits: tuple[float, float] | None = None, + clip: bool = True, + ) -> tuple[NDArray[np.number], NDArray[np.number], NDArray[np.number]]: + if clip and limits is not None: + data = np.clip(data, *limits) + hist = histograms([data], bins, limits) + shape = HistogramShape(shape) + kind = HistogramKind(kind) + if kind is HistogramKind.count: + heights = hist.counts[0] + elif kind is HistogramKind.density: + heights = hist.density()[0] + elif kind is HistogramKind.frequency: + heights = hist.frequency()[0] + elif kind is HistogramKind.percent: + heights = hist.percent()[0] + else: + raise ValueError(f"Unknown kind {kind!r}.") + + if shape is HistogramShape.step: + xdata = np.repeat(hist.edges, 2) + ydata = np.concatenate([[0], np.repeat(heights, 2), [0]]) + elif shape is HistogramShape.polygon: + centers = hist.centers() + xdata = np.concatenate([[centers[0]], centers, [centers[-1]]]) + ydata = np.concatenate([[0], heights, [0]]) + elif shape is HistogramShape.bars: + edges = hist.edges + xdata = np.repeat(edges, 3)[1:-1] + ydata = np.zeros_like(xdata) + ydata[1::3] = ydata[2::3] = heights + else: + raise ValueError(f"Unknown shape {shape!r}.") + return xdata, ydata, hist.edges + + +def _prep_bottom(ydata: NDArray[np.number]) -> NDArray[np.number]: + return np.full_like(ydata, 0) + + +class Kde(LineFillBase): + def __init__( + self, + data: NDArray[np.number], + band_width: float, + line: Line, + fill: Band, + name: str | None = None, + bottom: float = 0.0, + scale: float = 1.0, + ): + if name is None: + name = "kde" + super().__init__(line, fill, name=name) + self._data = data + self._bottom = bottom + self._band_width = band_width + self._scale = scale + + @property + def data(self) -> NDArray[np.number]: + """The data used to plot the histogram.""" + return self._data + + @data.setter + def data(self, data: NDArray[np.number]): + data = as_array_1d(data) + xdata, ydata = self._calculate_params( + data, self._band_width, self._bottom, self._scale + ) # fmt: skip + self._update_internal(xdata, ydata, self._bottom) + self._data = data + + def _update_internal( + self, xdata: NDArray[np.number], ydata: NDArray[np.number], bottom: float + ): + if self.orient.is_vertical: + self.line.data = xdata, ydata + else: + self.line.data = ydata, xdata + self.fill.data = xdata, np.full_like(xdata, bottom), ydata + + @property + def band_width(self) -> float: + """The band width of the kernel density estimation.""" + return self._band_width + + @band_width.setter + def band_width(self, band_width: float): + xdata, ydata, bw = self._calculate_params( + self._data, band_width, self._bottom, self._scale + ) # fmt: skip + self._update_internal(xdata, ydata, self._bottom) + self._band_width = bw + + @property + def bottom(self) -> float: + """The bottom value of the fill.""" + return self._bottom + + @bottom.setter + def bottom(self, bottom: float): + xdata, ydata, _ = self._calculate_params( + self._data, self._band_width, bottom, self._scale + ) # fmt: skip + self._update_internal(xdata, ydata, bottom) + self._bottom = bottom + + @property + def scale(self) -> float: + """The scale of the kernel density estimation.""" + return self._scale + + @scale.setter + def scale(self, scale: float): + xdata, ydata, _ = self._calculate_params( + self._data, self._band_width, self._bottom, scale + ) # fmt: skip + self._update_internal(xdata, ydata, self._bottom) + self._scale = scale + + @classmethod + def from_array( + cls, + data: ArrayLike1D, + bottom: float = 0.0, + scale: float = 1.0, + *, + name: str | None = None, + band_width: float | None = None, + color: ColorType = "blue", + style: str | LineStyle = LineStyle.SOLID, + width: float = 1.0, + orient: str | Orientation = Orientation.VERTICAL, + backend: Backend | str | None = None, + ): + data = as_array_1d(data) + x, y1, bw = cls._calculate_params(data, band_width, bottom, scale) + if orient.is_vertical: + line = Line(x, y1, color=color, style=style, width=width, backend=backend) + else: + line = Line(y1, x, color=color, style=style, width=width, backend=backend) + fill = Band( + x, np.full_like(x, bottom), y1, color=color, alpha=0.2, orient=orient, + backend=backend, + ) # fmt: skip + return Kde(data, bw, line, fill, name=name, bottom=bottom, scale=scale) + + @staticmethod + def _calculate_params( + data: NDArray[np.number], + band_width: float, + bottom: float = 0.0, + scale: float = 1.0, + ) -> tuple[NDArray[np.number], NDArray[np.number], float]: + from whitecanvas.utils.kde import gaussian_kde + + data = as_array_1d(data) + kde = gaussian_kde(data, bw_method=band_width) + + sigma = np.sqrt(kde.covariance[0, 0]) + pad = sigma * 2.5 + x = np.linspace(data.min() - pad, data.max() + pad, 100) + y1 = kde(x) * scale + bottom + return x, y1, kde.factor diff --git a/whitecanvas/layers/tabular/__init__.py b/whitecanvas/layers/tabular/__init__.py index ecf6cd77..ce7ea984 100644 --- a/whitecanvas/layers/tabular/__init__.py +++ b/whitecanvas/layers/tabular/__init__.py @@ -7,11 +7,14 @@ from whitecanvas.layers.tabular._dataframe import ( DFBars, DFHeatmap, + DFHistograms, + DFKde, DFLines, DFMarkerGroups, DFMarkers, DFPointPlot2D, ) +from whitecanvas.layers.tabular._df_compat import parse __all__ = [ "DFBarPlot", @@ -23,5 +26,8 @@ "DFBars", "DFBoxPlot", "DFHeatmap", + "DFHistograms", + "DFKde", "DFPointPlot2D", + "parse", ] diff --git a/whitecanvas/layers/tabular/_box_like.py b/whitecanvas/layers/tabular/_box_like.py index 8f14bffc..6925925f 100644 --- a/whitecanvas/layers/tabular/_box_like.py +++ b/whitecanvas/layers/tabular/_box_like.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, TypeVar +from typing import TYPE_CHECKING, Callable, Generic, TypeVar import numpy as np from cmap import Color @@ -13,7 +13,7 @@ from whitecanvas.layers import group as _lg from whitecanvas.layers.tabular import _plans as _p from whitecanvas.layers.tabular import _shared -from whitecanvas.layers.tabular._df_compat import DataFrameWrapper, parse +from whitecanvas.layers.tabular._df_compat import DataFrameWrapper from whitecanvas.types import ( ColorType, Hatch, @@ -24,79 +24,81 @@ if TYPE_CHECKING: from typing_extensions import Self + from whitecanvas.canvas.dataframe._base import CatIterator + _FE = _mixin.AbstractFaceEdgeMixin[_mixin.FaceNamespace, _mixin.EdgeNamespace] _DF = TypeVar("_DF") +def _norm_color_hatch( + color, + hatch, + df: DataFrameWrapper[_DF], +) -> tuple[_p.ColorPlan, _p.HatchPlan]: + color_cov = _shared.ColumnOrValue(color, df) + if color_cov.is_column: + color_by = _p.ColorPlan.from_palette(color_cov.columns) + elif color_cov.value is not None: + color_by = _p.ColorPlan.from_const(Color(color_cov.value)) + else: + color_by = _p.ColorPlan.default() + hatch_cov = _shared.ColumnOrValue(hatch, df) + if hatch_cov.is_column: + hatch_by = _p.HatchPlan.new(hatch_cov.columns) + elif hatch_cov.value is not None: + hatch_by = _p.HatchPlan.from_const(Hatch(hatch_cov.value)) + else: + hatch_by = _p.HatchPlan.default() + return color_by, hatch_by + + class _BoxLikeMixin: _source: DataFrameWrapper[_DF] def __init__( self, - source: DataFrameWrapper[_DF], - offset: str | tuple[str, ...], - value: str, - color: str | tuple[str, ...] | None = None, - hatch: str | tuple[str, ...] | None = None, + categories: list[tuple], + splitby: tuple[str, ...], + color_by: _p.ColorPlan, + hatch_by: _p.HatchPlan, ): - if isinstance(offset, str): - offset = (offset,) - splitby = _shared.join_columns(offset, color, hatch, source=source) - self._y = value self._splitby = splitby - self._color_by = _p.ColorPlan.default() - self._hatch_by = _p.HatchPlan.default() - self._offset_by = _p.OffsetPlan.default().more_by(*offset) - self._source = source - - @property - def color(self) -> _p.ColorPlan | _p.ColormapPlan: - """Return the object describing how the plot is colored.""" - return self._color_by - - @property - def hatch(self) -> _p.HatchPlan: - """Return the object describing how the plot is hatched.""" - return self._hatch_by + self._categories = categories + self._color_by = color_by + self._hatch_by = hatch_by + self._get_base().face.color = color_by.generate(self._categories, self._splitby) + self._get_base().face.hatch = hatch_by.generate(self._categories, self._splitby) def _get_base(self) -> _FE: """Just for typing.""" return self._base_layer - def with_color(self, by: str | Iterable[str], palette=None) -> Self: - cov = _shared.ColumnOrValue(by, self._source) - if cov.is_column: - if set(cov.columns) > set(self._splitby): - raise ValueError(f"Cannot color by a column other than {self._splitby}") - other_by = _shared.unique_tuple(self._offset_by.by, self._hatch_by.by) - by_all = _shared.unique_tuple(cov.columns, other_by) - color_by = _p.ColorPlan.from_palette(cov.columns, palette=palette) - self._splitby = by_all - _, self._labels = self._generate_datasets() - else: - color_by = _p.ColorPlan.from_const(Color(cov.value)) - self._get_base().face.color = color_by.generate(self._labels, self._splitby) + def with_color_palette(self, palette) -> Self: + if self._color_by.is_const(): + raise ValueError("Cannot redraw color for a constant color") + color_by = _p.ColorPlan.from_palette(self._color_by.by, palette=palette) + self._get_base().face.color = color_by.generate(self._categories, self._splitby) self._color_by = color_by return self - def with_hatch( - self, - by: str | Iterable[str], - choices=None, - ) -> Self: - cov = _shared.ColumnOrValue(by, self._source) - if cov.is_column: - if set(cov.columns) > set(self._splitby): - raise ValueError(f"Cannot color by a column other than {self._splitby}") - other_by = _shared.unique_tuple(self._offset_by.by, self._color_by.by) - by_all = _shared.unique_tuple(other_by, cov.columns) - hatch_by = _p.HatchPlan.new(cov.columns, values=choices) - self._splitby = by_all - _, self._labels = self._generate_datasets() - else: - hatch_by = _p.HatchPlan.from_const(Hatch(cov.value)) - self._get_base().face.hatch = hatch_by.generate(self._labels, self._splitby) + def with_color(self, color: ColorType) -> Self: + color_by = _p.ColorPlan.from_const(Color(color)) + self._get_base().face.color = color_by.generate(self._categories, self._splitby) + self._color_by = color_by + return self + + def with_hatch_palette(self, choices) -> Self: + if self._hatch_by.is_const(): + raise ValueError("Cannot redraw hatch for a constant hatch") + hatch_by = _p.HatchPlan.new(self._hatch_by.by, values=choices) + self._get_base().face.hatch = hatch_by.generate(self._categories, self._splitby) + self._hatch_by = hatch_by + return self + + def with_hatch(self, hatch: str | Hatch) -> Self: + hatch_by = _p.HatchPlan.from_const(Hatch(hatch)) + self._get_base().face.hatch = hatch_by.generate(self._categories, self._splitby) self._hatch_by = hatch_by return self @@ -111,47 +113,6 @@ def with_edge( self._get_base().with_edge(color=color, width=width, style=style, alpha=alpha) return self - def _generate_datasets(self) -> tuple[list[np.ndarray], list[tuple[Any, ...]]]: - datasets = [] - unique_sl: list[tuple[Any, ...]] = [] - for sl, df in self._source.group_by(self._splitby): - unique_sl.append(sl) - datasets.append(df[self._y]) - return datasets, unique_sl - - def _generate_labels(self): - """Generate the tick positions, labels and the axis label.""" - _agged_by = _shared.unique_tuple(self._color_by.by, self._hatch_by.by) - _nagged = 0 - for each in reversed(self._offset_by.by): - if each in _agged_by: - _nagged += 1 - else: - break - - # If all the offset columns are redundantly categorized by color or hatch, - # then all the labels should be shown. - if _nagged == len(self._offset_by.by): - _nagged = 0 - - # group positions by aggregated labels - label_to_pos: dict[str, list[float]] = {} - for p, lbl in self._offset_by.iter_ticks(self._labels, self._splitby): - label_agged = "\n".join(lbl[: len(lbl) - _nagged]) - if label_agged in label_to_pos: - label_to_pos[label_agged].append(p) - else: - label_to_pos[label_agged] = [p] - # compute the mean position for each aggregated label - pos: list[float] = [] - labels: list[str] = [] - for label, pos_list in label_to_pos.items(): - pos.append(np.mean(pos_list)) - labels.append(label) - - offset_labels = self._offset_by.by[: len(self._offset_by.by) - _nagged] - return pos, labels, offset_labels - class DFViolinPlot( _shared.DataFrameLayerWrapper[_lg.ViolinPlot, _DF], @@ -160,63 +121,36 @@ class DFViolinPlot( ): def __init__( self, - source: DataFrameWrapper[_DF], - offset: str | tuple[str, ...], + cat: CatIterator[_DF], value: str, - *, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, + dodge: str | tuple[str, ...] | bool | None = None, name: str | None = None, orient: Orientation = Orientation.VERTICAL, extent: float = 0.8, shape: str = "both", backend: str | Backend | None = None, ): - if isinstance(offset, str): - offset = (offset,) - _BoxLikeMixin.__init__(self, source, offset, value, color, hatch) - arrays, self._labels = self._generate_datasets() - x = self._offset_by.generate(self._labels, self._splitby) + _splitby, dodge = _shared.norm_dodge( + cat.df, cat.offsets, color, hatch, dodge=dodge + ) # fmt: skip + x, arr, categories = cat.prep_arrays(_splitby, value, dodge=dodge) + _extent = cat.zoom_factor(dodge=dodge) * extent + color_by, hatch_by = _norm_color_hatch(color, hatch, cat.df) base = _lg.ViolinPlot.from_arrays( - x, arrays, name=name, orient=orient, shape=shape, extent=extent, + x, arr, name=name, orient=orient, shape=shape, extent=_extent, backend=backend, ) # fmt: skip - super().__init__(base, source) - if color is not None: - self.with_color(color) - if hatch is not None: - self.with_hatch(hatch) - - @classmethod - def from_table( - cls, - df: _DF, - offset: tuple[str, ...], - value: str, - color: str | None = None, - hatch: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - extent: float = 0.8, - shape: str = "both", - backend: str | Backend | None = None, - ) -> DFViolinPlot[_DF]: - src = parse(df) - self = DFViolinPlot( - src, offset, value, orient=orient, name=name, extent=extent, - color=color, hatch=hatch, shape=shape, backend=backend - ) # fmt: skip - return self + super().__init__(base, cat.df) + _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) @property def orient(self) -> Orientation: """Orientation of the violins.""" return self._base_layer.orient - def with_shift( - self, - shift: float = 0.0, - ) -> Self: + def with_shift(self, shift: float = 0.0) -> Self: for layer in self._base_layer: _old = layer.data layer.set_data(edge_low=_old.y0 + shift, edge_high=_old.y1 + shift) @@ -230,57 +164,30 @@ class DFBoxPlot( ): def __init__( self, - source: DataFrameWrapper[_DF], - offset: str | tuple[str, ...], + cat: CatIterator[_DF], value: str, - *, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, + dodge: str | tuple[str, ...] | bool | None = None, name: str | None = None, orient: Orientation = Orientation.VERTICAL, - capsize: float = 0.1, extent: float = 0.8, + capsize: float = 0.1, backend: str | Backend | None = None, ): - _BoxLikeMixin.__init__(self, source, offset, value, color, hatch) - arrays, self._labels = self._generate_datasets() - x = self._offset_by.generate(self._labels, self._splitby) + _splitby, dodge = _shared.norm_dodge( + cat.df, cat.offsets, color, hatch, dodge=dodge, + ) # fmt: skip + x, arr, categories = cat.prep_arrays(_splitby, value, dodge=dodge) + _extent = cat.zoom_factor(dodge=dodge) * extent + _capsize = cat.zoom_factor(dodge=dodge) * capsize + color_by, hatch_by = _norm_color_hatch(color, hatch, cat.df) base = _lg.BoxPlot.from_arrays( - x, - arrays, - name=name, - orient=orient, - capsize=capsize, - extent=extent, + x, arr, name=name, orient=orient, capsize=_capsize, extent=_extent, backend=backend, - ) - super().__init__(base, source) - base.with_edge(color=theme.get_theme().foreground_color) - if color is not None: - self.with_color(color) - if hatch is not None: - self.with_hatch(hatch) - - @classmethod - def from_table( - cls, - df: _DF, - offset: tuple[str, ...], - value: str, - color: str | None = None, - hatch: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - extent: float = 0.8, - backend: str | Backend | None = None, - ) -> DFBoxPlot[_DF]: - src = parse(df) - self = DFBoxPlot( - src, offset, value, orient=orient, name=name, color=color, hatch=hatch, - capsize=capsize, extent=extent, backend=backend ) # fmt: skip - return self + super().__init__(base, cat.df) + _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) @property def orient(self) -> Orientation: @@ -350,7 +257,7 @@ def err_func(x): return self._update_error(err_func) def _update_estimate(self, est_func: Callable[[np.ndarray], float]) -> Self: - arrays, _ = self._generate_datasets() + arrays = self._get_arrays() est = [est_func(arr) for arr in arrays] self._set_estimation_values(est) return self @@ -359,7 +266,7 @@ def _update_error( self, err_func: Callable[[np.ndarray], tuple[float, float]], ) -> Self: - arrays, _ = self._generate_datasets() + arrays = self._get_arrays() err_low = [] err_high = [] for arr in arrays: @@ -375,50 +282,30 @@ class DFPointPlot( ): def __init__( self, - source: DataFrameWrapper[_DF], - offset: str | tuple[str, ...], + cat: CatIterator[_DF], value: str, - *, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, + dodge: str | tuple[str, ...] | bool | None = None, name: str | None = None, orient: Orientation = Orientation.VERTICAL, capsize: float = 0.1, backend: str | Backend | None = None, ): - _BoxLikeMixin.__init__(self, source, offset, value, color, hatch) - arrays, self._labels = self._generate_datasets() - x = self._offset_by.generate(self._labels, self._splitby) + _splitby, dodge = _shared.norm_dodge( + cat.df, cat.offsets, color, hatch, dodge=dodge, + ) # fmt: skip + x, arr, categories = cat.prep_arrays(_splitby, value, dodge=dodge) + _capsize = cat.zoom_factor(dodge=dodge) * capsize + color_by, hatch_by = _norm_color_hatch(color, hatch, cat.df) base = _lg.LabeledPlot.from_arrays( - x, arrays, name=name, orient=orient, capsize=capsize, backend=backend, + x, arr, name=name, orient=orient, capsize=_capsize, backend=backend, ) # fmt: skip - super().__init__(base, source) + self._arrays = arr + super().__init__(base, cat.df) + _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) base.with_edge(color=theme.get_theme().foreground_color) self._orient = orient - if color is not None: - self.with_color(color) - if hatch is not None: - self.with_hatch(hatch) - - @classmethod - def from_table( - cls, - df: _DF, - offset: tuple[str, ...], - value: str, - color: str | None = None, - hatch: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - backend: str | Backend | None = None, - ) -> DFPointPlot[_DF]: - src = parse(df) - self = DFPointPlot( - src, offset, value, orient=orient, name=name, color=color, hatch=hatch, - capsize=capsize, backend=backend - ) # fmt: skip - return self @property def orient(self) -> Orientation: @@ -437,6 +324,9 @@ def with_shift( base.set_data(data.x, data.y + shift) return self + def _get_arrays(self) -> list[np.ndarray]: + return self._arrays + def _set_estimation_values(self, est): if self.orient.is_vertical: self._base_layer.set_data(ydata=est) @@ -452,62 +342,45 @@ def _set_error_values(self, err_low, err_high): class DFBarPlot( - _shared.DataFrameLayerWrapper[_lg.LabeledBars, _DF], _BoxLikeMixin, Generic[_DF] + _shared.DataFrameLayerWrapper[_lg.LabeledBars, _DF], _EstimatorMixin, Generic[_DF] ): def __init__( self, - source: DataFrameWrapper[_DF], - offset: str | tuple[str, ...], + cat: CatIterator[_DF], value: str, - *, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, + dodge: str | tuple[str, ...] | bool | None = None, name: str | None = None, orient: Orientation = Orientation.VERTICAL, capsize: float = 0.1, extent: float = 0.8, backend: str | Backend | None = None, ): - _BoxLikeMixin.__init__(self, source, offset, value, color, hatch) - arrays, self._labels = self._generate_datasets() - x = self._offset_by.generate(self._labels, self._splitby) + _splitby, dodge = _shared.norm_dodge( + cat.df, cat.offsets, color, hatch, dodge=dodge, + ) # fmt: skip + x, arr, categories = cat.prep_arrays(_splitby, value, dodge=dodge) + _extent = cat.zoom_factor(dodge=dodge) * extent + _capsize = cat.zoom_factor(dodge=dodge) * capsize + color_by, hatch_by = _norm_color_hatch(color, hatch, cat.df) base = _lg.LabeledBars.from_arrays( - x, arrays, name=name, orient=orient, capsize=capsize, extent=extent, + x, arr, name=name, orient=orient, capsize=_capsize, extent=_extent, backend=backend, ) # fmt: skip - super().__init__(base, source) + self._arrays = arr + super().__init__(base, cat.df) + _BoxLikeMixin.__init__(self, categories, _splitby, color_by, hatch_by) base.with_edge(color=theme.get_theme().foreground_color) self._orient = orient - if color is not None: - self.with_color(color) - if hatch is not None: - self.with_hatch(hatch) - - @classmethod - def from_table( - cls, - df: _DF, - offset: tuple[str, ...], - value: str, - color: str | None = None, - hatch: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - capsize: float = 0.1, - extent: float = 0.8, - backend: str | Backend | None = None, - ) -> DFBarPlot[_DF]: - src = parse(df) - self = DFBarPlot( - src, offset, value, orient=orient, name=name, color=color, hatch=hatch, - capsize=capsize, extent=extent, backend=backend, - ) # fmt: skip - return self @property def orient(self) -> Orientation: return self._base_layer.bars.orient + def _get_arrays(self) -> list[np.ndarray]: + return self._arrays + def _set_estimation_values(self, est): if self.orient.is_vertical: self._base_layer.set_data(ydata=est) diff --git a/whitecanvas/layers/tabular/_dataframe.py b/whitecanvas/layers/tabular/_dataframe.py index 07c557ce..3148428d 100644 --- a/whitecanvas/layers/tabular/_dataframe.py +++ b/whitecanvas/layers/tabular/_dataframe.py @@ -8,13 +8,13 @@ from cmap import Color, Colormap from whitecanvas import layers as _l +from whitecanvas import theme from whitecanvas.backend import Backend from whitecanvas.layers import _mixin from whitecanvas.layers import group as _lg from whitecanvas.layers.tabular import _jitter, _shared from whitecanvas.layers.tabular import _plans as _p from whitecanvas.layers.tabular._df_compat import DataFrameWrapper, parse -from whitecanvas.layers.tabular._utils import unique from whitecanvas.types import ( ArrayLike1D, ColormapType, @@ -25,6 +25,7 @@ Symbol, _Void, ) +from whitecanvas.utils.hist import histograms if TYPE_CHECKING: from typing_extensions import Self @@ -41,14 +42,13 @@ def __init__( segs: list[np.ndarray], labels: list[tuple[Any, ...]], color: _Cols | None = None, - width: str | None = None, + width: float = 1.0, style: _Cols | None = None, name: str | None = None, backend: str | Backend | None = None, ): splitby = _shared.join_columns(color, style, source=source) self._color_by = _p.ColorPlan.default() - self._width_by = _p.WidthPlan.default() self._style_by = _p.StylePlan.default() self._labels = labels self._splitby = splitby @@ -56,126 +56,41 @@ def __init__( super().__init__(base, source) if color is not None: self.with_color(color) - if isinstance(width, str): - self.with_width(width) + self.with_width(width) if style is not None: self.with_style(style) @classmethod def from_table( cls, - df: _DF, - x: str, - y: str, + df: DataFrameWrapper[_DF], + x: str | _jitter.JitterBase, + y: str | _jitter.JitterBase, color: str | None = None, - width: str | None = None, + width: float | None = None, style: str | None = None, name: str | None = None, backend: str | Backend | None = None, ) -> DFLines[_DF]: - src = parse(df) - splitby = _shared.join_columns(color, style, source=src) + splitby = _shared.join_columns(color, style, source=df) segs = [] labels: list[tuple[Any, ...]] = [] - for sl, df in src.group_by(splitby): - labels.append(sl) - segs.append(np.column_stack([df[x], df[y]])) - return DFLines( - src, segs, labels, name=name, color=color, width=width, style=style, - backend=backend, - ) # fmt: skip - - @classmethod - def build_kde( - cls, - df: _DF, - value: str, - band_width: float | None = None, - color: str | None = None, - width: str | None = None, - style: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - backend: str | Backend | None = None, - ) -> DFLines[_DF]: - from whitecanvas.utils.kde import gaussian_kde - - src = parse(df) - splitby = _shared.join_columns(color, style, source=src) - ori = Orientation.parse(orient) - segs = [] - labels: list[tuple[Any, ...]] = [] - for sl, df in src.group_by(splitby): - labels.append(sl) - each = df[value] - kde = gaussian_kde(each, bw_method=band_width) - sigma = np.sqrt(kde.covariance[0, 0]) - pad = sigma * 2.5 - x = np.linspace(each.min() - pad, each.max() + pad, 100) - y = kde(x) - if ori.is_vertical: - segs.append(np.column_stack([x, y])) - else: - segs.append(np.column_stack([y, x])) - return DFLines( - src, segs, labels, name=name, color=color, width=width, style=style, - backend=backend, - ) # fmt: skip - - @classmethod - def build_hist( - cls, - df: _DF, - value: str, - bins: int | ArrayLike1D = 10, - density: bool = False, - range: tuple[float, float] | None = None, - color: str | None = None, - width: str | None = None, - style: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - backend: str | Backend | None = None, - ) -> DFLines[_DF]: - src = parse(df) - splitby = _shared.join_columns(color, style, source=src) - ori = Orientation.parse(orient) - segs = [] - labels: list[tuple[Any, ...]] = [] - for sl, df in src.group_by(splitby): + if isinstance(x, _jitter.JitterBase): + xj = x + else: + xj = _jitter.IdentityJitter(x) + if isinstance(y, _jitter.JitterBase): + yj = y + else: + yj = _jitter.IdentityJitter(y) + for sl, sub in df.group_by(splitby): labels.append(sl) - each = df[value] - counts, edges = np.histogram(each, bins=bins, density=density, range=range) - x = np.empty(2 * counts.size + 2, dtype=np.float32) - y = np.empty(2 * counts.size + 2, dtype=np.float32) - x[0] = edges[0] - x[-1] = edges[-1] - y[0] = y[-1] = 0 - x[1:-1:2] = edges[:-1] - x[2:-1:2] = edges[1:] - y[1:-1:2] = counts - y[2:-1:2] = counts - if ori.is_vertical: - segs.append(np.column_stack([x, y])) - else: - segs.append(np.column_stack([y, x])) + segs.append(np.column_stack([xj.map(sub), yj.map(sub)])) return DFLines( - src, segs, labels, name=name, color=color, width=width, style=style, + df, segs, labels, name=name, color=color, width=width, style=style, backend=backend, ) # fmt: skip - @property - def color(self) -> _p.ColorPlan: - return self._color_by - - @property - def width(self) -> _p.WidthPlan: - return self._width_by - - @property - def style(self) -> _p.StylePlan: - return self._style_by - @overload def with_color(self, value: ColorType) -> Self: ... @@ -200,21 +115,8 @@ def with_color(self, by, /, palette=None): self._color_by = color_by return self - @overload def with_width(self, value: float) -> Self: - ... - - @overload - def with_width(self, by: str, limits=None) -> Self: - ... - - def with_width(self, by, /, limits=None) -> Self: - if isinstance(by, str): - width_by = _p.WidthPlan.from_range(by, limits=limits) - else: - width_by = _p.WidthPlan.from_const(float(by)) - self._base_layer.width = width_by.map(self._source) - self._width_by = width_by + self._base_layer.width = value return self def with_style(self, by: str | Iterable[str], styles=None) -> Self: @@ -280,109 +182,8 @@ def __init__( self.with_symbol(symbol) if size is not None: self.with_size(size) - - def _generate_labels(self): - pos, labels = self._x.generate_labels(self._source) - return pos, ["\n".join(str(_l) for _l in lbl) for lbl in labels] - - @property - def symbol(self) -> _p.SymbolPlan: - return self._symbol_by - - @property - def size(self) -> _p.SizePlan: - return self._size_by - - @property - def color(self) -> _p.ColorPlan: - return self._color_by - - @property - def hatch(self) -> _p.HatchPlan: - return self._hatch_by - - @property - def width(self) -> _p.WidthPlan: - return self._width_by - - @classmethod - def from_table( - cls, - df: _DF, - x: str, - y: str, - *, - color: str | tuple[str, ...] | None = None, - hatch: str | tuple[str, ...] | None = None, - symbol: str | tuple[str, ...] | None = None, - size: str | None = None, - name: str | None = None, - backend: str | Backend | None = None, - ) -> DFMarkers[_DF]: - src = parse(df) - xj = _jitter.identity_or_categorical(src, x) - yj = _jitter.identity_or_categorical(src, y) - return DFMarkers( - src, xj, yj, name=name, color=color, hatch=hatch, symbol=symbol, - size=size, backend=backend, - ) # fmt: skip - - @classmethod - def build_stripplot( - cls, - df: _DF, - label: str, - value: str, - *, - color: str | tuple[str, ...] | None = None, - hatch: str | tuple[str, ...] | None = None, - symbol: str | tuple[str, ...] | None = None, - size: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - extent: float = 0.8, - seed: int | None = 0, - backend: str | Backend | None = None, - ) -> DFMarkerGroups[_DF]: - src = parse(df) - xj = _jitter.UniformJitter(label, extent=extent, seed=seed) - yj = _jitter.identity_or_categorical(src, value) - if not Orientation.parse(orient).is_vertical: - xj, yj = yj, xj - return DFMarkerGroups( - src, xj, yj, name=name, color=color, hatch=hatch, orient=orient, - symbol=symbol, size=size, backend=backend, - ) # fmt: skip - - @classmethod - def build_swarmplot( - cls, - df: _DF, - label: str, - value: str, - *, - color: str | tuple[str, ...] | None = None, - hatch: str | tuple[str, ...] | None = None, - symbol: str | tuple[str, ...] | None = None, - size: str | None = None, - name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, - extent: float = 0.8, - sort: bool = False, - backend: str | Backend | None = None, - ) -> DFMarkerGroups[_DF]: - src = parse(df) - if sort: - src = src.sort(value) - lims = src[value].min(), src[value].max() - xj = _jitter.SwarmJitter(label, value, limits=lims, extent=extent) - yj = _jitter.identity_or_categorical(src, value) - if not Orientation.parse(orient).is_vertical: - xj, yj = yj, xj - return DFMarkerGroups( - src, xj, yj, name=name, color=color, hatch=hatch, orient=orient, - symbol=symbol, size=size, backend=backend, - ) # fmt: skip + else: + self.with_size(theme.get_theme().markers.size) @overload def with_color(self, value: ColorType) -> Self: @@ -455,10 +256,10 @@ def with_edge_colormap( self._edge_color_by = color_by return self - def with_hatch(self, by: str | Iterable[str], choices=None) -> Self: + def with_hatch(self, by: str | Iterable[str], palette=None) -> Self: cov = _shared.ColumnOrValue(by, self._source) if cov.is_column: - hatch_by = _p.HatchPlan.new(cov.columns, values=choices) + hatch_by = _p.HatchPlan.new(cov.columns, values=palette) else: hatch_by = _p.HatchPlan.from_const(Hatch(cov.value)) hatches = hatch_by.map(self._source) @@ -540,6 +341,28 @@ def with_shift(self, dx: float = 0.0, dy: float = 0.0) -> Self: canvas._autoscale_for_layer(self, pad_rel=0.025) return self + def as_edge_only( + self, + width: float = 3.0, + style: str | LineStyle = LineStyle.SOLID, + ) -> Self: + """ + Convert the markers to edge-only mode. + + This method will set the face color to transparent and the edge color to the + current face color. + + Parameters + ---------- + width : float, default 3.0 + Width of the edge. + style : str or LineStyle, default LineStyle.SOLID + Line style of the edge. + """ + for layer in self.base.iter_children(): + layer.as_edge_only(width=width, style=style) + return self + class DFMarkerGroups(DFMarkers): def __init__(self, *args, orient: Orientation = Orientation.VERTICAL, **kwargs): @@ -570,9 +393,8 @@ class DFBars( def __init__( self, source: DataFrameWrapper[_DF], - offset: str, - value: str, - *, + x, + y, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, name: str | None = None, @@ -580,27 +402,13 @@ def __init__( extent: float = 0.8, backend: str | Backend | None = None, ): - if isinstance(offset, str): - offset = (offset,) - splitby = _shared.join_columns(offset, color, hatch, source=source) - unique_sl: list[tuple[Any, ...]] = [] - values = [] - for sl, df in source.group_by(splitby): - unique_sl.append(sl) - series = df[value] - if len(series) != 1: - raise ValueError(f"More than one value found for category {sl!r}.") - values.append(series[0]) - + splitby = _shared.join_columns(color, hatch, source=source) self._color_by = _p.ColorPlan.default() - self._hatch_by = _p.HatchPlan.default() - self._offset_by = _p.OffsetPlan.default().more_by(*offset) - self._labels = unique_sl + self._style_by = _p.StylePlan.default() self._splitby = splitby - x = self._offset_by.generate(self._labels, splitby) base = _l.Bars( - x, values, name=name, orient=orient, extent=extent, backend=backend + x, y, name=name, orient=orient, extent=extent, backend=backend ).with_face_multi() super().__init__(base, source) if color is not None: @@ -611,53 +419,42 @@ def __init__( @classmethod def from_table( cls, - df: _DF, - x: str, - y: str, - *, - color: str | tuple[str, ...] | None = None, - hatch: str | tuple[str, ...] | None = None, - name: str | None = None, - extent: float = 0.8, - backend: str | Backend | None = None, - ) -> DFBars[_DF]: - src = parse(df) - return DFBars( - src, x, y, name=name, color=color, hatch=hatch, extent=extent, - backend=backend - ) # fmt: skip - - @classmethod - def build_count( - cls, - df: _DF, - offset: str, + df: DataFrameWrapper[_DF], + x: str | _jitter.JitterBase, + y: str | _jitter.JitterBase, *, color: str | tuple[str, ...] | None = None, hatch: str | tuple[str, ...] | None = None, name: str | None = None, - orient: str | Orientation = Orientation.VERTICAL, extent: float = 0.8, + orient: Orientation = Orientation.VERTICAL, backend: str | Backend | None = None, ) -> DFBars[_DF]: - src = parse(df) - splitby = _shared.join_columns(offset, color, hatch, source=src) - new_src = src.value_count(splitby) + splitby = _shared.join_columns(color, hatch, source=df) + if isinstance(x, _jitter.JitterBase): + xj = x + else: + xj = _jitter.IdentityJitter(x) + if isinstance(y, _jitter.JitterBase): + yj = y + else: + yj = _jitter.IdentityJitter(y) + xs: list[np.ndarray] = [] + ys: list[np.ndarray] = [] + for _, sub in df.group_by(splitby): + xcur = xj.map(sub) + ycur = yj.map(sub) + order = np.argsort(xcur) + xs.append(xcur[order]) + ys.append(ycur[order]) + # BUG: order of coloring and x/y do not match + x0 = np.concatenate(xs) + y0 = np.concatenate(ys) return DFBars( - new_src, offset, "size", name=name, color=color, hatch=hatch, - orient=orient, extent=extent, backend=backend + df, x0, y0, name=name, color=color, hatch=hatch, extent=extent, + orient=orient, backend=backend, ) # fmt: skip - @property - def color(self) -> _p.ColorPlan: - """Return the color plan object.""" - return self._color_by - - @property - def hatch(self) -> _p.HatchPlan: - """Return the hatch plan object.""" - return self._hatch_by - def with_color(self, by: str | Iterable[str] | ColorType, palette=None) -> Self: cov = _shared.ColumnOrValue(by, self._source) if cov.is_column: @@ -666,7 +463,7 @@ def with_color(self, by: str | Iterable[str] | ColorType, palette=None) -> Self: color_by = _p.ColorPlan.from_palette(cov.columns, palette=palette) else: color_by = _p.ColorPlan.from_const(Color(cov.value)) - self._base_layer.face.color = color_by.generate(self._labels, self._splitby) + self._base_layer.face.color = color_by.map(self._source) self._color_by = color_by return self @@ -678,23 +475,12 @@ def with_hatch(self, by: str | Iterable[str], choices=None) -> Self: hatch_by = _p.HatchPlan.new(cov.columns, values=choices) else: hatch_by = _p.HatchPlan.from_const(Hatch(cov.value)) - self._base_layer.face.hatch = hatch_by.generate(self._labels, self._splitby) + self._base_layer.face.hatch = hatch_by.map(self._source) self._hatch_by = hatch_by return self class DFHeatmap(_shared.DataFrameLayerWrapper[_l.Image, _DF], Generic[_DF]): - def __init__( - self, - base: _l.Image, - source: DataFrameWrapper[_DF], - xticks: list[str] | None = None, - yticks: list[str] | None = None, - ): - super().__init__(base, source) - self._xticks = xticks - self._yticks = yticks - @property def cmap(self) -> Colormap: return self._base_layer.cmap @@ -738,54 +524,16 @@ def build_hist( return cls(base, src) @classmethod - def build_heatmap( + def from_array( cls, - df: _DF, - x: str, - y: str, - value: str, + src: DataFrameWrapper[_DF], + arr: np.ndarray, name: str | None = None, cmap: ColormapType = "gray", clim: tuple[float | None, float | None] | None = None, - fill=0, backend: Backend | str | None = None, - ) -> Self: - src = parse(df) - xnunique = unique(src[x], axis=None) - ynunique = unique(src[y], axis=None) - dtype = src[value].dtype - if dtype.kind not in "fiub": - raise ValueError(f"Column {value!r} is not numeric.") - arr = np.full((ynunique.size, xnunique.size), fill, dtype=dtype) - xmap = {x: i for i, x in enumerate(xnunique)} - ymap = {y: i for i, y in enumerate(ynunique)} - for sl, sub in src.group_by((x, y)): - xval, yval = sl - vals = sub[value] - if vals.size == 1: - arr[ymap[yval], xmap[xval]] = sub[value][0] - else: - raise ValueError(f"More than one value found for {sl!r}.") - if clim is None: - # `fill` may be outside the range of the data, so calculate clim here. - clim = src[value].min(), src[value].max() - base = _l.Image(arr, name=name, cmap=cmap, clim=clim, backend=backend) - return cls( - base, - src, - xticks=[str(_x) for _x in xnunique], - yticks=[str(_y) for _y in ynunique], - ) - - def _generate_xticks(self): - if self._xticks is None: - return None - return np.arange(len(self._xticks)), self._xticks - - def _generate_yticks(self): - if self._yticks is None: - return None - return np.arange(len(self._yticks)), self._yticks + ) -> DFHeatmap[_DF]: + return cls(_l.Image(arr, name=name, cmap=cmap, clim=clim, backend=backend), src) class DFPointPlot2D(_shared.DataFrameLayerWrapper[_lg.LabeledPlot, _DF], Generic[_DF]): @@ -814,3 +562,215 @@ def __init__( if size is not None: base.markers.size = size super().__init__(base, source) + + +class DFHistograms( + _shared.DataFrameLayerWrapper[_lg.LayerCollectionBase[_lg.Histogram], _DF], + Generic[_DF], +): + def __init__( + self, + source: DataFrameWrapper[_DF], + base: _lg.LayerCollectionBase[_lg.Histogram], + labels: list[tuple[Any, ...]], + color: _Cols | None = None, + width: str | None = None, + style: _Cols | None = None, + ): + splitby = _shared.join_columns(color, style, source=source) + self._color_by = _p.ColorPlan.default() + self._width_by = _p.WidthPlan.default() + self._style_by = _p.StylePlan.default() + self._labels = labels + self._splitby = splitby + super().__init__(base, source) + if color is not None: + self.with_color(color) + if isinstance(width, str): + self.with_width(width) + if style is not None: + self.with_style(style) + + @classmethod + def from_table( + cls, + df: DataFrameWrapper[_DF], + value: str, + bins: int | ArrayLike1D, + limits: tuple[float, float] | None = None, + kind="count", + shape="bars", + color: str | None = None, + width: float = 1.0, + style: str | None = None, + name: str | None = None, + orient: str | Orientation = Orientation.VERTICAL, + backend: str | Backend | None = None, + ) -> DFHistograms[_DF]: + splitby = _shared.join_columns(color, style, source=df) + ori = Orientation.parse(orient) + arrays: list[np.ndarray] = [] + labels: list[tuple] = [] + for sl, sub in df.group_by(splitby): + labels.append(sl) + arrays.append(sub[value]) + hist = histograms(arrays, bins, limits) + + layers = [] + for arr in arrays: + each_layer = _lg.Histogram.from_array( + arr, + kind=kind, + bins=hist.edges, + limits=limits, + width=width, + orient=ori, + shape=shape, + backend=backend, + ) + layers.append(each_layer) + base = _lg.LayerCollectionBase(layers, name=name) + return cls(df, base, labels, color=color, width=width, style=style) + + @overload + def with_color(self, value: ColorType) -> Self: + ... + + @overload + def with_color( + self, + by: str | Iterable[str], + palette: ColormapType | None = None, + ) -> Self: + ... + + def with_color(self, by, /, palette=None): + cov = _shared.ColumnOrValue(by, self._source) + if cov.is_column: + if set(cov.columns) > set(self._splitby): + raise ValueError(f"Cannot color by a column other than {self._splitby}") + color_by = _p.ColorPlan.from_palette(cov.columns, palette=palette) + else: + color_by = _p.ColorPlan.from_const(Color(cov.value)) + for i, col in enumerate(color_by.generate(self._labels, self._splitby)): + self._base_layer[i].color = col + self._color_by = color_by + return self + + def with_width(self, value: float) -> Self: + for hist in self._base_layer: + hist.line.width = value + return self + + def with_style(self, by: str | Iterable[str], styles=None) -> Self: + cov = _shared.ColumnOrValue(by, self._source) + if cov.is_column: + if set(cov.columns) > set(self._splitby): + raise ValueError(f"Cannot style by a column other than {self._splitby}") + style_by = _p.StylePlan.new(cov.columns, values=styles) + else: + style_by = _p.StylePlan.from_const(LineStyle(cov.value)) + for i, st in enumerate(style_by.generate(self._labels, self._splitby)): + self._base_layer[i].line.style = st + self._style_by = style_by + return self + + +class DFKde( + _shared.DataFrameLayerWrapper[_lg.LayerCollectionBase[_lg.Kde], _DF], + Generic[_DF], +): + def __init__( + self, + source: DataFrameWrapper[_DF], + base: _lg.LayerCollectionBase[_lg.Kde], + labels: list[tuple[Any, ...]], + color: _Cols | None = None, + width: str | None = None, + style: _Cols | None = None, + ): + splitby = _shared.join_columns(color, style, source=source) + self._color_by = _p.ColorPlan.default() + self._width_by = _p.WidthPlan.default() + self._style_by = _p.StylePlan.default() + self._labels = labels + self._splitby = splitby + super().__init__(base, source) + if color is not None: + self.with_color(color) + if isinstance(width, str): + self.with_width(width) + if style is not None: + self.with_style(style) + + @classmethod + def from_table( + cls, + df: DataFrameWrapper[_DF], + value: str, + band_width: float | None = None, + color: str | None = None, + width: float = 1.0, + style: str | None = None, + name: str | None = None, + orient: str | Orientation = Orientation.VERTICAL, + backend: str | Backend | None = None, + ) -> DFHistograms[_DF]: + splitby = _shared.join_columns(color, style, source=df) + ori = Orientation.parse(orient) + arrays: list[np.ndarray] = [] + labels: list[tuple] = [] + for sl, sub in df.group_by(splitby): + labels.append(sl) + arrays.append(sub[value]) + layers = [] + for arr in arrays: + each_layer = _lg.Kde.from_array( + arr, width=width, band_width=band_width, orient=ori, backend=backend, + ) # fmt: skip + layers.append(each_layer) + base = _lg.LayerCollectionBase(layers, name=name) + return cls(df, base, labels, color=color, width=width, style=style) + + @overload + def with_color(self, value: ColorType) -> Self: + ... + + @overload + def with_color( + self, + by: str | Iterable[str], + palette: ColormapType | None = None, + ) -> Self: + ... + + def with_color(self, by, /, palette=None): + cov = _shared.ColumnOrValue(by, self._source) + if cov.is_column: + if set(cov.columns) > set(self._splitby): + raise ValueError(f"Cannot color by a column other than {self._splitby}") + color_by = _p.ColorPlan.from_palette(cov.columns, palette=palette) + else: + color_by = _p.ColorPlan.from_const(Color(cov.value)) + for i, col in enumerate(color_by.generate(self._labels, self._splitby)): + self._base_layer[i].color = col + self._color_by = color_by + return self + + def with_width(self, value: float) -> Self: + for hist in self._base_layer: + hist.line.width = value + return self + + def with_style(self, by: str | Iterable[str], styles=None) -> Self: + cov = _shared.ColumnOrValue(by, self._source) + if cov.is_column: + if set(cov.columns) > set(self._splitby): + raise ValueError(f"Cannot style by a column other than {self._splitby}") + style_by = _p.StylePlan.new(cov.columns, values=styles) + else: + style_by = _p.StylePlan.from_const(LineStyle(cov.value)) + for i, st in enumerate(style_by.generate(self._labels, self._splitby)): + self._base_layer[i].line.style = st + self._style_by = style_by + return self diff --git a/whitecanvas/layers/tabular/_df_compat.py b/whitecanvas/layers/tabular/_df_compat.py index 5e8689d3..3efc7831 100644 --- a/whitecanvas/layers/tabular/_df_compat.py +++ b/whitecanvas/layers/tabular/_df_compat.py @@ -24,7 +24,7 @@ def __repr__(self) -> str: return f"{type(self).__name__} of {self._data!r}" def __len__(self) -> int: - such_as = next(iter(self.iter_values()), None) + such_as = next(self.iter_values(), None) if such_as is None: return 0 else: @@ -32,11 +32,11 @@ def __len__(self) -> int: @property def shape(self) -> tuple[int, int]: - such_as = next(iter(self.iter_values()), None) + such_as = next(self.iter_values(), None) if such_as is None: return 0, 0 else: - return such_as.size, len(self.iter_keys()) + return such_as.size, len(self.columns) def get_native(self) -> _T: return self._data @@ -45,6 +45,9 @@ def get_native(self) -> _T: def __getitem__(self, item: str) -> NDArray[np.generic]: ... + def __contains__(self, item: str) -> bool: + return item in self.iter_keys() + @abstractmethod def iter_keys(self) -> Iterator[str]: ... @@ -83,7 +86,11 @@ def agg_by(self, by: tuple[str, ...], on: str, method: str) -> Self: @abstractmethod def value_count(self, by: tuple[str, ...]) -> Self: - ... + """Return the count of each group.""" + + @abstractmethod + def value_first(self, by: tuple[str, ...], on: str) -> Self: + """Return the first value of a column for each group.""" @property def columns(self) -> list[str]: @@ -123,6 +130,9 @@ def filter( return DictWrapper({k: v[sl] for k, v in self._data.items()}) def group_by(self, by: tuple[str, ...]) -> Iterator[tuple[tuple[Any, ...], Self]]: + if by == (): + yield (), self + return observed = set() for row in zip(*[self._data[b] for b in by]): if row in observed: @@ -149,6 +159,14 @@ def value_count(self, by: tuple[str, ...]) -> Self: out["size"].append(len(sub[by[0]])) return DictWrapper({k: np.array(v) for k, v in out.items()}) + def value_first(self, by: tuple[str, ...], on: str) -> Self: + out = {k: [] for k in [*by, on]} + for sl, sub in self.group_by(by): + for b, s in zip(by, sl): + out[b].append(s) + out[on].append(sub[on][0]) + return DictWrapper({k: np.array(v) for k, v in out.items()}) + class PandasWrapper(DataFrameWrapper["pd.DataFrame"]): def __getitem__(self, item: str) -> np.ndarray: @@ -178,6 +196,9 @@ def filter( return PandasWrapper(self._data[sers]) def group_by(self, by: tuple[str, ...]) -> Iterator[tuple[tuple[Any, ...], Self]]: + if by == (): + yield (), self + return for sl, sub in self._data.groupby(list(by), observed=True): yield sl, PandasWrapper(sub) @@ -192,6 +213,9 @@ def value_count(self, by: tuple[str, ...]) -> Self: rows.append((*sl, len(sub))) return PandasWrapper(pd.DataFrame(rows, columns=[*by, "size"])) + def value_first(self, by: tuple[str, ...], on: str) -> Self: + return PandasWrapper(self._data.groupby(list(by)).first().reset_index()) + class PolarsWrapper(DataFrameWrapper["pl.DataFrame"]): def __getitem__(self, item: str) -> np.ndarray: @@ -225,6 +249,9 @@ def filter( return PolarsWrapper(df) def group_by(self, by: tuple[str, ...]) -> Iterator[tuple[tuple[Any, ...], Self]]: + if by == (): + yield (), self + return for sl, sub in self._data.group_by(by, maintain_order=True): yield sl, PolarsWrapper(sub) @@ -235,12 +262,15 @@ def agg_by(self, by: tuple[str, ...], on: str, method: str) -> Self: return PolarsWrapper(self._data.group_by(by, maintain_order=True).agg(expr)) def value_count(self, by: tuple[str, ...]) -> Self: - return ( + return PolarsWrapper( self._data.group_by(by, maintain_order=True) .count() .rename({"count": "size"}) ) + def value_first(self, by: tuple[str, ...], on: str) -> Self: + return PolarsWrapper(self._data.group_by(by, maintain_order=True).first()) + class PyArrowWrapper(DataFrameWrapper["pa.Table"]): def __getitem__(self, item: str) -> np.ndarray: @@ -267,6 +297,9 @@ def filter( return PyArrowWrapper(df) def group_by(self, by: tuple[str, ...]) -> Iterator[tuple[tuple[Any, ...], Self]]: + if by == (): + yield (), self + return for sl, sub in self._data.group_by(by, maintain_order=True): yield sl, PyArrowWrapper(sub) @@ -281,12 +314,15 @@ def agg_by(self, by: tuple[str, ...], on: str, method: str) -> Self: ) def value_count(self, by: tuple[str, ...]) -> Self: - return ( + return PyArrowWrapper( self._data.group_by(by, maintain_order=True) .count() .rename_columns([*by, "size"]) ) + def value_first(self, by: tuple[str, ...], on: str) -> Self: + return PyArrowWrapper(self._data.group_by(by, maintain_order=True).first()) + def parse(data: Any) -> DataFrameWrapper: """Parse a data object into a DataFrameWrapper.""" diff --git a/whitecanvas/layers/tabular/_jitter.py b/whitecanvas/layers/tabular/_jitter.py index 07ac5813..949cca3d 100644 --- a/whitecanvas/layers/tabular/_jitter.py +++ b/whitecanvas/layers/tabular/_jitter.py @@ -1,6 +1,5 @@ from __future__ import annotations -import itertools from abc import ABC, abstractmethod from typing import TypeVar @@ -8,24 +7,15 @@ from numpy.typing import NDArray from whitecanvas.layers.tabular._df_compat import DataFrameWrapper -from whitecanvas.layers.tabular._plans import OffsetPlan -from whitecanvas.layers.tabular._utils import unique _DF = TypeVar("_DF") class JitterBase(ABC): @abstractmethod - def map(self, src: DataFrameWrapper[_DF]) -> np.ndarray: + def map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: """Map the source data to jittered data.""" - @abstractmethod - def generate_labels( - self, - src: DataFrameWrapper[_DF], - ) -> tuple[NDArray[np.floating], list[tuple[str, ...]]]: - """Generate labels for the jittered data.""" - class IdentityJitter(JitterBase): """No jittering.""" @@ -35,62 +25,37 @@ def __init__(self, by: str): raise TypeError(f"Only str is allowed, got {type(by)}") self._by = by - def map(self, src: DataFrameWrapper[_DF]) -> np.ndarray: + def map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: return src[self._by] - def generate_labels( - self, - src: DataFrameWrapper[_DF], - ) -> tuple[NDArray[np.floating], list[tuple[str, ...]]]: - """Generate labels for the jittered data.""" - return _map_x_and_label([src[b] for b in self._by]) + def check(self, src: DataFrameWrapper[_DF]) -> IdentityJitter: + if self._by not in src: + raise ValueError(f"Column {self._by} not found in the data frame.") + if src[self._by].dtype.kind not in "iufb": + raise ValueError(f"Column {self._by} is not numeric.") + return self class CategoricalLikeJitter(JitterBase): - def __init__(self, by: str | tuple[str, ...]): + def __init__(self, by: str | tuple[str, ...], mapping: dict[tuple, float]): self._by = _tuple(by) + self._mapping = mapping - def generate_labels( - self, - src: DataFrameWrapper[_DF], - ) -> tuple[NDArray[np.floating], list[tuple[str, ...]]]: - """Generate labels for the jittered data.""" - return _map_x_and_label([src[b] for b in self._by]) + def _map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: + # only map the categorical data to real numbers + args = [src[b] for b in self._by] + out = np.zeros(len(src), dtype=np.float32) + for row, pos in self._mapping.items(): + sl = np.all(np.column_stack([a == r for a, r in zip(args, row)]), axis=1) + out[sl] = pos + return out class CategoricalJitter(CategoricalLikeJitter): """Jitter for categorical data.""" - def map(self, src: DataFrameWrapper[_DF]) -> np.ndarray: - # only map the categorical data to real numbers - return _map_x([src[b] for b in self._by]) - - -def identity_or_categorical( - df: DataFrameWrapper[_DF], - by: str | tuple[str, ...], -) -> JitterBase: - """ - Return either IdentityJitter or CategoricalJitter depending on the data type. - - Parameters - ---------- - df : DataFrameWrapper - The source data. - by : str | tuple[str, ...] - Column(s) to be used for the x-axis. - """ - if isinstance(by, str): - series = df[by] - if series.dtype.kind in "iuf": - return IdentityJitter(by) - else: - return CategoricalJitter((by,)) - else: - if len(by) == 1: - return identity_or_categorical(df, by[0]) - else: - return CategoricalJitter(by) + def map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: + return self._map(src) class UniformJitter(CategoricalLikeJitter): @@ -99,17 +64,18 @@ class UniformJitter(CategoricalLikeJitter): def __init__( self, by: str | tuple[str, ...], + mapping: dict[tuple, float], extent: float = 0.8, seed: int | None = 0, ): - super().__init__(by) + super().__init__(by, mapping) self._rng = np.random.default_rng(seed) self._extent = extent - def map(self, src: DataFrameWrapper[_DF]) -> np.ndarray: + def map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: w = self._extent jitter = self._rng.uniform(-w / 2, w / 2, size=len(src)) - return _map_x([src[b] for b in self._by]) + jitter + return self._map(src) + jitter class SwarmJitter(CategoricalLikeJitter): @@ -118,77 +84,54 @@ class SwarmJitter(CategoricalLikeJitter): def __init__( self, by: str | tuple[str, ...], + mapping: dict[tuple, float], value: str, limits: tuple[float, float], extent: float = 0.8, ): - super().__init__(by) + super().__init__(by, mapping) self._value = value self._extent = extent self._limits = limits - def map(self, src: DataFrameWrapper[_DF]) -> np.ndarray: + def _get_bins(self, src: DataFrameWrapper[_DF]) -> int: + return 25 # just for now + + def map(self, src: DataFrameWrapper[_DF]) -> NDArray[np.floating]: values = src[self._value] vmin, vmax = self._limits - nbin = 25 + nbin = self._get_bins(src) dv = (vmax - vmin) / nbin + # bin index that each value belongs to v_indices = np.floor((values - vmin) / dv).astype(np.int32) v_indices[v_indices == nbin] = nbin - 1 + + args = [src[b] for b in self._by] + offset_pre = np.zeros(len(src), dtype=np.float32) + for row in self._mapping.keys(): + sl = np.all(np.column_stack([a == r for a, r in zip(args, row)]), axis=1) + offset_pre[sl] = self._map_one(v_indices[sl], nbin) + + offset_max = np.abs(offset_pre).max() + width_default = dv * offset_max + offsets = offset_pre / offset_max * min(self._extent / 2, width_default) + out = self._map(src) + offsets + return out + + def _map_one(self, indices: NDArray[np.int32], nbin: int) -> NDArray[np.floating]: offset_count = np.zeros(nbin, dtype=np.int32) - offset_pre = np.zeros_like(values, dtype=np.int32) - for i, idx in enumerate(v_indices): + offset_pre = np.zeros_like(indices, dtype=np.int32) + for i, idx in enumerate(indices): c = offset_count[idx] if c % 2 == 0: offset_pre[i] = c / 2 else: offset_pre[i] = -(c + 1) / 2 offset_count[idx] += 1 - offset_max = np.abs(offset_pre).max() - width_default = dv * offset_max - offsets = offset_pre / offset_max * min(self._extent / 2, width_default) - out = _map_x([src[b] for b in self._by]) + offsets - return out + return offset_pre def _tuple(x) -> tuple[str, ...]: if isinstance(x, str): return (x,) return tuple(x) - - -def _map_x(args: list[np.ndarray]) -> NDArray[np.floating]: - """ - Map the input data to x-axis values. - - >>> _map_x([["a", "a", "b", "b"], ["u", "v", "u", "v"]]) # [0, 1, 2, 3] - >>> _map_x([["p", "q", "r", "r", "q"]]) # [0, 1, 2, 2, 1] - """ - by_all = tuple(str(i) for i in range(len(args))) - plan = OffsetPlan.default().more_by(*by_all) - each_unique = [unique(a, axis=None) for a in args] - labels = list(itertools.product(*each_unique)) - offsets = np.asarray(plan.generate(labels, by_all)) - out = np.zeros_like(args[0], dtype=np.float32) - for i, row in enumerate(labels): - sl = np.all(np.column_stack([a == r for a, r in zip(args, row)]), axis=1) - out[sl] = offsets[i] - return out - - -def _map_x_and_label( - args: list[np.ndarray], -) -> tuple[NDArray[np.floating], list[tuple[str, ...]]]: - """ - Map the input data to x-axis values and generate labels. - - >>> _map_x_and_label([["a", "a", "b", "b"], ["u", "v", "u", "v"]]) - >>> # [0, 1, 2, 3], [("a", "u"), ("a", "v"), ("b", "u"), ("b", "v")] - >>> _map_x_and_label([["p", "q", "r", "r", "q"]]) - >>> # [0, 1, 2], [("p",), ("q",), ("r",)] - """ - by_all = tuple(str(i) for i in range(len(args))) - plan = OffsetPlan.default().more_by(*by_all) - each_unique = [unique(a, axis=None) for a in args] - labels = list(itertools.product(*each_unique)) - offsets = np.asarray(plan.generate(labels, by_all)) - return offsets, labels diff --git a/whitecanvas/layers/tabular/_plans.py b/whitecanvas/layers/tabular/_plans.py index 3a8597d0..2f642001 100644 --- a/whitecanvas/layers/tabular/_plans.py +++ b/whitecanvas/layers/tabular/_plans.py @@ -53,7 +53,7 @@ class OffsetPolicy(ABC): @abstractmethod def get(self, interval: int) -> float: - """Get 1D array for offsets""" + """Get increment of position for given interval from the previous position.""" def with_shift(self, val: float) -> CompositeOffsetPolicy: return CompositeOffsetPolicy([self, ConstOffset(val)]) @@ -269,7 +269,7 @@ def from_const(cls, value: _V) -> Self: def is_const(self) -> bool: """Return True if the plan is a constant plan.""" - return len(self.values) == 1 + return len(self.by) == 0 def generate( self, diff --git a/whitecanvas/layers/tabular/_shared.py b/whitecanvas/layers/tabular/_shared.py index 334b6d5e..cf5bd344 100644 --- a/whitecanvas/layers/tabular/_shared.py +++ b/whitecanvas/layers/tabular/_shared.py @@ -73,10 +73,57 @@ def join_columns( continue cv = ColumnOrValue(obj, source) if cv.is_column: - out.extend(cv.columns) + for each in cv.columns: + if each not in out: + out.append(each) return tuple(out) def unique_tuple(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[str, ...]: b_filt = tuple(x for x in b if x not in a) return a + b_filt + + +def norm_dodge( + source: DataFrameWrapper[_DF], + offset: str | tuple[str, ...], + *args: str | tuple[str, ...] | None, + dodge: str | tuple[str, ...] | bool = False, +) -> tuple[tuple[str, ...], tuple[str, ...]]: + if isinstance(offset, str): + offset = (offset,) + if isinstance(dodge, bool): + if dodge: + _all = join_columns(*args, source=source) + dodge = tuple(c for c in _all if c not in offset) + else: + dodge = () + elif isinstance(dodge, str): + dodge = (dodge,) + else: + dodge = tuple(dodge) + splitby = join_columns(offset, *args, dodge, source=source) + return splitby, dodge + + +def norm_dodge_markers( + source: DataFrameWrapper[_DF], + offset: str | tuple[str, ...], + color: str | tuple[str, ...] | None = None, + hatch: str | tuple[str, ...] | None = None, + dodge: str | tuple[str, ...] | bool = False, +) -> tuple[tuple[str, ...], tuple[str, ...]]: + if isinstance(offset, str): + offset = (offset,) + if isinstance(dodge, bool): + if dodge: + _all = join_columns(color, hatch, source=source) + dodge = tuple(c for c in _all if c not in offset) + else: + dodge = () + elif isinstance(dodge, str): + dodge = (dodge,) + else: + dodge = tuple(dodge) + splitby = join_columns(offset, dodge, source=source) + return splitby, dodge diff --git a/whitecanvas/layers/tabular/_utils.py b/whitecanvas/layers/tabular/_utils.py index 145cc00d..33d70bdc 100644 --- a/whitecanvas/layers/tabular/_utils.py +++ b/whitecanvas/layers/tabular/_utils.py @@ -1,7 +1,5 @@ from __future__ import annotations -import itertools - import numpy as np @@ -11,18 +9,3 @@ def unique(arr: np.ndarray, axis=0) -> np.ndarray: raise ValueError(f"Cannot handle {arr.dtype} in unique().") _, idx = np.unique(arr, axis=axis, return_index=True) return arr[np.sort(idx)] - - -def unique_product(each_unique: list[np.ndarray]) -> np.ndarray: - """ - Return the all the unique combinations of the given arrays. - - >>> unique_product([np.array([0, 1, 2]), np.array([3, 4])]) - array([[0, 3], - [0, 4], - [1, 3], - [1, 4], - [2, 3], - [2, 4]]) - """ - return np.array(list(itertools.product(*each_unique))) diff --git a/whitecanvas/plot/_canvases.py b/whitecanvas/plot/_canvases.py index 62ec04c5..85a85f9b 100644 --- a/whitecanvas/plot/_canvases.py +++ b/whitecanvas/plot/_canvases.py @@ -30,12 +30,10 @@ def subplots( nrows: int = 1, ncols: int = 1, *, - link_x: bool = False, - link_y: bool = False, backend: Backend | str | None = None, ) -> CanvasGrid: """Create a new grid of subplots.""" - out = grid(nrows, ncols, link_x=link_x, link_y=link_y, backend=backend) + out = grid(nrows, ncols, backend=backend) for i in range(nrows): for j in range(ncols): out.add_canvas(i, j) diff --git a/whitecanvas/theme/_dataclasses.py b/whitecanvas/theme/_dataclasses.py index 35b50181..b9e92848 100644 --- a/whitecanvas/theme/_dataclasses.py +++ b/whitecanvas/theme/_dataclasses.py @@ -90,7 +90,7 @@ class Line(_BaseModel): class Markers(_BaseModel): """Markers of points.""" - size: int = _field(8) + size: float = _field(8.0) hatch: Hatch = _field(Hatch.SOLID) symbol: Symbol = _field(Symbol.CIRCLE) diff --git a/whitecanvas/utils/hist.py b/whitecanvas/utils/hist.py new file mode 100644 index 00000000..15d30cbb --- /dev/null +++ b/whitecanvas/utils/hist.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import NamedTuple + +import numpy as np +from numpy.typing import NDArray + +from whitecanvas.utils.normalize import as_array_1d + + +class Histogram(NamedTuple): + edges: NDArray[np.number] + width: float + counts: NDArray[np.integer] + + def density(self) -> NDArray[np.number]: + return self.frequency_scaled(self.width) + + def frequency(self) -> NDArray[np.number]: + return self.frequency_scaled(1) + + def percent(self) -> NDArray[np.number]: + return self.frequency_scaled(100) + + def scaled(self, scale: float) -> NDArray[np.number]: + return self.counts / scale + + def frequency_scaled(self, scale: float) -> NDArray[np.number]: + return self.counts / self.counts.sum() / scale + + +class HistogramTuple(NamedTuple): + edges: NDArray[np.number] + width: float + counts: list[NDArray[np.integer]] + + def density(self) -> list[NDArray[np.number]]: + return self.frequency_scaled(self.width) + + def frequency(self) -> list[NDArray[np.number]]: + return self.frequency_scaled(1) + + def percent(self) -> list[NDArray[np.number]]: + return self.frequency_scaled(100) + + def scaled(self, scale: float) -> list[NDArray[np.number]]: + out: list[NDArray[np.number]] = [] + for arr in self.counts: + scaled = arr / scale + out.append(scaled) + return out + + def frequency_scaled(self, scale: float) -> list[NDArray[np.number]]: + out: list[NDArray[np.number]] = [] + for arr in self.counts: + density_scaled = arr / arr.sum() / scale + out.append(density_scaled) + return out + + def centers(self) -> NDArray[np.number]: + return (self.edges[:-1] + self.edges[1:]) / 2 + + +def get_hist_edges( + arrays: list[NDArray[np.number]], + bins: int | NDArray[np.number], + range: tuple[float, float] | None = None, +) -> NDArray[np.number]: + if range is None: + total = np.concatenate(arrays) + value_min = total.min() + value_max = total.max() + else: + value_min, value_max = range + if value_min >= value_max: + raise ValueError("max must be larger than min in range parameter") + if isinstance(bins, (int, np.integer)): + nbins = bins.__index__() + if nbins < 1: + raise ValueError("bins should be a positive integer") + edges = np.linspace(value_min, value_max, nbins + 1) + else: + edges = as_array_1d(bins) + if np.diff(edges).min() <= 0: + raise ValueError("bin edges must increase monotonically") + return edges + + +def histograms( + arrays: list[NDArray[np.number]], + bins: int, + range: tuple[float, float] | None = None, +) -> HistogramTuple: + edges = get_hist_edges(arrays, bins, range) + width = edges[1] - edges[0] + counts = [np.histogram(arr, edges)[0] for arr in arrays] + return HistogramTuple(edges, width, counts)