Beautiful Statistical
Visualizations with Seaborn
Seaborn is a high-level statistical visualization library built on Matplotlib. It produces publication-quality plots with minimal code, offering built-in support for pandas DataFrames, statistical aggregation, and elegant default themes.
Creating Beautiful Statistical Visualizations with Seaborn
Seaborn wraps Matplotlib with a statistical layer — automatically computing distributions, confidence intervals, and aggregations. Its tightly integrated DataFrame support means you pass column names as strings rather than raw arrays.
plt. or ax. calls afterward.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Seaborn ships with built-in example datasets
tips = sns.load_dataset("tips")
print(tips.head())
# total_bill tip sex smoker day time size
# 0 16.99 1.01 Female No Sun Dinner 2
# 1 10.34 1.66 Male No Sun Dinner 3
# Set a global theme — applies to all subsequent plots
sns.set_theme(style="whitegrid", palette="deep")
# ─── 1. Distribution plot ───────────────────────────
fig, ax = plt.subplots(figsize=(8, 4))
sns.histplot(tips["total_bill"], kde=True, ax=ax, color="steelblue")
ax.set_title("Distribution of Total Bill")
plt.tight_layout()
plt.show()
# ─── 2. Box plot ────────────────────────────────────
fig, ax = plt.subplots(figsize=(8, 4))
sns.boxplot(data=tips, x="day", y="total_bill", hue="smoker",
palette="Set2", ax=ax)
ax.set_title("Total Bill by Day and Smoker Status")
plt.show()
# ─── 3. Violin plot ─────────────────────────────────
fig, ax = plt.subplots(figsize=(8, 5))
sns.violinplot(data=tips, x="day", y="total_bill",
hue="sex", split=True, palette="muted")
ax.set_title("Bill Distribution: Violin with split hue")
plt.show()
# ─── 4. Bar plot (with CI) ──────────────────────────
fig, ax = plt.subplots(figsize=(8, 4))
sns.barplot(data=tips, x="day", y="tip",
estimator="mean", errorbar="sd", palette="pastel")
ax.set_title("Mean Tip ± SD by Day")
plt.show()
# ─── 5. Count plot ──────────────────────────────────
fig, ax = plt.subplots(figsize=(6, 4))
sns.countplot(data=tips, x="day", hue="sex", palette="Set1")
ax.set_title("Number of Visits by Day and Gender")
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# ─── Seaborn built-in styles ─────────────────────
# "darkgrid", "whitegrid", "dark", "white", "ticks"
sns.set_style("whitegrid")
# ─── Built-in palettes ───────────────────────────
# Qualitative: "deep", "muted", "pastel", "bright", "dark", "colorblind"
# Sequential: "Blues", "Greens", "Purples", "rocket", "mako", "viridis"
# Diverging: "coolwarm", "RdBu", "icefire"
palette_names = ["deep", "muted", "pastel", "colorblind", "rocket", "viridis"]
fig, axes = plt.subplots(len(palette_names), 1, figsize=(8, 8))
for ax, name in zip(axes, palette_names):
sns.palplot(sns.color_palette(name, 8), ax=ax)
ax.set_title(name, loc="left", fontsize=9)
plt.tight_layout()
plt.show()
# ─── Context scaling ─────────────────────────────
# "paper" < "notebook" < "talk" < "poster"
with sns.plotting_context("talk"):
fig, ax = plt.subplots(figsize=(8, 4))
sns.histplot(sns.load_dataset("tips")["tip"], kde=True, color="purple")
ax.set_title("Talk context — larger text")
plt.show()
Pairplots, Heatmaps, and Regression Plots
These plots reveal relationships between variables — correlation structure (heatmaps), pairwise scatter patterns (pairplots), and trend lines with confidence bands (regression plots). They are standard tools in exploratory data analysis.
sns.pairplot() to see all pairwise relationships at once. Then use sns.heatmap(df.corr()) to quantify them. Drill into specific pairs with sns.jointplot() or sns.regplot().
import seaborn as sns
import matplotlib.pyplot as plt
iris = sns.load_dataset("iris")
# ─── Basic pairplot ──────────────────────────────
# Diagonal: histogram/KDE; Off-diagonal: scatter plots
g = sns.pairplot(iris, hue="species", palette="deep",
diag_kind="kde", # "hist" or "kde"
plot_kws={"alpha": 0.6, "s": 50})
g.fig.suptitle("Iris Pairplot — All Feature Pairs", y=1.02, fontsize=14)
plt.show()
# ─── Pairplot with regression ─────────────────────
g = sns.pairplot(iris.drop("species", axis=1),
kind="reg", # scatter + regression line
corner=True) # only lower triangle
plt.show()
# ─── Pairplot selecting specific columns ──────────
g = sns.pairplot(iris,
vars=["sepal_length", "petal_length"],
hue="species",
markers=["o", "s", "D"])
plt.show()
print(f"Pairplot grid size: {len(iris.columns)-1}×{len(iris.columns)-1}")
# → 4×4 plot grid (one per numeric column)
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# ─── Correlation heatmap ─────────────────────────
flights = sns.load_dataset("flights").pivot(
index="month", columns="year", values="passengers"
)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Full heatmap with annotations
sns.heatmap(flights,
ax=axes[0],
cmap="YlOrRd", # colormap
annot=True, # show values in cells
fmt="d", # integer format
linewidths=0.5,
cbar_kws={"label": "Passengers"})
axes[0].set_title("Monthly Airline Passengers")
# Correlation matrix heatmap with mask
iris = sns.load_dataset("iris").select_dtypes("number")
corr = iris.corr()
mask = np.triu(np.ones_like(corr, dtype=bool)) # upper triangle
sns.heatmap(corr,
ax=axes[1],
mask=mask, # only lower triangle
cmap="coolwarm",
center=0, # center colormap at 0
annot=True, fmt=".2f",
square=True, # square cells
linewidths=0.5)
axes[1].set_title("Iris Feature Correlation")
plt.tight_layout()
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
# ─── regplot: single regression with CI band ────
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
sns.regplot(data=tips, x="total_bill", y="tip",
ax=axes[0],
ci=95, # 95% confidence band
scatter_kws={"alpha": 0.5, "color": "steelblue"},
line_kws={"color": "red"})
axes[0].set_title("regplot: tip ~ total_bill")
# ─── lmplot: faceted regression ─────────────────
g = sns.lmplot(data=tips, x="total_bill", y="tip",
hue="smoker",
col="time", # one column per 'time' value
ci=95,
scatter_kws={"alpha": 0.6})
g.set_titles("{col_name}")
plt.suptitle("lmplot: Faceted by Meal Time", y=1.03)
plt.show()
# ─── jointplot: marginal distributions ──────────
g = sns.jointplot(data=tips, x="total_bill", y="tip",
kind="reg", # scatter + regression in center
color="purple",
marginal_kws={"bins": 20})
g.set_axis_labels("Total Bill ($)", "Tip ($)")
plt.suptitle("Jointplot with marginal KDE", y=1.02)
plt.show()
# ─── residplot: check regression residuals ──────
fig, ax = plt.subplots(figsize=(7, 4))
sns.residplot(data=tips, x="total_bill", y="tip",
lowess=True,
scatter_kws={"alpha": 0.5},
line_kws={"color": "red"})
ax.axhline(0, ls="--", color="gray")
ax.set_title("Residual Plot — Non-linearity Check")
plt.show()
Customizing Seaborn Plots for Professional Visuals
Seaborn's Figure-Level API (relplot, displot, catplot, lmplot) returns FacetGrid objects — powerful for creating multi-panel plots with consistent styling. Combine with Matplotlib's customization for publication-ready figures.
scatterplot, boxplot, etc.) return a Matplotlib Axes and accept an ax= parameter. Figure-level functions (relplot, catplot) return a FacetGrid and manage their own figure — don't use ax= with these.
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
# ─── catplot: unified categorical figure-level ──
g = sns.catplot(
data=tips,
x="day", y="total_bill",
hue="sex",
col="time", # separate column per meal time
kind="violin", # "box","strip","swarm","bar","point"
split=True,
height=5, aspect=0.8,
palette="husl"
)
g.set_axis_labels("Day of Week", "Total Bill ($)")
g.set_titles("{col_name} Dining")
g.add_legend(title="Gender")
plt.suptitle("Tip Distributions by Time, Day, and Gender", y=1.02)
plt.show()
# ─── relplot: scatter/line figure-level ─────────
g = sns.relplot(
data=tips,
x="total_bill", y="tip",
hue="smoker",
size="size", # encode party size as dot size
style="sex", # encode sex as marker shape
col="time",
height=4,
palette="Set1"
)
g.set_titles("{col_name}")
plt.suptitle("relplot: size, hue, style, col encodings", y=1.02)
plt.show()
# ─── Manual FacetGrid ───────────────────────────
g = sns.FacetGrid(tips, col="day", col_wrap=2,
height=3.5, sharey=False)
g.map_dataframe(sns.scatterplot, x="total_bill", y="tip",
hue="sex", palette="dark")
g.add_legend()
g.set_titles(col_template="{col_name}")
plt.suptitle("Custom FacetGrid — Tips per Day", y=1.02)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
tips = sns.load_dataset("tips")
# ─── Full custom publication figure ─────────────
sns.set_theme(style="ticks", context="paper",
font="DejaVu Sans", font_scale=1.1,
rc={"axes.spines.right": False,
"axes.spines.top": False})
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
fig.suptitle("Restaurant Tips Analysis", fontsize=14, fontweight="bold", y=1.02)
# Left: violin plot
sns.violinplot(data=tips, x="day", y="total_bill",
hue="sex", split=True,
inner="quart", # show quartile lines inside
palette={"Male": "#4C72B0", "Female": "#DD8452"},
ax=axes[0], linewidth=1.2)
axes[0].set_xlabel("Day of Week", fontsize=11)
axes[0].set_ylabel("Total Bill ($)", fontsize=11)
axes[0].set_title("Bill Distribution by Day", fontsize=12)
axes[0].legend(title="Gender", frameon=False)
# Right: swarm over box
sns.boxplot(data=tips, x="time", y="tip",
palette="pastel", ax=axes[1], width=0.5, linewidth=1.2)
sns.swarmplot(data=tips, x="time", y="tip",
color="0.25", size=3.5, ax=axes[1]) # overlay raw data
axes[1].set_xlabel("Meal Time", fontsize=11)
axes[1].set_ylabel("Tip ($)", fontsize=11)
axes[1].set_title("Tip Distribution: Box + Swarm", fontsize=12)
# Formatting
for ax in axes:
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter("$%.0f"))
sns.despine(ax=ax, trim=True) # remove top/right spines
plt.tight_layout()
fig.savefig("tips_analysis.pdf", dpi=300, bbox_inches="tight")
plt.show()
print("Figure saved to tips_analysis.pdf")
fig.savefig("plot.pdf", dpi=300, bbox_inches="tight"). PDF is vector-format (infinite resolution). Use PNG for web (dpi=150) and PDF/SVG for print. Always call savefig before plt.show() — show() clears the figure.