The Office Ratings with Python and Matplotlib
fig, ax = plt.subplots(figsize = (15, 10))
# Some layout stuff ----------------------------------------------
# Background color
fig.patch.set_facecolor(BG_WHITE)
ax.set_facecolor(BG_WHITE)
# First, horizontal lines that are used as scale reference
# zorder=0 to keep them in the background
for h in HLINES:
plt.axhline(h, color=GREY82, zorder=0)
# Add vertical segments ------------------------------------------
# Vertical segments.
# These represent the deviation of episode's rating from the mean rating of
# the season they appeared.
plt.vlines(
x="episode_mod",
ymin="imdb_rating",
ymax="avg",
color=cmap_light(normalize(df_office_avg["season"])),
data = df_office_avg
)
# Add horizontal segments ----------------------------------------
# A grey line that connects mean values
# The third argument is the format string, either empty or "-"
plt.plot("x", "y", "-", color=GREY40, data=df_lines)
# These represent the mean rating per season.
for season in df_lines["season"].unique():
d = df_lines[df_lines["season"] == season]
plt.plot("x_group", "y", "", color=cmap_dark(normalize(season)), lw=5, data=d, solid_capstyle="butt")
# Add dots -------------------------------------------------------
# The dots indicate each episode's rating, with its size given by the
# number of votes.
plt.scatter(
"episode_mod",
"imdb_rating",
s = scale_to_interval(df_office_avg["total_votes"]),
color=cmap_regular(normalize(df_office_avg["season"])),
data=df_office_avg,
zorder=3
)
# Add labels -----------------------------------------------------
# They indicate the season and free us from using a legend.
midpoints = df_office_avg["mid"].unique()
for season, mid in enumerate(midpoints):
color = cmap_dark(normalize(season + 1))
plt.text(
mid, 10.12, f" Season {season + 1} ",
color=color,
weight="bold",
ha="center",
va="center",
fontname="Special Elite",
fontsize=11,
bbox=dict(
facecolor="none",
edgecolor=color,
linewidth=1,
boxstyle="round",
pad=0.2
)
)
# Customize layout -----------------------------------------------
# Hide spines
ax.spines["right"].set_color("none")
ax.spines["top"].set_color("none")
ax.spines["bottom"].set_color("none")
ax.spines["left"].set_color("none")
# Customize y ticks
# * Remove y axis ticks
# * Put labels on both right and left sides
plt.tick_params(axis="y", labelright=True, length=0)
plt.yticks(HLINES, fontname="Roboto Mono", fontsize=11, color=GREY30)
plt.ylim(0.98 * 6.5, 10.2 * 1.02)
# Remove ticks and legends
plt.xticks([], "")
# Y label
plt.ylabel("IMDb Rating", fontname="Roboto Mono", fontsize=14)
# Add caption
plt.text(
0.5, -0.03, "Visualization by Cédric Scherer • Data by IMDb via data.world • Fanart Logo by ArieS",
fontname="Special Elite",
fontsize=11,
color=GREY70,
ha="center",
va="center",
transform=ax.transAxes # so coordinates are in terms of the axis.
)
# Create annotation box to place image.
# It will be added at (1, 6.75) in data coordinates.
ab = AnnotationBbox(
OffsetImage(IMAGE, zoom=0.2),
(1, 6.75),
xycoords="data",
box_alignment=(0, 0.5),
pad=0,
frameon=False
)
# Add the annotation box into the axis
ax.add_artist(ab)
# Add custom legend ----------------------------------------------
# We create a horizontal legend from scratch so this plot looks as
# much as possible like the original.
# Horizontal position for the dots and their labels
x_pos = [0.44, 0.48, 0.52, 0.56]
votes = [2000, 4000, 6000, 8000]
# Dots are in term of the (0, 1) coordinate system of the axis.
plt.scatter(
x_pos,
[0.065] * 4,
s=scale_to_interval(np.array(votes)),
color="black",
transform=ax.transAxes
)
# Add title to our custom legend
plt.text(0.5, 0.0875, "Votes per Episode", fontname="Roboto Mono", fontsize=10, ha="center", transform=ax.transAxes)
# Place legends below the legend markers
for (xpos, vote) in zip(x_pos, votes):
plt.text(xpos, 0.035, f"{vote}", fontname="Roboto Mono", fontsize=9, ha="center", transform=ax.transAxes)
# Now save the plot!
plt.savefig(
"the-office-lollipop.png",
dpi=300,
bbox_inches="tight",
pad_inches=0.3
)