Cadences#

Hide imports
%load_ext autoreload
%autoreload 2
import os
from collections import Counter, defaultdict

import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
from dimcat import plotting
from dimcat.steps import filters, groupers, slicers

import utils
Hide source
RESULTS_PATH = os.path.abspath(os.path.join(utils.OUTPUT_FOLDER, "cadences"))
os.makedirs(RESULTS_PATH, exist_ok=True)


def make_output_path(
    filename: str,
    extension=None,
    path=RESULTS_PATH,
) -> str:
    return utils.make_output_path(filename=filename, extension=extension, path=path)


def save_figure_as(
    fig, filename, formats=("png", "pdf"), directory=RESULTS_PATH, **kwargs
):
    if formats is not None:
        for fmt in formats:
            plotting.write_image(fig, filename, directory, format=fmt, **kwargs)
    else:
        plotting.write_image(fig, filename, directory, **kwargs)

Loading data

Hide source
D = utils.get_dataset("rachmaninoff_piano", corpus_release="v2.4")
package = D.inputs.get_package()
package_info = package._package.custom
git_tag = package_info.get("git_tag")
utils.print_heading("Data and software versions")
print("Sergei Rachmaninoff – Variations on a Theme of Corelli, Op. 42 version v2.4")
print(f"Datapackage '{package.package_name}' @ {git_tag}")
print(f"dimcat version {dc.__version__}\n")
D
Data and software versions
--------------------------

Sergei Rachmaninoff – Variations on a Theme of Corelli, Op. 42 version v2.4
Datapackage 'rachmaninoff_piano' @ v2.4
dimcat version 3.4.0
Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'rachmaninoff_piano': ["'rachmaninoff_piano.measures' (MuseScoreFacetName.MuseScoreMeasures)",
                                                "'rachmaninoff_piano.notes' (MuseScoreFacetName.MuseScoreNotes)",
                                                "'rachmaninoff_piano.expanded' (MuseScoreFacetName.MuseScoreHarmonies)",
                                                "'rachmaninoff_piano.chords' (MuseScoreFacetName.MuseScoreChords)",
                                                "'rachmaninoff_piano.metadata' (FeatureName.Metadata)"]}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}
try:
    cadence_labels = D.get_feature("cadencelabels")
except Exception:
    raise ValueError("Corpus has no cadence annotations.")
cadence_labels
mc mn quarterbeats quarterbeats_all_endings duration_qb mc_onset mn_onset timesig staff voice ... globalkey localkey globalkey_is_minor localkey_is_minor globalkey_mode localkey_mode localkey_resolved localkey_and_mode cadence_type cadence
corpus piece i
rachmaninoff_piano op42_01a 11 8 8 21 21 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor HC HC
27 16 16 46 46 2.00 1/4 1/4 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_01b 16 8 8 21 21 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor HC HC
34 16 16 45 45 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_02 11 8 8 21 21 3.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor HC HC
23 16 16 45 45 3.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_03 23 8 8 21 21 3.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor HC HC
51 16 16 45 45 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_04 9 8 8 21 21 3.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor DC DC
21 16 16 45 45 3.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_05 16 15 15 38 38 6.00 0 0 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_06 35 8 8 21 21 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor HC HC
81 16 16 47 47 1.00 1/2 1/2 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_07 25 8 8 23 23 1.00 1/2 1/2 3/4 2 1 ... d i True True minor minor i i, minor HC HC
50 18 18 51 51 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PC PC
op42_08 20 7 7 20 20 1.00 1/2 1/2 3/4 2 2 ... d i True True minor minor i i, minor HC HC
44 15 15 44 44 1.00 1/2 1/2 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_09 20 8 8 23 23 1.00 1/2 1/2 3/4 2 1 ... d i True True minor minor i i, minor HC HC
27 11 11 30 30 2.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PC PC
51 19 19 56 56 1.00 1/2 1/2 3/4 2 1 ... d i True True minor minor i i, minor HC HC
op42_10 10 10 10 30 30 5.00 0 0 4/4 2 1 ... d i True True minor minor i i, minor EC EC
12 12 12 36 36 3.00 0 0 4/4 2 1 ... d i True True minor minor i i, minor HC HC
35 19 19 62 62 5.50 0 0 4/4 2 1 ... d i True True minor minor i i, minor PAC PAC
44 24 24 79 79 6.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_11 53 16 16 45 45 3.00 0 0 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_12 46 15 15 42 42 1.00 0 0 3/4 2 1 ... d iv True True minor minor iv iv, minor DC DC
67 22 22 64 64 4.00 1/2 1/2 3/4 2 2 ... d i True True minor minor i i, minor PAC PAC
op42_13a 20 5 5 19 19 0.50 1/4 1/4 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
35 9 9 71/2 71/2 0.50 1/4 1/4 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
67 17 17 135/2 135/2 3.00 0 0 6/8 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_14 33 16 16 45 45 1.00 0 0 3/4 2 1 ... Db I False False major major I I, major PAC PAC
op42_16 23 9 8 57/2 57/2 3.50 0 0 4/4 2 1 ... d i True True minor minor i i, minor HC HC
op42_17 14 11 11 79/2 79/2 6.00 11/8 11/8 6/4 2 1 ... d i True True minor minor i i, minor IAC IAC
33 20 20 80 80 5.50 0 0 4/4 2 1 ... d i True True minor minor i i, minor DC DC
35 22 22 88 88 1.50 0 0 4/4 2 1 ... d i True True minor minor i i, minor PC PC
op42_18 6 5 5 18 18 0.75 0 0 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
31 9 9 36 36 3.50 0 0 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
38 13 13 54 54 3.00 0 0 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
48 16 16 135/2 135/2 4.50 0 0 9/8 2 1 ... d i True True minor minor i i, minor PAC PAC
op42_19 12 3 3 9 9 0.75 0 0 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
22 5 5 18 18 0.75 0 0 9/8 2 1 ... d i True True minor minor i i, minor IAC IAC
45 9 9 36 36 1.00 0 0 9/8 2 1 ... d i True True minor minor i i, minor PAC PAC
69 13 13 54 54 1.00 0 0 9/8 2 1 ... d III True False minor major III III, minor IAC IAC
op42_20 8 5 5 18 18 3.00 0 0 9/8 2 1 ... d i True True minor minor i i, minor PAC PAC
16 9 9 36 36 3.00 0 0 9/8 2 1 ... d i True True minor minor i i, minor PAC PAC
27 13 13 54 54 1.50 0 0 9/8 2 1 ... d i True True minor minor i i, minor EC EC
42 17 17 141/2 141/2 4.50 0 0 9/8 2 1 ... d i True True minor minor i i, minor EC EC
71 24 24 211/2 211/2 11.50 7/8 7/8 9/8 2 1 ... d i True True minor minor i i, minor PAC PAC
99 44 44 172 172 2.00 1/4 1/4 3/4 2 1 ... d i True True minor minor i i, minor PAC PAC

49 rows × 22 columns

cadence_labels.plot_grouped(
    title="Distribution of cadence types over the DLC",
    output=make_output_path("all_cadences_pie"),
    width=1000,
    height=1000,
)

Metadata#

cadence_filter = filters.HasCadenceAnnotationsFilter()
filtered_D = cadence_filter.process(D)
hascadence_metadata = filtered_D.get_metadata()
chronological_corpus_names = hascadence_metadata.get_corpus_names()
cadence_counts = cadence_labels.apply_step("Counter")
cadence_counts.plot_grouped("corpus")
mean_composition_years = (
    hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
)
chronological_corpus_names = hascadence_metadata.get_corpus_names()
bar_data = pd.concat(
    [
        mean_composition_years.rename("year"),
        hascadence_metadata.groupby(level="corpus").size().rename("pieces"),
    ],
    axis=1,
).reset_index()
fig = px.bar(
    bar_data,
    x="year",
    y="pieces",
    color="corpus",
    title="Pieces contained in the dataset",
)
fig.update_traces(width=5)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 12
      4 chronological_corpus_names = hascadence_metadata.get_corpus_names()
      5 bar_data = pd.concat(
      6     [
      7         mean_composition_years.rename("year"),
   (...)     10     axis=1,
     11 ).reset_index()
---> 12 fig = px.bar(
     13     bar_data,
     14     x="year",
     15     y="pieces",
     16     color="corpus",
     17     title="Pieces contained in the dataset",
     18 )
     19 fig.update_traces(width=5)

File ~/.local/lib/python3.12/site-packages/plotly/express/_chart_types.py:381, in bar(data_frame, x, y, color, pattern_shape, facet_row, facet_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, hover_name, hover_data, custom_data, text, base, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, color_discrete_sequence, color_discrete_map, color_continuous_scale, pattern_shape_sequence, pattern_shape_map, range_color, color_continuous_midpoint, opacity, orientation, barmode, log_x, log_y, range_x, range_y, text_auto, title, subtitle, template, width, height)
    332 def bar(
    333     data_frame=None,
    334     x=None,
   (...)    375     height=None,
    376 ) -> go.Figure:
    377     """
    378     In a bar plot, each row of `data_frame` is represented as a rectangular
    379     mark.
    380     """
--> 381     return make_figure(
    382         args=locals(),
    383         constructor=go.Bar,
    384         trace_patch=dict(textposition="auto"),
    385         layout_patch=dict(barmode=barmode),
    386     )

File ~/.local/lib/python3.12/site-packages/plotly/express/_core.py:2479, in make_figure(args, constructor, trace_patch, layout_patch)
   2476 layout_patch = layout_patch or {}
   2477 apply_default_cascade(args)
-> 2479 args = build_dataframe(args, constructor)
   2480 if constructor in [go.Treemap, go.Sunburst, go.Icicle] and args["path"] is not None:
   2481     args = process_dataframe_hierarchy(args)

File ~/.local/lib/python3.12/site-packages/plotly/express/_core.py:1727, in build_dataframe(args, constructor)
   1724     args["color"] = None
   1725 # now that things have been prepped, we do the systematic rewriting of `args`
-> 1727 df_output, wide_id_vars = process_args_into_dataframe(
   1728     args,
   1729     wide_mode,
   1730     var_name,
   1731     value_name,
   1732     is_pd_like,
   1733     native_namespace,
   1734 )
   1735 df_output: nw.DataFrame
   1736 # now that `df_output` exists and `args` contains only references, we complete
   1737 # the special-case and wide-mode handling by further rewriting args and/or mutating
   1738 # df_output

File ~/.local/lib/python3.12/site-packages/plotly/express/_core.py:1328, in process_args_into_dataframe(args, wide_mode, var_name, value_name, is_pd_like, native_namespace)
   1326         if argument == "index":
   1327             err_msg += "\n To use the index, pass it in directly as `df.index`."
-> 1328         raise ValueError(err_msg)
   1329 elif length and (actual_len := len(df_input)) != length:
   1330     raise ValueError(
   1331         "All arguments should have the same length. "
   1332         "The length of column argument `df[%s]` is %d, whereas the "
   (...)   1339         )
   1340     )

ValueError: Value of 'color' is not the name of a column in 'data_frame'. Expected one of ['index', 'year', 'pieces'] but received: corpus

Overall#

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

print(f"{len(cadence_labels)} cadence labels.")
utils.value_count_df(cadence_labels.cadence)

Per dataset#

all_labels = D.get_feature("harmonylabels")
cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = (
    cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
)
cadence_fraction_per_dataset = cadence_fraction_per_dataset.rename(
    "fraction"
).reset_index()
cadence_fraction_per_dataset["corpus_name"] = cadence_fraction_per_dataset.corpus.map(
    utils.get_corpus_display_name
)
fig = px.bar(
    cadence_fraction_per_dataset,
    x="corpus_name",
    y="fraction",
    title="Distribution of cadence types per corpus",
    color="cadence",
    color_discrete_map=plotting.CADENCE_COLORS,
    labels=dict(corpus_name="", fraction="Fraction of all cadences"),
    category_orders=dict(corpus_name=chronological_corpus_names),
)
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "all_cadences_corpuswise_stacked_bars", height=1000)
fig.show()
fig = px.pie(
    cadence_count_per_dataset.rename("count").reset_index(),
    names="cadence",
    color="cadence",
    values="count",
    facet_col="corpus",
    facet_col_wrap=4,
    height=2000,
    color_discrete_map=plotting.CADENCE_COLORS,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "all_cadences_corpuswise_pies")
fig.show()
cadence_count_per_mode = (
    all_labels.groupby("localkey_is_minor").cadence.value_counts().reset_index()
)
cadence_count_per_mode["mode"] = cadence_count_per_mode.localkey_is_minor.map(
    {False: "major", True: "minor"}
)
fig = px.pie(
    cadence_count_per_mode,
    names="cadence",
    color="cadence",
    values="count",
    facet_col="mode",
    height=2000,
    color_discrete_map=plotting.CADENCE_COLORS,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "all_cadences_modewise_pies")
fig.show()
corelli = dc.Dataset()
CORELLI_PATH = os.path.abspath(os.path.join("..", "corelli"))
corelli.load(directory=CORELLI_PATH, parse_tsv=False)
annotated_view = corelli.data.get_view("annotated")
annotated_view.include("facets", "expanded")
annotated_view.pieces_with_incomplete_facets = False
corelli.data.set_view(annotated_view)
corelli.data.parse_tsv(choose="auto")
corelli.get_indices()
corelli_labels = corelli.get_facet("expanded")
corelli_cadence_count_per_mode = (
    corelli_labels.groupby("localkey_is_minor").cadence.value_counts().reset_index()
)
corelli_cadence_count_per_mode["mode"] = (
    corelli_cadence_count_per_mode.localkey_is_minor.map(
        {False: "major", True: "minor"}
    )
)
fig = px.pie(
    corelli_cadence_count_per_mode,
    names="cadence",
    color="cadence",
    values="count",
    facet_col="mode",
    height=2000,
    color_discrete_map=plotting.CADENCE_COLORS,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "all_corelli_cadences_modewise_pies")
fig.show()
combined_cadences = pd.concat(
    [cadence_count_per_mode, corelli_cadence_count_per_mode],
    keys=["couperin", "corelli"],
    names=["corpus", None],
).reset_index(level=0)
fig = px.pie(
    combined_cadences,
    names="cadence",
    color="cadence",
    values="count",
    facet_col="mode",
    facet_row="corpus",
    height=2000,
    color_discrete_map=plotting.CADENCE_COLORS,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
updated_layout = dict(utils.STD_LAYOUT, font=dict(size=40))
fig.update_layout(**updated_layout)
save_figure_as(fig, "couperin_corelli_cadences_modewise_pies")
fig.show()

Per phrase#

Number of cadences per phrase#

grouped_by_corpus = groupers.CorpusGrouper().process(D)
segmented = slicers.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0, 1, 2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat(
    [
        n_local_keys_per_phrase.rename("n_local_keys"),
        local_keys_per_phrase.rename("local_keys"),
        phrases,
    ],
    axis=1,
)
phrases_with_cadences = pd.concat(
    [
        phrase_gpb.cadence.nunique().rename("n_cadences"),
        phrase_gpb.cadence.unique()
        .rename("cadences")
        .map(lambda arr: tuple(e for e in arr if not pd.isnull(e))),
        phrases_with_keys,
    ],
    axis=1,
)
utils.value_count_df(phrases_with_cadences.n_cadences, counts_column="#phrases")
n_cad = (
    phrases_with_cadences.groupby(level="corpus")
    .n_cadences.value_counts()
    .rename("counts")
    .reset_index()
    .sort_values("n_cadences")
)
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(
    n_cad,
    x="corpus",
    y="counts",
    color="n_cadences",
    height=800,
    barmode="group",
    labels=dict(n_cadences="#cadences in a phrase"),
    category_orders=dict(dataset=chronological_corpus_names),
)
save_figure_as(fig, "n_cadences_per_phrase_corpuswise_absolute_grouped_bars")
fig.show()

Combinations of cadence types for phrases with more than one cadence#

utils.value_count_df(
    phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences
)

Positioning of cadences within phrases#

df_rows = []
y_position = 0
for ix in (
    phrases_with_cadences[phrases_with_cadences.n_cadences > 0]
    .sort_values("duration_qb")
    .index
):
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ["quarterbeats", "cadence"]]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    # else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))

data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(
    data[data.x.notna()],
    x="x",
    y="phrase_ix",
    color="marker",
    hover_name="description",
    height=3000,
    labels=dict(marker="legend"),
    color_discrete_map=plotting.CADENCE_COLORS,
)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
save_figure_as(fig, "cadence_positions_within_all_phrases")
fig.show()

Cadence ultima#

phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(
    phrase_segments[cadence_with_missing_chord_selector],
    propagate=False,
    chord_tones=True,
    skip_checks=True,
)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(
    f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence "
    f"labels."
)

Ultimae as Roman numeral#

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: {color};"] * 4


cadence_counts = all_labels.cadence.value_counts()
ultima_root = (
    phrase_segments.groupby(["localkey_is_minor", "cadence"])
    .numeral.value_counts()
    .rename("counts")
    .to_frame()
    .reset_index()
)
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map(
    {False: "in major", True: "in minor"}
)
# ultima_root.style.apply(highlight, axis=1)
fig = px.pie(
    ultima_root,
    names="numeral",
    values="counts",
    facet_row="cadence",
    facet_col="localkey_is_minor",
    height=1500,
    category_orders={"cadence": cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition="inside", textinfo="percent+label")
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "ultima_root_distributions_over_cadence_types_maj_min_pies")
fig.show()
# phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') &
# (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree#

ultima_bass = (
    phrase_segments.groupby(["localkey_is_minor", "cadence"])
    .bass_note.value_counts()
    .rename("counts")
    .reset_index()
)
ultima_bass.bass_note = ms3.transform(
    ultima_bass, ms3.fifths2sd, dict(fifths="bass_note", minor="localkey_is_minor")
)
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map(
    {False: "in major", True: "in minor"}
)
# ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(
    ultima_bass,
    names="bass_note",
    values="counts",
    facet_row="cadence",
    facet_col="localkey_is_minor",
    height=1500,
    category_orders={"cadence": cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition="inside", textinfo="percent+label")
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "ultima_degree_distributions_over_cadence_types_maj_min_pies")
fig.show()

Chord progressions#

PACs with ultima I/i#

def remove_immediate_duplicates(lst):
    return tuple(a for a, b in zip(lst, (None,) + lst) if a != b)


def get_progressions(
    selected="PAC",
    last_row={},
    feature="chord",
    dataset=None,
    as_series=True,
    remove_duplicates=False,
):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[
        phrase_segments[feature].notna()
    ].groupby(level=[0, 1, 2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.bfill().notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                if remove_duplicates:
                    progressions.append(
                        remove_immediate_duplicates(cadence[feature].to_list())
                    )
                else:
                    progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions, dtype="object")
    return progressions
chord_progressions = get_progressions("PAC", dict(numeral=("I", "i")), "chord")
print(f"Progressions for {len(chord_progressions)} cadences:")
utils.value_count_df(chord_progressions, "chord progressions")
numeral_progressions = get_progressions("PAC", dict(numeral=("I", "i")), "numeral")
utils.value_count_df(numeral_progressions, "numeral progressions")
numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
utils.value_count_df(numeral_prog_no_dups)

PACs ending on scale degree 1#

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions("PAC", dict(bass_note=0), "bass_note")
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
utils.value_count_df(bass_prog, "bass progressions")
bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
utils.value_count_df(bass_prog_no_dups)
def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights


def plot_progressions(progressions, cut_at_stage=None, **kwargs):
    stage_nodes, edge_weights = progressions2graph_data(
        progressions, cut_at_stage=cut_at_stage
    )
    return utils.graph_data2sankey(stage_nodes, edge_weights, **kwargs)

Chordal roots for the 3 last stages#

fig = plot_progressions(
    numeral_prog_no_dups,
    cut_at_stage=3,
    font=dict(size=30),
)
save_figure_as(fig, "last_3_roots_before_pacs_ending_on_1_sankey", height=800)
fig.show()

Complete chords for the last four stages in major#

pac_major = get_progressions("PAC", dict(numeral="I", localkey_is_minor=False), "chord")
fig = plot_progressions(pac_major, cut_at_stage=4)
save_figure_as(fig, "last_4_stages_before_pacs_in_major_sankey")
fig.show()

Bass degrees for the last 6 stages.#

fig = plot_progressions(bass_prog_no_dups, cut_at_stage=7)
save_figure_as(fig, "last_7_degrees_before_pacs_ending_on_1_sankey")
fig.show()

Bass degrees without accidentals#

def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))


bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(
    remove_immediate_duplicates
)
fig = plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)
save_figure_as(fig, "last_7_degrees_before_pacs_ending_on_1_without_accdentals_sankey")
fig.show()

HCs ending on V#

half = get_progressions("HC", dict(numeral="V"), "bass_note").map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
fig = plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
save_figure_as(fig, "last_7_degrees_before_hcs_ending_on_V_sankey")
fig.show()