Annotations#

Hide imports
%load_ext autoreload
%autoreload 2

import os

import dimcat as dc
import ms3
import plotly.express as px
from dimcat import groupers, plotting

import utils
Hide source
RESULTS_PATH = os.path.abspath(os.path.join(utils.OUTPUT_FOLDER, "overview"))
os.makedirs(RESULTS_PATH, exist_ok=True)


def make_output_path(
    filename: str,
    extension=None,
    path=RESULTS_PATH,
) -> str:
    return utils.make_output_path(filename=filename, extension=extension, path=path)


def save_figure_as(
    fig, filename, formats=("png", "pdf"), directory=RESULTS_PATH, **kwargs
):
    if formats is not None:
        for fmt in formats:
            plotting.write_image(fig, filename, directory, format=fmt, **kwargs)
    else:
        plotting.write_image(fig, filename, directory, **kwargs)

Loading data

Hide source
D = utils.get_dataset("frescobaldi_fiori_musicali", corpus_release="v2.4")
package = D.inputs.get_package()
package_info = package._package.custom
git_tag = package_info.get("git_tag")
utils.print_heading("Data and software versions")
print("Girolamo Frescobaldi (1583-1643) – Fiori Musicali, op. 12 (1635) version v2.4")
print(f"Datapackage '{package.package_name}' @ {git_tag}")
print(f"dimcat version {dc.__version__}\n")
D
Data and software versions
--------------------------

Girolamo Frescobaldi (1583-1643) – Fiori Musicali, op. 12 (1635) version v2.4
Datapackage 'frescobaldi_fiori_musicali' @ v2.4
dimcat version 3.4.0
Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'frescobaldi_fiori_musicali': ["'frescobaldi_fiori_musicali.measures' "
                                                        '(MuseScoreFacetName.MuseScoreMeasures)',
                                                        "'frescobaldi_fiori_musicali.notes' "
                                                        '(MuseScoreFacetName.MuseScoreNotes)',
                                                        "'frescobaldi_fiori_musicali.expanded' "
                                                        '(MuseScoreFacetName.MuseScoreHarmonies)',
                                                        "'frescobaldi_fiori_musicali.chords' "
                                                        '(MuseScoreFacetName.MuseScoreChords)',
                                                        "'frescobaldi_fiori_musicali.metadata' "
                                                        '(FeatureName.Metadata)']}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}
filtered_D = D.apply_step("HasHarmonyLabelsFilter")
all_metadata = filtered_D.get_metadata()
assert len(all_metadata) > 0, "No pieces selected for analysis."
chronological_corpus_names = all_metadata.get_corpus_names()

DCML harmony labels#

Hide source
all_annotations = filtered_D.get_feature("DcmlAnnotations")
is_annotated_mask = all_metadata.label_count > 0
is_annotated_index = all_metadata.index[is_annotated_mask]
annotated_notes = filtered_D.get_feature("notes").subselect(is_annotated_index)
print(f"The annotated pieces have {len(annotated_notes)} notes.")
The annotated pieces have 19370 notes.
all_chords = filtered_D.get_feature("harmonylabels")
print(
    f"{len(all_annotations)} annotations, of which {len(all_chords)} are harmony labels."
)
5119 annotations, of which 5086 are harmony labels.

Harmony labels#

Unigrams#

For computing unigram statistics, the tokens need to be grouped by their occurrence within a major or a minor key because this changes their meaning. To that aim, the annotated corpus needs to be sliced into contiguous localkey segments which are then grouped into a major (is_minor=False) and a minor group.

root_durations = (
    all_chords[all_chords.root.between(-5, 6)]
    .groupby(["root", "chord_type"])
    .duration_qb.sum()
)
# sort by stacked bar length:
# root_durations = root_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()),
# ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
fig = px.bar(
    bar_data,
    x="root",
    y="duration_qb",
    color="chord_type",
    title="Distribution of chord types over chord roots",
    labels=dict(
        root="Chord root expressed as interval above the local (or secondary) tonic",
        duration_qb="duration in quarter notes",
        chord_type="chord type",
    ),
)
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "chord_type_distribution_over_scale_degrees_absolute_stacked_bars")
fig.show()
relative_roots = all_chords[
    ["numeral", "duration_qb", "relativeroot", "localkey_is_minor", "chord_type"]
].copy()
relative_roots["relativeroot_resolved"] = ms3.transform(
    relative_roots, ms3.resolve_relative_keys, ["relativeroot", "localkey_is_minor"]
)
has_rel = relative_roots.relativeroot_resolved.notna()
relative_roots.loc[has_rel, "localkey_is_minor"] = relative_roots.loc[
    has_rel, "relativeroot_resolved"
].str.islower()
relative_roots["root"] = ms3.transform(
    relative_roots, ms3.roman_numeral2fifths, ["numeral", "localkey_is_minor"]
)
chord_type_frequency = all_chords.chord_type.value_counts()
replace_rare = ms3.map_dict(
    {t: "other" for t in chord_type_frequency[chord_type_frequency < 500].index}
)
relative_roots["type_reduced"] = relative_roots.chord_type.map(replace_rare)
# is_special = relative_roots.chord_type.isin(('It', 'Ger', 'Fr'))
# relative_roots.loc[is_special, 'root'] = -4
root_durations = (
    relative_roots.groupby(["root", "type_reduced"])
    .duration_qb.sum()
    .sort_values(ascending=False)
)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
root_order = (
    bar_data.groupby("root")
    .duration_qb.sum()
    .sort_values(ascending=False)
    .index.to_list()
)
fig = px.bar(
    bar_data,
    x="root",
    y="duration_qb",
    color="type_reduced",
    barmode="group",
    log_y=True,
    color_discrete_map=utils.TYPE_COLORS,
    category_orders=dict(
        root=root_order,
        type_reduced=relative_roots.type_reduced.value_counts().index.to_list(),
    ),
    labels=dict(
        root="intervallic difference between chord root to the local or secondary tonic",
        duration_qb="duration in quarter notes",
        type_reduced="chord type",
    ),
    width=1000,
    height=400,
)
fig.update_layout(
    **utils.STD_LAYOUT,
    legend=dict(
        orientation="h",
        xanchor="right",
        x=1,
        y=1,
    ),
)
save_figure_as(fig, "chord_type_distribution_over_scale_degrees_absolute_grouped_bars")
fig.show()
print(
    f"Reduced to {len(set(bar_data.iloc[:,:2].itertuples(index=False, name=None)))} types. "
    f"Paper cites the sum of types in major and types in minor (see below), treating them as distinct."
)
Reduced to 29 types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.
dim_or_aug = bar_data[
    bar_data.root.str.startswith("a") | bar_data.root.str.startswith("d")
].duration_qb.sum()
complete = bar_data.duration_qb.sum()
print(
    f"On diminished or augmented scale degrees: {dim_or_aug} / {complete} = {dim_or_aug / complete}"
)
On diminished or augmented scale degrees: 4.0 / 10022.0 = 0.0003991219317501497
chords_by_mode = groupers.ModeGrouper().process(all_chords)
chords_by_mode.format = "scale_degree"

Whole dataset#

unigram_proportions = chords_by_mode.get_default_analysis()
unigram_proportions.make_ranking_table()
mode major minor
chord_and_mode scale_degrees duration_qb proportion proportion_% chord_and_mode scale_degrees duration_qb proportion proportion_%
rank
1 I, major (1, 3, 5) 401.5 0.163411 16.34 % i, minor (1, 3, 5) 1147.00 0.151619 15.16 %
2 V, major (5, 7, 2) 279.5 0.113757 11.38 % v, minor (5, 7, 2) 610.00 0.080635 8.06 %
3 IV, major (4, 6, 1) 202.5 0.082418 8.24 % I, minor (1, #3, 5) 480.50 0.063516 6.35 %
4 vi, major (6, 1, 3) 181.5 0.073871 7.39 % i6, minor (3, 5, 1) 439.75 0.058130 5.81 %
5 I6, major (3, 5, 1) 130.0 0.052910 5.29 % iv, minor (4, 6, 1) 356.50 0.047125 4.71 %
... ... ... ... ... ... ... ... ... ... ...
376 NaN NaN NaN NaN NaN iv6(112), minor (6, 1, 5) 0.50 0.000066 0.01 %
377 NaN NaN NaN NaN NaN #vi%7, minor (#6, 1, 3, 5) 0.50 0.000066 0.01 %
378 NaN NaN NaN NaN NaN viio64/IV, minor (7, #3, 5) 0.50 0.000066 0.01 %
379 NaN NaN NaN NaN NaN viio64/V, minor (1, #4, #6) 0.50 0.000066 0.01 %
380 NaN NaN NaN NaN NaN V6/VI, minor (5, 7, 3) 0.50 0.000066 0.01 %

380 rows × 10 columns

chords_by_mode.apply_step("Counter")
count
mode corpus piece chord_and_mode scale_degrees
major frescobaldi_fiori_musicali 12.17_Canzon_post_il_Comune I, major (1, 3, 5) 30
V, major (5, 7, 2) 26
vi, major (6, 1, 3) 18
I6, major (3, 5, 1) 14
ii, major (2, 4, 6) 14
... ... ... ... ... ...
minor frescobaldi_fiori_musicali 12.45_Toccata_per_l'Elevatione VI/iv, minor (b2, 4, 6) 1
V7/V/III, minor (4, #6, 1, 3) 1
V7/III, minor (7, 2, 4, 6) 1
#viio/iv, minor (#3, 5, 7) 1
vo6, minor (7, b2, 5) 1

2008 rows × 1 columns

chords_by_mode.format = "scale_degree"
chords_by_mode.get_default_analysis().make_ranking_table()
mode major minor
chord_and_mode scale_degrees duration_qb proportion proportion_% chord_and_mode scale_degrees duration_qb proportion proportion_%
rank
1 I, major (1, 3, 5) 401.5 0.163411 16.34 % i, minor (1, 3, 5) 1147.00 0.151619 15.16 %
2 V, major (5, 7, 2) 279.5 0.113757 11.38 % v, minor (5, 7, 2) 610.00 0.080635 8.06 %
3 IV, major (4, 6, 1) 202.5 0.082418 8.24 % I, minor (1, #3, 5) 480.50 0.063516 6.35 %
4 vi, major (6, 1, 3) 181.5 0.073871 7.39 % i6, minor (3, 5, 1) 439.75 0.058130 5.81 %
5 I6, major (3, 5, 1) 130.0 0.052910 5.29 % iv, minor (4, 6, 1) 356.50 0.047125 4.71 %
... ... ... ... ... ... ... ... ... ... ...
376 NaN NaN NaN NaN NaN iv6(112), minor (6, 1, 5) 0.50 0.000066 0.01 %
377 NaN NaN NaN NaN NaN #vi%7, minor (#6, 1, 3, 5) 0.50 0.000066 0.01 %
378 NaN NaN NaN NaN NaN viio64/IV, minor (7, #3, 5) 0.50 0.000066 0.01 %
379 NaN NaN NaN NaN NaN viio64/V, minor (1, #4, #6) 0.50 0.000066 0.01 %
380 NaN NaN NaN NaN NaN V6/VI, minor (5, 7, 3) 0.50 0.000066 0.01 %

380 rows × 10 columns

unigram_proportions.plot_grouped()