Annotations#

Loading data

Data and software versions
--------------------------

The Annotated Mozart Sonatas: Score, Harmony, and Cadence version v2.3
Datapackage 'mozart_piano_sonatas' @ v2.3
dimcat version 3.4.0

Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'mozart_piano_sonatas': ["'mozart_piano_sonatas.measures' "
                                                  '(MuseScoreFacetName.MuseScoreMeasures)',
                                                  "'mozart_piano_sonatas.notes' (MuseScoreFacetName.MuseScoreNotes)",
                                                  "'mozart_piano_sonatas.expanded' "
                                                  '(MuseScoreFacetName.MuseScoreHarmonies)',
                                                  "'mozart_piano_sonatas.chords' (MuseScoreFacetName.MuseScoreChords)",
                                                  "'mozart_piano_sonatas.metadata' (FeatureName.Metadata)"]}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}

filtered_D = D.apply_step("HasHarmonyLabelsFilter")
all_metadata = filtered_D.get_metadata()

assert len(all_metadata) > 0, "No pieces selected for analysis."
chronological_corpus_names = all_metadata.get_corpus_names()

DCML harmony labels#

The annotated pieces have 104774 notes.

all_chords = filtered_D.get_feature("harmonylabels")
print(
    f"{len(all_annotations)} annotations, of which {len(all_chords)} are harmony labels."
)

15236 annotations, of which 14995 are harmony labels.

Harmony labels#

Unigrams#

For computing unigram statistics, the tokens need to be grouped by their occurrence within a major or a minor key because this changes their meaning. To that aim, the annotated corpus needs to be sliced into contiguous localkey segments which are then grouped into a major (is_minor=False) and a minor group.

root_durations = (
    all_chords[all_chords.root.between(-5, 6)]
    .groupby(["root", "chord_type"])
    .duration_qb.sum()
)
# sort by stacked bar length:
# root_durations = root_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()),
# ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
fig = px.bar(
    bar_data,
    x="root",
    y="duration_qb",
    color="chord_type",
    title="Distribution of chord types over chord roots",
    labels=dict(
        root="Chord root expressed as interval above the local (or secondary) tonic",
        duration_qb="duration in quarter notes",
        chord_type="chord type",
    ),
)
fig.update_layout(**utils.STD_LAYOUT)
save_figure_as(fig, "chord_type_distribution_over_scale_degrees_absolute_stacked_bars")
fig.show()

relative_roots = all_chords[
    ["numeral", "duration_qb", "relativeroot", "localkey_is_minor", "chord_type"]
].copy()
relative_roots["relativeroot_resolved"] = ms3.transform(
    relative_roots, ms3.resolve_relative_keys, ["relativeroot", "localkey_is_minor"]
)
has_rel = relative_roots.relativeroot_resolved.notna()
relative_roots.loc[has_rel, "localkey_is_minor"] = relative_roots.loc[
    has_rel, "relativeroot_resolved"
].str.islower()
relative_roots["root"] = ms3.transform(
    relative_roots, ms3.roman_numeral2fifths, ["numeral", "localkey_is_minor"]
)
chord_type_frequency = all_chords.chord_type.value_counts()
replace_rare = ms3.map_dict(
    {t: "other" for t in chord_type_frequency[chord_type_frequency < 500].index}
)
relative_roots["type_reduced"] = relative_roots.chord_type.map(replace_rare)
# is_special = relative_roots.chord_type.isin(('It', 'Ger', 'Fr'))
# relative_roots.loc[is_special, 'root'] = -4

root_durations = (
    relative_roots.groupby(["root", "type_reduced"])
    .duration_qb.sum()
    .sort_values(ascending=False)
)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
root_order = (
    bar_data.groupby("root")
    .duration_qb.sum()
    .sort_values(ascending=False)
    .index.to_list()
)
fig = px.bar(
    bar_data,
    x="root",
    y="duration_qb",
    color="type_reduced",
    barmode="group",
    log_y=True,
    color_discrete_map=utils.TYPE_COLORS,
    category_orders=dict(
        root=root_order,
        type_reduced=relative_roots.type_reduced.value_counts().index.to_list(),
    ),
    labels=dict(
        root="intervallic difference between chord root to the local or secondary tonic",
        duration_qb="duration in quarter notes",
        type_reduced="chord type",
    ),
    width=1000,
    height=400,
)
fig.update_layout(
    **utils.STD_LAYOUT,
    legend=dict(
        orientation="h",
        xanchor="right",
        x=1,
        y=1,
    ),
)
save_figure_as(fig, "chord_type_distribution_over_scale_degrees_absolute_grouped_bars")
fig.show()

print(
    f"Reduced to {len(set(bar_data.iloc[:,:2].itertuples(index=False, name=None)))} types. "
    f"Paper cites the sum of types in major and types in minor (see below), treating them as distinct."
)

Reduced to 33 types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.

dim_or_aug = bar_data[
    bar_data.root.str.startswith("a") | bar_data.root.str.startswith("d")
].duration_qb.sum()
complete = bar_data.duration_qb.sum()
print(
    f"On diminished or augmented scale degrees: {dim_or_aug} / {complete} = {dim_or_aug / complete}"
)

On diminished or augmented scale degrees: 0.0 / 22408.25 = 0.0

chords_by_mode = groupers.ModeGrouper().process(all_chords)
chords_by_mode.format = "scale_degree"

Whole dataset#

unigram_proportions = chords_by_mode.get_default_analysis()
unigram_proportions.make_ranking_table()

mode	major					minor
	chord_and_mode	scale_degrees	duration_qb	proportion	proportion_%	chord_and_mode	scale_degrees	duration_qb	proportion	proportion_%
rank
1	I, major	(1, 3, 5)	4259.375000	0.232166	23.22 %	i, minor	(1, 3, 5)	899.00	0.221320	22.13 %
2	V7, major	(5, 7, 2, 4)	1595.000000	0.086939	8.69 %	V, minor	(5, #7, 2)	456.50	0.112383	11.24 %
3	V, major	(5, 7, 2)	1545.250000	0.084227	8.42 %	V7, minor	(5, #7, 2, 4)	271.25	0.066777	6.68 %
4	I6, major	(3, 5, 1)	1442.583333	0.078631	7.86 %	i6, minor	(3, 5, 1)	251.50	0.061915	6.19 %
5	ii6, major	(4, 6, 2)	946.500000	0.051591	5.16 %	V(64), minor	(5, 1, 3)	152.25	0.037482	3.75 %
...	...	...	...	...	...	...	...	...	...	...
368	IV(2), major	(5, 6, 1)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
369	IM7, major	(1, 3, 5, 7)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
370	V65/V/vi, major	(#2, #4, 6, 7)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
371	v2, major	(4, 5, b7, 2)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
372	V64/IV, major	(5, 1, 3)	0.125000	0.000007	0.0 %	NaN	NaN	NaN	NaN	NaN

372 rows × 10 columns

chords_by_mode.apply_step("Counter")

					count
mode	corpus	piece	chord_and_mode	scale_degrees
major	mozart_piano_sonatas	K279-1	I, major	(1, 3, 5)	42
			I6, major	(3, 5, 1)	21
			V7, major	(5, 7, 2, 4)	21
			ii6, major	(4, 6, 2)	20
			V, major	(5, 7, 2)	16
...	...	...	...	...	...
minor	mozart_piano_sonatas	K576-3	i, minor	(1, 3, 5)	1
			V65/V, minor	(#4, #6, 1, 2)	1
			V, minor	(5, #7, 2)	1
			#viio6, minor	(2, 4, #7)	1
			ii65, minor	(4, #6, 1, 2)	1

2970 rows × 1 columns

chords_by_mode.format = "scale_degree"
chords_by_mode.get_default_analysis().make_ranking_table()

mode	major					minor
	chord_and_mode	scale_degrees	duration_qb	proportion	proportion_%	chord_and_mode	scale_degrees	duration_qb	proportion	proportion_%
rank
1	I, major	(1, 3, 5)	4259.375000	0.232166	23.22 %	i, minor	(1, 3, 5)	899.00	0.221320	22.13 %
2	V7, major	(5, 7, 2, 4)	1595.000000	0.086939	8.69 %	V, minor	(5, #7, 2)	456.50	0.112383	11.24 %
3	V, major	(5, 7, 2)	1545.250000	0.084227	8.42 %	V7, minor	(5, #7, 2, 4)	271.25	0.066777	6.68 %
4	I6, major	(3, 5, 1)	1442.583333	0.078631	7.86 %	i6, minor	(3, 5, 1)	251.50	0.061915	6.19 %
5	ii6, major	(4, 6, 2)	946.500000	0.051591	5.16 %	V(64), minor	(5, 1, 3)	152.25	0.037482	3.75 %
...	...	...	...	...	...	...	...	...	...	...
368	IV(2), major	(5, 6, 1)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
369	IM7, major	(1, 3, 5, 7)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
370	V65/V/vi, major	(#2, #4, 6, 7)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
371	v2, major	(4, 5, b7, 2)	0.500000	0.000027	0.0 %	NaN	NaN	NaN	NaN	NaN
372	V64/IV, major	(5, 1, 3)	0.125000	0.000007	0.0 %	NaN	NaN	NaN	NaN	NaN

372 rows × 10 columns

unigram_proportions.plot_grouped()