Notes#

Loading data

Data and software versions
--------------------------

Arcangelo Corelli – Trio Sonatas version v2.7
Datapackage 'corelli' @ v2.7
dimcat version 3.4.0

Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'corelli': ["'corelli.measures' (MuseScoreFacetName.MuseScoreMeasures)",
                                     "'corelli.notes' (MuseScoreFacetName.MuseScoreNotes)",
                                     "'corelli.expanded' (MuseScoreFacetName.MuseScoreHarmonies)",
                                     "'corelli.chords' (MuseScoreFacetName.MuseScoreChords)",
                                     "'corelli.metadata' (FeatureName.Metadata)"]}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}

Metadata#

filtered_D = filters.HasHarmonyLabelsFilter(keep_values=[True]).process(D)

all_metadata = filtered_D.get_metadata()
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:, :20]

	piece	TimeSig	KeySig	last_mc	last_mn	length_qb	last_mc_unfolded	last_mn_unfolded	length_qb_unfolded	volta_mcs	all_notes_qb	n_onsets	n_onset_positions	guitar_chord_count	form_label_count	label_count	annotated_key	harmony_version	annotators	reviewers
corpus
corelli	op01n01a	{1: '4/4'}	{1: -1}	14	14	56.0	14	14	56.0	()	224.0	280	110	0	0	64	F	2.3.0	Lars Opfermann, Ya-Chuan Wu (2.1.1), Hanné Bec...	HB, JH

chronological_order = utils.chronological_corpus_order(all_metadata)
corpus_colors = dict(zip(chronological_order, utils.CORPUS_COLOR_SCALE))

notes_feature = filtered_D.get_feature("notes")
all_notes = notes_feature.df
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()

70322 notes over 149 files.

			mc	mn	quarterbeats	quarterbeats_all_endings	duration_qb	duration	mc_onset	mn_onset	timesig	staff	voice	chord_id	midi	name	nominal_duration	octave	scalar	tied	tpc_name	tpc
corpus	piece	i
corelli	op01n01a	0	1	1	0	0	1.0	1/4	0	0	4/4	3	1	8	53	F3	1/4	3	1	<NA>	F	-1
		1	1	1	0	0	1.0	1/4	0	0	4/4	4	1	14	53	F3	1/4	3	1	<NA>	F	-1
		2	1	1	0	0	1.0	1/4	0	0	4/4	2	1	4	81	A5	1/4	5	1	<NA>	A	3
		3	1	1	0	0	1.0	1/4	0	0	4/4	1	1	0	84	C6	1/4	6	1	<NA>	C	0
		4	1	1	1	1	1.0	1/4	1/4	1/4	4/4	3	1	9	55	G3	1/4	3	1	<NA>	G	1

def weight_notes(nl, group_col="midi", precise=True):
    summed_durations = nl.groupby(group_col).duration_qb.sum()
    shortest_duration = summed_durations[summed_durations > 0].min()
    summed_durations /= shortest_duration  # normalize such that the shortest duration results in 1 occurrence
    if not precise:
        # This simple trick reduces compute time but also precision:
        # The rationale is to have the smallest value be slightly larger than 0.5 because
        # if it was exactly 0.5 it would be rounded down by repeat_notes_according_to_weights()
        summed_durations /= 1.9999999
    return repeat_notes_according_to_weights(summed_durations)


def repeat_notes_according_to_weights(weights):
    try:
        counts = weights.round().astype(int)
    except Exception:
        return pd.Series(dtype=int)
    counts_reflecting_weights = []
    for pitch, count in counts.items():
        counts_reflecting_weights.extend([pitch] * count)
    return pd.Series(counts_reflecting_weights)

Ambitus#

corpus_names = {
    corp: utils.get_corpus_display_name(corp) for corp in chronological_order
}
chronological_corpus_names = list(corpus_names.values())
corpus_name_colors = {
    corpus_names[corp]: color for corp, color in corpus_colors.items()
}
all_notes["corpus_name"] = all_notes.index.get_level_values(0).map(corpus_names)

grouped_notes = all_notes.groupby("corpus_name")
weighted_midi = pd.concat(
    [weight_notes(nl, "midi", precise=False) for _, nl in grouped_notes],
    keys=grouped_notes.groups.keys(),
).reset_index(level=0)
weighted_midi.columns = ["dataset", "midi"]
weighted_midi

	dataset	midi
0	Corelli Trio Sonatas	36
1	Corelli Trio Sonatas	36
2	Corelli Trio Sonatas	36
3	Corelli Trio Sonatas	36
4	Corelli Trio Sonatas	36
...	...	...
16144	Corelli Trio Sonatas	86
16145	Corelli Trio Sonatas	86
16146	Corelli Trio Sonatas	88
16147	Corelli Trio Sonatas	88
16148	Corelli Trio Sonatas	88

16149 rows × 2 columns

# fig = px.violin(weighted_midi,
#                 x='dataset',
#                 y='midi',
#                 color='dataset',
#                 title="Corpus-wise distribution over registers (ambitus)",
#                 box=True,
#                 labels=dict(
#                     dataset='',
#                     midi='distribution of pitches by duration'
#                 ),
#                 category_orders=dict(dataset=chronological_corpus_names),
#                 color_discrete_map=corpus_name_colors,
#                 width=1000, height=600,
#                )
# fig.update_traces(spanmode='hard') # do not extend beyond outliers
# fig.update_layout(**utils.STD_LAYOUT,
#                  showlegend=False)
# fig.update_yaxes(
#     tickmode= 'array',
#     tickvals= [12, 24, 36, 48, 60, 72, 84, 96],
#     ticktext = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7"],
# )
# fig.update_xaxes(tickangle=45)
# save_figure_as(fig, "ambitus_corpuswise_violins")
# fig.show()

Tonal Pitch Classes (TPC)#

weighted_tpc = pd.concat(
    [weight_notes(nl, "tpc") for _, nl in grouped_notes],
    keys=grouped_notes.groups.keys(),
).reset_index(level=0)
weighted_tpc.columns = ["dataset", "tpc"]
weighted_tpc

	dataset	tpc
0	Corelli Trio Sonatas	-6
1	Corelli Trio Sonatas	-5
2	Corelli Trio Sonatas	-5
3	Corelli Trio Sonatas	-5
4	Corelli Trio Sonatas	-5
...	...	...
129164	Corelli Trio Sonatas	12
129165	Corelli Trio Sonatas	12
129166	Corelli Trio Sonatas	12
129167	Corelli Trio Sonatas	12
129168	Corelli Trio Sonatas	12

129169 rows × 2 columns

As violin plot#

# fig = px.violin(weighted_tpc,
#                 x='dataset',
#                 y='tpc',
#                 color='dataset',
#                 title="Corpus-wise distribution over line of fifths (tonal pitch classes)",
#                 box=True,
#                 labels=dict(
#                     dataset='',
#                     tpc='distribution of tonal pitch classes by duration'
#                 ),
#                 category_orders=dict(dataset=chronological_corpus_names),
#                 color_discrete_map=corpus_name_colors,
#                 width=1000,
#                 height=600,
#                )
# fig.update_traces(spanmode='hard') # do not extend beyond outliers
# fig.update_layout(**utils.STD_LAYOUT,
#                  showlegend=False)
# fig.update_yaxes(
#     tickmode= 'array',
#     tickvals= [-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18],
#     ticktext = ["Dbb", "Bbb", "Gb", "Eb", "C", "A", "F#", "D#", "B#", "G##", "E##"],
#     zerolinecolor='grey',
#     zeroline=True
# )
# fig.update_xaxes(tickangle=45)
# save_figure_as(fig, "pitch_class_distributions_corpuswise_violins")
# fig.show()

(all_notes)

			mc	mn	quarterbeats	quarterbeats_all_endings	duration_qb	duration	mc_onset	mn_onset	timesig	staff	voice	chord_id	midi	name	nominal_duration	octave	scalar	tied	tpc_name	tpc	corpus_name
corpus	piece	i
corelli	op01n01a	0	1	1	0	0	1.0	1/4	0	0	4/4	3	1	8	53	F3	1/4	3	1	<NA>	F	-1	Corelli Trio Sonatas
		1	1	1	0	0	1.0	1/4	0	0	4/4	4	1	14	53	F3	1/4	3	1	<NA>	F	-1	Corelli Trio Sonatas
		2	1	1	0	0	1.0	1/4	0	0	4/4	2	1	4	81	A5	1/4	5	1	<NA>	A	3	Corelli Trio Sonatas
		3	1	1	0	0	1.0	1/4	0	0	4/4	1	1	0	84	C6	1/4	6	1	<NA>	C	0	Corelli Trio Sonatas
		4	1	1	1	1	1.0	1/4	1/4	1/4	4/4	3	1	9	55	G3	1/4	3	1	<NA>	G	1	Corelli Trio Sonatas
	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
	op04n12c	379	19	19	110	110	2.0	1/2	1/2	1/2	12/8	4	1	383	47	B2	1/2	2	1	<NA>	B	5	Corelli Trio Sonatas
		380	19	19	110	110	2.0	1/2	1/2	1/2	12/8	2	1	377	62	D4	1/2	4	1	<NA>	D	2	Corelli Trio Sonatas
		381	19	19	110	110	0.5	1/8	1/2	1/2	12/8	1	1	372	71	B4	1/8	4	1	<NA>	B	5	Corelli Trio Sonatas
		382	19	19	221/2	221/2	0.5	1/8	5/8	5/8	12/8	1	1	373	70	A#4	1/8	4	1	<NA>	A#	10	Corelli Trio Sonatas
		383	19	19	111	111	3.0	3/4	3/4	3/4	12/8	1	1	374	71	B4	1/2	4	3/2	<NA>	B	5	Corelli Trio Sonatas

70322 rows × 21 columns

width = 1400
height = 800

weighted_pitch_values = pd.concat(
    [
        weighted_midi.rename(columns={"midi": "value"}),
        weighted_tpc.rename(columns={"tpc": "value"}),
    ],
    keys=["MIDI pitch", "Tonal pitch class"],
    names=["distribution"],
).reset_index(level=[0, 1])

fig = plotting.make_violin_plot(
    weighted_pitch_values,
    x_col="dataset",
    y_col="value",
    color="dataset",
    facet_row="distribution",
    box=True,
    labels=dict(dataset="", tpc="distribution of tonal pitch classes by duration"),
    category_orders=dict(dataset=chronological_corpus_names),
    # color_discrete_map=corpus_name_colors,
    color_discrete_sequence=px.colors.qualitative.Dark24,
    traces_settings=dict(
        spanmode="hard",
        width=1,
        # scalemode='width'
    ),
    layout=dict(
        showlegend=False,
        margin=dict(
            t=0,
            b=0,
            l=0,
            r=0,
        ),
    ),
    x_axis=dict(
        # tickangle=45,
        tickfont_size=15
    ),
    y_axis=dict(
        tickmode="array",
        tickvals=[-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 24, 36, 48, 60, 72, 84, 96],
        ticktext=[
            "Dbb",
            "Bbb",
            "Gb",
            "Eb",
            "C",
            "A",
            "F#",
            "D#",
            "B#",
            "G##",
            "C1",
            "C2",
            "C3",
            "C4",
            "C5",
            "C6",
            "C7",
        ],
        zerolinecolor="grey",
        zeroline=True,
    ),
    width=width,
    height=height,
)
utils.realign_subplot_axes(fig, y_axes=dict(title_text=""))
save_figure_as(fig, "notes_violin", width=width, height=height)
fig

fig = plotting.make_box_plot(
    weighted_pitch_values,
    x_col="dataset",
    y_col="value",
    color="dataset",
    facet_row="distribution",
    # box=True,
    labels=dict(dataset="", tpc="distribution of tonal pitch classes by duration"),
    category_orders=dict(dataset=chronological_corpus_names),
    # color_discrete_map=corpus_name_colors,
    color_discrete_sequence=px.colors.qualitative.Light24,
    # traces_settings=dict(spanmode='hard'),
    layout=dict(showlegend=False, margin=dict(t=0)),
    x_axis=dict(tickangle=45, tickfont_size=15),
    y_axis=dict(
        tickmode="array",
        tickvals=[-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 24, 36, 48, 60, 72, 84, 96],
        ticktext=[
            "Dbb",
            "Bbb",
            "Gb",
            "Eb",
            "C",
            "A",
            "F#",
            "D#",
            "B#",
            "G##",
            "C1",
            "C2",
            "C3",
            "C4",
            "C5",
            "C6",
            "C7",
        ],
        zerolinecolor="grey",
        zeroline=True,
    ),
    width=width,
    height=height,
)
utils.realign_subplot_axes(fig, y_axes=True)
save_figure_as(fig, "notes_box", width=width, height=height)
fig

As bar plots#

bar_data = all_notes.groupby("tpc").duration_qb.sum().reset_index()
x_values = list(range(bar_data.tpc.min(), bar_data.tpc.max() + 1))
x_names = ms3.fifths2name(x_values)
fig = px.bar(
    bar_data,
    x="tpc",
    y="duration_qb",
    labels=dict(tpc="Named pitch class", duration_qb="Duration in quarter notes"),
    color_discrete_sequence=utils.CORPUS_COLOR_SCALE,
    width=1000,
    height=300,
)
fig.update_layout(**utils.STD_LAYOUT)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=x_values,
    ticktext=x_names,
    dtick=1,
    ticks="outside",
    tickcolor="black",
    minor=dict(dtick=6, gridcolor="grey", showgrid=True),
)
save_figure_as(fig, "pitch_class_distribution_absolute_bars")
fig.show()

scatter_data = all_notes.groupby(["corpus_name", "tpc"]).duration_qb.sum().reset_index()
fig = px.bar(
    scatter_data,
    x="tpc",
    y="duration_qb",
    color="corpus_name",
    labels=dict(
        duration_qb="duration",
        tpc="named pitch class",
    ),
    category_orders=dict(dataset=chronological_corpus_names),
    color_discrete_map=corpus_name_colors,
    width=1000,
    height=500,
)
fig.update_layout(**utils.STD_LAYOUT)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=x_values,
    ticktext=x_names,
    dtick=1,
    ticks="outside",
    tickcolor="black",
    minor=dict(dtick=6, gridcolor="grey", showgrid=True),
)
save_figure_as(fig, "pitch_class_distribution_corpuswise_absolute_bars")
fig.show()

As scatter plots#

fig = px.scatter(
    scatter_data,
    x="tpc",
    y="duration_qb",
    color="corpus_name",
    labels=dict(
        duration_qb="duration",
        tpc="named pitch class",
    ),
    category_orders=dict(dataset=chronological_corpus_names),
    color_discrete_map=corpus_name_colors,
    facet_col="corpus_name",
    facet_col_wrap=3,
    facet_col_spacing=0.03,
    width=1000,
    height=1000,
)
fig.update_traces(mode="lines+markers")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**utils.STD_LAYOUT, showlegend=False)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=[-12, -6, 0, 6, 12, 18],
    ticktext=["Dbb", "Gb", "C", "F#", "B#", "E##"],
    visible=True,
)
fig.update_yaxes(zeroline=False, matches=None, showticklabels=True)
save_figure_as(fig, "pitch_class_distribution_corpuswise_scatter")
fig.show()

no_accidental = bar_data[bar_data.tpc.between(-1, 5)].duration_qb.sum()
with_accidental = bar_data[~bar_data.tpc.between(-1, 5)].duration_qb.sum()

entire = no_accidental + with_accidental
(
    f"Fraction of note duration without accidental of the entire durations: {no_accidental} / {entire} = "
    f"{no_accidental / entire}"
)

'Fraction of note duration without accidental of the entire durations: 49521.083333333336 / 64585.083333333336 = 0.7667572878670385'

Notes and staves#

print("Distribution of notes over staves:")
utils.value_count_df(all_notes.staff)

Distribution of notes over staves:

	counts	%
staff
1	20815	29.6
2	17593	25.02
3	16765	23.84
4	15149	21.54